async def ping(uri: str, message: str) -> None: """Ping odin at uri and send message. :param uri: The location of the server :param message: The message you expect to see back :raises RuntimeError: If the server returns an error """ async with websockets.connect(uri) as websocket: await websocket.send(json.dumps({APIField.COMMAND: 'PING', APIField.REQUEST: message})) resp = json.loads(await websocket.recv()) if resp[APIField.STATUS] == APIStatus.ERROR: LOGGER.error(resp) raise RuntimeError(resp) LOGGER.info(resp[APIField.RESPONSE])
async def request_generate_config(ws, config): """Use async to open a connection to serve.py and generate a config.""" async with websockets.connect(ws) as websocket: await websocket.send( json.dumps({ APIField.COMMAND: 'GENERATE', APIField.REQUEST: config })) result = json.loads(await websocket.recv()) if result[APIField.STATUS] == APIStatus.ERROR: LOGGER.error(result) return if result[APIField.STATUS] == APIStatus.OK: LOGGER.info('Generated pipeline is called %s', result[APIField.RESPONSE])
def submit(self, task: Task) -> str: """Submit a multi-worker PyTorchJob Task :param task: The task definition :type task: Task :return: A string handle name :rtype: str """ secrets = self._reference_secrets(task) configmaps = self._generate_configmaps(task) task.num_gpus = 1 pod_spec = task_to_pod_spec(task, container_name="pytorch-elasticjob", secrets=secrets, configmaps=configmaps) template_metadata = client.V1ObjectMeta(name=task.name) template = client.V1PodTemplateSpec(metadata=template_metadata, spec=pod_spec) worker_replica_spec = {} worker_replica_spec['replicas'] = task.num_workers worker_replica_spec['restartPolicy'] = PyTorchElasticJobHandler.EXIT_CODE worker_replica_spec['template'] = template spec = {} spec['replicaSpecs'] = {} spec['replicaSpecs']['Worker'] = worker_replica_spec spec['minReplicas'] = task.num_workers spec['maxReplicas'] = task.num_workers etcd_svc = getenv('PYTORCH_ELASTIC_ETCD_SVC') if not etcd_svc: LOGGER.warning("No environment variable set for etcd service, looking for first available in elastic-job namespace") api = client.CoreV1Api() etcd_svc = [x for x in api.list_namespaced_service('elastic-job').items if x.metadata.name =='etcd-service'][0].spec.cluster_ip LOGGER.info("Using etcd service on %s:%d", etcd_svc, PyTorchElasticJobHandler.ETCD_PORT) spec['rdzvEndpoint'] = f'{etcd_svc}:{PyTorchElasticJobHandler.ETCD_PORT}' pytorch_job_spec = {} pytorch_job_spec['kind'] = PyTorchElasticJobHandler.NAME pytorch_job_spec['apiVersion'] = f'{PyTorchElasticJobHandler.GROUP}/{PyTorchElasticJobHandler.VERSION}' pytorch_job_spec['metadata'] = client.V1ObjectMeta(generate_name=task.name) pytorch_job_spec['spec'] = spec pytorch_job = self.api.create_namespaced_custom_object( PyTorchElasticJobHandler.GROUP, PyTorchElasticJobHandler.VERSION, self.namespace, PyTorchElasticJobHandler.PLURAL, pytorch_job_spec, ) return pytorch_job['metadata']['name']
def create_user_http(url: str, jwt_token: str, username: str, password: str, firstname: str, lastname: str) -> None: """Create or update a user over HTTP :param url: the base URL :param jwt_token: The JWT token representing this authentication :param username: The user ID :param password: The updated password :param firstname: The firstname :param lastname: The lastname """ user = {"username": username, "password": password} if firstname: user['firstname'] = firstname if lastname: user['lastname'] = lastname headers = {'Authorization': f'Bearer {jwt_token}'} try: response = requests.get(f'{url}/v1/users/{username}') if response.status_code == 401: raise ValueError("Invalid login") if response.status_code != 200: # No such user exists so do a POST response = requests.post(f'{url}/v1/users', headers=headers, json={"user": user}) if response.status_code != 200: raise Exception(f"Failed to create user: {username}") results = response.json() LOGGER.info("Created new user") LOGGER.info(json.dumps(results)) return results = response.json() LOGGER.info("Found existing user") LOGGER.info(json.dumps(results)) except Exception as ex: LOGGER.error(ex) return response = requests.put(f'{url}/v1/users/{username}', json=user, headers=headers) results = response.json() LOGGER.info(json.dumps(results))
async def schedule_pipeline(ws, work) -> None: """Use async to open a connection to serve.py and launch work Blocks until the job completes (and websocket stays open) """ async with websockets.connect(ws) as websocket: await websocket.send( json.dumps({ APIField.COMMAND: 'START', APIField.REQUEST: work })) result = json.loads(await websocket.recv()) while result[APIField.STATUS] != APIStatus.END: if result[APIField.STATUS] == APIStatus.ERROR: LOGGER.error(result) return if result[APIField.RESPONSE].startswith('PIPE_ID'): pipe_id = result.split(' ')[-1] LOGGER.info('Started %s', pipe_id) else: LOGGER.info(result[APIField.RESPONSE]) result = json.loads(await websocket.recv())