def list_utilization(self, timeout=None ) -> List[clara_types.ClaraUtilizationDetails]: """ Method for aquiring snapshot of GPU utilization information of Clara in a list Returns: List[clara_types.ClaraGpuUtilization] with snapshot of GPU Utilization details for Clara GPUs """ if (self._channel is None) or (self._stub is None): raise Exception( "Connection is currently closed. Please run reconnect() to reopen connection" ) request = clara_pb2.ClaraUtilizationRequest( header=self.get_request_header(), watch=False) response = self._stub.Utilization(request, timeout=timeout) utilization_list = [] header_check = False for resp in response: if not header_check: self.check_response_header(header=resp.header) header_check = True metrics = resp.gpu_metrics clara_utilization_details = clara_types.ClaraUtilizationDetails() for item in metrics: gpu_utilization = clara_types.ClaraGpuUtilization( node_id=item.node_id, pcie_id=item.pcie_id, compute_utilization=item.compute_utilization, memory_free=item.memory_free, memory_used=item.memory_used, memory_utilization=item.memory_utilization, timestamp=self.get_timestamp(item.timestamp), ) for proc_info in item.process_details: process_details = clara_types.ClaraProcessDetails( name=proc_info.name, ) if proc_info.job_id.value: process_details.job_id = job_types.JobId( proc_info.job_id.value) gpu_utilization.process_details.append((process_details)) clara_utilization_details.gpu_metrics.append((gpu_utilization)) utilization_list.append(clara_utilization_details) return utilization_list
def test_read_logs(): requests = [ jobs_pb2.JobsReadLogsRequest( header=BaseClient.get_request_header(), job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ), operator_name="dicom-reader" ) ] responses = [ jobs_pb2.JobsReadLogsResponse( header=common_pb2.ResponseHeader( code=0, messages=[]), job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ), operator_name="Dicom Reader", logs=["Log_String_0", "Log_String_1"] ), jobs_pb2.JobsReadLogsResponse( header=common_pb2.ResponseHeader( code=0, messages=[]), job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ), operator_name="Dicom Reader", logs=["Log_String_2", "Log_String_3"] ) ] stub_method_handlers = [( 'ReadLogs', 'unary_stream', ( requests, responses ) )] MockClaraJobsServiceClient.stub_method_handlers = stub_method_handlers with MockClaraJobsServiceClient('10.0.0.1:50051') as client: job_logs = client.job_logs( job_id=job_types.JobId(value='432b274a8f754968888807fe1eba237b'), operator_name="dicom-reader" ) print(len(job_logs)) assert len(job_logs) == 4 assert job_logs[0] == "Log_String_0" assert job_logs[1] == "Log_String_1" assert job_logs[2] == "Log_String_2" assert job_logs[3] == "Log_String_3"
def get_status(self, job_id: job_types.JobId, timeout=None) -> job_types.JobDetails: """ Get status of a job Args: job_id (job_types.JobId): job_id Unique identifier of the job to get the status of. Returns: job_types.JobDetails including the status of a known job """ if (self._channel is None) or (self._stub is None): raise Exception( "Connection is currently closed. Please run reconnect() to reopen connection" ) if job_id.value is None: raise Exception( "Job identifier must have instantiated non-null instance") request = jobs_pb2.JobsStatusRequest(header=self.get_request_header(), job_id=job_id.to_grpc_value()) response = self._stub.Status(request, timeout=timeout) self.check_response_header(header=response.header) resp_operator_details = response.operator_details operator_details = {} for item in resp_operator_details: operator_details[item.name] = {} operator_details[item.name]["created"] = item.created operator_details[item.name]["started"] = item.started operator_details[item.name]["stopped"] = item.stopped operator_details[item.name]["status"] = item.status result = job_types.JobDetails( job_id=job_types.JobId(response.job_id.value), job_priority=response.priority, job_state=response.state, job_status=response.status, name=response.name, payload_id=payload_types.PayloadId(response.payload_id.value), pipeline_id=pipeline_types.PipelineId(response.pipeline_id.value), date_created=self.get_timestamp(response.created), date_started=self.get_timestamp(response.started), date_stopped=self.get_timestamp(response.stopped), operator_details=operator_details, messages=response.messages, metadata=response.metadata) return result
def test_cancel_job(): requests = [ jobs_pb2.JobsCancelRequest( header=BaseClient.get_request_header(), job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ) ) ] responses = [ jobs_pb2.JobsCancelResponse( header=common_pb2.ResponseHeader( code=0, messages=[]), job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ), job_state=jobs_pb2.JOB_STATE_STOPPED, job_status=jobs_pb2.JOB_STATUS_CANCELED ) ] stub_method_handlers = [( 'Cancel', 'unary_unary', ( requests, responses ) )] MockClaraJobsServiceClient.stub_method_handlers = stub_method_handlers with MockClaraJobsServiceClient('10.0.0.1:50051') as client: job_token = client.cancel_job( job_id=job_types.JobId(value='432b274a8f754968888807fe1eba237b') ) print(job_token.job_id.value, job_token.job_state, job_token.job_status) assert job_token.job_id.value == '432b274a8f754968888807fe1eba237b' assert job_token.job_state == 3 assert job_token.job_status == 3
def test_start_job(): requests = [ jobs_pb2.JobsStartRequest( header=BaseClient.get_request_header(), job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ) ) ] responses = [ jobs_pb2.JobsStartResponse( header=common_pb2.ResponseHeader( code=0, messages=[]), state=jobs_pb2.JOB_STATE_RUNNING, status=jobs_pb2.JOB_STATUS_HEALTHY, priority=jobs_pb2.JOB_PRIORITY_NORMAL ) ] stub_method_handlers = [( 'Start', 'unary_unary', ( requests, responses ) )] MockClaraJobsServiceClient.stub_method_handlers = stub_method_handlers with MockClaraJobsServiceClient('10.0.0.1:50051') as client: job_token = client.start_job( job_id=job_types.JobId(value='432b274a8f754968888807fe1eba237b') ) print(job_token.job_id.value, job_token.job_state, job_token.job_status) assert job_token.job_id.value == '432b274a8f754968888807fe1eba237b' assert job_token.job_state == 2 assert job_token.job_status == 1
def cancel_job(self, job_id: job_types.JobId, reason=None, timeout=None) -> job_types.JobToken: """ Cancels a pipeline job, preventing it from being executed. Has no affect on executing or terminated jobs. Args: job_id (job_types.JobId): Unique identity of the job to be cancelled. reason: Optional reason as to why the job was cancelled. Returns: job_types.JobToken of cancelled job """ if (self._channel is None) or (self._stub is None): raise Exception( "Connection is currently closed. Please run reconnect() to reopen connection" ) if (job_id.value is None) or (job_id.value == ""): raise Exception("Job identifier must have instantiated value") request = jobs_pb2.JobsCancelRequest(header=self.get_request_header(), job_id=job_id.to_grpc_value(), reason=reason) response = self._stub.Cancel(request, timeout=timeout) self.check_response_header(header=response.header) result = job_types.JobToken(job_id=job_types.JobId( response.job_id.value), job_state=response.job_state, job_status=response.job_status) return result
def test_get_status(): requests = [ jobs_pb2.JobsStatusRequest( header=BaseClient.get_request_header(), job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ) ) ] fake_seconds_from_epoch = 63763345820 responses = [ jobs_pb2.JobsStatusResponse( header=common_pb2.ResponseHeader( code=0, messages=[]), name="job_1", job_id=common_pb2.Identifier( value='432b274a8f754968888807fe1eba237b' ), pipeline_id=common_pb2.Identifier( value='92656d79fa414db6b294069c0e9e6df5' ), payload_id=common_pb2.Identifier( value='7ac5c691e13d4f45894a3a70d9925936' ), state=jobs_pb2.JOB_STATE_RUNNING, status=jobs_pb2.JOB_STATUS_HEALTHY, created=common_pb2.Timestamp(value=fake_seconds_from_epoch) ) ] stub_method_handlers = [( 'Status', 'unary_unary', ( requests, responses ) )] MockClaraJobsServiceClient.stub_method_handlers = stub_method_handlers with MockClaraJobsServiceClient('10.0.0.1:50051') as client: job_details = client.get_status( job_id=job_types.JobId(value='432b274a8f754968888807fe1eba237b') ) print(job_details.job_id.value, job_details.job_state, job_details.job_status) print(job_details.date_created) print(datetime.datetime.fromtimestamp(float(fake_seconds_from_epoch) - 62135596800)) assert job_details.name == "job_1" assert job_details.job_id.value == '432b274a8f754968888807fe1eba237b' assert job_details.pipeline_id.value == '92656d79fa414db6b294069c0e9e6df5' assert job_details.payload_id.value == '7ac5c691e13d4f45894a3a70d9925936' assert job_details.job_state == 2 assert job_details.job_status == 1 assert job_details.date_created == datetime.datetime.fromtimestamp( float(fake_seconds_from_epoch) - 62135596800).astimezone(datetime.timezone.utc)
def stream_jobs(self, job_filter: job_types.JobFilter = None, timeout=None): """ Provides generator to stream current jobs on platform Args: job_filter (job_types.JobFilter): Optional filter used to limit the number of pipeline job records return Returns: list of job_types.JobInfo with known pipeline job details from the server. """ if (self._channel is None) or (self._stub is None): raise Exception( "Connection is currently closed. Please run reconnect() to reopen connection" ) empty = job_types.JobFilter() request = jobs_pb2.JobsListRequest(header=self.get_request_header()) if job_filter != empty and job_filter is not None: request_filter = jobs_pb2.JobsListRequest.JobFilter if job_filter.completed_before is not None: day_one = datetime.datetime(1, 1, 1) if job_filter.completed_before.tzinfo is not None \ and job_filter.completed_before.tzinfo.utcoffset(job_filter.completed_before) is not None: day_one = datetime.datetime( 1, 1, 1, tzinfo=job_filter.completed_before.tzinfo) seconds = (job_filter.completed_before - day_one).total_seconds() request.filter.completed_before.value = int(seconds) if job_filter.created_after is not None: day_one = datetime.datetime(1, 1, 1) if job_filter.created_after.tzinfo is not None \ and job_filter.created_after.tzinfo.utcoffset(job_filter.created_after) is not None: day_one = datetime.datetime( 1, 1, 1, tzinfo=job_filter.created_after.tzinfo) seconds = (job_filter.created_after - day_one).total_seconds() request.filter.created_after.value = int(seconds) if job_filter.has_job_state is not None: if len(job_filter.has_job_state) > 0: for state in job_filter.has_job_state: if (state.value < job_types.JobState.Minimum.value ) or (state.value > job_types.JobState.Maximum.value): raise Exception( "Job states in filter must be within " + str(job_types.JobState.Minimum) + " and " + str(job_types.JobState.Maximum) + ", found:" + str(state)) request.filter.has_state.append(state.value) if job_filter.has_job_status is not None: if len(job_filter.has_job_status) > 0: for status in job_filter.has_job_status: if (status.value < job_types.JobStatus.Minimum.value ) or (status.value > job_types.JobStatus.Maximum.value): raise Exception( "Job status in filter must be within " + str(job_types.JobStatus.Minimum) + " and " + str(job_types.JobStatus.Maximum) + ", found:" + str(status)) request.filter.has_status.append(status.value) if job_filter.pipeline_ids is not None: if len(job_filter.pipeline_ids) > 0: for pipe_id in job_filter.pipeline_ids: request.filter.pipeline_id.append( pipe_id.to_grpc_value()) response = self._stub.List(request, timeout=timeout) check_header = True for item in response: if check_header: self.check_response_header(header=item.header) check_header = False if (item.job_details is None) or (item.job_details.job_id.value == ''): continue info = job_types.JobInfo( job_id=job_types.JobId(item.job_details.job_id.value), job_priority=item.job_details.priority, job_state=item.job_details.state, job_status=item.job_details.status, name=item.job_details.job_name, payload_id=payload_types.PayloadId( item.job_details.payload_id.value), pipeline_id=pipeline_types.PipelineId( item.job_details.pipeline_id.value), date_created=self.get_timestamp(item.job_details.created), date_started=self.get_timestamp(item.job_details.started), date_stopped=self.get_timestamp(item.job_details.stopped), metadata=item.job_details.metadata) yield info
def create_job( self, pipeline_id: pipeline_types.PipelineId, job_name: str, input_payloads: List[payload_types.PayloadId] = None, job_priority: job_types.JobPriority = job_types.JobPriority.Normal, metadata: Mapping[str, str] = None, timeout=None) -> job_types.JobInfo: """ Creates a new pipeline job record and associate storage payload. Jobs are created in a "JobState.Pending" state. Use "StartJob(JobId, Map{KeyValuePair{string, string}}" to cause the job to start executing. Args: pipeline_id (pipeline_types.PipelineId): Unique identifier of the pipeline which the job should be instances from. job_name (str): Human readable name of the job. job_priority (job_types.JobPriority): Optional Priority of the job. Affects how and when the server will schedule the job. input_payloads (List[payload_types.PayloadId]): [Optional Paramater] List of static payloads to include as input for the job. metadata (Mapping[str, str]): [Optional Parameter] Metadata (set of key/value pairs) associated with the job Returns: job_types.JobInfo about the newly created pipeline job. """ if (self._channel is None) or (self._stub is None): raise Exception( "Connection is currently closed. Please run reconnect() to reopen connection" ) if pipeline_id.value is None: raise Exception( "Pipeline identifier must have instantiated non-null instance") if (job_name is None) or (job_name == ""): raise Exception( "Job name must be initialized to non-null/non-empty string") if (job_priority.value < job_types.JobPriority.Minimum.value) or ( job_priority.value > job_types.JobPriority.Maximum.value): raise Exception( "Job priority must contain valid value between minimum and maximum job priority bounds" ) input_payloads_identifiers = [] if input_payloads is not None: for pay_id in input_payloads: input_payloads_identifiers.append(pay_id.to_grpc_value()) else: input_payloads_identifiers = None request = jobs_pb2.JobsCreateRequest( header=self.get_request_header(), name=job_name, pipeline_id=pipeline_id.to_grpc_value(), priority=job_priority.value, input_payloads=input_payloads_identifiers) if metadata is not None: request.metadata.update(metadata) response = self._stub.Create(request, timeout=timeout) self.check_response_header(header=response.header) result = job_types.JobInfo(job_id=job_types.JobId( response.job_id.value), job_priority=job_priority, job_state=job_types.JobState.Pending, job_status=job_types.JobStatus.Healthy, name=job_name, payload_id=payload_types.PayloadId( value=response.payload_id.value), pipeline_id=pipeline_id, metadata=metadata) return result