async def _task_state_change_handler(self, event: str) -> None: task_state_event = TaskStateEvent.parse_raw(event) logger.debug( "received task state update: %s", task_state_event, ) service_key, service_version, user_id, project_id, node_id = parse_dask_job_id( task_state_event.job_id ) if task_state_event.state == RunningState.STARTED: message = InstrumentationRabbitMessage( metrics="service_started", user_id=user_id, project_id=project_id, node_id=node_id, service_uuid=node_id, service_type=NodeClass.COMPUTATIONAL, service_key=service_key, service_tag=service_version, ) await self.rabbitmq_client.publish_message(message) await CompTasksRepository(self.db_engine).set_project_tasks_state( project_id, [node_id], task_state_event.state )
async def _process_task_result( self, task: CompTaskAtDB, result: Union[Exception, TaskOutputData] ) -> None: logger.debug("received %s result: %s", f"{task=}", f"{result=}") task_final_state = RunningState.FAILED if task.job_id is not None: ( service_key, service_version, user_id, project_id, node_id, ) = parse_dask_job_id(task.job_id) assert task.project_id == project_id # nosec assert task.node_id == node_id # nosec if isinstance(result, TaskOutputData): # success! task_final_state = RunningState.SUCCESS await parse_output_data( self.db_engine, task.job_id, result, ) else: if isinstance(result, TaskCancelledError): task_final_state = RunningState.ABORTED else: task_final_state = RunningState.FAILED # we need to remove any invalid files in the storage await clean_task_output_and_log_files_if_invalid( self.db_engine, user_id, project_id, node_id ) # instrumentation message = InstrumentationRabbitMessage( metrics="service_stopped", user_id=user_id, project_id=task.project_id, node_id=task.node_id, service_uuid=task.node_id, service_type=NodeClass.COMPUTATIONAL, service_key=service_key, service_tag=service_version, result=task_final_state, ) await self.rabbitmq_client.publish_message(message) await CompTasksRepository(self.db_engine).set_project_tasks_state( task.project_id, [task.node_id], task_final_state )
async def instrumentation_message_parser(app: web.Application, data: bytes) -> None: rabbit_message = InstrumentationRabbitMessage.parse_raw(data) if rabbit_message.metrics == "service_started": service_started( app, **{ key: rabbit_message.dict()[key] for key in SERVICE_STARTED_LABELS }) elif rabbit_message.metrics == "service_stopped": service_stopped( app, **{ key: rabbit_message.dict()[key] for key in SERVICE_STOPPED_LABELS })
async def _on_task_completed(self, event: TaskStateEvent) -> None: logger.debug( "received task completion: %s", event, ) service_key, service_version, user_id, project_id, node_id = parse_dask_job_id( event.job_id) assert event.state in COMPLETED_STATES # nosec logger.info( "task %s completed with state: %s\n%s", event.job_id, f"{event.state.value}".lower(), event.msg, ) if event.state == RunningState.SUCCESS: # we need to parse the results assert event.msg # nosec await parse_output_data( self.db_engine, event.job_id, TaskOutputData.parse_raw(event.msg), ) else: # we need to remove any invalid files in the storage await clean_task_output_and_log_files_if_invalid( self.db_engine, user_id, project_id, node_id) await CompTasksRepository(self.db_engine).set_project_tasks_state( project_id, [node_id], event.state) # instrumentation message = InstrumentationRabbitMessage( metrics="service_stopped", user_id=user_id, project_id=project_id, node_id=node_id, service_uuid=node_id, service_type=NodeClass.COMPUTATIONAL, service_key=service_key, service_tag=service_version, result=event.state, ) await self.rabbitmq_client.publish_message(message) self._wake_up_scheduler_now()
async def _publish_in_rabbit( user_id: int, project_id: UUIDStr, node_uuid: UUIDStr, num_messages: int, rabbit_exchanges: RabbitExchanges, ) -> Tuple[LogMessages, ProgressMessages, InstrumMessages]: log_messages = [ LoggerRabbitMessage( user_id=user_id, project_id=project_id, node_id=node_uuid, messages=[f"log number {n}"], ) for n in range(num_messages) ] progress_messages = [ ProgressRabbitMessage( user_id=user_id, project_id=project_id, node_id=node_uuid, progress=float(n) / float(num_messages), ) for n in range(num_messages) ] # indicate container is started instrumentation_start_message = ( instrumentation_stop_message) = InstrumentationRabbitMessage( metrics="service_started", user_id=user_id, project_id=project_id, node_id=node_uuid, service_uuid=node_uuid, service_type="COMPUTATIONAL", service_key="some/service/awesome/key", service_tag="some-awesome-tag", ) instrumentation_stop_message.metrics = "service_stopped" instrumentation_stop_message.result = RunningState.SUCCESS instrumentation_messages = [ instrumentation_start_message, instrumentation_stop_message, ] await rabbit_exchanges.instrumentation.publish( aio_pika.Message( body=instrumentation_start_message.json().encode(), content_type="text/json", ), routing_key="", ) for n in range(num_messages): await rabbit_exchanges.logs.publish( aio_pika.Message(body=log_messages[n].json().encode(), content_type="text/json"), routing_key="", ) await rabbit_exchanges.progress.publish( aio_pika.Message(body=progress_messages[n].json().encode(), content_type="text/json"), routing_key="", ) # indicate container is stopped await rabbit_exchanges.instrumentation.publish( aio_pika.Message( body=instrumentation_stop_message.json().encode(), content_type="text/json", ), routing_key="", ) return (log_messages, progress_messages, instrumentation_messages)