def _generate_task( self, metadata_handler: metadata.Metadata, node: pipeline_pb2.PipelineNode) -> Optional[task_pb2.Task]: """Generates a node execution task. If a node execution is not feasible, `None` is returned. Args: metadata_handler: A handler to access MLMD db. node: The pipeline node for which to generate a task. Returns: Returns a `Task` or `None` if task generation is deemed infeasible. """ if not task_gen_utils.is_feasible_node(node): return None executions = task_gen_utils.get_executions(metadata_handler, node) result = task_gen_utils.generate_task_from_active_execution( self._pipeline, node, executions) if result: return result resolved_info = task_gen_utils.generate_resolved_info( metadata_handler, node) if resolved_info.input_artifacts is None: logging.info( 'Task cannot be generated for node %s since no input artifacts ' 'are resolved.', node.node_info.id) return None # If the latest successful execution had the same resolved input artifacts, # the component should not be triggered, so task is not generated. # TODO(b/170231077): This logic should be handled by the resolver when it's # implemented. Also, currently only the artifact ids of previous execution # are checked to decide if a new execution is warranted but it may also be # necessary to factor in the difference of execution properties. latest_exec = task_gen_utils.get_latest_successful_execution( executions) if latest_exec: artifact_ids_by_event_type = ( execution_lib.get_artifact_ids_by_event_type_for_execution_id( metadata_handler, latest_exec.id)) latest_exec_input_artifact_ids = artifact_ids_by_event_type.get( metadata_store_pb2.Event.INPUT, set()) current_exec_input_artifact_ids = set( a.id for a in itertools.chain( *resolved_info.input_artifacts.values())) if latest_exec_input_artifact_ids == current_exec_input_artifact_ids: return None execution = execution_publish_utils.register_execution( metadata_handler=metadata_handler, execution_type=node.node_info.type, contexts=resolved_info.contexts, input_artifacts=resolved_info.input_artifacts, exec_properties=resolved_info.exec_properties) return task_gen_utils.create_task(self._pipeline, node, execution)
def test_get_latest_successful_execution(self): otu.fake_transform_output(self._mlmd_connection, self._transform) otu.fake_transform_output(self._mlmd_connection, self._transform) otu.fake_transform_output(self._mlmd_connection, self._transform) with self._mlmd_connection as m: execs = sorted(m.store.get_executions(), key=lambda e: e.id) execs[2].last_known_state = metadata_store_pb2.Execution.FAILED m.store.put_executions([execs[2]]) execs = sorted(task_gen_utils.get_executions(m, self._transform), key=lambda e: e.id) self.assertEqual( execs[1], task_gen_utils.get_latest_successful_execution(execs))
def _generate_task( self, metadata_handler: metadata.Metadata, node: pipeline_pb2.PipelineNode) -> Optional[task_lib.Task]: """Generates a node execution task. If a node execution is not feasible, `None` is returned. Args: metadata_handler: A handler to access MLMD db. node: The pipeline node for which to generate a task. Returns: Returns a `Task` or `None` if task generation is deemed infeasible. """ executions = task_gen_utils.get_executions(metadata_handler, node) result = task_gen_utils.generate_task_from_active_execution( metadata_handler, self._pipeline, node, executions) if result: return result resolved_info = task_gen_utils.generate_resolved_info( metadata_handler, node) if resolved_info.input_artifacts is None or not any( resolved_info.input_artifacts.values()): logging.info( 'Task cannot be generated for node %s since no input artifacts ' 'are resolved.', node.node_info.id) return None # If the latest successful execution had the same resolved input artifacts, # the component should not be triggered, so task is not generated. # TODO(b/170231077): This logic should be handled by the resolver when it's # implemented. Also, currently only the artifact ids of previous execution # are checked to decide if a new execution is warranted but it may also be # necessary to factor in the difference of execution properties. latest_exec = task_gen_utils.get_latest_successful_execution( executions) if latest_exec: artifact_ids_by_event_type = ( execution_lib.get_artifact_ids_by_event_type_for_execution_id( metadata_handler, latest_exec.id)) latest_exec_input_artifact_ids = artifact_ids_by_event_type.get( metadata_store_pb2.Event.INPUT, set()) current_exec_input_artifact_ids = set( a.id for a in itertools.chain( *resolved_info.input_artifacts.values())) if latest_exec_input_artifact_ids == current_exec_input_artifact_ids: return None node_uid = task_lib.NodeUid.from_pipeline_node(self._pipeline, node) execution = execution_publish_utils.register_execution( metadata_handler=metadata_handler, execution_type=node.node_info.type, contexts=resolved_info.contexts, input_artifacts=resolved_info.input_artifacts, exec_properties=resolved_info.exec_properties) outputs_resolver = outputs_utils.OutputsResolver( node, self._pipeline.pipeline_info, self._pipeline.runtime_spec, self._pipeline.execution_mode) # For mixed service nodes, we ensure node services and check service # status; the node is aborted if its service jobs have failed. service_status = self._ensure_node_services_if_mixed(node.node_info.id) if service_status is not None: if service_status != service_jobs.ServiceStatus.RUNNING: return self._abort_node_task(node_uid) return task_lib.ExecNodeTask( node_uid=node_uid, execution=execution, contexts=resolved_info.contexts, input_artifacts=resolved_info.input_artifacts, exec_properties=resolved_info.exec_properties, output_artifacts=outputs_resolver.generate_output_artifacts( execution.id), executor_output_uri=outputs_resolver.get_executor_output_uri( execution.id), stateful_working_dir=outputs_resolver. get_stateful_working_directory(execution.id), pipeline=self._pipeline)