Esempio n. 1
0
    def _generate_task(
            self, metadata_handler: metadata.Metadata,
            node: pipeline_pb2.PipelineNode) -> Optional[task_pb2.Task]:
        """Generates a node execution task.

    If a node execution is not feasible, `None` is returned.

    Args:
      metadata_handler: A handler to access MLMD db.
      node: The pipeline node for which to generate a task.

    Returns:
      Returns a `Task` or `None` if task generation is deemed infeasible.
    """
        if not task_gen_utils.is_feasible_node(node):
            return None

        executions = task_gen_utils.get_executions(metadata_handler, node)
        result = task_gen_utils.generate_task_from_active_execution(
            self._pipeline, node, executions)
        if result:
            return result

        resolved_info = task_gen_utils.generate_resolved_info(
            metadata_handler, node)
        if resolved_info.input_artifacts is None:
            logging.info(
                'Task cannot be generated for node %s since no input artifacts '
                'are resolved.', node.node_info.id)
            return None

        # If the latest successful execution had the same resolved input artifacts,
        # the component should not be triggered, so task is not generated.
        # TODO(b/170231077): This logic should be handled by the resolver when it's
        # implemented. Also, currently only the artifact ids of previous execution
        # are checked to decide if a new execution is warranted but it may also be
        # necessary to factor in the difference of execution properties.
        latest_exec = task_gen_utils.get_latest_successful_execution(
            executions)
        if latest_exec:
            artifact_ids_by_event_type = (
                execution_lib.get_artifact_ids_by_event_type_for_execution_id(
                    metadata_handler, latest_exec.id))
            latest_exec_input_artifact_ids = artifact_ids_by_event_type.get(
                metadata_store_pb2.Event.INPUT, set())
            current_exec_input_artifact_ids = set(
                a.id for a in itertools.chain(
                    *resolved_info.input_artifacts.values()))
            if latest_exec_input_artifact_ids == current_exec_input_artifact_ids:
                return None

        execution = execution_publish_utils.register_execution(
            metadata_handler=metadata_handler,
            execution_type=node.node_info.type,
            contexts=resolved_info.contexts,
            input_artifacts=resolved_info.input_artifacts,
            exec_properties=resolved_info.exec_properties)
        return task_gen_utils.create_task(self._pipeline, node, execution)
Esempio n. 2
0
 def test_get_latest_successful_execution(self):
     otu.fake_transform_output(self._mlmd_connection, self._transform)
     otu.fake_transform_output(self._mlmd_connection, self._transform)
     otu.fake_transform_output(self._mlmd_connection, self._transform)
     with self._mlmd_connection as m:
         execs = sorted(m.store.get_executions(), key=lambda e: e.id)
         execs[2].last_known_state = metadata_store_pb2.Execution.FAILED
         m.store.put_executions([execs[2]])
         execs = sorted(task_gen_utils.get_executions(m, self._transform),
                        key=lambda e: e.id)
         self.assertEqual(
             execs[1],
             task_gen_utils.get_latest_successful_execution(execs))
Esempio n. 3
0
    def _generate_task(
            self, metadata_handler: metadata.Metadata,
            node: pipeline_pb2.PipelineNode) -> Optional[task_lib.Task]:
        """Generates a node execution task.

    If a node execution is not feasible, `None` is returned.

    Args:
      metadata_handler: A handler to access MLMD db.
      node: The pipeline node for which to generate a task.

    Returns:
      Returns a `Task` or `None` if task generation is deemed infeasible.
    """
        executions = task_gen_utils.get_executions(metadata_handler, node)
        result = task_gen_utils.generate_task_from_active_execution(
            metadata_handler, self._pipeline, node, executions)
        if result:
            return result

        resolved_info = task_gen_utils.generate_resolved_info(
            metadata_handler, node)
        if resolved_info.input_artifacts is None or not any(
                resolved_info.input_artifacts.values()):
            logging.info(
                'Task cannot be generated for node %s since no input artifacts '
                'are resolved.', node.node_info.id)
            return None

        # If the latest successful execution had the same resolved input artifacts,
        # the component should not be triggered, so task is not generated.
        # TODO(b/170231077): This logic should be handled by the resolver when it's
        # implemented. Also, currently only the artifact ids of previous execution
        # are checked to decide if a new execution is warranted but it may also be
        # necessary to factor in the difference of execution properties.
        latest_exec = task_gen_utils.get_latest_successful_execution(
            executions)
        if latest_exec:
            artifact_ids_by_event_type = (
                execution_lib.get_artifact_ids_by_event_type_for_execution_id(
                    metadata_handler, latest_exec.id))
            latest_exec_input_artifact_ids = artifact_ids_by_event_type.get(
                metadata_store_pb2.Event.INPUT, set())
            current_exec_input_artifact_ids = set(
                a.id for a in itertools.chain(
                    *resolved_info.input_artifacts.values()))
            if latest_exec_input_artifact_ids == current_exec_input_artifact_ids:
                return None

        node_uid = task_lib.NodeUid.from_pipeline_node(self._pipeline, node)
        execution = execution_publish_utils.register_execution(
            metadata_handler=metadata_handler,
            execution_type=node.node_info.type,
            contexts=resolved_info.contexts,
            input_artifacts=resolved_info.input_artifacts,
            exec_properties=resolved_info.exec_properties)
        outputs_resolver = outputs_utils.OutputsResolver(
            node, self._pipeline.pipeline_info, self._pipeline.runtime_spec,
            self._pipeline.execution_mode)

        # For mixed service nodes, we ensure node services and check service
        # status; the node is aborted if its service jobs have failed.
        service_status = self._ensure_node_services_if_mixed(node.node_info.id)
        if service_status is not None:
            if service_status != service_jobs.ServiceStatus.RUNNING:
                return self._abort_node_task(node_uid)

        return task_lib.ExecNodeTask(
            node_uid=node_uid,
            execution=execution,
            contexts=resolved_info.contexts,
            input_artifacts=resolved_info.input_artifacts,
            exec_properties=resolved_info.exec_properties,
            output_artifacts=outputs_resolver.generate_output_artifacts(
                execution.id),
            executor_output_uri=outputs_resolver.get_executor_output_uri(
                execution.id),
            stateful_working_dir=outputs_resolver.
            get_stateful_working_directory(execution.id),
            pipeline=self._pipeline)