Ejemplo n.º 1
0
    def _generate_task(
            self, metadata_handler: metadata.Metadata,
            node: pipeline_pb2.PipelineNode) -> Optional[task_pb2.Task]:
        """Generates a node execution task.

    If a node execution is not feasible, `None` is returned.

    Args:
      metadata_handler: A handler to access MLMD db.
      node: The pipeline node for which to generate a task.

    Returns:
      Returns a `Task` or `None` if task generation is deemed infeasible.
    """
        if not task_gen_utils.is_feasible_node(node):
            return None

        executions = task_gen_utils.get_executions(metadata_handler, node)
        result = task_gen_utils.generate_task_from_active_execution(
            self._pipeline, node, executions)
        if result:
            return result

        resolved_info = task_gen_utils.generate_resolved_info(
            metadata_handler, node)
        if resolved_info.input_artifacts is None:
            logging.info(
                'Task cannot be generated for node %s since no input artifacts '
                'are resolved.', node.node_info.id)
            return None

        # If the latest successful execution had the same resolved input artifacts,
        # the component should not be triggered, so task is not generated.
        # TODO(b/170231077): This logic should be handled by the resolver when it's
        # implemented. Also, currently only the artifact ids of previous execution
        # are checked to decide if a new execution is warranted but it may also be
        # necessary to factor in the difference of execution properties.
        latest_exec = task_gen_utils.get_latest_successful_execution(
            executions)
        if latest_exec:
            artifact_ids_by_event_type = (
                execution_lib.get_artifact_ids_by_event_type_for_execution_id(
                    metadata_handler, latest_exec.id))
            latest_exec_input_artifact_ids = artifact_ids_by_event_type.get(
                metadata_store_pb2.Event.INPUT, set())
            current_exec_input_artifact_ids = set(
                a.id for a in itertools.chain(
                    *resolved_info.input_artifacts.values()))
            if latest_exec_input_artifact_ids == current_exec_input_artifact_ids:
                return None

        execution = execution_publish_utils.register_execution(
            metadata_handler=metadata_handler,
            execution_type=node.node_info.type,
            contexts=resolved_info.contexts,
            input_artifacts=resolved_info.input_artifacts,
            exec_properties=resolved_info.exec_properties)
        return task_gen_utils.create_task(self._pipeline, node, execution)
Ejemplo n.º 2
0
    def _generate_task(
            self, metadata_handler: metadata.Metadata,
            node: pipeline_pb2.PipelineNode) -> Optional[task_lib.Task]:
        """Generates a node execution task.

    If node execution is not feasible, `None` is returned.

    Args:
      metadata_handler: A handler to access MLMD db.
      node: The pipeline node for which to generate a task.

    Returns:
      Returns a `Task` or `None` if task generation is deemed infeasible.
    """
        if not task_gen_utils.is_feasible_node(node):
            return None

        executions = task_gen_utils.get_executions(metadata_handler, node)
        result = task_gen_utils.generate_task_from_active_execution(
            metadata_handler, self._pipeline, node, executions)
        if result:
            return result

        resolved_info = task_gen_utils.generate_resolved_info(
            metadata_handler, node)
        if resolved_info.input_artifacts is None:
            # TODO(goutham): If the pipeline can't make progress, there should be a
            # standard mechanism to surface it to the user.
            logging.warning(
                'Task cannot be generated for node %s since no input artifacts '
                'are resolved.', node.node_info.id)
            return None

        execution = execution_publish_utils.register_execution(
            metadata_handler=metadata_handler,
            execution_type=node.node_info.type,
            contexts=resolved_info.contexts,
            input_artifacts=resolved_info.input_artifacts,
            exec_properties=resolved_info.exec_properties)
        outputs_resolver = outputs_utils.OutputsResolver(
            node, self._pipeline.pipeline_info, self._pipeline.runtime_spec,
            self._pipeline.execution_mode)
        return task_lib.ExecNodeTask(
            node_uid=task_lib.NodeUid.from_pipeline_node(self._pipeline, node),
            execution=execution,
            contexts=resolved_info.contexts,
            input_artifacts=resolved_info.input_artifacts,
            exec_properties=resolved_info.exec_properties,
            output_artifacts=outputs_resolver.generate_output_artifacts(
                execution.id),
            executor_output_uri=outputs_resolver.get_executor_output_uri(
                execution.id),
            stateful_working_dir=outputs_resolver.
            get_stateful_working_directory(execution.id))
Ejemplo n.º 3
0
    def _generate_task(
            self, metadata_handler: metadata.Metadata,
            node: pipeline_pb2.PipelineNode) -> Optional[task_pb2.Task]:
        """Generates a node execution task.

    If node execution is not feasible, `None` is returned.

    Args:
      metadata_handler: A handler to access MLMD db.
      node: The pipeline node for which to generate a task.

    Returns:
      Returns a `Task` or `None` if task generation is deemed infeasible.
    """
        if not task_gen_utils.is_feasible_node(node):
            return None

        executions = task_gen_utils.get_executions(metadata_handler, node)
        task = task_gen_utils.generate_task_from_active_execution(
            self._pipeline, node, executions)
        if task:
            return task

        resolved_info = task_gen_utils.generate_resolved_info(
            metadata_handler, node)
        if resolved_info.input_artifacts is None:
            # TODO(goutham): If the pipeline can't make progress, there should be a
            # standard mechanism to surface it to the user.
            logging.warning(
                'Task cannot be generated for node %s since no input artifacts '
                'are resolved.', node.node_info.id)
            return None

        execution = execution_publish_utils.register_execution(
            metadata_handler=metadata_handler,
            execution_type=node.node_info.type,
            contexts=resolved_info.contexts,
            input_artifacts=resolved_info.input_artifacts,
            exec_properties=resolved_info.exec_properties)
        return task_gen_utils.create_task(self._pipeline, node, execution)
Ejemplo n.º 4
0
def _maybe_enqueue_cancellation_task(mlmd_handle: metadata.Metadata,
                                     pipeline: pipeline_pb2.Pipeline,
                                     node: pipeline_pb2.PipelineNode,
                                     task_queue: tq.TaskQueue) -> bool:
    """Enqueues a node cancellation task if not already stopped.

  If the node has an ExecNodeTask in the task queue, issue a cancellation.
  Otherwise, if the node has an active execution in MLMD but no ExecNodeTask
  enqueued, it may be due to orchestrator restart after stopping was initiated
  but before the schedulers could finish. So, enqueue an ExecNodeTask with
  is_cancelled set to give a chance for the scheduler to finish gracefully.

  Args:
    mlmd_handle: A handle to the MLMD db.
    pipeline: The pipeline containing the node to cancel.
    node: The node to cancel.
    task_queue: A `TaskQueue` instance into which any cancellation tasks will be
      enqueued.

  Returns:
    `True` if a cancellation task was enqueued. `False` if node is already
    stopped or no cancellation was required.
  """
    if not task_gen_utils.is_feasible_node(node):
        return False
    exec_node_task_id = task_lib.exec_node_task_id_from_pipeline_node(
        pipeline, node)
    if task_queue.contains_task_id(exec_node_task_id):
        task_queue.enqueue(
            task_lib.CancelNodeTask(
                node_uid=task_lib.NodeUid.from_pipeline_node(pipeline, node)))
        return True
    else:
        executions = task_gen_utils.get_executions(mlmd_handle, node)
        exec_node_task = task_gen_utils.generate_task_from_active_execution(
            mlmd_handle, pipeline, node, executions, is_cancelled=True)
        if exec_node_task:
            task_queue.enqueue(exec_node_task)
            return True
    return False