Ejemplo n.º 1
0
def _generate_task_from_execution(metadata_handler: metadata.Metadata,
                                  pipeline: pipeline_pb2.Pipeline,
                                  node: pipeline_pb2.PipelineNode,
                                  execution: metadata_store_pb2.Execution,
                                  is_cancelled: bool = False) -> task_lib.Task:
    """Generates `ExecNodeTask` given execution."""
    contexts = metadata_handler.store.get_contexts_by_execution(execution.id)
    exec_properties = extract_properties(execution)
    input_artifacts = execution_lib.get_artifacts_dict(
        metadata_handler, execution.id, [metadata_store_pb2.Event.INPUT])
    outputs_resolver = outputs_utils.OutputsResolver(node,
                                                     pipeline.pipeline_info,
                                                     pipeline.runtime_spec,
                                                     pipeline.execution_mode)
    output_artifacts = outputs_resolver.generate_output_artifacts(execution.id)
    outputs_utils.make_output_dirs(output_artifacts)
    return task_lib.ExecNodeTask(
        node_uid=task_lib.NodeUid.from_pipeline_node(pipeline, node),
        execution_id=execution.id,
        contexts=contexts,
        exec_properties=exec_properties,
        input_artifacts=input_artifacts,
        output_artifacts=output_artifacts,
        executor_output_uri=outputs_resolver.get_executor_output_uri(
            execution.id),
        stateful_working_dir=outputs_resolver.get_stateful_working_directory(
            execution.id),
        tmp_dir=outputs_resolver.make_tmp_dir(execution.id),
        pipeline=pipeline,
        is_cancelled=is_cancelled)
Ejemplo n.º 2
0
  def testMakeOutputDirsAndRemoveOutputDirs(self):
    output_artifacts = self._output_resolver.generate_output_artifacts(1)
    outputs_utils.make_output_dirs(output_artifacts)
    for _, artifact_list in output_artifacts.items():
      for artifact in artifact_list:
        self.assertTrue(tf.io.gfile.exists(artifact.uri))

    outputs_utils.remove_output_dirs(output_artifacts)
    for _, artifact_list in output_artifacts.items():
      for artifact in artifact_list:
        self.assertFalse(tf.io.gfile.exists(artifact.uri))
Ejemplo n.º 3
0
    def _run_executor(
        self, execution_info: data_types.ExecutionInfo
    ) -> execution_result_pb2.ExecutorOutput:
        """Executes underlying component implementation."""

        logging.info('Going to run a new execution: %s', execution_info)

        outputs_utils.make_output_dirs(execution_info.output_dict)
        try:
            return self._executor_operator.run_executor(execution_info)
        except Exception:  # pylint: disable=broad-except
            outputs_utils.remove_output_dirs(execution_info.output_dict)
            raise
Ejemplo n.º 4
0
    def testMakeOutputDirsAndRemoveOutputDirs(self):
        output_artifacts = self._output_resolver.generate_output_artifacts(1)
        outputs_utils.make_output_dirs(output_artifacts)
        for _, artifact_list in output_artifacts.items():
            for artifact in artifact_list:
                if isinstance(artifact, ValueArtifact):
                    self.assertFalse(fileio.isdir(artifact.uri))
                else:
                    self.assertTrue(fileio.isdir(artifact.uri))
                self.assertTrue(fileio.exists(artifact.uri))

        outputs_utils.remove_output_dirs(output_artifacts)
        for _, artifact_list in output_artifacts.items():
            for artifact in artifact_list:
                self.assertFalse(fileio.exists(artifact.uri))
Ejemplo n.º 5
0
    def _run_executor(
        self, execution_info: base_executor_operator.ExecutionInfo
    ) -> execution_result_pb2.ExecutorOutput:
        """Executes underlying component implementation."""

        logging.info('Going to run a new execution: %s', execution_info)

        outputs_utils.make_output_dirs(execution_info.output_dict)
        try:
            return self._executor_operator.run_executor(execution_info)
        except Exception as e:  # pylint: disable=broad-except
            outputs_utils.remove_output_dirs(execution_info.output_dict)
            logging.error(
                'Execution failed with error %s '
                'and this is the stack trace \n %s', e, traceback.format_exc())
            raise
Ejemplo n.º 6
0
 def testMakeOutputDirsArtifactAlreadyExists(self):
     output_artifacts = self._output_resolver().generate_output_artifacts(1)
     outputs_utils.make_output_dirs(output_artifacts)
     for _, artifact_list in output_artifacts.items():
         for artifact in artifact_list:
             if isinstance(artifact, ValueArtifact):
                 with fileio.open(artifact.uri, 'w') as f:
                     f.write('test')
             else:
                 with fileio.open(os.path.join(artifact.uri, 'output'),
                                  'w') as f:
                     f.write('test')
     outputs_utils.make_output_dirs(output_artifacts)
     for _, artifact_list in output_artifacts.items():
         for artifact in artifact_list:
             if isinstance(artifact, ValueArtifact):
                 with fileio.open(artifact.uri, 'r') as f:
                     self.assertEqual(f.read(), 'test')
             else:
                 with fileio.open(os.path.join(artifact.uri, 'output'),
                                  'r') as f:
                     self.assertEqual(f.read(), 'test')
Ejemplo n.º 7
0
  def _run_executor(
      self, execution_info: data_types.ExecutionInfo
  ) -> execution_result_pb2.ExecutorOutput:
    """Executes underlying component implementation."""

    logging.info('Going to run a new execution: %s', execution_info)

    outputs_utils.make_output_dirs(execution_info.output_dict)
    try:
      executor_output = self._executor_operator.run_executor(execution_info)
      code = executor_output.execution_result.code
      if code != 0:
        result_message = executor_output.execution_result.result_message
        err = (f'Execution {execution_info.execution_id} '
               f'failed with error code {code} and '
               f'error message {result_message}')
        logging.error(err)
        raise _ExecutionFailedError(err, executor_output)
      return executor_output
    except Exception:  # pylint: disable=broad-except
      outputs_utils.remove_output_dirs(execution_info.output_dict)
      raise
Ejemplo n.º 8
0
    def testMakeClearAndRemoveOutputDirs(self):
        output_artifacts = self._output_resolver().generate_output_artifacts(1)
        outputs_utils.make_output_dirs(output_artifacts)
        for _, artifact_list in output_artifacts.items():
            for artifact in artifact_list:
                if isinstance(artifact, ValueArtifact):
                    self.assertFalse(fileio.isdir(artifact.uri))
                else:
                    self.assertTrue(fileio.isdir(artifact.uri))
                    with fileio.open(os.path.join(artifact.uri, 'output'),
                                     'w') as f:
                        f.write('')
                self.assertTrue(fileio.exists(artifact.uri))

        outputs_utils.clear_output_dirs(output_artifacts)
        for _, artifact_list in output_artifacts.items():
            for artifact in artifact_list:
                if not isinstance(artifact, ValueArtifact):
                    self.assertEqual(fileio.listdir(artifact.uri), [])

        outputs_utils.remove_output_dirs(output_artifacts)
        for _, artifact_list in output_artifacts.items():
            for artifact in artifact_list:
                self.assertFalse(fileio.exists(artifact.uri))
Ejemplo n.º 9
0
    def _resolve_inputs_and_generate_tasks_for_node(
        self,
        node: pipeline_pb2.PipelineNode,
    ) -> List[task_lib.Task]:
        """Generates tasks for a node by freshly resolving inputs."""
        result = []
        node_uid = task_lib.NodeUid.from_pipeline_node(self._pipeline, node)
        resolved_info = task_gen_utils.generate_resolved_info(
            self._mlmd_handle, node)
        if resolved_info is None:
            result.append(
                task_lib.UpdateNodeStateTask(node_uid=node_uid,
                                             state=pstate.NodeState.SKIPPED))
            return result

        if not resolved_info.input_artifacts:
            error_msg = f'failure to resolve inputs; node uid: {node_uid}'
            result.append(
                task_lib.UpdateNodeStateTask(node_uid=node_uid,
                                             state=pstate.NodeState.FAILED,
                                             status=status_lib.Status(
                                                 code=status_lib.Code.ABORTED,
                                                 message=error_msg)))
            return result
        # TODO(b/207038460): Update sync pipeline to support ForEach.
        input_artifacts = resolved_info.input_artifacts[0]

        execution = execution_publish_utils.register_execution(
            metadata_handler=self._mlmd_handle,
            execution_type=node.node_info.type,
            contexts=resolved_info.contexts,
            input_artifacts=input_artifacts,
            exec_properties=resolved_info.exec_properties)
        outputs_resolver = outputs_utils.OutputsResolver(
            node, self._pipeline.pipeline_info, self._pipeline.runtime_spec,
            self._pipeline.execution_mode)
        output_artifacts = outputs_resolver.generate_output_artifacts(
            execution.id)

        # For mixed service nodes, we ensure node services and check service
        # status; pipeline is aborted if the service jobs have failed.
        service_status = self._ensure_node_services_if_mixed(node.node_info.id)
        if service_status == service_jobs.ServiceStatus.FAILED:
            error_msg = f'associated service job failed; node uid: {node_uid}'
            result.append(
                task_lib.UpdateNodeStateTask(node_uid=node_uid,
                                             state=pstate.NodeState.FAILED,
                                             status=status_lib.Status(
                                                 code=status_lib.Code.ABORTED,
                                                 message=error_msg)))
            return result

        outputs_utils.make_output_dirs(output_artifacts)
        result.append(
            task_lib.UpdateNodeStateTask(node_uid=node_uid,
                                         state=pstate.NodeState.RUNNING))
        result.append(
            task_lib.ExecNodeTask(
                node_uid=node_uid,
                execution_id=execution.id,
                contexts=resolved_info.contexts,
                input_artifacts=input_artifacts,
                exec_properties=resolved_info.exec_properties,
                output_artifacts=output_artifacts,
                executor_output_uri=outputs_resolver.get_executor_output_uri(
                    execution.id),
                stateful_working_dir=outputs_resolver.
                get_stateful_working_directory(execution.id),
                tmp_dir=outputs_resolver.make_tmp_dir(execution.id),
                pipeline=self._pipeline))
        return result
Ejemplo n.º 10
0
    def _generate_tasks_for_node(
            self, metadata_handler: metadata.Metadata,
            node: pipeline_pb2.PipelineNode) -> List[task_lib.Task]:
        """Generates a node execution task.

    If a node execution is not feasible, `None` is returned.

    Args:
      metadata_handler: A handler to access MLMD db.
      node: The pipeline node for which to generate a task.

    Returns:
      Returns a `Task` or `None` if task generation is deemed infeasible.
    """
        result = []
        node_uid = task_lib.NodeUid.from_pipeline_node(self._pipeline, node)

        executions = task_gen_utils.get_executions(metadata_handler, node)
        exec_node_task = task_gen_utils.generate_task_from_active_execution(
            metadata_handler, self._pipeline, node, executions)
        if exec_node_task:
            result.append(
                task_lib.UpdateNodeStateTask(node_uid=node_uid,
                                             state=pstate.NodeState.RUNNING))
            result.append(exec_node_task)
            return result

        resolved_info = task_gen_utils.generate_resolved_info(
            metadata_handler, node)
        # TODO(b/207038460): Update async pipeline to support ForEach.
        if (resolved_info is None or not resolved_info.input_artifacts
                or resolved_info.input_artifacts[0] is None
                or not any(resolved_info.input_artifacts[0].values())):
            logging.info(
                'Task cannot be generated for node %s since no input artifacts '
                'are resolved.', node.node_info.id)
            return result
        input_artifact = resolved_info.input_artifacts[0]

        executor_spec_fingerprint = hashlib.sha256()
        executor_spec = task_gen_utils.get_executor_spec(
            self._pipeline_state.pipeline, node.node_info.id)
        if executor_spec is not None:
            executor_spec_fingerprint.update(
                executor_spec.SerializeToString(deterministic=True))
        resolved_info.exec_properties[
            constants.
            EXECUTOR_SPEC_FINGERPRINT_KEY] = executor_spec_fingerprint.hexdigest(
            )

        # If the latest execution had the same resolved input artifacts, execution
        # properties and executor specs, we should not trigger a new execution.
        latest_exec = task_gen_utils.get_latest_execution(executions)
        if latest_exec:
            artifact_ids_by_event_type = (
                execution_lib.get_artifact_ids_by_event_type_for_execution_id(
                    metadata_handler, latest_exec.id))
            latest_exec_input_artifact_ids = artifact_ids_by_event_type.get(
                metadata_store_pb2.Event.INPUT, set())
            current_exec_input_artifact_ids = set(
                a.id for a in itertools.chain(*input_artifact.values()))
            latest_exec_properties = task_gen_utils.extract_properties(
                latest_exec)
            current_exec_properties = resolved_info.exec_properties
            latest_exec_executor_spec_fp = latest_exec_properties[
                constants.EXECUTOR_SPEC_FINGERPRINT_KEY]
            current_exec_executor_spec_fp = resolved_info.exec_properties[
                constants.EXECUTOR_SPEC_FINGERPRINT_KEY]
            if (latest_exec_input_artifact_ids
                    == current_exec_input_artifact_ids
                    and _exec_properties_match(latest_exec_properties,
                                               current_exec_properties)
                    and latest_exec_executor_spec_fp
                    == current_exec_executor_spec_fp):
                result.append(
                    task_lib.UpdateNodeStateTask(
                        node_uid=node_uid, state=pstate.NodeState.STARTED))
                return result

        execution = execution_publish_utils.register_execution(
            metadata_handler=metadata_handler,
            execution_type=node.node_info.type,
            contexts=resolved_info.contexts,
            input_artifacts=input_artifact,
            exec_properties=resolved_info.exec_properties)
        outputs_resolver = outputs_utils.OutputsResolver(
            node, self._pipeline.pipeline_info, self._pipeline.runtime_spec,
            self._pipeline.execution_mode)

        # For mixed service nodes, we ensure node services and check service
        # status; the node is aborted if its service jobs have failed.
        service_status = self._ensure_node_services_if_mixed(node.node_info.id)
        if service_status is not None:
            if service_status != service_jobs.ServiceStatus.RUNNING:
                error_msg = f'associated service job failed; node uid: {node_uid}'
                result.append(
                    task_lib.UpdateNodeStateTask(
                        node_uid=node_uid,
                        state=pstate.NodeState.FAILED,
                        status=status_lib.Status(code=status_lib.Code.ABORTED,
                                                 message=error_msg)))
                return result

        output_artifacts = outputs_resolver.generate_output_artifacts(
            execution.id)
        outputs_utils.make_output_dirs(output_artifacts)
        result.append(
            task_lib.UpdateNodeStateTask(node_uid=node_uid,
                                         state=pstate.NodeState.RUNNING))
        result.append(
            task_lib.ExecNodeTask(
                node_uid=node_uid,
                execution_id=execution.id,
                contexts=resolved_info.contexts,
                input_artifacts=input_artifact,
                exec_properties=resolved_info.exec_properties,
                output_artifacts=output_artifacts,
                executor_output_uri=outputs_resolver.get_executor_output_uri(
                    execution.id),
                stateful_working_dir=outputs_resolver.
                get_stateful_working_directory(execution.id),
                tmp_dir=outputs_resolver.make_tmp_dir(execution.id),
                pipeline=self._pipeline))
        return result