Example #1
0
    def publish_execution(
            self,
            component_info: data_types.ComponentInfo,
            output_artifacts: Optional[Dict[Text,
                                            List[types.Artifact]]] = None,
            exec_properties: Optional[Dict[Text, Any]] = None):
        """Publishes a component execution to metadata.

    This function will do two things:
    1. update the execution that was previously registered before execution to
       complete or skipped state, depending on whether cached results are used.
    2. for each input and output artifact, publish an event that associate the
       artifact to the execution, with type INPUT or OUTPUT respectively

    Args:
      component_info: the information of the component
      output_artifacts: optional key -> Artifacts to be published as outputs
        of the execution
      exec_properties: optional execution properties to be published for the
        execution

    Returns:
      A dict containing output artifacts.
    """
        outputs_utils.tag_output_artifacts_with_version(output_artifacts)

        logging.debug('Outputs: %s', output_artifacts)
        logging.debug('Execution properties: %s', exec_properties)

        self._metadata_handler.publish_execution(
            component_info=component_info,
            output_artifacts=output_artifacts,
            exec_properties=exec_properties)
Example #2
0
    def launch(self) -> Optional[metadata_store_pb2.Execution]:
        """Executes the component, includes driver, executor and publisher.

    Returns:
      The metadata of this execution that is registered in MLMD. It can be None
      if the driver decides not to run the execution.

    Raises:
      Exception: If the executor fails.
    """
        logging.info('Running launcher for %s', self._pipeline_node)
        if self._system_node_handler:
            # If this is a system node, runs it and directly return.
            return self._system_node_handler.run(self._mlmd_connection,
                                                 self._pipeline_node,
                                                 self._pipeline_info,
                                                 self._pipeline_runtime_spec)

        # Runs as a normal node.
        prepare_execution_result = self._prepare_execution()
        (execution_info, contexts,
         is_execution_needed) = (prepare_execution_result.execution_info,
                                 prepare_execution_result.contexts,
                                 prepare_execution_result.is_execution_needed)
        if is_execution_needed:
            try:
                executor_output = self._run_executor(execution_info)
            except Exception as e:  # pylint: disable=broad-except
                execution_output = (e.executor_output if isinstance(
                    e, _ExecutionFailedError) else None)
                self._publish_failed_execution(execution_info.execution_id,
                                               contexts, execution_output)
                logging.error('Execution %d failed.',
                              execution_info.execution_id)
                raise
            finally:
                self._clean_up_stateless_execution_info(execution_info)

            logging.info('Execution %d succeeded.',
                         execution_info.execution_id)
            self._clean_up_stateful_execution_info(execution_info)

            # TODO(b/182316162): Unify publisher handing so that post-execution
            # artifact logic is more cleanly handled.
            # Note that currently both the ExecutionInfo and ExecutorOutput are
            # consulted in `execution_publish_utils.publish_succeeded_execution()`.
            outputs_utils.tag_executor_output_with_version(executor_output)
            outputs_utils.tag_output_artifacts_with_version(
                execution_info.output_dict)
            logging.info('Publishing output artifacts %s for execution %s',
                         execution_info.output_dict,
                         execution_info.execution_id)
            self._publish_successful_execution(execution_info.execution_id,
                                               contexts,
                                               execution_info.output_dict,
                                               executor_output)
        return prepare_execution_result.execution_metadata
Example #3
0
def _publish_execution_results(mlmd_handle: metadata.Metadata,
                               task: task_lib.ExecNodeTask,
                               result: ts.TaskSchedulerResult) -> None:
  """Publishes execution results to MLMD."""

  def _update_state(status: status_lib.Status) -> None:
    assert status.code != status_lib.Code.OK
    if status.code == status_lib.Code.CANCELLED:
      logging.info('Cancelling execution (id: %s); task id: %s; status: %s',
                   task.execution_id, task.task_id, status)
      execution_state = metadata_store_pb2.Execution.CANCELED
    else:
      logging.info(
          'Aborting execution (id: %s) due to error (code: %s); task id: %s',
          task.execution_id, status.code, task.task_id)
      execution_state = metadata_store_pb2.Execution.FAILED
    _update_execution_state_in_mlmd(mlmd_handle, task.execution_id,
                                    execution_state, status.message)
    pipeline_state.record_state_change_time()

  if result.status.code != status_lib.Code.OK:
    _update_state(result.status)
    return

  # TODO(b/182316162): Unify publisher handing so that post-execution artifact
  # logic is more cleanly handled.
  outputs_utils.tag_output_artifacts_with_version(task.output_artifacts)
  publish_params = dict(output_artifacts=task.output_artifacts)
  if result.output_artifacts is not None:
    # TODO(b/182316162): Unify publisher handing so that post-execution artifact
    # logic is more cleanly handled.
    outputs_utils.tag_output_artifacts_with_version(result.output_artifacts)
    publish_params['output_artifacts'] = result.output_artifacts
  elif result.executor_output is not None:
    if result.executor_output.execution_result.code != status_lib.Code.OK:
      _update_state(
          status_lib.Status(
              code=result.executor_output.execution_result.code,
              message=result.executor_output.execution_result.result_message))
      return
    # TODO(b/182316162): Unify publisher handing so that post-execution artifact
    # logic is more cleanly handled.
    outputs_utils.tag_executor_output_with_version(result.executor_output)
    publish_params['executor_output'] = result.executor_output

  execution_publish_utils.publish_succeeded_execution(mlmd_handle,
                                                      task.execution_id,
                                                      task.contexts,
                                                      **publish_params)
  pipeline_state.record_state_change_time()
Example #4
0
def _publish_execution_results(mlmd_handle: metadata.Metadata,
                               task: task_lib.ExecNodeTask,
                               result: ts.TaskSchedulerResult) -> None:
    """Publishes execution results to MLMD."""
    def _update_state(status: status_lib.Status) -> None:
        assert status.code != status_lib.Code.OK
        _remove_output_dirs(task, result)
        _remove_task_dirs(task)
        if status.code == status_lib.Code.CANCELLED:
            logging.info(
                'Cancelling execution (id: %s); task id: %s; status: %s',
                task.execution_id, task.task_id, status)
            execution_state = metadata_store_pb2.Execution.CANCELED
        else:
            logging.info(
                'Aborting execution (id: %s) due to error (code: %s); task id: %s',
                task.execution_id, status.code, task.task_id)
            execution_state = metadata_store_pb2.Execution.FAILED
        _update_execution_state_in_mlmd(mlmd_handle, task.execution_id,
                                        execution_state, status.message)
        pipeline_state.record_state_change_time()

    if result.status.code != status_lib.Code.OK:
        _update_state(result.status)
        return

    # TODO(b/182316162): Unify publisher handing so that post-execution artifact
    # logic is more cleanly handled.
    outputs_utils.tag_output_artifacts_with_version(task.output_artifacts)
    if isinstance(result.output, ts.ExecutorNodeOutput):
        executor_output = result.output.executor_output
        if executor_output is not None:
            if executor_output.execution_result.code != status_lib.Code.OK:
                _update_state(
                    status_lib.Status(
                        code=executor_output.execution_result.code,
                        message=executor_output.execution_result.result_message
                    ))
                return
            # TODO(b/182316162): Unify publisher handing so that post-execution
            # artifact logic is more cleanly handled.
            outputs_utils.tag_executor_output_with_version(executor_output)
        _remove_task_dirs(task)
        execution_publish_utils.publish_succeeded_execution(
            mlmd_handle,
            execution_id=task.execution_id,
            contexts=task.contexts,
            output_artifacts=task.output_artifacts,
            executor_output=executor_output)
    elif isinstance(result.output, ts.ImporterNodeOutput):
        output_artifacts = result.output.output_artifacts
        # TODO(b/182316162): Unify publisher handing so that post-execution artifact
        # logic is more cleanly handled.
        outputs_utils.tag_output_artifacts_with_version(output_artifacts)
        _remove_task_dirs(task)
        execution_publish_utils.publish_succeeded_execution(
            mlmd_handle,
            execution_id=task.execution_id,
            contexts=task.contexts,
            output_artifacts=output_artifacts)
    elif isinstance(result.output, ts.ResolverNodeOutput):
        resolved_input_artifacts = result.output.resolved_input_artifacts
        execution_publish_utils.publish_internal_execution(
            mlmd_handle,
            execution_id=task.execution_id,
            contexts=task.contexts,
            output_artifacts=resolved_input_artifacts)
    else:
        raise TypeError(f'Unable to process task scheduler result: {result}')

    pipeline_state.record_state_change_time()
Example #5
0
    def launch(self) -> Optional[data_types.ExecutionInfo]:
        """Executes the component, includes driver, executor and publisher.

    Returns:
      The metadata of this execution that is registered in MLMD. It can be None
      if the driver decides not to run the execution.

    Raises:
      Exception: If the executor fails.
    """
        logging.info('Running launcher for %s', self._pipeline_node)
        if self._system_node_handler:
            # If this is a system node, runs it and directly return.
            return self._system_node_handler.run(self._mlmd_connection,
                                                 self._pipeline_node,
                                                 self._pipeline_info,
                                                 self._pipeline_runtime_spec)

        # Runs as a normal node.
        execution_preparation_result = self._prepare_execution()
        (execution_info, contexts, is_execution_needed) = (
            execution_preparation_result.execution_info,
            execution_preparation_result.contexts,
            execution_preparation_result.is_execution_needed)
        if is_execution_needed:
            try:
                executor_watcher = None
                if self._executor_operator:
                    # Create an execution watcher and save an in memory copy of the
                    # Execution object to execution to it. Launcher calls executor
                    # operator in process, thus there won't be race condition between the
                    # execution watcher and the launcher to write to MLMD.
                    executor_watcher = execution_watcher.ExecutionWatcher(
                        port=portpicker.pick_unused_port(),
                        mlmd_connection=self._mlmd_connection,
                        execution=execution_preparation_result.
                        execution_metadata,
                        creds=grpc.local_server_credentials())
                    self._executor_operator.with_execution_watcher(
                        executor_watcher.address)
                    executor_watcher.start()
                executor_output = self._run_executor(execution_info)
            except Exception as e:  # pylint: disable=broad-except
                execution_output = (e.executor_output if isinstance(
                    e, _ExecutionFailedError) else None)
                self._publish_failed_execution(execution_info.execution_id,
                                               contexts, execution_output)
                logging.error('Execution %d failed.',
                              execution_info.execution_id)
                raise
            finally:
                self._clean_up_stateless_execution_info(execution_info)
                if executor_watcher:
                    executor_watcher.stop()

            logging.info('Execution %d succeeded.',
                         execution_info.execution_id)
            self._clean_up_stateful_execution_info(execution_info)

            # TODO(b/182316162): Unify publisher handing so that post-execution
            # artifact logic is more cleanly handled.
            # Note that currently both the ExecutionInfo and ExecutorOutput are
            # consulted in `execution_publish_utils.publish_succeeded_execution()`.
            outputs_utils.tag_executor_output_with_version(executor_output)
            outputs_utils.tag_output_artifacts_with_version(
                execution_info.output_dict)
            logging.info('Publishing output artifacts %s for execution %s',
                         execution_info.output_dict,
                         execution_info.execution_id)
            self._publish_successful_execution(execution_info.execution_id,
                                               contexts,
                                               execution_info.output_dict,
                                               executor_output)
        return execution_info
Example #6
0
def _run_executor(args: argparse.Namespace, beam_args: List[str]) -> None:
  """Selects a particular executor and run it based on name.

  Args:
    args:
      --executor_class_path: The import path of the executor class.
      --json_serialized_invocation_args: Full JSON-serialized parameters for
        this execution.
    beam_args: Optional parameter that maps to the optional_pipeline_args
      parameter in the pipeline, which provides additional configuration options
      for apache-beam and tensorflow.logging.
    For more about the beam arguments please refer to:
    https://cloud.google.com/dataflow/docs/guides/specifying-exec-params
  """
  logging.set_verbosity(logging.INFO)

  # Rehydrate inputs/outputs/exec_properties from the serialized metadata.
  executor_input = pipeline_spec_pb2.ExecutorInput()
  json_format.Parse(
      args.json_serialized_invocation_args,
      executor_input,
      ignore_unknown_fields=True)

  inputs_dict = executor_input.inputs.artifacts
  outputs_dict = executor_input.outputs.artifacts
  inputs_parameter = executor_input.inputs.parameters

  if fileio.exists(executor_input.outputs.output_file):
    # It has a driver that outputs the updated exec_properties in this file.
    with fileio.open(executor_input.outputs.output_file,
                     'rb') as output_meta_json:
      output_metadata = pipeline_spec_pb2.ExecutorOutput()
      json_format.Parse(
          output_meta_json.read(), output_metadata, ignore_unknown_fields=True)
      # Append/Overwrite exec_propertise.
      for k, v in output_metadata.parameters.items():
        inputs_parameter[k].CopyFrom(v)

  name_from_id = {}

  inputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict(
      inputs_dict, name_from_id)
  outputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict(
      outputs_dict, name_from_id)
  exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties(
      inputs_parameter)
  logging.info('Executor %s do: inputs: %s, outputs: %s, exec_properties: %s',
               args.executor_class_path, inputs, outputs, exec_properties)
  executor_cls = import_utils.import_class_by_path(args.executor_class_path)
  if issubclass(executor_cls, base_beam_executor.BaseBeamExecutor):
    executor_context = base_beam_executor.BaseBeamExecutor.Context(
        beam_pipeline_args=beam_args, unique_id='', tmp_dir='/tmp')
  else:
    executor_context = base_executor.BaseExecutor.Context(
        extra_flags=beam_args, unique_id='', tmp_dir='/tmp')
  executor = executor_cls(executor_context)
  logging.info('Starting executor')
  executor.Do(inputs, outputs, exec_properties)

  # TODO(b/182316162): Unify publisher handling so that post-execution artifact
  # logic is more cleanly handled.
  outputs_utils.tag_output_artifacts_with_version(outputs)  # pylint: disable=protected-access

  # TODO(b/169583143): Remove this workaround when TFX migrates to use str-typed
  # id/name to identify artifacts.
  # Convert ModelBlessing artifact to use managed MLMD resource name.
  if (issubclass(executor_cls, evaluator_executor.Executor) and
      standard_component_specs.BLESSING_KEY in outputs):
    # Parse the parent prefix for managed MLMD resource name.
    kubeflow_v2_entrypoint_utils.refactor_model_blessing(
        artifact_utils.get_single_instance(
            outputs[standard_component_specs.BLESSING_KEY]), name_from_id)

  # Log the output metadata to a file. So that it can be picked up by MP.
  metadata_uri = executor_input.outputs.output_file
  executor_output = pipeline_spec_pb2.ExecutorOutput()
  for k, v in kubeflow_v2_entrypoint_utils.translate_executor_output(
      outputs, name_from_id).items():
    executor_output.artifacts[k].CopyFrom(v)

  fileio.makedirs(os.path.dirname(metadata_uri))
  with fileio.open(metadata_uri, 'wb') as f:
    f.write(json_format.MessageToJson(executor_output))
Example #7
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Importer specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an importer node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves execution properties, please note that importers has no
            # input.
            exec_properties = data_types_utils.build_parsed_value_dict(
                inputs_utils.resolve_parameters_with_schema(
                    node_parameters=pipeline_node.parameters))

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # 4. Generate output artifacts to represent the imported artifacts.
            output_spec = pipeline_node.outputs.outputs[
                importer.IMPORT_RESULT_KEY]
            properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_properties)
            custom_properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_custom_properties)
            output_artifact_class = types.Artifact(
                output_spec.artifact_spec.type).type
            output_artifacts = importer.generate_output_dict(
                metadata_handler=m,
                uri=str(exec_properties[importer.SOURCE_URI_KEY]),
                properties=properties,
                custom_properties=custom_properties,
                reimport=bool(exec_properties[importer.REIMPORT_OPTION_KEY]),
                output_artifact_class=output_artifact_class,
                mlmd_artifact_type=output_spec.artifact_spec.type)

            result = data_types.ExecutionInfo(execution_id=execution.id,
                                              input_dict={},
                                              output_dict=output_artifacts,
                                              exec_properties=exec_properties,
                                              pipeline_node=pipeline_node,
                                              pipeline_info=pipeline_info)

            # TODO(b/182316162): consider let the launcher level do the publish
            # for system nodes. So that the version taging logic doesn't need to be
            # handled per system node.
            outputs_utils.tag_output_artifacts_with_version(result.output_dict)

            # 5. Publish the output artifacts. If artifacts are reimported, the
            # execution is published as CACHED. Otherwise it is published as COMPLETE.
            if _is_artifact_reimported(output_artifacts):
                execution_publish_utils.publish_cached_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    output_artifacts=output_artifacts)

            else:
                execution_publish_utils.publish_succeeded_execution(
                    metadata_handler=m,
                    execution_id=execution.id,
                    contexts=contexts,
                    output_artifacts=output_artifacts)

            return result