def load_source_path_class(source: Text) -> Type: """ Loads a Python class from the source. Args: source (str): class_source e.g. this.module.Class[@sha] """ source = source.split('@')[0] pin = source.split('@')[-1] is_standard = is_standard_pin(pin) if '@' in source and not is_standard: logger.debug('Pinned step found with git sha. ' 'Loading class from git history.') wrapper: GitWrapper = Repository.get_instance().get_git_wrapper() module_source = get_module_source_from_source(source) relative_module_path = get_relative_path_from_module_source( module_source) logger.warning('Found source with a pinned sha. Will now checkout ' f'module: {module_source}') # critical step if not wrapper.check_module_clean(source): raise Exception(f'One of the files at {relative_module_path} ' f'is not committed and we ' f'are trying to load that directory from git ' f'history due to a pinned step in the pipeline. ' f'Please commit the file and then run the ' f'pipeline.') # Check out the directory at that sha wrapper.checkout(sha_or_branch=pin, directory=relative_module_path) # After this point, all exceptions will first undo the above try: class_ = import_class_by_path(source) wrapper.reset(relative_module_path) wrapper.checkout(directory=relative_module_path) except Exception: wrapper.reset(relative_module_path) wrapper.checkout(directory=relative_module_path) raise Exception elif '@' in source and is_standard: logger.debug(f'Default {APP_NAME} class used. Loading directly.') # TODO: [LOW] Check if ZenML version is installed before loading. class_ = import_class_by_path(source) else: logger.debug('Unpinned step found with no git sha. Attempting to ' 'load class from current repository state.') class_ = import_class_by_path(source) return class_
def __init__( self, executor_class_path, name, input_dict, outputs, exec_properties, ): raw_args = exec_properties.get('beam_pipeline_args', []) # Beam expects str types for it's pipeline args. Ensure unicode type is # converted to str if required. beam_pipeline_args = [] for arg in raw_args: if isinstance(arg, unicode): arg = arg.encode('ascii', 'ignore') beam_pipeline_args.append(arg) # TODO(zhitaoli): Revisit usage of setup_file here. module_dir = os.path.dirname(os.path.dirname(tfx.__file__)) setup_file = os.path.join(module_dir, 'setup.py') beam_pipeline_args.append('--setup_file={}'.format(setup_file)) executor_cls = import_utils.import_class_by_path(executor_class_path) self._executor = executor_cls(beam_pipeline_args=beam_pipeline_args) self._input_dict = input_dict self._output_dict = types.parse_tfx_type_dict(outputs) self._exec_properties = exec_properties self._component_name = to_snake_case(name)
def _run_executor(args, pipeline_args): """Select a particular executor and run it based on name.""" tf.logging.set_verbosity(tf.logging.INFO) (inputs_str, outputs_str, exec_properties_str) = (args.inputs or base64.b64decode(args.inputs_base64), args.outputs or base64.b64decode(args.outputs_base64), args.exec_properties or base64.b64decode(args.exec_properties_base64)) inputs = types.parse_tfx_type_dict(inputs_str) outputs = types.parse_tfx_type_dict(outputs_str) exec_properties = json.loads(exec_properties_str) tf.logging.info( 'Executor {} do: inputs: {}, outputs: {}, exec_properties: {}'.format( args.executor, inputs, outputs, exec_properties)) executor_cls = import_utils.import_class_by_path(args.executor_class_path) executor = executor_cls(beam_pipeline_args=pipeline_args) tf.logging.info('Starting executor') executor.Do(inputs, outputs, exec_properties) # The last line of stdout will be pushed to xcom by Airflow. if args.write_outputs_stdout: print(types.jsonify_tfx_type_dict(outputs))
def main(): # Log to the container's stdout so Kubeflow Pipelines UI can display logs to # the user. logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--pipeline_name', type=str, required=True) parser.add_argument('--pipeline_root', type=str, required=True) parser.add_argument('--kubeflow_metadata_config', type=str, required=True) parser.add_argument('--beam_pipeline_args', type=str, required=True) parser.add_argument('--additional_pipeline_args', type=str, required=True) parser.add_argument('--component_launcher_class_path', type=str, required=True) parser.add_argument('--enable_cache', action='store_true') parser.add_argument('--serialized_component', type=str, required=True) parser.add_argument('--component_config', type=str, required=True) args = parser.parse_args() component = json_utils.loads(args.serialized_component) component_config = json_utils.loads(args.component_config) component_launcher_class = import_utils.import_class_by_path( args.component_launcher_class_path) if not issubclass(component_launcher_class, base_component_launcher.BaseComponentLauncher): raise TypeError( 'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher' % component_launcher_class) kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig() json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config) metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter( _get_metadata_connection_config(kubeflow_metadata_config)) driver_args = data_types.DriverArgs(enable_cache=args.enable_cache) beam_pipeline_args = _make_beam_pipeline_args(args.beam_pipeline_args) additional_pipeline_args = json.loads(args.additional_pipeline_args) launcher = component_launcher_class.create( component=component, pipeline_info=data_types.PipelineInfo( pipeline_name=args.pipeline_name, pipeline_root=args.pipeline_root, run_id=os.environ['WORKFLOW_ID']), driver_args=driver_args, metadata_connection=metadata_connection, beam_pipeline_args=beam_pipeline_args, additional_pipeline_args=additional_pipeline_args, component_config=component_config) execution_info = launcher.launch() # Dump the UI metadata. _dump_ui_metadata(component, execution_info)
def main(): # Log to the container's stdout so it can be streamed by the orchestrator. logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--pipeline_name', type=str, required=True) parser.add_argument('--pipeline_root', type=str, required=True) parser.add_argument('--run_id', type=str, required=True) parser.add_argument('--metadata_config', type=str, required=True) parser.add_argument('--beam_pipeline_args', type=str, required=True) parser.add_argument('--additional_pipeline_args', type=str, required=True) parser.add_argument('--component_launcher_class_path', type=str, required=True) parser.add_argument('--enable_cache', action='store_true') parser.add_argument('--serialized_component', type=str, required=True) parser.add_argument('--component_config', type=str, required=True) args = parser.parse_args() component = json_utils.loads(args.serialized_component) component_config = json_utils.loads(args.component_config) component_launcher_class = import_utils.import_class_by_path( args.component_launcher_class_path) if not issubclass(component_launcher_class, base_component_launcher.BaseComponentLauncher): raise TypeError( 'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher' % component_launcher_class) metadata_config = metadata_store_pb2.ConnectionConfig() json_format.Parse(args.metadata_config, metadata_config) driver_args = data_types.DriverArgs(enable_cache=args.enable_cache) beam_pipeline_args = json.loads(args.beam_pipeline_args) additional_pipeline_args = json.loads(args.additional_pipeline_args) launcher = component_launcher_class.create( component=component, pipeline_info=data_types.PipelineInfo( pipeline_name=args.pipeline_name, pipeline_root=args.pipeline_root, run_id=args.run_id, ), driver_args=driver_args, metadata_connection=metadata.Metadata( connection_config=metadata_config), beam_pipeline_args=beam_pipeline_args, additional_pipeline_args=additional_pipeline_args, component_config=component_config) # Attach necessary labels to distinguish different runner and DSL. with telemetry_utils.scoped_labels({ telemetry_utils.LABEL_TFX_RUNNER: 'kubernetes', }): launcher.launch()
def __init__(self, driver_spec: message.Message, mlmd_connection: metadata.Metadata): """Constructor. Args: driver_spec: The specification of how to initialize the driver. mlmd_connection: ML metadata connection. Raises: RuntimeError: if the driver_spec is not supported. """ super().__init__(driver_spec, mlmd_connection) python_class_driver_spec = cast( pipeline_pb2.ExecutorSpec.PythonClassExecutorSpec, driver_spec) self._driver = import_utils.import_class_by_path( python_class_driver_spec.class_path)(self._mlmd_connection)
def __init__(self, executor_spec: message.Message, platform_spec: Optional[message.Message] = None): """Initialize an PythonExecutorOperator. Args: executor_spec: The specification of how to initialize the executor. platform_spec: The specification of how to allocate resource for the executor. """ # Python exectors run locally, so platform_spec is not used. del platform_spec super(PythonExecutorOperator, self).__init__(executor_spec) python_class_executor_spec = cast( pipeline_pb2.ExecutorSpec.PythonClassExecutorSpec, self._executor_spec) self._executor_cls = import_utils.import_class_by_path( python_class_executor_spec.class_path)
def __init__( self, executor_class_path: Text, name: Text, input_dict: Dict[Text, List[types.TfxArtifact]], outputs: Text, exec_properties: Dict[Text, Any], ): self._input_dict = input_dict self._output_dict = types.parse_tfx_type_dict(outputs) self._component_name = to_snake_case(name) self._exec_properties = exec_properties self._output_dir = self._exec_properties['output_dir'] self._workflow_id = os.environ['WORKFLOW_ID'] raw_args = self._exec_properties.get('beam_pipeline_args', []) # Beam expects str types for it's pipeline args. Ensure unicode type is # converted to str if required. beam_pipeline_args = [] for arg in raw_args: # In order to support both Py2 and Py3: Py3 doesn't have `unicode` type. if six.PY2 and isinstance(arg, unicode): arg = arg.encode('ascii', 'ignore') beam_pipeline_args.append(arg) # TODO(zhitaoli): Revisit usage of setup_file here. module_dir = os.path.dirname(os.path.dirname(version.__file__)) setup_file = os.path.join(module_dir, 'setup.py') tf.logging.info('Using setup_file \'%s\' to capture TFX dependencies', setup_file) beam_pipeline_args.append('--setup_file={}'.format(setup_file)) executor_cls = import_utils.import_class_by_path(executor_class_path) # TODO(swoonna): Switch to execution_id when available unique_id = '{}_{}'.format(self._component_name, self._workflow_id) # TODO(swoonna): Add tmp_dir to additional_pipeline_args executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=beam_pipeline_args, tmp_dir=os.path.join(self._output_dir, '.temp', ''), unique_id=unique_id) self._executor = executor_cls(executor_context)
def __init__(self, executor_spec: message.Message, platform_config: Optional[message.Message] = None): """Initializes a BeamExecutorOperator. Args: executor_spec: The specification of how to initialize the executor. platform_config: The specification of how to allocate resource for the executor. """ del platform_config super().__init__(executor_spec) beam_executor_spec = cast(executable_spec_pb2.BeamExecutableSpec, self._executor_spec) self._executor_cls = import_utils.import_class_by_path( beam_executor_spec.python_executor_spec.class_path) self.extra_flags = [] self.extra_flags.extend(beam_executor_spec.python_executor_spec.extra_flags) self.beam_pipeline_args = [] self.beam_pipeline_args.extend(beam_executor_spec.beam_pipeline_args)
def __init__(self, executor_spec: message.Message, platform_config: Optional[message.Message] = None): """Initializes a PythonExecutorOperator. Args: executor_spec: The specification of how to initialize the executor. platform_config: The specification of how to allocate resource for the executor. """ # Python executors run locally, so platform_config is not used. del platform_config super().__init__(executor_spec) python_class_executor_spec = cast( executable_spec_pb2.PythonClassExecutableSpec, self._executor_spec) self._executor_cls = import_utils.import_class_by_path( python_class_executor_spec.class_path) self.extra_flags = [] self.extra_flags.extend(python_class_executor_spec.extra_flags) self.extra_flags.extend(sys.argv[1:])
def _parse_raw_artifact( artifact_pb: pipeline_pb2.RuntimeArtifact, name_from_id: MutableMapping[int, str]) -> artifact.Artifact: """Parses RuntimeArtifact proto message without artifact_type.""" # This parser can only reserve what's inside the RuntimeArtifact pb message. # Recovers the type information from artifact type schema. # TODO(b/170261670): Replace this workaround by a more resilient # implementation. Currently custom artifact type can hardly be supported. assert (artifact_pb.type and artifact_pb.type.WhichOneof('kind') == 'instance_schema' and artifact_pb.type.instance_schema), ( 'RuntimeArtifact is expected to have ' 'instance_schema populated.') # 1. Import the artifact class from preloaded TFX library. type_path = _retrieve_class_path(artifact_pb.type.instance_schema) artifact_cls = import_utils.import_class_by_path(type_path) # 2. Copy properties and custom properties to the MLMD artifact pb. mlmd_artifact = metadata_store_pb2.Artifact() # TODO(b/135056715): Change to a unified getter/setter of Artifact type # once it's ready. if artifact_pb.name: # TODO(b/169583143): Remove this workaround when TFX migrates to use # str-typed id/name to identify artifacts. # Convert and populate the MLMD artifact ID. mlmd_artifact.id = _get_hashed_id(artifact_pb.name, name_from_id) mlmd_artifact.uri = artifact_pb.uri for k, v in artifact_pb.properties.items(): mlmd_artifact.properties[k].CopyFrom(compiler_utils.get_mlmd_value(v)) for k, v in artifact_pb.custom_properties.items(): mlmd_artifact.custom_properties[k].CopyFrom( compiler_utils.get_mlmd_value(v)) # 3. Instantiate the artifact Python object. result = artifact_cls() result.set_mlmd_artifact(mlmd_artifact) return result
def __init__(self, driver_spec: message.Message, mlmd_connection: metadata.Metadata, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_node: pipeline_pb2.PipelineNode): """Constructor. Args: driver_spec: The specification of how to initialize the driver. mlmd_connection: ML metadata connection. pipeline_info: The information of the pipeline that this driver is in. pipeline_node: The specification of the node that this driver is in. Raises: RuntimeError: if the driver_spec is not supported. """ super(PythonDriverOperator, self).__init__(driver_spec, mlmd_connection, pipeline_info, pipeline_node) python_class_driver_spec = cast( pipeline_pb2.ExecutorSpec.PythonClassExecutorSpec, driver_spec) self._driver = import_utils.import_class_by_path( python_class_driver_spec.class_path)(self._mlmd_connection, self._pipeline_info, self._pipeline_node)
def _run_executor(args: argparse.Namespace, beam_args: List[str]) -> None: """Selects a particular executor and run it based on name. Args: args: --executor_class_path: The import path of the executor class. --json_serialized_invocation_args: Full JSON-serialized parameters for this execution. beam_args: Optional parameter that maps to the optional_pipeline_args parameter in the pipeline, which provides additional configuration options for apache-beam and tensorflow.logging. For more about the beam arguments please refer to: https://cloud.google.com/dataflow/docs/guides/specifying-exec-params """ logging.set_verbosity(logging.INFO) # Rehydrate inputs/outputs/exec_properties from the serialized metadata. executor_input = pipeline_spec_pb2.ExecutorInput() json_format.Parse( args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) inputs_dict = executor_input.inputs.artifacts outputs_dict = executor_input.outputs.artifacts inputs_parameter = executor_input.inputs.parameters if fileio.exists(executor_input.outputs.output_file): # It has a driver that outputs the updated exec_properties in this file. with fileio.open(executor_input.outputs.output_file, 'rb') as output_meta_json: output_metadata = pipeline_spec_pb2.ExecutorOutput() json_format.Parse( output_meta_json.read(), output_metadata, ignore_unknown_fields=True) # Append/Overwrite exec_propertise. for k, v in output_metadata.parameters.items(): inputs_parameter[k].CopyFrom(v) name_from_id = {} inputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( inputs_dict, name_from_id) outputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( outputs_dict, name_from_id) exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties( inputs_parameter) logging.info('Executor %s do: inputs: %s, outputs: %s, exec_properties: %s', args.executor_class_path, inputs, outputs, exec_properties) executor_cls = import_utils.import_class_by_path(args.executor_class_path) if issubclass(executor_cls, base_beam_executor.BaseBeamExecutor): executor_context = base_beam_executor.BaseBeamExecutor.Context( beam_pipeline_args=beam_args, unique_id='', tmp_dir='/tmp') else: executor_context = base_executor.BaseExecutor.Context( extra_flags=beam_args, unique_id='', tmp_dir='/tmp') executor = executor_cls(executor_context) logging.info('Starting executor') executor.Do(inputs, outputs, exec_properties) # TODO(b/182316162): Unify publisher handling so that post-execution artifact # logic is more cleanly handled. outputs_utils.tag_output_artifacts_with_version(outputs) # pylint: disable=protected-access # TODO(b/169583143): Remove this workaround when TFX migrates to use str-typed # id/name to identify artifacts. # Convert ModelBlessing artifact to use managed MLMD resource name. if (issubclass(executor_cls, evaluator_executor.Executor) and standard_component_specs.BLESSING_KEY in outputs): # Parse the parent prefix for managed MLMD resource name. kubeflow_v2_entrypoint_utils.refactor_model_blessing( artifact_utils.get_single_instance( outputs[standard_component_specs.BLESSING_KEY]), name_from_id) # Log the output metadata to a file. So that it can be picked up by MP. metadata_uri = executor_input.outputs.output_file executor_output = pipeline_spec_pb2.ExecutorOutput() for k, v in kubeflow_v2_entrypoint_utils.translate_executor_output( outputs, name_from_id).items(): executor_output.artifacts[k].CopyFrom(v) fileio.makedirs(os.path.dirname(metadata_uri)) with fileio.open(metadata_uri, 'wb') as f: f.write(json_format.MessageToJson(executor_output))
def _run_executor(args, pipeline_args) -> None: r"""Select a particular executor and run it based on name. # pylint: disable=line-too-long _run_executor() is used to invoke a class subclassing tfx.components.base.base_executor.BaseExecutor. This function can be used for both invoking the executor on remote environments as well as for unit testing of executors. How to invoke an executor as standalone: # TODO(b/132958430): Create utility script to generate arguments for run_executor.py First, the input data needs to be prepared. An easy way to generate the test data is to fully run the pipeline once. This will generate the data to be used for testing as well as log the artifacts to be used as input parameters. In each executed component, three log entries will be generated similar to the below: ``` [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,116] {base_executor.py:72} INFO - Starting Executor execution. [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,117] {base_executor.py:74} INFO - Inputs for Executor is: {"input_base": [{"artifact": {"id": "1", "typeId": "1", "uri": "/usr/local/google/home/khaas/taxi/data/simple", "properties": {"split": {"stringValue": ""}, "state": {"stringValue": "published"}, "span": {"intValue": "1"}, "type_name": {"stringValue": "ExternalPath"}}}, "artifact_type": {"id": "1", "name": "ExternalPath", "properties": {"span": "INT", "name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING"}}}]} [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,117] {base_executor.py:76} INFO - Outputs for Executor is: {"examples": [{"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/train/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "train"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}, {"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/eval/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "eval"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}]} [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,117] {base_executor.py:78} INFO - Execution properties for Executor is: {"output": "{ \"splitConfig\": {\"splits\": [{\"name\": \"train\", \"hashBuckets\": 2}, {\"name\": \"eval\",\"hashBuckets\": 1}]}}"} ``` Each of these map directly to the input parameters expected by run_executor(): ``` python scripts/run_executor.py \ --executor_class_path=tfx.components.example_gen.big_query_example_gen.executor.Executor \ --inputs={"input_base": [{"artifact": {"id": "1", "typeId": "1", "uri": "/usr/local/google/home/khaas/taxi/data/simple", "properties": {"split": {"stringValue": ""}, "state": {"stringValue": "published"}, "span": {"intValue": "1"}, "type_name": {"stringValue": "ExternalPath"}}}, "artifact_type": {"id": "1", "name": "ExternalPath", "properties": {"span": "INT", "name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING"}}}]} \ --outputs={"examples": [{"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/train/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "train"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}, {"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/eval/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "eval"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}]} \ --exec-properties={"output": "{ \"splitConfig\": {\"splits\": [{\"name\": \"train\", \"hashBuckets\": 2}, {\"name\": \"eval\",\"hashBuckets\": 1}]}}"} ``` # pylint: disable=line-too-long Args: args: - inputs: The input artifacts for this execution, serialized as JSON. - outputs: The output artifacts to be generated by this execution, serialized as JSON. - exec_properties: The execution properties to be used by this execution, serialized as JSON. Technically all the exec_properties values should be a primitive, and nested exec_properties needs to be JSON-encoded as a string. But as a convenience, the script allows you to feed in non-serialized values of exec_properties, which is then automatically serialized. pipeline_args: Optional parameter that maps to the optional_pipeline_args parameter in the pipeline, which provides additional configuration options for apache-beam and tensorflow.logging. Returns: None Raises: None """ (inputs_str, outputs_str, exec_properties_str) = (args.inputs or base64.b64decode(args.inputs_base64), args.outputs or base64.b64decode(args.outputs_base64), args.exec_properties or base64.b64decode(args.exec_properties_base64)) inputs = artifact_utils.parse_artifact_dict(inputs_str) outputs = artifact_utils.parse_artifact_dict(outputs_str) exec_properties = json.loads(exec_properties_str) # Technically exec_properties value can only be a primitive (e.g. string), and # one of our convention is to use proto object by JSON-serializing it. # Unfortunately, run_executor.py script accepts serialized exec_properties as # an input, thus proto object value would be serialized twice. This is really # inconvenient if you're manually constructing exec_properties, so we allow # to feed in non-serialized values of exec_properties, and serialize them # here. for key, value in exec_properties.items(): if isinstance(value, (dict, list)): exec_properties[key] = json.dumps(value) logging.info( 'Executor %s do: inputs: %s, outputs: %s, exec_properties: %s', args.executor_class_path, inputs, outputs, exec_properties) executor_cls = import_utils.import_class_by_path(args.executor_class_path) executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=pipeline_args, tmp_dir=args.temp_directory_path, unique_id='') executor = executor_cls(executor_context) logging.info('Starting executor') executor.Do(inputs, outputs, exec_properties) # The last line of stdout will be pushed to xcom by Airflow. if args.write_outputs_stdout: print(artifact_utils.jsonify_artifact_dict(outputs))
def _run_executor(args, pipeline_args) -> None: r"""Select a particular executor and run it based on name. # pylint: disable=line-too-long _run_executor() is used to invoke a class subclassing tfx.components.base.base_executor.BaseExecutor. This function can be used for both invoking the executor on remote environments as well as for unit testing of executors. How to invoke an executor as standalone: # TODO(b/132958430): Create utility script to generate arguments for run_executor.py First, the input data needs to be prepared. An easy way to generate the test data is to fully run the pipeline once. This will generate the data to be used for testing as well as log the artifacts to be used as input parameters. In each executed component, three log entries will be generated similar to the below: ``` [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,116] {base_executor.py:72} INFO - Starting Executor execution. [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,117] {base_executor.py:74} INFO - Inputs for Executor is: {"input_base": [{"artifact": {"id": "1", "typeId": "1", "uri": "/usr/local/google/home/khaas/taxi/data/simple", "properties": {"split": {"stringValue": ""}, "state": {"stringValue": "published"}, "span": {"intValue": "1"}, "type_name": {"stringValue": "ExternalPath"}}}, "artifact_type": {"id": "1", "name": "ExternalPath", "properties": {"span": "INT", "name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING"}}}]} [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,117] {base_executor.py:76} INFO - Outputs for Executor is: {"examples": [{"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/train/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "train"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}, {"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/eval/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "eval"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}]} [2019-05-16 08:59:27,117] {logging_mixin.py:95} INFO - [2019-05-16 08:59:27,117] {base_executor.py:78} INFO - Execution properties for Executor is: {"output": "{ \"splitConfig\": {\"splits\": [{\"name\": \"train\", \"hashBuckets\": 2}, {\"name\": \"eval\",\"hashBuckets\": 1}]}}"} ``` Each of these map directly to the input parameters expected by run_executor(): ``` python scripts/run_executor.py \ --executor_class_path=tfx.components.example_gen.big_query_example_gen.executor.Executor \ --inputs={"input_base": [{"artifact": {"id": "1", "typeId": "1", "uri": "/usr/local/google/home/khaas/taxi/data/simple", "properties": {"split": {"stringValue": ""}, "state": {"stringValue": "published"}, "span": {"intValue": "1"}, "type_name": {"stringValue": "ExternalPath"}}}, "artifact_type": {"id": "1", "name": "ExternalPath", "properties": {"span": "INT", "name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING"}}}]} \ --outputs={"examples": [{"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/train/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "train"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}, {"artifact": {"uri": "/usr/local/google/home/khaas/tfx/pipelines/chicago_taxi_simple/CsvExampleGen/examples/1/eval/", "properties": {"type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "eval"}, "span": {"intValue": "1"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "split": "STRING", "state": "STRING", "span": "INT"}}}]} \ --exec-properties={"output": "{ \"splitConfig\": {\"splits\": [{\"name\": \"train\", \"hashBuckets\": 2}, {\"name\": \"eval\",\"hashBuckets\": 1}]}}"} ``` # pylint: disable=line-too-long Args: args: - inputs: The input artifacts for this execution, serialized as JSON. - outputs: The output artifacts to be generated by this execution, serialized as JSON. - exec_properties: The execution properties to be used by this execution, serialized as JSON. pipeline_args: Optional parameter that maps to the optional_pipeline_args parameter in the pipeline, which provides additional configuration options for apache-beam and tensorflow.logging. Returns: None Raises: None """ tf.logging.set_verbosity(tf.logging.INFO) (inputs_str, outputs_str, exec_properties_str) = (args.inputs or base64.b64decode(args.inputs_base64), args.outputs or base64.b64decode(args.outputs_base64), args.exec_properties or base64.b64decode(args.exec_properties_base64)) inputs = artifact_utils.parse_artifact_dict(inputs_str) outputs = artifact_utils.parse_artifact_dict(outputs_str) exec_properties = json.loads(exec_properties_str) tf.logging.info( 'Executor {} do: inputs: {}, outputs: {}, exec_properties: {}'.format( args.executor_class_path, inputs, outputs, exec_properties)) executor_cls = import_utils.import_class_by_path(args.executor_class_path) executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=pipeline_args, tmp_dir=args.temp_directory_path, unique_id='') executor = executor_cls(executor_context) tf.logging.info('Starting executor') executor.Do(inputs, outputs, exec_properties) # The last line of stdout will be pushed to xcom by Airflow. if args.write_outputs_stdout: print(artifact_utils.jsonify_artifact_dict(outputs))
def _parse_raw_artifact( artifact_pb: pipeline_pb2.RuntimeArtifact, name_from_id: MutableMapping[int, str]) -> artifact.Artifact: """Parses RuntimeArtifact proto message without artifact_type.""" # This parser can only reserve what's inside the RuntimeArtifact pb message. # Recovers the type information from artifact type schema. # TODO(b/170261670): Replace this workaround by a more resilient # implementation. Currently custom artifact type can hardly be supported. assert artifact_pb.type, 'RuntimeArtifact is expected to have a type.' # 1. Import the artifact class from preloaded TFX library. type_path = _retrieve_class_path(artifact_pb.type) artifact_cls = import_utils.import_class_by_path(type_path) # 2. Copy properties and custom properties to the MLMD artifact pb. mlmd_artifact = metadata_store_pb2.Artifact() # TODO(b/135056715): Change to a unified getter/setter of Artifact type # once it's ready. if artifact_pb.name: # TODO(b/169583143): Remove this workaround when TFX migrates to use # str-typed id/name to identify artifacts. # Convert and populate the MLMD artifact ID. mlmd_artifact.id = _get_hashed_id(artifact_pb.name, name_from_id) mlmd_artifact.uri = artifact_pb.uri for k, v in artifact_pb.properties.items(): mlmd_artifact.properties[k].CopyFrom(compiler_utils.get_mlmd_value(v)) for k, v in artifact_pb.custom_properties.items(): mlmd_artifact.custom_properties[k].CopyFrom( compiler_utils.get_mlmd_value(v)) # Translate metadata items into properties and custom properties. mlmd_artifact_type = artifact_cls().artifact_type metadata_dict = json_format.MessageToDict(artifact_pb.metadata) for k, v in metadata_dict.items(): if k in mlmd_artifact_type.properties: property_type = mlmd_artifact_type.properties[k] if property_type == metadata_store_pb2.INT and isinstance( v, float): mlmd_artifact.properties[k].int_value = int(v) continue elif property_type == metadata_store_pb2.DOUBLE and isinstance( v, float): mlmd_artifact.properties[k].double_value = v continue elif property_type == metadata_store_pb2.STRING and isinstance( v, str): mlmd_artifact.properties[k].string_value = v continue elif property_type == metadata_store_pb2.STRUCT: mlmd_artifact.properties[k].struct_value.CopyFrom( artifact._encode_struct_value(v)) # pylint: disable=protected-access continue # We fell through, which means the property doesn't actually fit the # schema. Therefore, we treat it as a custom property. # First, we drop the custom property prefix if we had to drop it because # of a property name conflict. if k.startswith(artifact.CUSTOM_PROPERTIES_PREFIX): stripped_k = k[len(artifact.CUSTOM_PROPERTIES_PREFIX):] if stripped_k in mlmd_artifact_type.properties: k = stripped_k mlmd_artifact.custom_properties[k].struct_value.CopyFrom( artifact._encode_struct_value(v)) # pylint: disable=protected-access # 3. Instantiate the artifact Python object. result = artifact_cls() result.set_mlmd_artifact(mlmd_artifact) return result
def test_import_class_by_path(self): """Test import_class_by_path.""" class_path = '.'.join( [ImportUtilsTest.__module__, ImportUtilsTest.__name__]) imported_class = import_utils.import_class_by_path(class_path) self.assertEqual(ImportUtilsTest, imported_class)
def testImportClassByPath(self): test_class = test_fn.TestClass class_path = '%s.%s' % (test_class.__module__, test_class.__name__) imported_class = import_utils.import_class_by_path(class_path) self.assertEqual(test_class, imported_class)
def _reconstruct_from_executor_class_path(executor_class_path): executor_class = import_utils.import_class_by_path(executor_class_path) return ExecutorClassSpec(executor_class)
def run_component( full_component_class_name: Text, temp_directory_path: Text = None, beam_pipeline_args: List[Text] = None, **arguments ): r"""Loads a component, instantiates it with arguments and runs its executor. The component class is instantiated, so the component code is executed, not just the executor code. To pass artifact URI, use <input_name>_uri argument name. To pass artifact property, use <input_name>_<property> argument name. Protobuf property values can be passed as JSON-serialized protobufs. # pylint: disable=line-too-long Example:: # When run as a script: python3 scripts/run_component.py \ --full-component-class-name tfx.components.StatisticsGen \ --examples-uri gs://my_bucket/chicago_taxi_simple/CsvExamplesGen/examples/1/ \ --examples-split-names '["train", "eval"]' \ --output-uri gs://my_bucket/chicago_taxi_simple/StatisticsGen/output/1/ # When run as a function: run_component( full_component_class_name='tfx.components.StatisticsGen', examples_uri='gs://my_bucket/chicago_taxi_simple/CsvExamplesGen/sxamples/1/', examples_split_names='["train", "eval"]', output_uri='gs://my_bucket/chicago_taxi_simple/StatisticsGen/output/1/', ) Args: full_component_class_name: The component class name including module name. temp_directory_path: Optional. Temporary directory path for the executor. beam_pipeline_args: Optional. Arguments to pass to the Beam pipeline. **arguments: Key-value pairs with component arguments. """ component_class = import_utils.import_class_by_path(full_component_class_name) component_arguments = {} for name, execution_param in component_class.SPEC_CLASS.PARAMETERS.items(): argument_value = arguments.get(name, None) if argument_value is None: continue param_type = execution_param.type if (isinstance(param_type, type) and issubclass(param_type, message.Message)): argument_value_obj = param_type() json_format.Parse(argument_value, argument_value_obj) else: argument_value_obj = argument_value component_arguments[name] = argument_value_obj for input_name, channel_param in component_class.SPEC_CLASS.INPUTS.items(): uri = (arguments.get(input_name + '_uri') or arguments.get(input_name + '_path')) if uri: artifact = channel_param.type() artifact.uri = uri # Setting the artifact properties for property_name in channel_param.type.PROPERTIES: property_arg_name = input_name + '_' + property_name if property_arg_name in arguments: setattr(artifact, property_name, arguments[property_arg_name]) component_arguments[input_name] = channel_utils.as_channel([artifact]) component_instance = component_class(**component_arguments) input_dict = channel_utils.unwrap_channel_dict( component_instance.inputs.get_all()) output_dict = channel_utils.unwrap_channel_dict( component_instance.outputs.get_all()) exec_properties = component_instance.exec_properties # Generating paths for output artifacts for output_name, artifacts in output_dict.items(): uri = (arguments.get('output_' + output_name + '_uri') or arguments.get(output_name + '_uri') or arguments.get(output_name + '_path')) if uri: for artifact in artifacts: artifact.uri = uri executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=beam_pipeline_args, tmp_dir=temp_directory_path, unique_id='', ) executor = component_instance.executor_spec.executor_class(executor_context) executor.Do( input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, )
def main(): # Log to the container's stdout so Kubeflow Pipelines UI can display logs to # the user. logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--pipeline_name', type=str, required=True) parser.add_argument('--pipeline_root', type=str, required=True) parser.add_argument('--kubeflow_metadata_config', type=str, required=True) parser.add_argument('--additional_pipeline_args', type=str, required=True) parser.add_argument('--component_id', type=str, required=True) parser.add_argument('--component_type', type=str, required=True) parser.add_argument('--driver_class_path', type=str, required=True) parser.add_argument('--executor_spec', type=str, required=True) parser.add_argument('--component_launcher_class_path', type=str, required=True) parser.add_argument('--inputs', type=str, required=True) parser.add_argument('--outputs', type=str, required=True) parser.add_argument('--exec_properties', type=str, required=True) parser.add_argument('--enable_cache', action='store_true') args = parser.parse_args() inputs = artifact_utils.parse_artifact_dict(args.inputs) input_dict = _make_channel_dict(inputs) outputs = artifact_utils.parse_artifact_dict(args.outputs) output_dict = _make_channel_dict(outputs) exec_properties = json.loads(args.exec_properties) driver_class = import_utils.import_class_by_path(args.driver_class_path) executor_spec = json_utils.loads(args.executor_spec) component_launcher_class = import_utils.import_class_by_path( args.component_launcher_class_path) if not issubclass(component_launcher_class, base_component_launcher.BaseComponentLauncher): raise TypeError( 'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher' % component_launcher_class) kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig() json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config) connection_config = _get_metadata_connection_config( kubeflow_metadata_config) component_info = data_types.ComponentInfo( component_type=args.component_type, component_id=args.component_id) driver_args = data_types.DriverArgs(enable_cache=args.enable_cache) additional_pipeline_args = _make_additional_pipeline_args( args.additional_pipeline_args) # TODO(hongyes): create a classmethod to create launcher from a deserialized # component. launcher = component_launcher_class( component_info=component_info, driver_class=driver_class, component_executor_spec=executor_spec, input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, pipeline_info=data_types.PipelineInfo( pipeline_name=args.pipeline_name, pipeline_root=args.pipeline_root, run_id=os.environ['WORKFLOW_ID']), driver_args=driver_args, metadata_connection_config=connection_config, additional_pipeline_args=additional_pipeline_args) launcher.launch()
def main(): # Log to the container's stdout so Kubeflow Pipelines UI can display logs to # the user. logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--pipeline_name', type=str, required=True) parser.add_argument('--pipeline_root', type=str, required=True) parser.add_argument('--kubeflow_metadata_config', type=str, required=True) parser.add_argument('--beam_pipeline_args', type=str, required=True) parser.add_argument('--additional_pipeline_args', type=str, required=True) parser.add_argument( '--component_launcher_class_path', type=str, required=True) parser.add_argument('--enable_cache', action='store_true') parser.add_argument('--serialized_component', type=str, required=True) parser.add_argument('--component_config', type=str, required=True) args = parser.parse_args() component = json_utils.loads(args.serialized_component) component_config = json_utils.loads(args.component_config) component_launcher_class = import_utils.import_class_by_path( args.component_launcher_class_path) if not issubclass(component_launcher_class, base_component_launcher.BaseComponentLauncher): raise TypeError( 'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher' % component_launcher_class) kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig() json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config) metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter( _get_metadata_connection_config(kubeflow_metadata_config)) driver_args = data_types.DriverArgs(enable_cache=args.enable_cache) beam_pipeline_args = json.loads(args.beam_pipeline_args) additional_pipeline_args = json.loads(args.additional_pipeline_args) launcher = component_launcher_class.create( component=component, pipeline_info=data_types.PipelineInfo( pipeline_name=args.pipeline_name, pipeline_root=args.pipeline_root, run_id=os.environ['WORKFLOW_ID']), driver_args=driver_args, metadata_connection=metadata_connection, beam_pipeline_args=beam_pipeline_args, additional_pipeline_args=additional_pipeline_args, component_config=component_config) # Attach necessary labels to distinguish different runner and DSL. # TODO(zhitaoli): Pass this from KFP runner side when the same container # entrypoint can be used by a different runner. with telemetry_utils.scoped_labels({ telemetry_utils.LABEL_TFX_RUNNER: 'kfp', }): execution_info = launcher.launch() # Dump the UI metadata. _dump_ui_metadata(component, execution_info)
def run_component(full_component_class_name: str, temp_directory_path: Optional[str] = None, beam_pipeline_args: Optional[List[str]] = None, **arguments): r"""Loads a component, instantiates it with arguments and runs its executor. The component class is instantiated, so the component code is executed, not just the executor code. To pass artifact URI, use <input_name>_uri argument name. To pass artifact property, use <input_name>_<property> argument name. Protobuf property values can be passed as JSON-serialized protobufs. # pylint: disable=line-too-long Example:: # When run as a script: python3 scripts/run_component.py \ --full-component-class-name tfx.components.StatisticsGen \ --examples-uri gs://my_bucket/chicago_taxi_simple/CsvExamplesGen/examples/1/ \ --examples-split-names '["train", "eval"]' \ --output-uri gs://my_bucket/chicago_taxi_simple/StatisticsGen/output/1/ # When run as a function: run_component( full_component_class_name='tfx.components.StatisticsGen', examples_uri='gs://my_bucket/chicago_taxi_simple/CsvExamplesGen/sxamples/1/', examples_split_names='["train", "eval"]', output_uri='gs://my_bucket/chicago_taxi_simple/StatisticsGen/output/1/', ) Args: full_component_class_name: The component class name including module name. temp_directory_path: Optional. Temporary directory path for the executor. beam_pipeline_args: Optional. Arguments to pass to the Beam pipeline. **arguments: Key-value pairs with component arguments. """ component_class = import_utils.import_class_by_path( full_component_class_name) component_arguments = {} for name, execution_param in component_class.SPEC_CLASS.PARAMETERS.items(): argument_value = arguments.get(name, None) if argument_value is None: continue param_type = execution_param.type if (isinstance(param_type, type) and issubclass(param_type, message.Message)): argument_value_obj = param_type() proto_utils.json_to_proto(argument_value, argument_value_obj) elif param_type is int: argument_value_obj = int(argument_value) elif param_type is float: argument_value_obj = float(argument_value) else: argument_value_obj = argument_value component_arguments[name] = argument_value_obj for input_name, channel_param in component_class.SPEC_CLASS.INPUTS.items(): uri = (arguments.get(input_name + '_uri') or arguments.get(input_name + '_path')) if uri: artifact = channel_param.type() artifact.uri = uri # Setting the artifact properties for property_name, property_spec in (channel_param.type.PROPERTIES or {}).items(): property_arg_name = input_name + '_' + property_name if property_arg_name in arguments: property_value = arguments[property_arg_name] if property_spec.type == PropertyType.INT: property_value = int(property_value) if property_spec.type == PropertyType.FLOAT: property_value = float(property_value) setattr(artifact, property_name, property_value) component_arguments[input_name] = channel_utils.as_channel( [artifact]) component_instance = component_class(**component_arguments) input_dict = channel_utils.unwrap_channel_dict(component_instance.inputs) output_dict = channel_utils.unwrap_channel_dict(component_instance.outputs) exec_properties = component_instance.exec_properties # Generating paths for output artifacts for output_name, channel_param in component_class.SPEC_CLASS.OUTPUTS.items( ): uri = (arguments.get('output_' + output_name + '_uri') or arguments.get(output_name + '_uri') or arguments.get(output_name + '_path')) if uri: artifacts = output_dict[output_name] if not artifacts: artifacts.append(channel_param.type()) for artifact in artifacts: artifact.uri = uri if issubclass(component_instance.executor_spec.executor_class, base_beam_executor.BaseBeamExecutor): executor_context = base_beam_executor.BaseBeamExecutor.Context( beam_pipeline_args=beam_pipeline_args, tmp_dir=temp_directory_path, unique_id='', ) else: executor_context = base_executor.BaseExecutor.Context( extra_flags=beam_pipeline_args, tmp_dir=temp_directory_path, unique_id='', ) executor = component_instance.executor_spec.executor_class( executor_context) executor.Do( input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, ) # Writing out the output artifact properties for output_name, channel_param in component_class.SPEC_CLASS.OUTPUTS.items( ): for property_name in channel_param.type.PROPERTIES or []: property_path_arg_name = output_name + '_' + property_name + '_path' property_path = arguments.get(property_path_arg_name) if property_path: artifacts = output_dict[output_name] for artifact in artifacts: property_value = getattr(artifact, property_name) os.makedirs(os.path.dirname(property_path), exist_ok=True) with open(property_path, 'w') as f: f.write(str(property_value))
def _run_executor(args: argparse.Namespace, beam_args: List[str]) -> None: """Selects a particular executor and run it based on name. Args: args: --executor_class_path: The import path of the executor class. --json_serialized_invocation_args: Full JSON-serialized parameters for this execution. See go/mp-alpha-placeholder for details. beam_args: Optional parameter that maps to the optional_pipeline_args parameter in the pipeline, which provides additional configuration options for apache-beam and tensorflow.logging. For more about the beam arguments please refer to: https://cloud.google.com/dataflow/docs/guides/specifying-exec-params """ logging.set_verbosity(logging.INFO) # Rehydrate inputs/outputs/exec_properties from the serialized metadata. executor_input = pipeline_pb2.ExecutorInput() json_format.Parse(args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) inputs_dict = executor_input.inputs.artifacts outputs_dict = executor_input.outputs.artifacts inputs_parameter = executor_input.inputs.parameters name_from_id = {} inputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( inputs_dict, name_from_id) outputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( outputs_dict, name_from_id) exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties( inputs_parameter) logging.info( 'Executor %s do: inputs: %s, outputs: %s, exec_properties: %s', args.executor_class_path, inputs, outputs, exec_properties) executor_cls = import_utils.import_class_by_path(args.executor_class_path) executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=beam_args, unique_id='') executor = executor_cls(executor_context) logging.info('Starting executor') executor.Do(inputs, outputs, exec_properties) # TODO(b/169583143): Remove this workaround when TFX migrates to use str-typed # id/name to identify artifacts. # Convert ModelBlessing artifact to use managed MLMD resource name. if (issubclass(executor_cls, evaluator_executor.Executor) and constants.BLESSING_KEY in outputs): # Parse the parent prefix for managed MLMD resource name. kubeflow_v2_entrypoint_utils.refactor_model_blessing( artifact_utils.get_single_instance( outputs[constants.BLESSING_KEY]), name_from_id) # Log the output metadata to a file. So that it can be picked up by MP. metadata_uri = executor_input.outputs.output_file executor_output = pipeline_pb2.ExecutorOutput() for k, v in kubeflow_v2_entrypoint_utils.translate_executor_output( outputs, name_from_id).items(): executor_output.artifacts[k].CopyFrom(v) fileio.open(metadata_uri, 'wb').write(json_format.MessageToJson(executor_output))