def main(args): executor_input = pipeline_spec_pb2.ExecutorInput() json_format.Parse(args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) _run_driver(executor_input)
def testParseExecutionPropertiesMapsInputBaseUri(self): properties_pb = pipeline_pb2.ExecutorInput() properties_pb.inputs.parameters[ 'input_base_uri'].string_value = 'gs://input/base' self.assertDictEqual( {'input_base': 'gs://input/base'}, kubeflow_v2_entrypoint_utils.parse_execution_properties( properties_pb.inputs.parameters))
def setUp(self): super(KubeflowV2EntrypointUtilsTest, self).setUp() _ARTIFACT_1.uri = 'gs://root/string/' # Hash value of # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/11111' _ARTIFACT_1.id = 9171918664759481579 _ARTIFACT_1.set_string_custom_property( key='my_property_1', value='Test string.') _ARTIFACT_2.uri = 'gs://root/model/' # Hash value of # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/22222' _ARTIFACT_2.id = 6826273797600318744 _ARTIFACT_2.set_float_custom_property(key='my_property_2', value=42.0) _ARTIFACT_3.uri = 'gs://root/examples/' _ARTIFACT_3.span = 9000 # Hash value of # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/33333' _ARTIFACT_3.id = 27709763105391302 self._expected_dict = { _KEY_1: [_ARTIFACT_1], _KEY_2: [_ARTIFACT_2], _KEY_3: [_ARTIFACT_3], } source_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') # Use two protos to store the testdata. artifacts_pb = pipeline_pb2.ExecutorInput() io_utils.parse_json_file( os.path.join(source_data_dir, 'artifacts.json'), artifacts_pb) self._artifacts = artifacts_pb.inputs.artifacts # Test legacy properties/custom properties deserialization. artifacts_legacy_pb = pipeline_pb2.ExecutorInput() io_utils.parse_json_file( os.path.join(source_data_dir, 'artifacts_legacy.json'), artifacts_legacy_pb) self._artifacts_legacy = artifacts_legacy_pb.inputs.artifacts properties_pb = pipeline_pb2.ExecutorInput() io_utils.parse_json_file( os.path.join(source_data_dir, 'exec_properties.json'), properties_pb) self._properties = properties_pb.inputs.parameters
def main(args): executor_input = pipeline_spec_pb2.ExecutorInput() json_format.Parse(args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) name_from_id = {} exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties( executor_input.inputs.parameters) outputs_dict = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( executor_input.outputs.artifacts, name_from_id) _run_driver(exec_properties, outputs_dict, executor_input.outputs.output_file, name_from_id)
def main( executor_input_str: str, function_name: str, output_metadata_path: Optional[str] = None): """Container entrypoint used by KFP Python function based component executor_input_str: A serialized ExecutorInput proto message. function_name: The name of the user-defined function. output_metadata_path: A local path where the output metadata JSON file should be written to. """ executor_input = pipeline_spec_pb2.ExecutorInput() json_format.Parse(text=executor_input_str, message=executor_input) output_metadata_path = output_metadata_path or executor_input.outputs.output_file parameter_dict = {} # kwargs to be passed to UDF. for name, input_param in executor_input.inputs.parameters.items(): parameter_dict[name] = entrypoint_utils.get_python_value(input_param) for name, input_artifacts in executor_input.inputs.artifacts.items(): parameter_dict[name] = artifact.Artifact.get_from_runtime_artifact( input_artifacts.artifacts[0]) # Also, determine a way to inspect the function signature to decide the type # of output artifacts. fn = entrypoint_utils.import_func_from_source(FN_SOURCE, function_name) # In the ExeuctorInput message passed into the entrypoint, the output artifact # URIs are already specified. The output artifact is constructed according to # the specified URIs + type information retrieved from function signature. output_uris = {} for name, output_artifacts in executor_input.outputs.artifacts.items(): output_uris[name] = output_artifacts.artifacts[0].uri output_artifacts = entrypoint_utils.get_output_artifacts( fn, output_uris) for name, art in output_artifacts.items(): parameter_dict[name] = art # Execute the user function. fn_res is expected to contain output parameters # only. It's either an namedtuple or a single primitive value. fn_res = fn(**parameter_dict) _write_output_metadata_file( fn_res=fn_res, output_artifacts=output_artifacts, output_metadata_path=output_metadata_path)
def setUp(self): super().setUp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'output_config'].string_value = '{}' self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts[ 'examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples)))) self._executor_invocation_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # Change working directory after all the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def _run_executor(args: argparse.Namespace, beam_args: List[str]) -> None: """Selects a particular executor and run it based on name. Args: args: --executor_class_path: The import path of the executor class. --json_serialized_invocation_args: Full JSON-serialized parameters for this execution. beam_args: Optional parameter that maps to the optional_pipeline_args parameter in the pipeline, which provides additional configuration options for apache-beam and tensorflow.logging. For more about the beam arguments please refer to: https://cloud.google.com/dataflow/docs/guides/specifying-exec-params """ logging.set_verbosity(logging.INFO) # Rehydrate inputs/outputs/exec_properties from the serialized metadata. executor_input = pipeline_spec_pb2.ExecutorInput() json_format.Parse( args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) inputs_dict = executor_input.inputs.artifacts outputs_dict = executor_input.outputs.artifacts inputs_parameter = executor_input.inputs.parameters if fileio.exists(executor_input.outputs.output_file): # It has a driver that outputs the updated exec_properties in this file. with fileio.open(executor_input.outputs.output_file, 'rb') as output_meta_json: output_metadata = pipeline_spec_pb2.ExecutorOutput() json_format.Parse( output_meta_json.read(), output_metadata, ignore_unknown_fields=True) # Append/Overwrite exec_propertise. for k, v in output_metadata.parameters.items(): inputs_parameter[k].CopyFrom(v) name_from_id = {} inputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( inputs_dict, name_from_id) outputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( outputs_dict, name_from_id) exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties( inputs_parameter) logging.info('Executor %s do: inputs: %s, outputs: %s, exec_properties: %s', args.executor_class_path, inputs, outputs, exec_properties) executor_cls = import_utils.import_class_by_path(args.executor_class_path) if issubclass(executor_cls, base_beam_executor.BaseBeamExecutor): executor_context = base_beam_executor.BaseBeamExecutor.Context( beam_pipeline_args=beam_args, unique_id='', tmp_dir='/tmp') else: executor_context = base_executor.BaseExecutor.Context( extra_flags=beam_args, unique_id='', tmp_dir='/tmp') executor = executor_cls(executor_context) logging.info('Starting executor') executor.Do(inputs, outputs, exec_properties) # TODO(b/182316162): Unify publisher handling so that post-execution artifact # logic is more cleanly handled. outputs_utils.tag_output_artifacts_with_version(outputs) # pylint: disable=protected-access # TODO(b/169583143): Remove this workaround when TFX migrates to use str-typed # id/name to identify artifacts. # Convert ModelBlessing artifact to use managed MLMD resource name. if (issubclass(executor_cls, evaluator_executor.Executor) and standard_component_specs.BLESSING_KEY in outputs): # Parse the parent prefix for managed MLMD resource name. kubeflow_v2_entrypoint_utils.refactor_model_blessing( artifact_utils.get_single_instance( outputs[standard_component_specs.BLESSING_KEY]), name_from_id) # Log the output metadata to a file. So that it can be picked up by MP. metadata_uri = executor_input.outputs.output_file executor_output = pipeline_spec_pb2.ExecutorOutput() for k, v in kubeflow_v2_entrypoint_utils.translate_executor_output( outputs, name_from_id).items(): executor_output.artifacts[k].CopyFrom(v) fileio.makedirs(os.path.dirname(metadata_uri)) with fileio.open(metadata_uri, 'wb') as f: f.write(json_format.MessageToJson(executor_output))