def setUp(self): super(KubeflowV2EntrypointUtilsTest, self).setUp() _ARTIFACT_1.uri = 'gs://root/string/' # Hash value of # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/11111' _ARTIFACT_1.id = 9171918664759481579 _ARTIFACT_1.set_string_custom_property(key='my_property_1', value='Test string.') _ARTIFACT_2.uri = 'gs://root/model/' # Hash value of # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/22222' _ARTIFACT_2.id = 6826273797600318744 _ARTIFACT_2.set_int_custom_property(key='my_property_2', value=42) _ARTIFACT_3.uri = 'gs://root/examples/' # Hash value of # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/33333' _ARTIFACT_3.id = 27709763105391302 self._expected_dict = { _KEY_1: [_ARTIFACT_1], _KEY_2: [_ARTIFACT_2], _KEY_3: [_ARTIFACT_3], } source_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') # Use two protos to store the testdata. artifacts_pb = pipeline_pb2.ExecutorInput() io_utils.parse_json_file( os.path.join(source_data_dir, 'artifacts.json'), artifacts_pb) self._artifacts = artifacts_pb.inputs.artifacts properties_pb = pipeline_pb2.ExecutorInput() io_utils.parse_json_file( os.path.join(source_data_dir, 'exec_properties.json'), properties_pb) self._properties = properties_pb.inputs.parameters
def setUp(self): super().setUp() self._test_dir = tempfile.mkdtemp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts[ 'examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() self._expected_result_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() self._olddir = os.getcwd() os.chdir(self._test_dir) fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def testParseExecutionPropertiesMapsInputBaseUri(self): properties_pb = pipeline_pb2.ExecutorInput() properties_pb.inputs.parameters[ 'input_base_uri'].string_value = 'gs://input/base' self.assertDictEqual( {'input_base': 'gs://input/base'}, kubeflow_v2_entrypoint_utils.parse_execution_properties( properties_pb.inputs.parameters))
def main(args): executor_input = pipeline_pb2.ExecutorInput() json_format.Parse(args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) name_from_id = {} exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties( executor_input.inputs.parameters) outputs_dict = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( executor_input.outputs.artifacts, name_from_id) _run_driver(exec_properties, outputs_dict, executor_input.outputs.output_file, name_from_id)
def setUp(self): self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts['examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # The initialization of TempWorkingDirTestCase has to be called after all # the testdata files have been read. Otherwise the original testdata files # are not accessible after cwd is changed. super().setUp() fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--json_serialized_invocation_args', type=str, required=True, help='JSON-serialized metadata for this execution.') args, _ = parser.parse_known_args(argv) executor_input = pipeline_pb2.ExecutorInput() json_format.Parse(args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) name_from_id = {} exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties( executor_input.inputs.parameters) outputs_dict = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( executor_input.outputs.artifacts, name_from_id) _run_driver(exec_properties, outputs_dict, executor_input.outputs.output_file, name_from_id)
def setUp(self): super().setUp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts['examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # Change working directory after all the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def _run_executor(args: argparse.Namespace, beam_args: List[str]) -> None: """Selects a particular executor and run it based on name. Args: args: --executor_class_path: The import path of the executor class. --json_serialized_invocation_args: Full JSON-serialized parameters for this execution. beam_args: Optional parameter that maps to the optional_pipeline_args parameter in the pipeline, which provides additional configuration options for apache-beam and tensorflow.logging. For more about the beam arguments please refer to: https://cloud.google.com/dataflow/docs/guides/specifying-exec-params """ logging.set_verbosity(logging.INFO) # Rehydrate inputs/outputs/exec_properties from the serialized metadata. executor_input = pipeline_pb2.ExecutorInput() json_format.Parse(args.json_serialized_invocation_args, executor_input, ignore_unknown_fields=True) inputs_dict = executor_input.inputs.artifacts outputs_dict = executor_input.outputs.artifacts inputs_parameter = executor_input.inputs.parameters name_from_id = {} inputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( inputs_dict, name_from_id) outputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict( outputs_dict, name_from_id) exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties( inputs_parameter) logging.info( 'Executor %s do: inputs: %s, outputs: %s, exec_properties: %s', args.executor_class_path, inputs, outputs, exec_properties) executor_cls = import_utils.import_class_by_path(args.executor_class_path) executor_context = base_executor.BaseExecutor.Context( beam_pipeline_args=beam_args, unique_id='') executor = executor_cls(executor_context) logging.info('Starting executor') executor.Do(inputs, outputs, exec_properties) # TODO(b/169583143): Remove this workaround when TFX migrates to use str-typed # id/name to identify artifacts. # Convert ModelBlessing artifact to use managed MLMD resource name. if (issubclass(executor_cls, evaluator_executor.Executor) and BLESSING_KEY in outputs): # Parse the parent prefix for managed MLMD resource name. kubeflow_v2_entrypoint_utils.refactor_model_blessing( artifact_utils.get_single_instance(outputs[BLESSING_KEY]), name_from_id) # Log the output metadata to a file. So that it can be picked up by MP. metadata_uri = executor_input.outputs.output_file executor_output = pipeline_pb2.ExecutorOutput() for k, v in kubeflow_v2_entrypoint_utils.translate_executor_output( outputs, name_from_id).items(): executor_output.artifacts[k].CopyFrom(v) fileio.makedirs(os.path.dirname(metadata_uri)) with fileio.open(metadata_uri, 'wb') as f: f.write(json_format.MessageToJson(executor_output))