def setUp(self): super(PlaceholderUtilsTest, self).setUp() examples = [standard_artifacts.Examples()] examples[0].uri = "/tmp" examples[0].split_names = artifact_utils.encode_split_names( ["train", "eval"]) self._serving_spec = infra_validator_pb2.ServingSpec() self._serving_spec.tensorflow_serving.tags.extend( ["latest", "1.15.0-gpu"]) self._resolution_context = placeholder_utils.ResolutionContext( exec_info=data_types.ExecutionInfo( input_dict={ "model": [standard_artifacts.Model()], "examples": examples, }, output_dict={"blessing": [standard_artifacts.ModelBlessing()]}, exec_properties={ "proto_property": json_format.MessageToJson(message=self._serving_spec, sort_keys=True, preserving_proto_field_name=True, indent=0) }, execution_output_uri="test_executor_output_uri", stateful_working_dir="test_stateful_working_dir", pipeline_node=pipeline_pb2.PipelineNode( node_info=pipeline_pb2.NodeInfo( type=metadata_store_pb2.ExecutionType( name="infra_validator"))), pipeline_info=pipeline_pb2.PipelineInfo( id="test_pipeline_id")), executor_spec=executable_spec_pb2.PythonClassExecutableSpec( class_path="test_class_path"), ) # Resolution context to simulate missing optional values. self._none_resolution_context = placeholder_utils.ResolutionContext( exec_info=data_types.ExecutionInfo( input_dict={ "model": [], "examples": [], }, output_dict={"blessing": []}, exec_properties={}, pipeline_node=pipeline_pb2.PipelineNode( node_info=pipeline_pb2.NodeInfo( type=metadata_store_pb2.ExecutionType( name="infra_validator"))), pipeline_info=pipeline_pb2.PipelineInfo( id="test_pipeline_id")), executor_spec=None, platform_config=None)
def testRunExecutor_with_InprocessExecutor(self): executor_sepc = text_format.Parse( """ class_path: "tfx.orchestration.portable.python_executor_operator_test.InprocessExecutor" """, executable_spec_pb2.PythonClassExecutableSpec()) operator = python_executor_operator.PythonExecutorOperator(executor_sepc) input_dict = {'input_key': [standard_artifacts.Examples()]} output_dict = {'output_key': [standard_artifacts.Model()]} exec_properties = {'key': 'value'} stateful_working_dir = os.path.join(self.tmp_dir, 'stateful_working_dir') executor_output_uri = os.path.join(self.tmp_dir, 'executor_output') executor_output = operator.run_executor( data_types.ExecutionInfo( execution_id=1, input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, stateful_working_dir=stateful_working_dir, execution_output_uri=executor_output_uri)) self.assertProtoPartiallyEquals( """ output_artifacts { key: "output_key" value { artifacts { } } }""", executor_output)
def testRunExecutor_with_InplaceUpdateExecutor(self): executor_sepc = text_format.Parse( """ class_path: "tfx.orchestration.portable.python_executor_operator_test.InplaceUpdateExecutor" """, executable_spec_pb2.PythonClassExecutableSpec()) operator = python_executor_operator.PythonExecutorOperator( executor_sepc) input_dict = {'input_key': [standard_artifacts.Examples()]} output_dict = {'output_key': [standard_artifacts.Model()]} exec_properties = { 'string': 'value', 'int': 1, 'float': 0.0, # This should not happen on production and will be # dropped. 'proto': execution_result_pb2.ExecutorOutput() } stateful_working_dir = os.path.join(self.tmp_dir, 'stateful_working_dir') executor_output_uri = os.path.join(self.tmp_dir, 'executor_output') executor_output = operator.run_executor( data_types.ExecutionInfo(execution_id=1, input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, stateful_working_dir=stateful_working_dir, execution_output_uri=executor_output_uri)) self.assertProtoPartiallyEquals( """ execution_properties { key: "float" value { double_value: 0.0 } } execution_properties { key: "int" value { int_value: 1 } } execution_properties { key: "string" value { string_value: "value" } } output_artifacts { key: "output_key" value { artifacts { custom_properties { key: "name" value { string_value: "my_model" } } } } }""", executor_output)
def testQueryBasedDriver(self): # Create exec proterties. exec_properties = { standard_component_specs.INPUT_CONFIG_KEY: proto_utils.proto_to_json( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern= "select * from table where span={SPAN} and split='s1'" ), example_gen_pb2.Input.Split( name='s2', pattern= "select * from table where span={SPAN} and split='s2'") ])), standard_component_specs.RANGE_CONFIG_KEY: proto_utils.proto_to_json( range_config_pb2.RangeConfig( static_range=range_config_pb2.StaticRange( start_span_number=2, end_span_number=2))), } # Prepare output_dict example = standard_artifacts.Examples() example.uri = 'my_uri' output_dict = {standard_component_specs.EXAMPLES_KEY: [example]} query_based_driver = driver.QueryBasedDriver(self._mock_metadata) result = query_based_driver.run( portable_data_types.ExecutionInfo(output_dict=output_dict, exec_properties=exec_properties)) self.assertEqual(exec_properties[utils.SPAN_PROPERTY_NAME], 2) self.assertIsNone(exec_properties[utils.VERSION_PROPERTY_NAME]) self.assertIsNone(exec_properties[utils.FINGERPRINT_PROPERTY_NAME]) updated_input_config = example_gen_pb2.Input() proto_utils.json_to_proto( exec_properties[standard_component_specs.INPUT_CONFIG_KEY], updated_input_config) self.assertProtoEquals( """ splits { name: "s1" pattern: "select * from table where span=2 and split='s1'" } splits { name: "s2" pattern: "select * from table where span=2 and split='s2'" }""", updated_input_config) self.assertLen( result.output_artifacts[ standard_component_specs.EXAMPLES_KEY].artifacts, 1) output_example = result.output_artifacts[ standard_component_specs.EXAMPLES_KEY].artifacts[0] self.assertEqual(output_example.uri, example.uri) self.assertEqual( output_example.custom_properties[ utils.SPAN_PROPERTY_NAME].string_value, '2')
def run( self, mlmd_connection: metadata.Metadata, pipeline_node: pipeline_pb2.PipelineNode, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec ) -> data_types.ExecutionInfo: """Runs Resolver specific logic. Args: mlmd_connection: ML metadata connection. pipeline_node: The specification of the node that this launcher lauches. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. Returns: The execution of the run. """ logging.info('Running as an resolver node.') with mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.prepare_contexts( metadata_handler=m, node_contexts=pipeline_node.contexts) # 2. Resolves inputs an execution properties. exec_properties = inputs_utils.resolve_parameters( node_parameters=pipeline_node.parameters) input_artifacts = inputs_utils.resolve_input_artifacts( metadata_handler=m, node_inputs=pipeline_node.inputs) # 3. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) # 4. Publish the execution as a cached execution with # resolved input artifact as the output artifacts. execution_publish_utils.publish_internal_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, output_artifacts=input_artifacts) return data_types.ExecutionInfo(execution_id=execution.id, input_dict=input_artifacts, output_dict=input_artifacts, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info)
def _set_up_test_execution_info(self, input_dict=None, output_dict=None, exec_properties=None): return data_types.ExecutionInfo( input_dict=input_dict or {}, output_dict=output_dict or {}, exec_properties=exec_properties or {}, execution_output_uri='/testing/executor/output/', stateful_working_dir='/testing/stateful/dir', pipeline_node=pipeline_pb2.PipelineNode( node_info=pipeline_pb2.NodeInfo( type=metadata_store_pb2.ExecutionType(name='Docker_executor'))), pipeline_info=pipeline_pb2.PipelineInfo(id='test_pipeline_id'))
def testRunExecutorWithBeamPipelineArgs(self): executor_sepc = text_format.Parse( """ class_path: "tfx.orchestration.portable.python_executor_operator_test.ValidateBeamPipelineArgsExecutor" extra_flags: "--runner=DirectRunner" """, executable_spec_pb2.PythonClassExecutableSpec()) operator = python_executor_operator.PythonExecutorOperator(executor_sepc) executor_output_uri = os.path.join(self.tmp_dir, 'executor_output') operator.run_executor( data_types.ExecutionInfo( input_dict={}, output_dict={}, exec_properties={}, execution_output_uri=executor_output_uri))
def _set_up_test_execution_info(self, input_dict=None, output_dict=None, exec_properties=None): return data_types.ExecutionInfo( execution_id=123, input_dict=input_dict or {}, output_dict=output_dict or {}, exec_properties=exec_properties or {}, execution_output_uri='/testing/executor/output/', stateful_working_dir='/testing/stateful/dir', pipeline_node=pipeline_pb2.PipelineNode( node_info=pipeline_pb2.NodeInfo( id='fakecomponent-fakecomponent')), pipeline_info=pipeline_pb2.PipelineInfo(id='Test'), pipeline_run_id='123')
def testRunExecutorWithBeamPipelineArgs(self): executor_spec = text_format.Parse( """ python_executor_spec: { class_path: "tfx.orchestration.portable.beam_executor_operator_test.ValidateBeamPipelineArgsExecutor" } beam_pipeline_args: "--runner=DirectRunner" """, executable_spec_pb2.BeamExecutableSpec()) operator = beam_executor_operator.BeamExecutorOperator(executor_spec) executor_output_uri = os.path.join(self.tmp_dir, 'executor_output') operator.run_executor( data_types.ExecutionInfo( execution_id=1, input_dict={}, output_dict={}, exec_properties={}, execution_output_uri=executor_output_uri))
def resolve_artifacts( self, metadata_handler: metadata.Metadata, input_dict: Dict[str, List[types.Artifact]] ) -> Optional[Dict[str, List[types.Artifact]]]: for placeholder_pb in self._predicates: context = placeholder_utils.ResolutionContext( exec_info=portable_data_types.ExecutionInfo( input_dict=input_dict)) predicate_result = placeholder_utils.resolve_placeholder_expression( placeholder_pb, context) if not isinstance(predicate_result, bool): raise ValueError( "Predicate evaluates to a non-boolean result.") if not predicate_result: raise exceptions.SkipSignal("Predicate evaluates to False.") return input_dict
def _get_execution_info(self, input_dict, output_dict, exec_properties): pipeline_node = pipeline_pb2.PipelineNode( node_info={'id': 'MyPythonNode'}) pipeline_info = pipeline_pb2.PipelineInfo(id='MyPipeline') stateful_working_dir = os.path.join(self.tmp_dir, 'stateful_working_dir') executor_output_uri = os.path.join(self.tmp_dir, 'executor_output') return data_types.ExecutionInfo( execution_id=1, input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, stateful_working_dir=stateful_working_dir, execution_output_uri=executor_output_uri, pipeline_node=pipeline_node, pipeline_info=pipeline_info, pipeline_run_id=99)
def deserialize_execution_info( execution_info_b64: str) -> data_types.ExecutionInfo: """De-serializes the ExecutionInfo class from a binary string.""" execution_info_proto = ( executor_invocation_pb2.ExecutorInvocation.FromString( base64.b64decode(execution_info_b64))) result = data_types.ExecutionInfo( execution_output_uri=execution_info_proto.output_metadata_uri, stateful_working_dir=execution_info_proto.stateful_working_dir, pipeline_info=execution_info_proto.pipeline_info, pipeline_node=execution_info_proto.pipeline_node) result.exec_properties = _build_exec_property_dict( execution_info_proto.execution_properties) result.input_dict = _build_artifact_dict(execution_info_proto.input_dict) result.output_dict = _build_artifact_dict(execution_info_proto.output_dict) return result
def testExecutionInfoSerialization(self): my_artifact = _MyArtifact() my_artifact.int1 = 111 execution_output_uri = 'output/uri' stateful_working_dir = 'workding/dir' exec_properties = { 'property1': 'value1', 'property2': 'value2', } pipeline_info = pipeline_pb2.PipelineInfo(id='my_pipeline') pipeline_node = text_format.Parse( """ node_info { id: 'my_node' } """, pipeline_pb2.PipelineNode()) original = data_types.ExecutionInfo( input_dict={'input': [my_artifact]}, output_dict={'output': [my_artifact]}, exec_properties=exec_properties, execution_output_uri=execution_output_uri, stateful_working_dir=stateful_working_dir, pipeline_info=pipeline_info, pipeline_node=pipeline_node) serialized = python_execution_binary_utils.serialize_execution_info( original) rehydrated = python_execution_binary_utils.deserialize_execution_info( serialized) self.CheckArtifactDict(rehydrated.input_dict, {'input': [my_artifact]}) self.CheckArtifactDict(rehydrated.output_dict, {'output': [my_artifact]}) self.assertEqual(rehydrated.exec_properties, exec_properties) self.assertEqual(rehydrated.execution_output_uri, execution_output_uri) self.assertEqual(rehydrated.stateful_working_dir, stateful_working_dir) self.assertProtoEquals(rehydrated.pipeline_info, original.pipeline_info) self.assertProtoEquals(rehydrated.pipeline_node, original.pipeline_node)
def testLauncher_resolver_node(self): mock_resolver_node_handler_class = mock.create_autospec( system_node_handler.SystemNodeHandler) mock_resolver_node_handler = mock.create_autospec( system_node_handler.SystemNodeHandler, instance=True) mock_resolver_node_handler_class.return_value = mock_resolver_node_handler expected_execution_info = data_types.ExecutionInfo() expected_execution_info.execution_id = 123 mock_resolver_node_handler.run.return_value = expected_execution_info launcher._SYSTEM_NODE_HANDLERS[ 'tfx.dsl.components.common.resolver.Resolver'] = ( mock_resolver_node_handler_class) test_launcher = launcher.Launcher( pipeline_node=self._resolver, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec) execution_info = test_launcher.launch() mock_resolver_node_handler.run.assert_called_once_with( self._mlmd_connection, self._resolver, self._pipeline_info, self._pipeline_runtime_spec) self.assertEqual(execution_info, expected_execution_info)
def testRunExecutorWithBeamPipelineArgs(self): executor_spec = text_format.Parse( """ python_executor_spec: { class_path: "tfx.orchestration.portable.beam_executor_operator_test.ValidateBeamPipelineArgsExecutor" } beam_pipeline_args: "--runner=DirectRunner" """, executable_spec_pb2.BeamExecutableSpec()) operator = beam_executor_operator.BeamExecutorOperator(executor_spec) pipeline_node = pipeline_pb2.PipelineNode( node_info={'id': 'MyBeamNode'}) pipeline_info = pipeline_pb2.PipelineInfo(id='MyPipeline') executor_output_uri = os.path.join(self.tmp_dir, 'executor_output') executor_output = operator.run_executor( data_types.ExecutionInfo( execution_id=1, input_dict={'input_key': [standard_artifacts.Examples()]}, output_dict={'output_key': [standard_artifacts.Model()]}, exec_properties={}, execution_output_uri=executor_output_uri, pipeline_node=pipeline_node, pipeline_info=pipeline_info, pipeline_run_id=99)) self.assertProtoPartiallyEquals( """ output_artifacts { key: "output_key" value { artifacts { custom_properties { key: "name" value { string_value: "MyPipeline.MyBeamNode.my_model" } } } } }""", executor_output)
def _prepare_execution(self) -> _PrepareExecutionResult: """Prepares inputs, outputs and execution properties for actual execution.""" # TODO(b/150979622): handle the edge case that the component get evicted # between successful pushlish and stateful working dir being clean up. # Otherwise following retries will keep failing because of duplicate # publishes. with self._mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.register_contexts_if_not_exists( metadata_handler=m, node_contexts=self._pipeline_node.contexts) # 2. Resolves inputs an execution properties. exec_properties = inputs_utils.resolve_parameters( node_parameters=self._pipeline_node.parameters) input_artifacts = inputs_utils.resolve_input_artifacts( metadata_handler=m, node_inputs=self._pipeline_node.inputs) # 3. If not all required inputs are met. Return ExecutionInfo with # is_execution_needed being false. No publish will happen so down stream # nodes won't be triggered. if input_artifacts is None: return _PrepareExecutionResult( execution_info=data_types.ExecutionInfo(), contexts=contexts, is_execution_needed=False) # 4. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=self._pipeline_node.node_info.type, contexts=contexts, input_artifacts=input_artifacts, exec_properties=exec_properties) # 5. Resolve output output_artifacts = self._output_resolver.generate_output_artifacts( execution.id) # If there is a custom driver, runs it. if self._driver_operator: driver_output = self._driver_operator.run_driver( data_types.ExecutionInfo( input_dict=input_artifacts, output_dict=output_artifacts, exec_properties=exec_properties, execution_output_uri=self._output_resolver. get_driver_output_uri())) self._update_with_driver_output(driver_output, exec_properties, output_artifacts) # We reconnect to MLMD here because the custom driver closes MLMD connection # on returning. with self._mlmd_connection as m: # 6. Check cached result cache_context = cache_utils.get_cache_context( metadata_handler=m, pipeline_node=self._pipeline_node, pipeline_info=self._pipeline_info, input_artifacts=input_artifacts, output_artifacts=output_artifacts, parameters=exec_properties) contexts.append(cache_context) cached_outputs = cache_utils.get_cached_outputs( metadata_handler=m, cache_context=cache_context) # 7. Should cache be used? if (self._pipeline_node.execution_options.caching_options. enable_cache and cached_outputs): # Publishes cache result execution_publish_utils.publish_cached_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, output_artifacts=cached_outputs) return _PrepareExecutionResult( execution_info=data_types.ExecutionInfo( execution_id=execution.id), execution_metadata=execution, contexts=contexts, is_execution_needed=False) pipeline_run_id = (self._pipeline_runtime_spec.pipeline_run_id. field_value.string_value) # 8. Going to trigger executor. return _PrepareExecutionResult( execution_info=data_types.ExecutionInfo( execution_id=execution.id, input_dict=input_artifacts, output_dict=output_artifacts, exec_properties=exec_properties, execution_output_uri=self._output_resolver. get_executor_output_uri(execution.id), stateful_working_dir=(self._output_resolver. get_stateful_working_directory()), tmp_dir=self._output_resolver.make_tmp_dir(execution.id), pipeline_node=self._pipeline_node, pipeline_info=self._pipeline_info, pipeline_run_id=pipeline_run_id), execution_metadata=execution, contexts=contexts, is_execution_needed=True)
def run( self, mlmd_connection: metadata.Metadata, pipeline_node: pipeline_pb2.PipelineNode, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec ) -> data_types.ExecutionInfo: """Runs Importer specific logic. Args: mlmd_connection: ML metadata connection. pipeline_node: The specification of the node that this launcher lauches. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. Returns: The execution of the run. """ logging.info('Running as an importer node.') with mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.prepare_contexts( metadata_handler=m, node_contexts=pipeline_node.contexts) # 2. Resolves execution properties, please note that importers has no # input. exec_properties = data_types_utils.build_parsed_value_dict( inputs_utils.resolve_parameters_with_schema( node_parameters=pipeline_node.parameters)) # 3. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) # 4. Generate output artifacts to represent the imported artifacts. output_spec = pipeline_node.outputs.outputs[ importer.IMPORT_RESULT_KEY] properties = self._extract_proto_map( output_spec.artifact_spec.additional_properties) custom_properties = self._extract_proto_map( output_spec.artifact_spec.additional_custom_properties) output_artifact_class = types.Artifact( output_spec.artifact_spec.type).type output_artifacts = importer.generate_output_dict( metadata_handler=m, uri=str(exec_properties[importer.SOURCE_URI_KEY]), properties=properties, custom_properties=custom_properties, reimport=bool(exec_properties[importer.REIMPORT_OPTION_KEY]), output_artifact_class=output_artifact_class, mlmd_artifact_type=output_spec.artifact_spec.type) result = data_types.ExecutionInfo(execution_id=execution.id, input_dict={}, output_dict=output_artifacts, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info) # TODO(b/182316162): consider let the launcher level do the publish # for system nodes. So that the version taging logic doesn't need to be # handled per system node. outputs_utils.tag_output_artifacts_with_version(result.output_dict) # 5. Publish the output artifacts. If artifacts are reimported, the # execution is published as CACHED. Otherwise it is published as COMPLETE. if _is_artifact_reimported(output_artifacts): execution_publish_utils.publish_cached_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, output_artifacts=output_artifacts) else: execution_publish_utils.publish_succeeded_execution( metadata_handler=m, execution_id=execution.id, contexts=contexts, output_artifacts=output_artifacts) return result
def testDriverRunFn(self): # Create input dir. self._input_base_path = os.path.join(self._test_dir, 'input_base') fileio.makedirs(self._input_base_path) # Fake previous outputs span1_v1_split1 = os.path.join(self._input_base_path, 'span01', 'split1', 'data') io_utils.write_string_file(span1_v1_split1, 'testing11') span1_v1_split2 = os.path.join(self._input_base_path, 'span01', 'split2', 'data') io_utils.write_string_file(span1_v1_split2, 'testing12') ir_driver = driver.FileBasedDriver(self._mock_metadata) example = standard_artifacts.Examples() # Prepare output_dic example.uri = 'my_uri' # Will verify that this uri is not changed. output_dic = {standard_component_specs.EXAMPLES_KEY: [example]} # Prepare output_dic exec_proterties. exec_properties = { standard_component_specs.INPUT_BASE_KEY: self._input_base_path, standard_component_specs.INPUT_CONFIG_KEY: proto_utils.proto_to_json( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN:2}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN:2}/split2/*') ])), } result = ir_driver.run( portable_data_types.ExecutionInfo(output_dict=output_dic, exec_properties=exec_properties)) # Assert exec_properties' values exec_properties = result.exec_properties self.assertEqual(exec_properties[utils.SPAN_PROPERTY_NAME].int_value, 1) updated_input_config = example_gen_pb2.Input() proto_utils.json_to_proto( exec_properties[ standard_component_specs.INPUT_CONFIG_KEY].string_value, updated_input_config) self.assertProtoEquals( """ splits { name: "s1" pattern: "span01/split1/*" } splits { name: "s2" pattern: "span01/split2/*" }""", updated_input_config) self.assertRegex( exec_properties[utils.FINGERPRINT_PROPERTY_NAME].string_value, r'split:s1,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*\nsplit:s2,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*' ) # Assert output_artifacts' values self.assertLen( result.output_artifacts[ standard_component_specs.EXAMPLES_KEY].artifacts, 1) output_example = result.output_artifacts[ standard_component_specs.EXAMPLES_KEY].artifacts[0] self.assertEqual(output_example.uri, example.uri) self.assertEqual( output_example.custom_properties[ utils.SPAN_PROPERTY_NAME].string_value, '1') self.assertRegex( output_example.custom_properties[ utils.FINGERPRINT_PROPERTY_NAME].string_value, r'split:s1,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*\nsplit:s2,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*' )
def run( self, mlmd_connection: metadata.Metadata, pipeline_node: pipeline_pb2.PipelineNode, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec ) -> data_types.ExecutionInfo: """Runs Importer specific logic. Args: mlmd_connection: ML metadata connection. pipeline_node: The specification of the node that this launcher lauches. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. Returns: The execution of the run. """ logging.info('Running as an importer node.') with mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.prepare_contexts( metadata_handler=m, node_contexts=pipeline_node.contexts) # 2. Resolves execution properties, please note that importers has no # input. exec_properties = inputs_utils.resolve_parameters( node_parameters=pipeline_node.parameters) # 3. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) # 4. Generate output artifacts to represent the imported artifacts. output_spec = pipeline_node.outputs.outputs[importer.IMPORT_RESULT_KEY] properties = self._extract_proto_map( output_spec.artifact_spec.additional_properties) custom_properties = self._extract_proto_map( output_spec.artifact_spec.additional_custom_properties) output_artifact_class = types.Artifact( output_spec.artifact_spec.type).type output_artifacts = importer.generate_output_dict( metadata_handler=m, uri=str(exec_properties[importer.SOURCE_URI_KEY]), properties=properties, custom_properties=custom_properties, reimport=bool(exec_properties[importer.REIMPORT_OPTION_KEY]), output_artifact_class=output_artifact_class, mlmd_artifact_type=output_spec.artifact_spec.type) # 5. Publish the output artifacts. execution_publish_utils.publish_succeeded_execution( metadata_handler=m, execution_id=execution.id, contexts=contexts, output_artifacts=output_artifacts) return data_types.ExecutionInfo( execution_id=execution.id, input_dict={}, output_dict=output_artifacts, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info)
def run( self, mlmd_connection: metadata.Metadata, pipeline_node: pipeline_pb2.PipelineNode, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec ) -> data_types.ExecutionInfo: """Runs Resolver specific logic. Args: mlmd_connection: ML metadata connection. pipeline_node: The specification of the node that this launcher lauches. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. Returns: The execution of the run. """ logging.info('Running as an resolver node.') with mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.prepare_contexts( metadata_handler=m, node_contexts=pipeline_node.contexts) # 2. Resolves inputs and execution properties. exec_properties = data_types_utils.build_parsed_value_dict( inputs_utils.resolve_parameters_with_schema( node_parameters=pipeline_node.parameters)) try: resolved_inputs = inputs_utils.resolve_input_artifacts_v2( pipeline_node=pipeline_node, metadata_handler=m) except exceptions.InputResolutionError as e: execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) execution_publish_utils.publish_failed_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, executor_output=self._build_error_output( code=e.grpc_code_value)) return data_types.ExecutionInfo( execution_id=execution.id, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info) # 2a. If Skip (i.e. inside conditional), no execution should be made. # TODO(b/197907821): Publish special execution for Skip? if isinstance(resolved_inputs, inputs_utils.Skip): return data_types.ExecutionInfo() # 3. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) # TODO(b/197741942): Support len > 1. if len(resolved_inputs) > 1: execution_publish_utils.publish_failed_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, executor_output=self._build_error_output( _ERROR_CODE_UNIMPLEMENTED, 'Handling more than one input dicts not implemented yet.' )) return data_types.ExecutionInfo( execution_id=execution.id, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info) input_artifacts = resolved_inputs[0] # 4. Publish the execution as a cached execution with # resolved input artifact as the output artifacts. execution_publish_utils.publish_internal_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, output_artifacts=input_artifacts) return data_types.ExecutionInfo(execution_id=execution.id, input_dict=input_artifacts, output_dict=input_artifacts, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info)