def _build_importer_spec(self) -> ImporterSpec: """Builds ImporterSpec.""" assert isinstance(self._node, importer.Importer) output_channel = self._node.outputs[importer.IMPORT_RESULT_KEY] result = ImporterSpec() # Importer's output channel contains one artifact instance with # additional properties. artifact_instance = list(output_channel.get())[0] struct_proto = compiler_utils.pack_artifact_properties(artifact_instance) if struct_proto: result.metadata.CopyFrom(struct_proto) result.reimport = bool(self._exec_properties[importer.REIMPORT_OPTION_KEY]) result.artifact_uri.CopyFrom( compiler_utils.value_converter( self._exec_properties[importer.SOURCE_URI_KEY])) single_artifact = artifact_utils.get_single_instance( list(output_channel.get())) result.type_schema.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( single_artifact))) return result
def setUp(self): super().setUp() self._test_dir = tempfile.mkdtemp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts[ 'examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() self._expected_result_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() self._olddir = os.getcwd() os.chdir(self._test_dir) fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def _build_importer_spec(self) -> ImporterSpec: """Builds ImporterSpec.""" assert isinstance(self._node, importer.Importer) output_channel = self._node.outputs[importer.IMPORT_RESULT_KEY] result = ImporterSpec() # Importer's output channel contains one artifact instance with # additional properties. if output_channel.additional_properties: result.metadata.update(output_channel.additional_properties) if output_channel.additional_custom_properties: result.metadata.update(output_channel.additional_custom_properties) result.reimport = bool(self._exec_properties[importer.REIMPORT_OPTION_KEY]) # 'artifact_uri' property of Importer node should be string, but the type # is not checked (except the pytype hint) in Importer node. # It is possible to escape the type constraint and pass a RuntimeParameter. # If that happens, we need to overwrite the runtime parameter name to # 'artifact_uri', instead of using the name of user-provided runtime # parameter. if isinstance(self._exec_properties[importer.SOURCE_URI_KEY], data_types.RuntimeParameter): result.artifact_uri.runtime_parameter = importer.SOURCE_URI_KEY else: result.artifact_uri.CopyFrom( compiler_utils.value_converter( self._exec_properties[importer.SOURCE_URI_KEY])) result.type_schema.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( output_channel.type))) return result
def testArtifactSchemaMapping(self): # Test first party standard artifact. example_schema = compiler_utils.get_artifact_schema( standard_artifacts.Examples) expected_example_schema = fileio.open( os.path.join(self._schema_base_dir, 'Examples.yaml'), 'rb').read() self.assertEqual(expected_example_schema, example_schema) # Test Kubeflow simple artifact. file_schema = compiler_utils.get_artifact_schema(simple_artifacts.File) expected_file_schema = fileio.open( os.path.join(self._schema_base_dir, 'File.yaml'), 'rb').read() self.assertEqual(expected_file_schema, file_schema) # Test custom artifact type. my_artifact_schema = compiler_utils.get_artifact_schema(_MyArtifact) self.assertDictEqual(yaml.safe_load(my_artifact_schema), yaml.safe_load(_EXPECTED_MY_ARTIFACT_SCHEMA))
def testCustomArtifactMappingFails(self): my_bad_artifact = _MyBadArtifact() my_bad_artifact_schema = compiler_utils.get_artifact_schema( my_bad_artifact) self.assertDictEqual(yaml.safe_load(my_bad_artifact_schema), yaml.safe_load(_EXPECTED_MY_BAD_ARTIFACT_SCHEMA)) my_bad_artifact.int1 = 42 with self.assertRaisesRegexp(KeyError, 'Actual property:'): _ = compiler_utils.build_output_artifact_spec( channel_utils.as_channel([my_bad_artifact]))
def _build_importer_spec(self) -> ImporterSpec: """Builds ImporterSpec.""" assert isinstance(self._node, importer_node.ImporterNode) result = ImporterSpec( properties=compiler_utils.convert_from_tfx_properties( self._exec_properties[importer_node.PROPERTIES_KEY]), custom_properties=compiler_utils.convert_from_tfx_properties( self._exec_properties[importer_node.CUSTOM_PROPERTIES_KEY])) result.reimport = bool( self._exec_properties[importer_node.REIMPORT_OPTION_KEY]) result.artifact_uri.CopyFrom( compiler_utils.value_converter( self._exec_properties[importer_node.SOURCE_URI_KEY])) single_artifact = artifact_utils.get_single_instance( list(self._node.outputs[importer_node.IMPORT_RESULT_KEY].get())) result.type_schema.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( single_artifact))) return result
def setUp(self): self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts['examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # The initialization of TempWorkingDirTestCase has to be called after all # the testdata files have been read. Otherwise the original testdata files # are not accessible after cwd is changed. super().setUp() fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def setUp(self): super().setUp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'output_config'].string_value = '{}' self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts[ 'examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples)))) self._executor_invocation_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # Change working directory after all the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))