def setUp(self): super().setUp() self._test_dir = tempfile.mkdtemp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts[ 'examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() self._expected_result_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() self._olddir = os.getcwd() os.chdir(self._test_dir) fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def _build_importer_spec(self) -> ImporterSpec: """Builds ImporterSpec.""" assert isinstance(self._node, importer.Importer) output_channel = self._node.outputs[importer.IMPORT_RESULT_KEY] result = ImporterSpec() # Importer's output channel contains one artifact instance with # additional properties. artifact_instance = list(output_channel.get())[0] struct_proto = compiler_utils.pack_artifact_properties( artifact_instance) if struct_proto: result.metadata.CopyFrom(struct_proto) result.reimport = bool( self._exec_properties[importer.REIMPORT_OPTION_KEY]) result.artifact_uri.CopyFrom( compiler_utils.value_converter( self._exec_properties[importer.SOURCE_URI_KEY])) single_artifact = artifact_utils.get_single_instance( list(output_channel.get())) result.type_schema.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( single_artifact))) return result
def build_input_artifact_spec( channel_spec: channel.Channel ) -> pipeline_pb2.ComponentInputsSpec.ArtifactSpec: """Builds artifact type spec for an input channel.""" artifact_instance = channel_spec.type() result = pipeline_pb2.ComponentInputsSpec.ArtifactSpec() result.artifact_type.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=get_artifact_schema(artifact_instance))) _validate_properties_schema( instance_schema=result.artifact_type.instance_schema, properties=channel_spec.type.PROPERTIES) return result
def build_output_artifact_spec( channel_spec: channel.Channel ) -> pipeline_pb2.TaskOutputsSpec.OutputArtifactSpec: """Builds the Kubeflow pipeline output artifact spec from TFX channel spec.""" artifact_instance = channel_spec.type() result = pipeline_pb2.TaskOutputsSpec.OutputArtifactSpec() result.artifact_type.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=get_artifact_schema(artifact_instance))) for k, v in convert_from_tfx_properties( artifact_instance.mlmd_artifact.properties).items(): result.properties[k].CopyFrom(v) for k, v in convert_from_tfx_properties( artifact_instance.mlmd_artifact.custom_properties).items(): result.custom_properties[k].CopyFrom(v) return result
def _build_importer_spec(self) -> ImporterSpec: """Builds ImporterSpec.""" assert isinstance(self._node, importer_node.ImporterNode) result = ImporterSpec( properties=compiler_utils.convert_from_tfx_properties( self._exec_properties[importer_node.PROPERTIES_KEY]), custom_properties=compiler_utils.convert_from_tfx_properties( self._exec_properties[importer_node.CUSTOM_PROPERTIES_KEY])) result.reimport = bool( self._exec_properties[importer_node.REIMPORT_OPTION_KEY]) result.artifact_uri.CopyFrom( compiler_utils.value_converter( self._exec_properties[importer_node.SOURCE_URI_KEY])) single_artifact = artifact_utils.get_single_instance( list(self._node.outputs[importer_node.IMPORT_RESULT_KEY].get())) result.type_schema.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( single_artifact))) return result
def setUp(self): self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts['examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # The initialization of TempWorkingDirTestCase has to be called after all # the testdata files have been read. Otherwise the original testdata files # are not accessible after cwd is changed. super().setUp() fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def setUp(self): super().setUp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base_uri'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split( name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts['examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples())))) self._executor_invocation_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join( os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # Change working directory after all the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def build_output_artifact_spec( channel_spec: channel.Channel ) -> pipeline_pb2.ComponentOutputsSpec.ArtifactSpec: """Builds artifact type spec for an output channel.""" # We use the first artifact instance if available from channel, otherwise # create one. artifacts = list(channel_spec.get()) artifact_instance = artifacts[0] if artifacts else channel_spec.type() result = pipeline_pb2.ComponentOutputsSpec.ArtifactSpec() result.artifact_type.CopyFrom( pipeline_pb2.ArtifactTypeSchema( instance_schema=get_artifact_schema(artifact_instance))) _validate_properties_schema( instance_schema=result.artifact_type.instance_schema, properties=channel_spec.type.PROPERTIES) struct_proto = pack_artifact_properties(artifact_instance) if struct_proto: result.metadata.CopyFrom(struct_proto) return result