Example #1
0
  def _build_importer_spec(self) -> ImporterSpec:
    """Builds ImporterSpec."""
    assert isinstance(self._node, importer.Importer)
    output_channel = self._node.outputs[importer.IMPORT_RESULT_KEY]
    result = ImporterSpec()

    # Importer's output channel contains one artifact instance with
    # additional properties.
    artifact_instance = list(output_channel.get())[0]
    struct_proto = compiler_utils.pack_artifact_properties(artifact_instance)
    if struct_proto:
      result.metadata.CopyFrom(struct_proto)

    result.reimport = bool(self._exec_properties[importer.REIMPORT_OPTION_KEY])
    result.artifact_uri.CopyFrom(
        compiler_utils.value_converter(
            self._exec_properties[importer.SOURCE_URI_KEY]))
    single_artifact = artifact_utils.get_single_instance(
        list(output_channel.get()))
    result.type_schema.CopyFrom(
        pipeline_pb2.ArtifactTypeSchema(
            instance_schema=compiler_utils.get_artifact_schema(
                single_artifact)))

    return result
Example #2
0
    def setUp(self):
        super().setUp()
        self._test_dir = tempfile.mkdtemp()

        self._executor_invocation = pipeline_pb2.ExecutorInput()
        self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON
        self._executor_invocation.inputs.parameters[
            'input_base_uri'].string_value = _TEST_INPUT_DIR
        self._executor_invocation.inputs.parameters[
            'input_config'].string_value = json_format.MessageToJson(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='s1',
                                                pattern='span{SPAN}/split1/*'),
                    example_gen_pb2.Input.Split(name='s2',
                                                pattern='span{SPAN}/split2/*')
                ]))
        self._executor_invocation.outputs.artifacts[
            'examples'].artifacts.append(
                pipeline_pb2.RuntimeArtifact(
                    type=pipeline_pb2.ArtifactTypeSchema(
                        instance_schema=compiler_utils.get_artifact_schema(
                            standard_artifacts.Examples()))))

        self._executor_invocation_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'executor_invocation.json'), 'r').read()
        self._expected_result_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'expected_output_metadata.json'), 'r').read()

        self._olddir = os.getcwd()
        os.chdir(self._test_dir)
        fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON))
        fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
Example #3
0
  def _build_importer_spec(self) -> ImporterSpec:
    """Builds ImporterSpec."""
    assert isinstance(self._node, importer.Importer)
    output_channel = self._node.outputs[importer.IMPORT_RESULT_KEY]
    result = ImporterSpec()

    # Importer's output channel contains one artifact instance with
    # additional properties.
    if output_channel.additional_properties:
      result.metadata.update(output_channel.additional_properties)
    if output_channel.additional_custom_properties:
      result.metadata.update(output_channel.additional_custom_properties)

    result.reimport = bool(self._exec_properties[importer.REIMPORT_OPTION_KEY])

    # 'artifact_uri' property of Importer node should be string, but the type
    # is not checked (except the pytype hint) in Importer node.
    # It is possible to escape the type constraint and pass a RuntimeParameter.
    # If that happens, we need to overwrite the runtime parameter name to
    # 'artifact_uri', instead of using the name of user-provided runtime
    # parameter.
    if isinstance(self._exec_properties[importer.SOURCE_URI_KEY],
                  data_types.RuntimeParameter):
      result.artifact_uri.runtime_parameter = importer.SOURCE_URI_KEY
    else:
      result.artifact_uri.CopyFrom(
          compiler_utils.value_converter(
              self._exec_properties[importer.SOURCE_URI_KEY]))

    result.type_schema.CopyFrom(
        pipeline_pb2.ArtifactTypeSchema(
            instance_schema=compiler_utils.get_artifact_schema(
                output_channel.type)))

    return result
Example #4
0
    def testArtifactSchemaMapping(self):
        # Test first party standard artifact.
        example_schema = compiler_utils.get_artifact_schema(
            standard_artifacts.Examples)
        expected_example_schema = fileio.open(
            os.path.join(self._schema_base_dir, 'Examples.yaml'), 'rb').read()
        self.assertEqual(expected_example_schema, example_schema)

        # Test Kubeflow simple artifact.
        file_schema = compiler_utils.get_artifact_schema(simple_artifacts.File)
        expected_file_schema = fileio.open(
            os.path.join(self._schema_base_dir, 'File.yaml'), 'rb').read()
        self.assertEqual(expected_file_schema, file_schema)

        # Test custom artifact type.
        my_artifact_schema = compiler_utils.get_artifact_schema(_MyArtifact)
        self.assertDictEqual(yaml.safe_load(my_artifact_schema),
                             yaml.safe_load(_EXPECTED_MY_ARTIFACT_SCHEMA))
Example #5
0
    def testCustomArtifactMappingFails(self):
        my_bad_artifact = _MyBadArtifact()
        my_bad_artifact_schema = compiler_utils.get_artifact_schema(
            my_bad_artifact)
        self.assertDictEqual(yaml.safe_load(my_bad_artifact_schema),
                             yaml.safe_load(_EXPECTED_MY_BAD_ARTIFACT_SCHEMA))

        my_bad_artifact.int1 = 42
        with self.assertRaisesRegexp(KeyError, 'Actual property:'):
            _ = compiler_utils.build_output_artifact_spec(
                channel_utils.as_channel([my_bad_artifact]))
Example #6
0
    def _build_importer_spec(self) -> ImporterSpec:
        """Builds ImporterSpec."""
        assert isinstance(self._node, importer_node.ImporterNode)
        result = ImporterSpec(
            properties=compiler_utils.convert_from_tfx_properties(
                self._exec_properties[importer_node.PROPERTIES_KEY]),
            custom_properties=compiler_utils.convert_from_tfx_properties(
                self._exec_properties[importer_node.CUSTOM_PROPERTIES_KEY]))
        result.reimport = bool(
            self._exec_properties[importer_node.REIMPORT_OPTION_KEY])
        result.artifact_uri.CopyFrom(
            compiler_utils.value_converter(
                self._exec_properties[importer_node.SOURCE_URI_KEY]))
        single_artifact = artifact_utils.get_single_instance(
            list(self._node.outputs[importer_node.IMPORT_RESULT_KEY].get()))
        result.type_schema.CopyFrom(
            pipeline_pb2.ArtifactTypeSchema(
                instance_schema=compiler_utils.get_artifact_schema(
                    single_artifact)))

        return result
Example #7
0
  def setUp(self):
    self._executor_invocation = pipeline_pb2.ExecutorInput()
    self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON
    self._executor_invocation.inputs.parameters[
        'input_base_uri'].string_value = _TEST_INPUT_DIR
    self._executor_invocation.inputs.parameters[
        'input_config'].string_value = json_format.MessageToJson(
            example_gen_pb2.Input(splits=[
                example_gen_pb2.Input.Split(
                    name='s1', pattern='span{SPAN}/split1/*'),
                example_gen_pb2.Input.Split(
                    name='s2', pattern='span{SPAN}/split2/*')
            ]))
    self._executor_invocation.outputs.artifacts['examples'].artifacts.append(
        pipeline_pb2.RuntimeArtifact(
            type=pipeline_pb2.ArtifactTypeSchema(
                instance_schema=compiler_utils.get_artifact_schema(
                    standard_artifacts.Examples()))))

    self._executor_invocation_from_file = fileio.open(
        os.path.join(
            os.path.dirname(__file__), 'testdata', 'executor_invocation.json'),
        'r').read()

    logging.debug('Executor invocation under test: %s',
                  self._executor_invocation_from_file)
    self._expected_result_from_file = fileio.open(
        os.path.join(
            os.path.dirname(__file__), 'testdata',
            'expected_output_metadata.json'), 'r').read()
    logging.debug('Expecting output metadata JSON: %s',
                  self._expected_result_from_file)

    # The initialization of TempWorkingDirTestCase has to be called after all
    # the testdata files have been read. Otherwise the original testdata files
    # are not accessible after cwd is changed.
    super().setUp()

    fileio.makedirs(os.path.dirname(_TEST_OUTPUT_METADATA_JSON))
    fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
Example #8
0
    def setUp(self):
        super().setUp()

        self._executor_invocation = pipeline_pb2.ExecutorInput()
        self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON
        self._executor_invocation.inputs.parameters[
            'input_base'].string_value = _TEST_INPUT_DIR
        self._executor_invocation.inputs.parameters[
            'output_config'].string_value = '{}'
        self._executor_invocation.inputs.parameters[
            'input_config'].string_value = json_format.MessageToJson(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='s1',
                                                pattern='span{SPAN}/split1/*'),
                    example_gen_pb2.Input.Split(name='s2',
                                                pattern='span{SPAN}/split2/*')
                ]))
        self._executor_invocation.outputs.artifacts[
            'examples'].artifacts.append(
                pipeline_pb2.RuntimeArtifact(
                    type=pipeline_pb2.ArtifactTypeSchema(
                        instance_schema=compiler_utils.get_artifact_schema(
                            standard_artifacts.Examples))))

        self._executor_invocation_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'executor_invocation.json'), 'r').read()

        logging.debug('Executor invocation under test: %s',
                      self._executor_invocation_from_file)
        self._expected_result_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'expected_output_metadata.json'), 'r').read()
        logging.debug('Expecting output metadata JSON: %s',
                      self._expected_result_from_file)

        # Change working directory after all the testdata files have been read.
        self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))

        fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))