Esempio n. 1
0
    def test_pop_input_from_component_spec(self):
        component_spec = pipeline_spec_pb2.ComponentSpec(
            executor_label='exec-component1')

        component_spec.input_definitions.artifacts[
            'input1'].artifact_type.schema_title = 'system.Dataset'
        component_spec.input_definitions.parameters[
            'input2'].type = pipeline_spec_pb2.PrimitiveType.STRING
        component_spec.input_definitions.parameters[
            'input3'].type = pipeline_spec_pb2.PrimitiveType.DOUBLE

        # pop an artifact, and there're other inputs left
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input1')
        expected_dict = {
            'inputDefinitions': {
                'parameters': {
                    'input2': {
                        'type': 'STRING'
                    },
                    'input3': {
                        'type': 'DOUBLE'
                    }
                }
            },
            'executorLabel': 'exec-component1'
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, component_spec)

        # pop an parameter, and there're other inputs left
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input2')
        expected_dict = {
            'inputDefinitions': {
                'parameters': {
                    'input3': {
                        'type': 'DOUBLE'
                    }
                }
            },
            'executorLabel': 'exec-component1'
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, component_spec)

        # pop the last input, expect no inputDefinitions
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input3')
        expected_dict = {'executorLabel': 'exec-component1'}
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, component_spec)

        # pop an input that doesn't exist, expect no-op.
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input4')
        self.assertEqual(expected_spec, component_spec)
Esempio n. 2
0
    def testBuildLatestBlessedModelStrategySucceed(self):
        latest_blessed_resolver = resolver.Resolver(
            strategy_class=latest_blessed_model_strategy.
            LatestBlessedModelStrategy,
            model=channel.Channel(type=standard_artifacts.Model),
            model_blessing=channel.Channel(
                type=standard_artifacts.ModelBlessing)).with_id('my_resolver2')
        test_pipeline_info = data_types.PipelineInfo(
            pipeline_name='test-pipeline',
            pipeline_root='gs://path/to/my/root')

        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=latest_blessed_resolver,
            deployment_config=deployment_config,
            pipeline_info=test_pipeline_info,
            component_defs=component_defs)
        actual_step_specs = my_builder.build()

        model_blessing_resolver_id = 'my_resolver2-model-blessing-resolver'
        model_resolver_id = 'my_resolver2-model-resolver'
        self.assertSameElements(
            actual_step_specs.keys(),
            [model_blessing_resolver_id, model_resolver_id])

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_blessed_model_resolver_component_1.pbtxt',
                pipeline_pb2.ComponentSpec()),
            component_defs[model_blessing_resolver_id])

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_blessed_model_resolver_task_1.pbtxt',
                pipeline_pb2.PipelineTaskSpec()),
            actual_step_specs[model_blessing_resolver_id])

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_blessed_model_resolver_component_2.pbtxt',
                pipeline_pb2.ComponentSpec()),
            component_defs[model_resolver_id])

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_blessed_model_resolver_task_2.pbtxt',
                pipeline_pb2.PipelineTaskSpec()),
            actual_step_specs[model_resolver_id])

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_blessed_model_resolver_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 3
0
  def test_build_importer_component_spec(self):
    expected_importer_component = {
        'inputDefinitions': {
            'parameters': {
                'uri': {
                    'type': 'STRING'
                }
            }
        },
        'outputDefinitions': {
            'artifacts': {
                'artifact': {
                    'artifactType': {
                        'schemaTitle': 'system.Artifact'
                    }
                }
            }
        },
        'executorLabel': 'exec-importer-1'
    }
    expected_importer_comp_spec = pb.ComponentSpec()
    json_format.ParseDict(expected_importer_component,
                          expected_importer_comp_spec)
    importer_comp_spec = importer_node._build_importer_component_spec(
        importer_base_name='importer-1',
        artifact_type_schema=pb.ArtifactTypeSchema(
            schema_title='system.Artifact'))

    self.maxDiff = None
    self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
Esempio n. 4
0
  def _populate_metrics_in_dag_outputs(
      self,
      ops: List[dsl.ContainerOp],
      op_to_parent_groups: Dict[str, List[str]],
      pipeline_spec: pipeline_spec_pb2.PipelineSpec,
  ) -> None:
    """Populates metrics artifacts in dag outputs.

    Args:
      ops: The list of ops that may produce metrics outputs.
      op_to_parent_groups: The dict of op name to parent groups. Key is the op's
        name. Value is a list of ancestor groups including the op itself. The
        list of a given op is sorted in a way that the farthest group is the
        first and the op itself is the last.
      pipeline_spec: The pipeline_spec to update in-place.
    """
    for op in ops:
      op_task_spec = getattr(op, 'task_spec',
                             pipeline_spec_pb2.PipelineTaskSpec())
      op_component_spec = getattr(op, 'component_spec',
                                  pipeline_spec_pb2.ComponentSpec())

      # Get the tuple of (component_name, task_name) of all its parent groups.
      parent_components_and_tasks = [('_root', '')]
      # skip the op itself and the root group which cannot be retrived via name.
      for group_name in op_to_parent_groups[op.name][1:-1]:
        parent_components_and_tasks.append(
            (dsl_utils.sanitize_component_name(group_name),
             dsl_utils.sanitize_task_name(group_name)))
      # Reverse the order to make the farthest group in the end.
      parent_components_and_tasks.reverse()

      for output_name, artifact_spec in \
          op_component_spec.output_definitions.artifacts.items():

        if artifact_spec.artifact_type.WhichOneof(
            'kind'
        ) == 'schema_title' and artifact_spec.artifact_type.schema_title in [
            io_types.Metrics.TYPE_NAME,
            io_types.ClassificationMetrics.TYPE_NAME,
        ]:
          unique_output_name = '{}-{}'.format(op_task_spec.task_info.name,
                                              output_name)

          sub_task_name = op_task_spec.task_info.name
          sub_task_output = output_name
          for component_name, task_name in parent_components_and_tasks:
            group_component_spec = (
                pipeline_spec.root if component_name == '_root' else
                pipeline_spec.components[component_name])
            group_component_spec.output_definitions.artifacts[
                unique_output_name].CopyFrom(artifact_spec)
            group_component_spec.dag.outputs.artifacts[
                unique_output_name].artifact_selectors.append(
                    pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec(
                        producer_subtask=sub_task_name,
                        output_artifact_key=sub_task_output,
                    ))
            sub_task_name = task_name
            sub_task_output = unique_output_name
    def test_build_importer_component_spec(self):
        expected_importer_component = {
            'inputDefinitions': {
                'parameters': {
                    'input1': {
                        'type': 'STRING'
                    }
                }
            },
            'outputDefinitions': {
                'artifacts': {
                    'result': {
                        'artifactType': {
                            'instanceSchema': 'title: kfp.Artifact'
                        }
                    }
                }
            },
            'executorLabel': 'exec-importer-task0-input1'
        }
        expected_importer_comp_spec = pb.ComponentSpec()
        json_format.ParseDict(expected_importer_component,
                              expected_importer_comp_spec)
        importer_comp_spec = importer_node.build_importer_component_spec(
            importer_base_name='importer-task0-input1',
            input_name='input1',
            input_type_schema='title: kfp.Artifact')

        self.maxDiff = None
        self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
Esempio n. 6
0
    def testBuildFileBasedExampleGen(self):
        example_gen = components.CsvExampleGen(
            input_base='path/to/data/root').with_beam_pipeline_args(
                ['--runner=DataflowRunner'])
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=example_gen,
            image='gcr.io/tensorflow/tfx:latest',
            image_cmds=_TEST_CMDS,
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_csv_example_gen_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_csv_example_gen_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_csv_example_gen_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 7
0
    def testBuildImporterWithRuntimeParam(self):
        param = data_types.RuntimeParameter(name='runtime_flag', ptype=str)
        impt = importer.Importer(
            source_uri=param,
            artifact_type=standard_artifacts.Examples).with_id('my_importer')
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        with parameter_utils.ParameterContext() as pc:
            my_builder = step_builder.StepBuilder(
                node=impt,
                deployment_config=deployment_config,
                component_defs=component_defs)
            actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_component_with_runtime_param.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_task_with_runtime_param.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_executor_with_runtime_param.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
        self.assertListEqual([param], pc.parameters)
Esempio n. 8
0
def build_root_spec_from_pipeline_params(
    pipeline_params: List[dsl.PipelineParam],
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds the root component spec instance from pipeline params.

  This is useful when building the component spec for a pipeline (aka piipeline
  root). Such a component spec doesn't need output_definitions, and its
  implementation field will be filled in later.

  Args:
    pipeline_params: The list of pipeline params.

  Returns:
    An instance of IR ComponentSpec.
  """
    result = pipeline_spec_pb2.ComponentSpec()
    for param in pipeline_params or []:
        if type_utils.is_parameter_type(param.param_type):
            result.input_definitions.parameters[
                param.name].type = type_utils.get_parameter_type(
                    param.param_type)
        else:
            result.input_definitions.artifacts[
                param.name].artifact_type.instance_schema = (
                    type_utils.get_artifact_type_schema(param.param_type))

    return result
Esempio n. 9
0
    def testBuildExitHandler(self):
        task = test_utils.dummy_producer_component(
            param1=decorators.FinalStatusStr('value1'), )
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=task,
            image='gcr.io/tensorflow/tfx:latest',
            deployment_config=deployment_config,
            component_defs=component_defs,
            is_exit_handler=True)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_exit_handler_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_exit_handler_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_exit_handler_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 10
0
    def testBuildLatestArtifactResolverSucceed(self):
        latest_model_resolver = resolver.Resolver(
            strategy_class=latest_artifact_strategy.LatestArtifactStrategy,
            model=channel.Channel(type=standard_artifacts.Model),
            examples=channel.Channel(
                type=standard_artifacts.Examples)).with_id('my_resolver')
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        test_pipeline_info = data_types.PipelineInfo(
            pipeline_name='test-pipeline',
            pipeline_root='gs://path/to/my/root')
        my_builder = step_builder.StepBuilder(
            node=latest_model_resolver,
            deployment_config=deployment_config,
            pipeline_info=test_pipeline_info,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_artifact_resolver_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_artifact_resolver_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_latest_artifact_resolver_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 11
0
    def testBuildTask(self):
        query = 'SELECT * FROM TABLE'
        bq_example_gen = big_query_example_gen_component.BigQueryExampleGen(
            query=query)
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=bq_example_gen,
            image='gcr.io/tensorflow/tfx:latest',
            deployment_config=deployment_config,
            component_defs=component_defs,
            enable_cache=True)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_bq_example_gen_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_bq_example_gen_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_bq_example_gen_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 12
0
def build_importer_component_spec(
    importer_base_name: str,
    input_name: str,
    input_type_schema: str,
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds an importer component spec.

  Args:
    importer_base_name: The base name of the importer node.
    dependent_task: The task requires importer node.
    input_name: The name of the input artifact needs to be imported.
    input_type_schema: The type of the input artifact.

  Returns:
    An importer node component spec.
  """
    result = pipeline_spec_pb2.ComponentSpec()
    result.executor_label = dsl_utils.sanitize_executor_label(
        importer_base_name)
    result.input_definitions.parameters[
        input_name].type = pipeline_spec_pb2.PrimitiveType.STRING
    result.output_definitions.artifacts[
        OUTPUT_KEY].artifact_type.instance_schema = input_type_schema

    return result
Esempio n. 13
0
def build_component_spec_from_structure(
    component_spec: structures.ComponentSpec,
) -> pipeline_spec_pb2.ComponentSpec:
  """Builds an IR ComponentSpec instance from structures.ComponentSpec.

  Args:
    component_spec: The structure component spec.

  Returns:
    An instance of IR ComponentSpec.
  """
  result = pipeline_spec_pb2.ComponentSpec()
  result.executor_label = dsl_utils.sanitize_executor_label(component_spec.name)

  for input_spec in component_spec.inputs or []:
    if type_utils.is_parameter_type(input_spec.type):
      result.input_definitions.parameters[
          input_spec.name].type = type_utils.get_parameter_type(input_spec.type)
    else:
      result.input_definitions.artifacts[
          input_spec.name].artifact_type.instance_schema = (
              type_utils.get_artifact_type_schema(input_spec.type))

  for output_spec in component_spec.outputs or []:
    if type_utils.is_parameter_type(output_spec.type):
      result.output_definitions.parameters[
          output_spec.name].type = type_utils.get_parameter_type(
              output_spec.type)
    else:
      result.output_definitions.artifacts[
          output_spec.name].artifact_type.instance_schema = (
              type_utils.get_artifact_type_schema(output_spec.type))

  return result
Esempio n. 14
0
    def testBuildContainerTask(self):
        task = test_utils.DummyProducerComponent(
            output1=channel_utils.as_channel([standard_artifacts.Model()]),
            param1='value1',
        )
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=task,
            image=
            'gcr.io/tensorflow/tfx:latest',  # Note this has no effect here.
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_container_spec_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_container_spec_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_container_spec_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 15
0
    def testBuildImporter(self):
        impt = importer.Importer(
            source_uri='m/y/u/r/i',
            properties={
                'split_names': '["train", "eval"]',
            },
            custom_properties={
                'str_custom_property': 'abc',
                'int_custom_property': 123,
            },
            artifact_type=standard_artifacts.Examples).with_id('my_importer')
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=impt,
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 16
0
    def testBuildFileBasedExampleGenWithInputConfig(self):
        input_config = example_gen_pb2.Input(splits=[
            example_gen_pb2.Input.Split(name='train', pattern='*train.tfr'),
            example_gen_pb2.Input.Split(name='eval', pattern='*test.tfr')
        ])
        example_gen = components.ImportExampleGen(
            input_base='path/to/data/root', input_config=input_config)
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=example_gen,
            image='gcr.io/tensorflow/tfx:latest',
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_import_example_gen_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_import_example_gen_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_import_example_gen_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
    def test_build_component_spec_from_structure(self):
        structure_component_spec = structures.ComponentSpec(
            name='component1',
            description='component1 desc',
            inputs=[
                structures.InputSpec(name='input1',
                                     description='input1 desc',
                                     type='Dataset'),
                structures.InputSpec(name='input2',
                                     description='input2 desc',
                                     type='String'),
                structures.InputSpec(name='input3',
                                     description='input3 desc',
                                     type='Integer'),
            ],
            outputs=[
                structures.OutputSpec(name='output1',
                                      description='output1 desc',
                                      type='Model')
            ])
        expected_dict = {
            'inputDefinitions': {
                'artifacts': {
                    'input1': {
                        'artifactType': {
                            'instanceSchema':
                            'properties:\ntitle: kfp.Dataset\ntype: object\n'
                        }
                    }
                },
                'parameters': {
                    'input2': {
                        'type': 'STRING'
                    },
                    'input3': {
                        'type': 'INT'
                    }
                }
            },
            'outputDefinitions': {
                'artifacts': {
                    'output1': {
                        'artifactType': {
                            'instanceSchema':
                            'properties:\ntitle: kfp.Model\ntype: object\n'
                        }
                    }
                }
            },
            'executorLabel': 'exec-component1'
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        component_spec = (
            dsl_component_spec.build_component_spec_from_structure(
                structure_component_spec))

        self.assertEqual(expected_spec, component_spec)
Esempio n. 18
0
def build_component_spec_for_task(
    task: pipeline_task.PipelineTask,
    is_exit_task: bool = False,
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a pipeline task.

    Args:
        task: The task to build a ComponentSpec for.
        is_exit_task: Whether the task is used as exit task in Exit Handler.

    Returns:
        A ComponentSpec object for the task.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()
    component_spec.executor_label = component_utils.sanitize_executor_label(
        task.name)

    for input_name, input_spec in (task.component_spec.inputs or {}).items():

        # Special handling for PipelineTaskFinalStatus first.
        if type_utils.is_task_final_status_type(input_spec.type):
            if not is_exit_task:
                raise ValueError(
                    'PipelineTaskFinalStatus can only be used in an exit task.'
                )
            component_spec.input_definitions.parameters[
                input_name].parameter_type = pipeline_spec_pb2.ParameterType.STRUCT
            continue

        # skip inputs not present, as a workaround to support optional inputs.
        if input_name not in task.inputs and input_spec.default is None:
            continue

        if type_utils.is_parameter_type(input_spec.type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
            if input_spec.default is not None:
                component_spec.input_definitions.parameters[
                    input_name].default_value.CopyFrom(
                        _to_protobuf_value(input_spec.default))

        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_name, output_spec in (task.component_spec.outputs
                                     or {}).items():
        if type_utils.is_parameter_type(output_spec.type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return component_spec
Esempio n. 19
0
    def test_build_component_inputs_spec(self, is_root_component,
                                         expected_result):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='input1', param_type='Dataset'),
            _pipeline_param.PipelineParam(name='input2', param_type='Integer'),
            _pipeline_param.PipelineParam(name='input3', param_type='String'),
            _pipeline_param.PipelineParam(name='input4', param_type='Float'),
        ]
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_result, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_inputs_spec(component_spec,
                                                       pipeline_params,
                                                       is_root_component)

        self.assertEqual(expected_spec, component_spec)
Esempio n. 20
0
  def test_build_component_spec_from_structure(self):
    structure_component_spec = structures.ComponentSpec(
        name='component1',
        description='component1 desc',
        inputs=[
            structures.InputSpec(
                name='input1', description='input1 desc', type='Dataset'),
            structures.InputSpec(
                name='input2', description='input2 desc', type='String'),
            structures.InputSpec(
                name='input3', description='input3 desc', type='Integer'),
            structures.InputSpec(
                name='input4', description='optional inputs', optional=True),
        ],
        outputs=[
            structures.OutputSpec(
                name='output1', description='output1 desc', type='Model')
        ])
    expected_dict = {
        'inputDefinitions': {
            'artifacts': {
                'input1': {
                    'artifactType': {
                        'schemaTitle': 'system.Dataset'
                    }
                }
            },
            'parameters': {
                'input2': {
                    'type': 'STRING'
                },
                'input3': {
                    'type': 'INT'
                }
            }
        },
        'outputDefinitions': {
            'artifacts': {
                'output1': {
                    'artifactType': {
                        'schemaTitle': 'system.Model'
                    }
                }
            }
        },
        'executorLabel': 'exec-component1'
    }
    expected_spec = pipeline_spec_pb2.ComponentSpec()
    json_format.ParseDict(expected_dict, expected_spec)

    component_spec = (
        dsl_component_spec.build_component_spec_from_structure(
            component_spec=structure_component_spec,
            executor_label='exec-component1',
            actual_inputs=['input1', 'input2', 'input3'],
        ))

    self.assertEqual(expected_spec, component_spec)
Esempio n. 21
0
    def test_build_component_outputs_spec(self):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='output1',
                                          param_type='Dataset'),
            _pipeline_param.PipelineParam(name='output2',
                                          param_type='Integer'),
            _pipeline_param.PipelineParam(name='output3', param_type='String'),
            _pipeline_param.PipelineParam(name='output4', param_type='Float'),
        ]
        expected_dict = {
            'outputDefinitions': {
                'artifacts': {
                    'output1': {
                        'artifactType': {
                            'instanceSchema':
                            'title: kfp.Dataset\ntype: object\nproperties:\n  '
                            'payload_format:\n    type: string\n  '
                            'container_format:\n    type: string\n'
                        }
                    }
                },
                'parameters': {
                    'output2': {
                        'type': 'INT'
                    },
                    'output3': {
                        'type': 'STRING'
                    },
                    'output4': {
                        'type': 'DOUBLE'
                    }
                }
            }
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_outputs_spec(
            component_spec, pipeline_params)

        self.assertEqual(expected_spec, component_spec)
Esempio n. 22
0
    def test_build_component_outputs_spec(self):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='output1',
                                          param_type='Dataset'),
            _pipeline_param.PipelineParam(name='output2',
                                          param_type='Integer'),
            _pipeline_param.PipelineParam(name='output3', param_type='String'),
            _pipeline_param.PipelineParam(name='output4', param_type='Float'),
        ]
        expected_dict = {
            'outputDefinitions': {
                'artifacts': {
                    'output1': {
                        'artifactType': {
                            'schemaTitle': 'system.Dataset',
                            'schemaVersion': '0.0.1'
                        }
                    }
                },
                'parameters': {
                    'output2': {
                        'parameterType': 'NUMBER_INTEGER'
                    },
                    'output3': {
                        'parameterType': 'STRING'
                    },
                    'output4': {
                        'parameterType': 'NUMBER_DOUBLE'
                    }
                }
            }
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_outputs_spec(
            component_spec, pipeline_params)

        self.assertEqual(expected_spec, component_spec)
Esempio n. 23
0
    def testBuildDummyConsumerWithCondition(self):
        producer_task_1 = test_utils.dummy_producer_component(
            output1=channel_utils.as_channel([standard_artifacts.Model()]),
            param1='value1',
        ).with_id('producer_task_1')
        producer_task_2 = test_utils.dummy_producer_component_2(
            output1=channel_utils.as_channel([standard_artifacts.Model()]),
            param1='value2',
        ).with_id('producer_task_2')
        # This test tests two things:
        # 1. Nested conditions. The condition string of consumer_task should contain
        #    both predicates.
        # 2. Implicit channels. consumer_task only takes producer_task_1's output.
        #    But producer_task_2 is used in condition, hence producer_task_2 should
        #    be added to the dependency of consumer_task.
        # See testdata for detail.
        with conditional.Cond(
                producer_task_1.outputs['output1'].future()[0].uri != 'uri'):
            with conditional.Cond(producer_task_2.outputs['output1'].future()
                                  [0].property('property') == 'value1'):
                consumer_task = test_utils.dummy_consumer_component(
                    input1=producer_task_1.outputs['output1'],
                    param1=1,
                )
        # Need to construct a pipeline to set producer_component_id.
        unused_pipeline = tfx.dsl.Pipeline(
            pipeline_name='pipeline-with-condition',
            pipeline_root='',
            components=[producer_task_1, producer_task_2, consumer_task],
        )
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=consumer_task,
            image='gcr.io/tensorflow/tfx:latest',
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_consumer_with_condition_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_consumer_with_condition_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_dummy_consumer_with_condition_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
Esempio n. 24
0
  def _build_resolver_for_latest_blessed_model(
      self, model_channel_key: str, model_blessing_resolver_name: str,
      model_blessing_channel_key: str) -> pipeline_pb2.PipelineTaskSpec:
    """Builds the resolver spec for latest blessed Model artifact."""
    name = '{}{}'.format(self._name, _MODEL_RESOLVER_SUFFIX)

    # Component def.
    component_def = pipeline_pb2.ComponentSpec()
    executor_label = _EXECUTOR_LABEL_PATTERN.format(name)
    component_def.executor_label = executor_label
    input_artifact_spec = compiler_utils.build_input_artifact_spec(
        self._outputs[model_blessing_channel_key])
    component_def.input_definitions.artifacts[
        _MODEL_RESOLVER_INPUT_KEY].CopyFrom(input_artifact_spec)
    output_artifact_spec = compiler_utils.build_output_artifact_spec(
        self._outputs[model_channel_key])
    component_def.output_definitions.artifacts[model_channel_key].CopyFrom(
        output_artifact_spec)
    self._component_defs[name] = component_def

    # Task spec.
    task_spec = pipeline_pb2.PipelineTaskSpec()
    task_spec.task_info.name = name
    task_spec.component_ref.name = name
    input_artifact_spec = pipeline_pb2.TaskInputsSpec.InputArtifactSpec()
    input_artifact_spec.task_output_artifact.producer_task = model_blessing_resolver_name
    input_artifact_spec.task_output_artifact.output_artifact_key = model_blessing_channel_key
    task_spec.inputs.artifacts[_MODEL_RESOLVER_INPUT_KEY].CopyFrom(
        input_artifact_spec)

    # Resolver executor spec.
    executor = pipeline_pb2.PipelineDeploymentConfig.ExecutorSpec()
    artifact_queries = {}
    query_filter = (
        'schema_title="{type}" AND '
        'state={state} AND '
        'name="{{{{$.inputs.artifacts[\'{input_key}\']'
        '.metadata[\'{property_key}\']}}}}"').format(
            type=compiler_utils.get_artifact_title(standard_artifacts.Model),
            state=metadata_store_pb2.Artifact.State.Name(
                metadata_store_pb2.Artifact.LIVE),
            input_key=_MODEL_RESOLVER_INPUT_KEY,
            property_key=constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY)
    artifact_queries[model_channel_key] = ResolverSpec.ArtifactQuerySpec(
        filter=query_filter)
    executor.resolver.CopyFrom(
        ResolverSpec(output_artifact_queries=artifact_queries))
    self._deployment_config.executors[executor_label].CopyFrom(executor)

    return task_spec
Esempio n. 25
0
def build_component_spec_from_structure(
    component_spec: structures.ComponentSpec,
    executor_label: str,
    actual_inputs: List[str],
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds an IR ComponentSpec instance from structures.ComponentSpec.

  Args:
    component_spec: The structure component spec.
    executor_label: The executor label.
    actual_inputs: The actual arugments passed to the task. This is used as a
      short term workaround to support optional inputs in component spec IR.

  Returns:
    An instance of IR ComponentSpec.
  """
    result = pipeline_spec_pb2.ComponentSpec()
    result.executor_label = executor_label

    for input_spec in component_spec.inputs or []:
        # skip inputs not present
        if input_spec.name not in actual_inputs:
            continue
        if type_utils.is_parameter_type(input_spec.type):
            result.input_definitions.parameters[
                input_spec.name].type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            result.input_definitions.artifacts[
                input_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema_message(
                        input_spec.type))

    for output_spec in component_spec.outputs or []:
        if type_utils.is_parameter_type(output_spec.type):
            result.output_definitions.parameters[
                output_spec.name].type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            result.output_definitions.artifacts[
                output_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema_message(
                        output_spec.type))

    return result
    def test_fill_in_component_input_default_value(self, parameter_type,
                                                   default_value, expected):
        component_spec = pipeline_spec_pb2.ComponentSpec(
            input_definitions=pipeline_spec_pb2.ComponentInputsSpec(
                parameters={
                    'input1':
                    pipeline_spec_pb2.ComponentInputsSpec.ParameterSpec(
                        parameter_type=parameter_type)
                }))
        pipeline_spec_builder._fill_in_component_input_default_value(
            component_spec=component_spec,
            input_name='input1',
            default_value=default_value)

        self.assertEqual(
            expected,
            component_spec.input_definitions.parameters['input1'].
            default_value,
        )
Esempio n. 27
0
def build_component_spec_for_task(
        task: pipeline_task.PipelineTask) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a pipeline task.

    Args:
        task: The task to build a ComponentSpec for.

    Returns:
        A ComponentSpec object for the task.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()
    component_spec.executor_label = component_utils.sanitize_executor_label(
        task.name)

    for input_name, input_spec in (task.component_spec.inputs or {}).items():

        # skip inputs not present, as a workaround to support optional inputs.
        if input_name not in task.inputs:
            continue

        if type_utils.is_parameter_type(input_spec.type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_name, output_spec in (task.component_spec.outputs
                                     or {}).items():
        if type_utils.is_parameter_type(output_spec.type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return component_spec
Esempio n. 28
0
def build_component_spec_for_group(
    pipeline_channels: List[pipeline_channel.PipelineChannel],
    is_root_group: bool,
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a TasksGroup.

    Args:
        group: The group to build a ComponentSpec for.
        pipeline_channels: The list of pipeline channels referenced by the group.

    Returns:
        A PipelineTaskSpec object representing the loop group.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()

    for channel in pipeline_channels:

        input_name = (
            channel.name if is_root_group else
            _additional_input_name_for_pipeline_channel(channel))

        if isinstance(channel, pipeline_channel.PipelineArtifactChannel):
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(channel.channel_type))
        else:
            # channel is one of PipelineParameterChannel, LoopArgument, or
            # LoopArgumentVariable.
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    channel.channel_type)

            # TODO: should we fill in default value for all groups and tasks?
            if is_root_group:
                _fill_in_component_input_default_value(
                    component_spec=component_spec,
                    input_name=input_name,
                    default_value=channel.value,
                )

    return component_spec
Esempio n. 29
0
  def _build_resolver_for_latest_model_blessing(
      self, model_blessing_channel_key: str) -> pipeline_pb2.PipelineTaskSpec:
    """Builds the resolver spec for latest valid ModelBlessing artifact."""
    name = '{}{}'.format(self._name, _MODEL_BLESSING_RESOLVER_SUFFIX)

    # Component def.
    component_def = pipeline_pb2.ComponentSpec()
    executor_label = _EXECUTOR_LABEL_PATTERN.format(name)
    component_def.executor_label = executor_label
    output_artifact_spec = compiler_utils.build_output_artifact_spec(
        self._outputs[model_blessing_channel_key])
    component_def.output_definitions.artifacts[
        model_blessing_channel_key].CopyFrom(output_artifact_spec)
    self._component_defs[name] = component_def

    # Task spec.
    task_spec = pipeline_pb2.PipelineTaskSpec()
    task_spec.task_info.name = name
    task_spec.component_ref.name = name

    # Builds the resolver executor spec for latest valid ModelBlessing.
    executor = pipeline_pb2.PipelineDeploymentConfig.ExecutorSpec()
    artifact_queries = {}
    query_filter = ('artifact_type="{type}" and state={state}'
                    ' and metadata.{key}.number_value={value}').format(
                        type=compiler_utils.get_artifact_title(
                            standard_artifacts.ModelBlessing),
                        state=metadata_store_pb2.Artifact.State.Name(
                            metadata_store_pb2.Artifact.LIVE),
                        key=constants.ARTIFACT_PROPERTY_BLESSED_KEY,
                        value=constants.BLESSED_VALUE)
    artifact_queries[
        model_blessing_channel_key] = ResolverSpec.ArtifactQuerySpec(
            filter=query_filter)
    executor.resolver.CopyFrom(
        ResolverSpec(output_artifact_queries=artifact_queries))
    self._deployment_config.executors[executor_label].CopyFrom(executor)

    return task_spec
Esempio n. 30
0
  def test_build_root_spec_from_pipeline_params(self):
    pipeline_params = [
        dsl.PipelineParam(name='input1', param_type='Dataset'),
        dsl.PipelineParam(name='input2', param_type='Integer'),
        dsl.PipelineParam(name='input3', param_type='String'),
        dsl.PipelineParam(name='input4', param_type='Float'),
    ]
    expected_dict = {
        'inputDefinitions': {
            'artifacts': {
                'input1': {
                    'artifactType': {
                        'instanceSchema':
                            'properties:\ntitle: kfp.Dataset\ntype: object\n'
                    }
                }
            },
            'parameters': {
                'input2': {
                    'type': 'INT'
                },
                'input3': {
                    'type': 'STRING'
                },
                'input4': {
                    'type': 'DOUBLE'
                }
            }
        }
    }
    expected_spec = pipeline_spec_pb2.ComponentSpec()
    json_format.ParseDict(expected_dict, expected_spec)

    component_spec = (
        dsl_component_spec.build_root_spec_from_pipeline_params(
            pipeline_params))

    self.assertEqual(expected_spec, component_spec)