Esempio n. 1
0
    def test_component_metadata(self):
        """Test component decorator metadata."""
        class MockContainerOp:
            def _set_metadata(self, component_meta):
                self._metadata = component_meta

        @component
        def componentA(
            a: {'ArtifactA': {
                'file_type': 'csv'
            }},
            b: Integer() = 12,
            c: {'ArtifactB': {
                'path_type': 'file',
                'file_type': 'tsv'
            }} = 'gs://hello/world'
        ) -> {
                'model': Integer()
        }:
            return MockContainerOp()

        containerOp = componentA(1, 2, c=3)

        golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=[])
        golden_meta.inputs.append(
            InputSpec(name='a', type={'ArtifactA': {
                'file_type': 'csv'
            }}))
        golden_meta.inputs.append(
            InputSpec(name='b',
                      type={
                          'Integer': {
                              'openapi_schema_validator': {
                                  "type": "integer"
                              }
                          }
                      },
                      default="12",
                      optional=True))
        golden_meta.inputs.append(
            InputSpec(
                name='c',
                type={'ArtifactB': {
                    'path_type': 'file',
                    'file_type': 'tsv'
                }},
                default='gs://hello/world',
                optional=True))
        golden_meta.outputs.append(
            OutputSpec(name='model',
                       type={
                           'Integer': {
                               'openapi_schema_validator': {
                                   "type": "integer"
                               }
                           }
                       }))

        self.assertEqual(containerOp._metadata, golden_meta)
    def test_handle_constructing_graph_component(self):
        task1 = TaskSpec(component_ref=ComponentReference(name='comp 1'),
                         arguments={'in1 1': 11})
        task2 = TaskSpec(component_ref=ComponentReference(name='comp 2'),
                         arguments={
                             'in2 1':
                             21,
                             'in2 2':
                             TaskOutputArgument.construct(task_id='task 1',
                                                          output_name='out1 1')
                         })
        task3 = TaskSpec(
            component_ref=ComponentReference(name='comp 3'),
            arguments={
                'in3 1':
                TaskOutputArgument.construct(task_id='task 2',
                                             output_name='out2 1'),
                'in3 2':
                GraphInputReference(input_name='graph in 1').as_argument()
            })

        graph_component1 = ComponentSpec(
            inputs=[
                InputSpec(name='graph in 1'),
                InputSpec(name='graph in 2'),
            ],
            outputs=[
                OutputSpec(name='graph out 1'),
                OutputSpec(name='graph out 2'),
            ],
            implementation=GraphImplementation(graph=GraphSpec(
                tasks={
                    'task 1': task1,
                    'task 2': task2,
                    'task 3': task3,
                },
                output_values={
                    'graph out 1':
                    TaskOutputArgument.construct(task_id='task 3',
                                                 output_name='out3 1'),
                    'graph out 2':
                    TaskOutputArgument.construct(task_id='task 1',
                                                 output_name='out1 2'),
                })))
    def _create_io_from_component_spec(
            spec: Type[SageMakerComponentSpec]) -> IOArgs:
        """Parses the set of inputs and outputs from a component spec into the
        YAML spec form.

        Args:
            spec: A component specification definition.

        Returns:
            IOArgs: The IO arguments object filled with the fields from the
                component spec definition.
        """
        inputs = []
        outputs = []
        args = []

        # Iterate through all inputs adding them to the argument list
        for key, _input in spec.INPUTS.__dict__.items():
            # We know all of these values are validators as we have validated the spec
            input_validator: SageMakerComponentInputValidator = cast(
                SageMakerComponentInputValidator, _input)

            # Map from argsparser to KFP component
            input_spec = InputSpec(
                name=key,
                description=input_validator.description,
                type=SageMakerComponentCompiler.KFP_TYPE_FROM_ARGS.get(
                    input_validator.input_type, "String"),
            )

            # Add optional fields
            if input_validator.default is not None:
                input_spec.__dict__["default"] = str(input_validator.default)
            elif not input_validator.required:
                # If not required and has no default, add empty string
                input_spec.__dict__["default"] = ""
            inputs.append(input_spec)

            # Add arguments to input list
            args.append(f"--{key}")
            args.append(InputValuePlaceholder(input_name=key))

        for key, _output in spec.OUTPUTS.__dict__.items():
            output_validator: SageMakerComponentOutputValidator = cast(
                SageMakerComponentOutputValidator, _output)
            outputs.append(
                OutputSpec(name=key, description=output_validator.description))

            # Add arguments to input list
            args.append(
                f"--{key}{SageMakerComponentSpec.OUTPUT_ARGUMENT_SUFFIX}")
            args.append(OutputPathPlaceholder(output_name=key))

        return IOArgs(inputs=inputs, outputs=outputs, args=args)
Esempio n. 4
0
def convert_method_to_component(
    cls: aiplatform.base.VertexAiResourceNoun, method: Callable
) -> Callable:
    """Converts a MB SDK Method to a Component wrapper.

    The wrapper enforces the correct signature w.r.t the MB SDK. The signature
    is also available to inspect.

    For example:

    aiplatform.Model.deploy is converted to ModelDeployOp

    Which can be called:
        model_deploy_step = ModelDeployOp(
            project=project,  # Pipeline parameter
            endpoint=endpoint_create_step.outputs['endpoint'],
            model=model_upload_step.outputs['model'],
            deployed_model_display_name='my-deployed-model',
            machine_type='n1-standard-4',
        )

    Generates and invokes the following Component:

    name: Model-deploy
    inputs:
    - {name: project, type: String}
    - {name: endpoint, type: Artifact}
    - {name: model, type: Model}
    outputs:
    - {name: endpoint, type: Artifact}
    implementation:
      container:
        image: gcr.io/sashaproject-1/mb_sdk_component:latest
        command:
        - python3
        - remote_runner.py
        - --cls_name=Model
        - --method_name=deploy
        - --method.deployed_model_display_name=my-deployed-model
        - --method.machine_type=n1-standard-4
        args:
        - --resource_name_output_artifact_path
        - {outputPath: endpoint}
        - --init.project
        - {inputValue: project}
        - --method.endpoint
        - {inputPath: endpoint}
        - --init.model_name
        - {inputPath: model}


    Args:
        method (Callable): A MB SDK Method
        should_serialize_init (bool): Whether to also include the constructor params
            in the component
    Returns:
        A Component wrapper that accepts the MB SDK params and returns a Task.
    """
    method_name = method.__name__
    method_signature = inspect.signature(method)

    cls_name = cls.__name__
    init_method = cls.__init__
    init_signature = inspect.signature(init_method)

    should_serialize_init = inspect.isfunction(method)

    # map to store parameter names that are changed in components
    # this is generally used for constructor where the mb sdk takes
    # a resource name but the component takes a metadata entry
    # ie: model: system.Model -> model_name: str
    component_param_name_to_mb_sdk_param_name = {}
    # remove unused parameters
    method_signature = filter_signature(method_signature)
    init_signature = filter_signature(
        init_signature,
        is_init_signature=True,
        self_type=cls,
        component_param_name_to_mb_sdk_param_name=
        component_param_name_to_mb_sdk_param_name
    )

    # use this to partition args to method or constructor
    init_arg_names = set(init_signature.parameters.keys()
                        ) if should_serialize_init else set([])

    # determines outputs for this component
    output_type = resolve_annotation(method_signature.return_annotation)
    output_specs = []
    output_args = []
    if output_type:
        output_metadata_name, output_metadata_type = map_resource_to_metadata_type(
            output_type
        )
        output_specs.append(
            OutputSpec(
                name=output_metadata_name,
                type=output_metadata_type,
            )
        )

        output_args = [
            '--executor_input',
            '{{$}}',
            '--resource_name_output_artifact_uri',
            OutputUriPlaceholder(output_name=output_metadata_name),
        ]

    def make_args(args_to_serialize: Dict[str, Dict[str, Any]]) -> List[str]:
        """Takes the args dictionary and returns command-line args.

        Args:
            args_to_serialize: Dictionary of format
                {'init': {'param_name_1': param_1}, {'method'}: {'param_name_2': param_name_2}}
        Returns:
            Serialized args compatible with Component YAML
        """
        additional_args = []
        for key, args in args_to_serialize.items():
            for arg_key, value in args.items():
                additional_args.append(f'--{key}.{arg_key}')
                additional_args.append(value)
        return additional_args

    def component_yaml_generator(**kwargs):
        input_specs = []
        input_args = []
        input_kwargs = {}

        serialized_args = {INIT_KEY: {}, METHOD_KEY: {}}

        init_kwargs = {}
        method_kwargs = {}

        for key, value in kwargs.items():
            if key in init_arg_names:
                prefix_key = INIT_KEY
                init_kwargs[key] = value
                signature = init_signature
            else:
                prefix_key = METHOD_KEY
                method_kwargs[key] = value
                signature = method_signature

            # no need to add this argument because it's optional
            # this param is validated against the signature because
            # of init_kwargs, method_kwargs
            if value is None:
                continue

            param_type = signature.parameters[key].annotation
            param_type = resolve_annotation(param_type)
            serializer = get_serializer(param_type)
            if serializer:
                param_type = str
                value = serializer(value)

            # TODO remove PipelineParam check when Metadata Importer component available
            # if we serialize we need to include the argument as input
            # perhaps, another option is to embed in yaml as json serialized list
            component_param_name = component_param_name_to_mb_sdk_param_name.get(
                key, key
            )
            if isinstance(value,
                          kfp.dsl._pipeline_param.PipelineParam) or serializer:
                if is_mb_sdk_resource_noun_type(param_type):
                    metadata_type = map_resource_to_metadata_type(param_type)[1]
                    component_param_type = metadata_type
                else:
                    component_param_type = 'String'

                input_specs.append(
                    InputSpec(
                        name=key,
                        type=component_param_type,
                    )
                )
                input_args.append(f'--{prefix_key}.{component_param_name}')
                if is_mb_sdk_resource_noun_type(param_type):
                    input_args.append(InputUriPlaceholder(input_name=key))
                else:
                    input_args.append(InputValuePlaceholder(input_name=key))

                input_kwargs[key] = value
            else:
                # Serialized arguments must always be strings
                value = str(value)
                serialized_args[prefix_key][component_param_name] = value

        # validate parameters
        if should_serialize_init:
            init_signature.bind(**init_kwargs)
        method_signature.bind(**method_kwargs)

        component_spec = ComponentSpec(
            name=f'{cls_name}-{method_name}',
            inputs=input_specs,
            outputs=output_specs,
            implementation=ContainerImplementation(
                container=ContainerSpec(
                    image=DEFAULT_CONTAINER_IMAGE,
                    command=[
                        'python3',
                        '-m',
                        'google_cloud_pipeline_components.aiplatform.remote_runner',
                        '--cls_name',
                        cls_name,
                        '--method_name',
                        method_name,
                    ],
                    args=make_args(serialized_args) + output_args + input_args,
                )
            )
        )
        component_path = tempfile.mktemp()
        component_spec.save(component_path)

        return components.load_component_from_file(component_path)(
            **input_kwargs
        )

    component_yaml_generator.__signature__ = signatures_union(
        init_signature, method_signature
    ) if should_serialize_init else method_signature

    # Create a docstring based on the new signature.
    new_args_dict = {}
    new_args_dict.update(
        filter_docstring_args(
            signature=method_signature,
            docstring=inspect.getdoc(method),
            is_init_signature=False
        )
    )
    if should_serialize_init:
        new_args_dict.update(
            filter_docstring_args(
                signature=init_signature,
                docstring=inspect.getdoc(init_method),
                is_init_signature=True
            )
        )
    component_yaml_generator.__doc__ = generate_docstring(
        args_dict=new_args_dict,
        signature=component_yaml_generator.__signature__,
        method_docstring=inspect.getdoc(method)
    )

    # TODO Possibly rename method

    return component_yaml_generator
Esempio n. 5
0
class ComponentCompilerTestCase(unittest.TestCase):
    # These should always match the dummy spec
    DUMMY_IO_ARGS = IOArgs(
        inputs=[
            InputSpec(
                name="input1",
                description="The first input.",
                type="String",
                default="input1-default",
            ),
            InputSpec(name="input2",
                      description="The second input.",
                      type="Integer"),
        ],
        outputs=[
            OutputSpec(name="output1", description="The first output."),
            OutputSpec(name="output2", description="The second output."),
        ],
        args=[
            "--input1",
            InputValuePlaceholder(input_name="input1"),
            "--input2",
            InputValuePlaceholder(input_name="input2"),
            "--output1_output_path",
            OutputPathPlaceholder(output_name="output1"),
            "--output2_output_path",
            OutputPathPlaceholder(output_name="output2"),
        ],
    )

    DUMMY_COMPONENT_SPEC = ComponentSpec(
        name="Dummy component",
        description="Dummy description",
        inputs=DUMMY_IO_ARGS.inputs,
        outputs=DUMMY_IO_ARGS.outputs,
        implementation=ContainerImplementation(container=ContainerSpec(
            image="my-image:my-tag",
            command=["python3"],
            args=[
                "fake-path",
                "--input1",
                InputValuePlaceholder(input_name="input1"),
                "--input2",
                InputValuePlaceholder(input_name="input2"),
                "--output1_output_path",
                OutputPathPlaceholder(output_name="output1"),
                "--output2_output_path",
                OutputPathPlaceholder(output_name="output2"),
            ],
        )),
    )

    EXTRA_IO_ARGS = IOArgs(
        inputs=[
            InputSpec(name="inputStr", description="str", type="String"),
            InputSpec(name="inputInt", description="int", type="Integer"),
            InputSpec(name="inputBool", description="bool", type="Bool"),
            InputSpec(name="inputDict", description="dict", type="JsonObject"),
            InputSpec(name="inputList", description="list", type="JsonArray"),
            InputSpec(
                name="inputOptional",
                description="optional",
                type="String",
                default="default-string",
            ),
            InputSpec(
                name="inputOptionalNoDefault",
                description="optional",
                type="String",
                default="",
            ),
        ],
        outputs=[],
        args=[
            "--inputStr",
            InputValuePlaceholder(input_name="inputStr"),
            "--inputInt",
            InputValuePlaceholder(input_name="inputInt"),
            "--inputBool",
            InputValuePlaceholder(input_name="inputBool"),
            "--inputDict",
            InputValuePlaceholder(input_name="inputDict"),
            "--inputList",
            InputValuePlaceholder(input_name="inputList"),
            "--inputOptional",
            InputValuePlaceholder(input_name="inputOptional"),
            "--inputOptionalNoDefault",
            InputValuePlaceholder(input_name="inputOptionalNoDefault"),
        ],
    )

    @classmethod
    def setUpClass(cls):
        cls.compiler = SageMakerComponentCompiler()

    def test_create_io_from_component_spec(self):
        response = SageMakerComponentCompiler._create_io_from_component_spec(
            DummySpec)  # type: ignore

        self.assertEqual(self.DUMMY_IO_ARGS, response)

    def test_create_io_from_component_spec_extra_types(self):
        response = SageMakerComponentCompiler._create_io_from_component_spec(
            ExtraSpec)  # type: ignore

        self.assertEqual(self.EXTRA_IO_ARGS, response)

    def test_create_component_spec_composes_correctly(self):
        image_uri = "my-image"
        image_tag = "my-tag"
        file_path = "fake-path"

        expected = ComponentSpec(
            name="Dummy component",
            description="Dummy description",
            inputs=self.DUMMY_IO_ARGS.inputs,
            outputs=self.DUMMY_IO_ARGS.outputs,
            implementation=ContainerImplementation(container=ContainerSpec(
                image="my-image:my-tag",
                command=["python3"],
                args=[
                    "fake-path",
                    "--input1",
                    InputValuePlaceholder(input_name="input1"),
                    "--input2",
                    InputValuePlaceholder(input_name="input2"),
                    "--output1_output_path",
                    OutputPathPlaceholder(output_name="output1"),
                    "--output2_output_path",
                    OutputPathPlaceholder(output_name="output2"),
                ],
            )),
        )

        with patch(
                "common.component_compiler.SageMakerComponentCompiler._create_io_from_component_spec",
                MagicMock(return_value=self.DUMMY_IO_ARGS),
        ):
            response = SageMakerComponentCompiler._create_component_spec(
                DummyComponent, file_path, image_uri, image_tag)

        self.assertEqual(expected, response)

    def test_write_component(self):
        DummyComponent.save = MagicMock()
        SageMakerComponentCompiler._write_component(DummyComponent,
                                                    "/tmp/fake-path")

        DummyComponent.save.assert_called_once_with("/tmp/fake-path")
Esempio n. 6
0
    def test_extract_component_interface(self):
        from typing import NamedTuple

        def my_func(  # noqa: F722
            required_param,
            int_param: int = 42,
            float_param: float = 3.14,
            str_param: str = 'string',
            bool_param: bool = True,
            none_param=None,
            custom_type_param: 'Custom type' = None,
            custom_struct_type_param: {
                'CustomType': {
                    'param1': 'value1',
                    'param2': 'value2'
                }
            } = None,
        ) -> NamedTuple(
                'DummyName',
            [
                #('required_param',), # All typing.NamedTuple fields must have types
                ('int_param', int),
                ('float_param', float),
                ('str_param', str),
                ('bool_param', bool),
                #('custom_type_param', 'Custom type'), #SyntaxError: Forward reference must be an expression -- got 'Custom type'
                ('custom_type_param', 'CustomType'),
                #('custom_struct_type_param', {'CustomType': {'param1': 'value1', 'param2': 'value2'}}), # TypeError: NamedTuple('Name', [(f0, t0), (f1, t1), ...]); each t must be a type Got {'CustomType': {'param1': 'value1', 'param2': 'value2'}}
            ]):
            '''Function docstring'''
            pass

        component_spec = comp._python_op._extract_component_interface(my_func)

        from kfp.components.structures import InputSpec, OutputSpec
        self.assertEqual(
            component_spec.inputs,
            [
                InputSpec(name='required_param'),
                InputSpec(name='int_param',
                          type='Integer',
                          default='42',
                          optional=True),
                InputSpec(name='float_param',
                          type='Float',
                          default='3.14',
                          optional=True),
                InputSpec(name='str_param',
                          type='String',
                          default='string',
                          optional=True),
                InputSpec(name='bool_param',
                          type='Boolean',
                          default='True',
                          optional=True),
                InputSpec(name='none_param',
                          optional=True),  # No default='None'
                InputSpec(name='custom_type_param',
                          type='Custom type',
                          optional=True),
                InputSpec(name='custom_struct_type_param',
                          type={
                              'CustomType': {
                                  'param1': 'value1',
                                  'param2': 'value2'
                              }
                          },
                          optional=True),
            ])
        self.assertEqual(
            component_spec.outputs,
            [
                OutputSpec(name='int_param', type='Integer'),
                OutputSpec(name='float_param', type='Float'),
                OutputSpec(name='str_param', type='String'),
                OutputSpec(name='bool_param', type='Boolean'),
                #OutputSpec(name='custom_type_param', type='Custom type', default='None'),
                OutputSpec(name='custom_type_param', type='CustomType'),
                #OutputSpec(name='custom_struct_type_param', type={'CustomType': {'param1': 'value1', 'param2': 'value2'}}, optional=True),
            ])

        self.maxDiff = None
        self.assertDictEqual(
            component_spec.to_dict(),
            {
                'name':
                'My func',
                'description':
                'Function docstring',
                'inputs': [
                    {
                        'name': 'required_param'
                    },
                    {
                        'name': 'int_param',
                        'type': 'Integer',
                        'default': '42',
                        'optional': True
                    },
                    {
                        'name': 'float_param',
                        'type': 'Float',
                        'default': '3.14',
                        'optional': True
                    },
                    {
                        'name': 'str_param',
                        'type': 'String',
                        'default': 'string',
                        'optional': True
                    },
                    {
                        'name': 'bool_param',
                        'type': 'Boolean',
                        'default': 'True',
                        'optional': True
                    },
                    {
                        'name': 'none_param',
                        'optional': True
                    },  # No default='None'
                    {
                        'name': 'custom_type_param',
                        'type': 'Custom type',
                        'optional': True
                    },
                    {
                        'name': 'custom_struct_type_param',
                        'type': {
                            'CustomType': {
                                'param1': 'value1',
                                'param2': 'value2'
                            }
                        },
                        'optional': True
                    },
                ],
                'outputs': [
                    {
                        'name': 'int_param',
                        'type': 'Integer'
                    },
                    {
                        'name': 'float_param',
                        'type': 'Float'
                    },
                    {
                        'name': 'str_param',
                        'type': 'String'
                    },
                    {
                        'name': 'bool_param',
                        'type': 'Boolean'
                    },
                    {
                        'name': 'custom_type_param',
                        'type': 'CustomType'
                    },
                    #{'name': 'custom_struct_type_param', 'type': {'CustomType': {'param1': 'value1', 'param2': 'value2'}}, 'optional': True},
                ]
            })
Esempio n. 7
0
 def test_to_dict(self):
     component_meta = ComponentSpec(
         name='foobar',
         description='foobar example',
         inputs=[
             InputSpec(name='input1',
                       description='input1 desc',
                       type={
                           'GCSPath': {
                               'bucket_type': 'directory',
                               'file_type': 'csv'
                           }
                       },
                       default='default1'),
             InputSpec(name='input2',
                       description='input2 desc',
                       type={
                           'TFModel': {
                               'input_data': 'tensor',
                               'version': '1.8.0'
                           }
                       },
                       default='default2'),
             InputSpec(name='input3',
                       description='input3 desc',
                       type='Integer',
                       default='default3'),
         ],
         outputs=[
             OutputSpec(
                 name='output1',
                 description='output1 desc',
                 type={'Schema': {
                     'file_type': 'tsv'
                 }},
             )
         ])
     golden_meta = {
         'name':
         'foobar',
         'description':
         'foobar example',
         'inputs': [{
             'name': 'input1',
             'description': 'input1 desc',
             'type': {
                 'GCSPath': {
                     'bucket_type': 'directory',
                     'file_type': 'csv'
                 }
             },
             'default': 'default1'
         }, {
             'name': 'input2',
             'description': 'input2 desc',
             'type': {
                 'TFModel': {
                     'input_data': 'tensor',
                     'version': '1.8.0'
                 }
             },
             'default': 'default2'
         }, {
             'name': 'input3',
             'description': 'input3 desc',
             'type': 'Integer',
             'default': 'default3'
         }],
         'outputs': [{
             'name': 'output1',
             'description': 'output1 desc',
             'type': {
                 'Schema': {
                     'file_type': 'tsv'
                 }
             },
         }]
     }
     self.assertEqual(component_meta.to_dict(), golden_meta)