def test_decorator_metadata(self):
        """Test @pipeline decorator with metadata."""
        @pipeline(name='p1', description='description1')
        def my_pipeline1(a: {'Schema':
                             {
                                 'file_type': 'csv'
                             }} = 'good',
                         b: Integer() = 12):
            pass

        golden_meta = ComponentSpec(name='p1',
                                    description='description1',
                                    inputs=[])
        golden_meta.inputs.append(
            InputSpec(name='a',
                      type={'Schema': {
                          'file_type': 'csv'
                      }},
                      default='good',
                      optional=True))
        golden_meta.inputs.append(
            InputSpec(name='b',
                      type={
                          'Integer': {
                              'openapi_schema_validator': {
                                  "type": "integer"
                              }
                          }
                      },
                      default="12",
                      optional=True))

        pipeline_meta = _extract_pipeline_metadata(my_pipeline1)
        self.assertEqual(pipeline_meta, golden_meta)
Beispiel #2
0
    def test_component_metadata_standard_type_annotation(self):
        """Test component decorator metadata."""

        class MockContainerOp:

            def _set_metadata(self, component_meta):
                self._metadata = component_meta

        @component
        def componentA(a: float, b: List[int], c: Optional[str] = None) -> None:
            return MockContainerOp()

        containerOp = componentA('str_value', '[1,2,3]')

        golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=None)
        golden_meta.inputs.append(InputSpec(name='a', type='Float'))
        golden_meta.inputs.append(
            InputSpec(
                name='b',
                type='typing.List[int]' if sys.version_info >=
                (3, 7) else 'List'))
        golden_meta.inputs.append(
            InputSpec(name='c', type='String', default=None, optional=True))

        self.assertEqual(containerOp._metadata, golden_meta)
Beispiel #3
0
    def test_component_metadata(self):
        """Test component decorator metadata."""
        class MockContainerOp:
            def _set_metadata(self, component_meta):
                self._metadata = component_meta

        @component
        def componentA(
            a: {'ArtifactA': {
                'file_type': 'csv'
            }},
            b: Integer() = 12,
            c: {'ArtifactB': {
                'path_type': 'file',
                'file_type': 'tsv'
            }} = 'gs://hello/world'
        ) -> {
                'model': Integer()
        }:
            return MockContainerOp()

        containerOp = componentA(1, 2, c=3)

        golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=[])
        golden_meta.inputs.append(
            InputSpec(name='a', type={'ArtifactA': {
                'file_type': 'csv'
            }}))
        golden_meta.inputs.append(
            InputSpec(name='b',
                      type={
                          'Integer': {
                              'openapi_schema_validator': {
                                  "type": "integer"
                              }
                          }
                      },
                      default="12",
                      optional=True))
        golden_meta.inputs.append(
            InputSpec(
                name='c',
                type={'ArtifactB': {
                    'path_type': 'file',
                    'file_type': 'tsv'
                }},
                default='gs://hello/world',
                optional=True))
        golden_meta.outputs.append(
            OutputSpec(name='model',
                       type={
                           'Integer': {
                               'openapi_schema_validator': {
                                   "type": "integer"
                               }
                           }
                       }))

        self.assertEqual(containerOp._metadata, golden_meta)
    def _create_io_from_component_spec(
            spec: Type[SageMakerComponentSpec]) -> IOArgs:
        """Parses the set of inputs and outputs from a component spec into the
        YAML spec form.

        Args:
            spec: A component specification definition.

        Returns:
            IOArgs: The IO arguments object filled with the fields from the
                component spec definition.
        """
        inputs = []
        outputs = []
        args = []

        # Iterate through all inputs adding them to the argument list
        for key, _input in spec.INPUTS.__dict__.items():
            # We know all of these values are validators as we have validated the spec
            input_validator: SageMakerComponentInputValidator = cast(
                SageMakerComponentInputValidator, _input)

            # Map from argsparser to KFP component
            input_spec = InputSpec(
                name=key,
                description=input_validator.description,
                type=SageMakerComponentCompiler.KFP_TYPE_FROM_ARGS.get(
                    input_validator.input_type, "String"),
            )

            # Add optional fields
            if input_validator.default is not None:
                input_spec.__dict__["default"] = str(input_validator.default)
            elif not input_validator.required:
                # If not required and has no default, add empty string
                input_spec.__dict__["default"] = ""
            inputs.append(input_spec)

            # Add arguments to input list
            args.append(f"--{key}")
            args.append(InputValuePlaceholder(input_name=key))

        for key, _output in spec.OUTPUTS.__dict__.items():
            output_validator: SageMakerComponentOutputValidator = cast(
                SageMakerComponentOutputValidator, _output)
            outputs.append(
                OutputSpec(name=key, description=output_validator.description))

            # Add arguments to input list
            args.append(
                f"--{key}{SageMakerComponentSpec.OUTPUT_ARGUMENT_SUFFIX}")
            args.append(OutputPathPlaceholder(output_name=key))

        return IOArgs(inputs=inputs, outputs=outputs, args=args)
    def test_handle_constructing_graph_component(self):
        task1 = TaskSpec(component_ref=ComponentReference(name='comp 1'),
                         arguments={'in1 1': 11})
        task2 = TaskSpec(component_ref=ComponentReference(name='comp 2'),
                         arguments={
                             'in2 1':
                             21,
                             'in2 2':
                             TaskOutputArgument.construct(task_id='task 1',
                                                          output_name='out1 1')
                         })
        task3 = TaskSpec(
            component_ref=ComponentReference(name='comp 3'),
            arguments={
                'in3 1':
                TaskOutputArgument.construct(task_id='task 2',
                                             output_name='out2 1'),
                'in3 2':
                GraphInputReference(input_name='graph in 1').as_argument()
            })

        graph_component1 = ComponentSpec(
            inputs=[
                InputSpec(name='graph in 1'),
                InputSpec(name='graph in 2'),
            ],
            outputs=[
                OutputSpec(name='graph out 1'),
                OutputSpec(name='graph out 2'),
            ],
            implementation=GraphImplementation(graph=GraphSpec(
                tasks={
                    'task 1': task1,
                    'task 2': task2,
                    'task 3': task3,
                },
                output_values={
                    'graph out 1':
                    TaskOutputArgument.construct(task_id='task 3',
                                                 output_name='out3 1'),
                    'graph out 2':
                    TaskOutputArgument.construct(task_id='task 1',
                                                 output_name='out1 2'),
                })))
Beispiel #6
0
  def _create_workflow(self,
      pipeline_func: Callable,
      pipeline_name: Text=None,
      pipeline_description: Text=None,
      params_list: List[dsl.PipelineParam]=None,
      pipeline_conf: dsl.PipelineConf = None,
      ) -> List[Dict[Text, Any]]:  # Tekton change, signature
    """ Internal implementation of create_workflow."""
    params_list = params_list or []
    argspec = inspect.getfullargspec(pipeline_func)

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _extract_pipeline_metadata(pipeline_func)
    pipeline_meta.name = pipeline_name or pipeline_meta.name
    pipeline_meta.description = pipeline_description or pipeline_meta.description
    pipeline_name = sanitize_k8s_name(pipeline_meta.name)

    # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
    # will be resolved immediately in place when being to each component.
    default_param_values = {}
    for param in params_list:
      default_param_values[param.name] = param.value
      param.value = None

    # Currently only allow specifying pipeline params at one place.
    if params_list and pipeline_meta.inputs:
      raise ValueError('Either specify pipeline params in the pipeline function, or in "params_list", but not both.')

    args_list = []
    for arg_name in argspec.args:
      arg_type = None
      for input in pipeline_meta.inputs or []:
        if arg_name == input.name:
          arg_type = input.type
          break
      args_list.append(dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type))

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list)

    pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified.

    self._validate_exit_handler(dsl_pipeline)
    self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

    # Fill in the default values.
    args_list_with_defaults = []
    if pipeline_meta.inputs:
      args_list_with_defaults = [dsl.PipelineParam(sanitize_k8s_name(arg_name, True))
                                 for arg_name in argspec.args]
      if argspec.defaults:
        for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)):
          arg.value = default.value if isinstance(default, dsl.PipelineParam) else default
    elif params_list:
      # Or, if args are provided by params_list, fill in pipeline_meta.
      for param in params_list:
        param.value = default_param_values[param.name]

      args_list_with_defaults = params_list
      pipeline_meta.inputs = [
        InputSpec(
            name=param.name,
            type=param.param_type,
            default=param.value) for param in params_list]

    op_transformers = [add_pod_env]
    op_transformers.extend(pipeline_conf.op_transformers)

    workflow = self._create_pipeline_workflow(
        args_list_with_defaults,
        dsl_pipeline,
        op_transformers,
        pipeline_conf,
    )

    from ._data_passing_rewriter import fix_big_data_passing
    workflow = fix_big_data_passing(workflow)

    import json
    pipeline = [item for item in workflow if item["kind"] == "Pipeline"][0]  # Tekton change
    pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True)

    return workflow
Beispiel #7
0
    def component_yaml_generator(**kwargs):
        input_specs = []
        input_args = []
        input_kwargs = {}

        serialized_args = {INIT_KEY: {}, METHOD_KEY: {}}

        init_kwargs = {}
        method_kwargs = {}

        for key, value in kwargs.items():
            if key in init_arg_names:
                prefix_key = INIT_KEY
                init_kwargs[key] = value
                signature = init_signature
            else:
                prefix_key = METHOD_KEY
                method_kwargs[key] = value
                signature = method_signature

            # no need to add this argument because it's optional
            # this param is validated against the signature because
            # of init_kwargs, method_kwargs
            if value is None:
                continue

            param_type = signature.parameters[key].annotation
            param_type = resolve_annotation(param_type)
            serializer = get_serializer(param_type)
            if serializer:
                param_type = str
                value = serializer(value)

            # TODO remove PipelineParam check when Metadata Importer component available
            # if we serialize we need to include the argument as input
            # perhaps, another option is to embed in yaml as json serialized list
            component_param_name = component_param_name_to_mb_sdk_param_name.get(
                key, key
            )
            if isinstance(value,
                          kfp.dsl._pipeline_param.PipelineParam) or serializer:
                if is_mb_sdk_resource_noun_type(param_type):
                    metadata_type = map_resource_to_metadata_type(param_type)[1]
                    component_param_type = metadata_type
                else:
                    component_param_type = 'String'

                input_specs.append(
                    InputSpec(
                        name=key,
                        type=component_param_type,
                    )
                )
                input_args.append(f'--{prefix_key}.{component_param_name}')
                if is_mb_sdk_resource_noun_type(param_type):
                    input_args.append(InputUriPlaceholder(input_name=key))
                else:
                    input_args.append(InputValuePlaceholder(input_name=key))

                input_kwargs[key] = value
            else:
                # Serialized arguments must always be strings
                value = str(value)
                serialized_args[prefix_key][component_param_name] = value

        # validate parameters
        if should_serialize_init:
            init_signature.bind(**init_kwargs)
        method_signature.bind(**method_kwargs)

        component_spec = ComponentSpec(
            name=f'{cls_name}-{method_name}',
            inputs=input_specs,
            outputs=output_specs,
            implementation=ContainerImplementation(
                container=ContainerSpec(
                    image=DEFAULT_CONTAINER_IMAGE,
                    command=[
                        'python3',
                        '-m',
                        'google_cloud_pipeline_components.aiplatform.remote_runner',
                        '--cls_name',
                        cls_name,
                        '--method_name',
                        method_name,
                    ],
                    args=make_args(serialized_args) + output_args + input_args,
                )
            )
        )
        component_path = tempfile.mktemp()
        component_spec.save(component_path)

        return components.load_component_from_file(component_path)(
            **input_kwargs
        )
Beispiel #8
0
class ComponentCompilerTestCase(unittest.TestCase):
    # These should always match the dummy spec
    DUMMY_IO_ARGS = IOArgs(
        inputs=[
            InputSpec(
                name="input1",
                description="The first input.",
                type="String",
                default="input1-default",
            ),
            InputSpec(name="input2",
                      description="The second input.",
                      type="Integer"),
        ],
        outputs=[
            OutputSpec(name="output1", description="The first output."),
            OutputSpec(name="output2", description="The second output."),
        ],
        args=[
            "--input1",
            InputValuePlaceholder(input_name="input1"),
            "--input2",
            InputValuePlaceholder(input_name="input2"),
            "--output1_output_path",
            OutputPathPlaceholder(output_name="output1"),
            "--output2_output_path",
            OutputPathPlaceholder(output_name="output2"),
        ],
    )

    DUMMY_COMPONENT_SPEC = ComponentSpec(
        name="Dummy component",
        description="Dummy description",
        inputs=DUMMY_IO_ARGS.inputs,
        outputs=DUMMY_IO_ARGS.outputs,
        implementation=ContainerImplementation(container=ContainerSpec(
            image="my-image:my-tag",
            command=["python3"],
            args=[
                "fake-path",
                "--input1",
                InputValuePlaceholder(input_name="input1"),
                "--input2",
                InputValuePlaceholder(input_name="input2"),
                "--output1_output_path",
                OutputPathPlaceholder(output_name="output1"),
                "--output2_output_path",
                OutputPathPlaceholder(output_name="output2"),
            ],
        )),
    )

    EXTRA_IO_ARGS = IOArgs(
        inputs=[
            InputSpec(name="inputStr", description="str", type="String"),
            InputSpec(name="inputInt", description="int", type="Integer"),
            InputSpec(name="inputBool", description="bool", type="Bool"),
            InputSpec(name="inputDict", description="dict", type="JsonObject"),
            InputSpec(name="inputList", description="list", type="JsonArray"),
            InputSpec(
                name="inputOptional",
                description="optional",
                type="String",
                default="default-string",
            ),
            InputSpec(
                name="inputOptionalNoDefault",
                description="optional",
                type="String",
                default="",
            ),
        ],
        outputs=[],
        args=[
            "--inputStr",
            InputValuePlaceholder(input_name="inputStr"),
            "--inputInt",
            InputValuePlaceholder(input_name="inputInt"),
            "--inputBool",
            InputValuePlaceholder(input_name="inputBool"),
            "--inputDict",
            InputValuePlaceholder(input_name="inputDict"),
            "--inputList",
            InputValuePlaceholder(input_name="inputList"),
            "--inputOptional",
            InputValuePlaceholder(input_name="inputOptional"),
            "--inputOptionalNoDefault",
            InputValuePlaceholder(input_name="inputOptionalNoDefault"),
        ],
    )

    @classmethod
    def setUpClass(cls):
        cls.compiler = SageMakerComponentCompiler()

    def test_create_io_from_component_spec(self):
        response = SageMakerComponentCompiler._create_io_from_component_spec(
            DummySpec)  # type: ignore

        self.assertEqual(self.DUMMY_IO_ARGS, response)

    def test_create_io_from_component_spec_extra_types(self):
        response = SageMakerComponentCompiler._create_io_from_component_spec(
            ExtraSpec)  # type: ignore

        self.assertEqual(self.EXTRA_IO_ARGS, response)

    def test_create_component_spec_composes_correctly(self):
        image_uri = "my-image"
        image_tag = "my-tag"
        file_path = "fake-path"

        expected = ComponentSpec(
            name="Dummy component",
            description="Dummy description",
            inputs=self.DUMMY_IO_ARGS.inputs,
            outputs=self.DUMMY_IO_ARGS.outputs,
            implementation=ContainerImplementation(container=ContainerSpec(
                image="my-image:my-tag",
                command=["python3"],
                args=[
                    "fake-path",
                    "--input1",
                    InputValuePlaceholder(input_name="input1"),
                    "--input2",
                    InputValuePlaceholder(input_name="input2"),
                    "--output1_output_path",
                    OutputPathPlaceholder(output_name="output1"),
                    "--output2_output_path",
                    OutputPathPlaceholder(output_name="output2"),
                ],
            )),
        )

        with patch(
                "common.component_compiler.SageMakerComponentCompiler._create_io_from_component_spec",
                MagicMock(return_value=self.DUMMY_IO_ARGS),
        ):
            response = SageMakerComponentCompiler._create_component_spec(
                DummyComponent, file_path, image_uri, image_tag)

        self.assertEqual(expected, response)

    def test_write_component(self):
        DummyComponent.save = MagicMock()
        SageMakerComponentCompiler._write_component(DummyComponent,
                                                    "/tmp/fake-path")

        DummyComponent.save.assert_called_once_with("/tmp/fake-path")
    def test_extract_component_interface(self):
        from typing import NamedTuple

        def my_func(  # noqa: F722
            required_param,
            int_param: int = 42,
            float_param: float = 3.14,
            str_param: str = 'string',
            bool_param: bool = True,
            none_param=None,
            custom_type_param: 'Custom type' = None,
            custom_struct_type_param: {
                'CustomType': {
                    'param1': 'value1',
                    'param2': 'value2'
                }
            } = None,
        ) -> NamedTuple(
                'DummyName',
            [
                #('required_param',), # All typing.NamedTuple fields must have types
                ('int_param', int),
                ('float_param', float),
                ('str_param', str),
                ('bool_param', bool),
                #('custom_type_param', 'Custom type'), #SyntaxError: Forward reference must be an expression -- got 'Custom type'
                ('custom_type_param', 'CustomType'),
                #('custom_struct_type_param', {'CustomType': {'param1': 'value1', 'param2': 'value2'}}), # TypeError: NamedTuple('Name', [(f0, t0), (f1, t1), ...]); each t must be a type Got {'CustomType': {'param1': 'value1', 'param2': 'value2'}}
            ]):
            '''Function docstring'''
            pass

        component_spec = comp._python_op._extract_component_interface(my_func)

        from kfp.components.structures import InputSpec, OutputSpec
        self.assertEqual(
            component_spec.inputs,
            [
                InputSpec(name='required_param'),
                InputSpec(name='int_param',
                          type='Integer',
                          default='42',
                          optional=True),
                InputSpec(name='float_param',
                          type='Float',
                          default='3.14',
                          optional=True),
                InputSpec(name='str_param',
                          type='String',
                          default='string',
                          optional=True),
                InputSpec(name='bool_param',
                          type='Boolean',
                          default='True',
                          optional=True),
                InputSpec(name='none_param',
                          optional=True),  # No default='None'
                InputSpec(name='custom_type_param',
                          type='Custom type',
                          optional=True),
                InputSpec(name='custom_struct_type_param',
                          type={
                              'CustomType': {
                                  'param1': 'value1',
                                  'param2': 'value2'
                              }
                          },
                          optional=True),
            ])
        self.assertEqual(
            component_spec.outputs,
            [
                OutputSpec(name='int_param', type='Integer'),
                OutputSpec(name='float_param', type='Float'),
                OutputSpec(name='str_param', type='String'),
                OutputSpec(name='bool_param', type='Boolean'),
                #OutputSpec(name='custom_type_param', type='Custom type', default='None'),
                OutputSpec(name='custom_type_param', type='CustomType'),
                #OutputSpec(name='custom_struct_type_param', type={'CustomType': {'param1': 'value1', 'param2': 'value2'}}, optional=True),
            ])

        self.maxDiff = None
        self.assertDictEqual(
            component_spec.to_dict(),
            {
                'name':
                'My func',
                'description':
                'Function docstring',
                'inputs': [
                    {
                        'name': 'required_param'
                    },
                    {
                        'name': 'int_param',
                        'type': 'Integer',
                        'default': '42',
                        'optional': True
                    },
                    {
                        'name': 'float_param',
                        'type': 'Float',
                        'default': '3.14',
                        'optional': True
                    },
                    {
                        'name': 'str_param',
                        'type': 'String',
                        'default': 'string',
                        'optional': True
                    },
                    {
                        'name': 'bool_param',
                        'type': 'Boolean',
                        'default': 'True',
                        'optional': True
                    },
                    {
                        'name': 'none_param',
                        'optional': True
                    },  # No default='None'
                    {
                        'name': 'custom_type_param',
                        'type': 'Custom type',
                        'optional': True
                    },
                    {
                        'name': 'custom_struct_type_param',
                        'type': {
                            'CustomType': {
                                'param1': 'value1',
                                'param2': 'value2'
                            }
                        },
                        'optional': True
                    },
                ],
                'outputs': [
                    {
                        'name': 'int_param',
                        'type': 'Integer'
                    },
                    {
                        'name': 'float_param',
                        'type': 'Float'
                    },
                    {
                        'name': 'str_param',
                        'type': 'String'
                    },
                    {
                        'name': 'bool_param',
                        'type': 'Boolean'
                    },
                    {
                        'name': 'custom_type_param',
                        'type': 'CustomType'
                    },
                    #{'name': 'custom_struct_type_param', 'type': {'CustomType': {'param1': 'value1', 'param2': 'value2'}}, 'optional': True},
                ]
            })
Beispiel #10
0
    def _create_workflow(
        self,
        pipeline_func: Callable,
        pipeline_name: Text = None,
        pipeline_description: Text = None,
        params_list: List[dsl.PipelineParam] = None,
        pipeline_conf: dsl.PipelineConf = None,
    ) -> Dict[Text, Any]:
        """ Internal implementation of create_workflow."""
        params_list = params_list or []
        argspec = inspect.getfullargspec(pipeline_func)

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = _extract_pipeline_metadata(pipeline_func)
        pipeline_meta.name = pipeline_name or pipeline_meta.name
        pipeline_meta.description = pipeline_description or pipeline_meta.description
        pipeline_name = sanitize_k8s_name(pipeline_meta.name)

        # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
        # will be resolved immediately in place when being to each component.
        default_param_values = {}
        for param in params_list:
            default_param_values[param.name] = param.value
            param.value = None

        # Currently only allow specifying pipeline params at one place.
        if params_list and pipeline_meta.inputs:
            raise ValueError(
                'Either specify pipeline params in the pipeline function, or in "params_list", but not both.'
            )

        args_list = []
        for arg_name in argspec.args:
            arg_type = None
            for input in pipeline_meta.inputs or []:
                if arg_name == input.name:
                    arg_type = input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Configuration passed to the compiler is overriding. Unfortunately, it is
        # not trivial to detect whether the dsl_pipeline.conf was ever modified.
        pipeline_conf = pipeline_conf or dsl_pipeline.conf

        self._validate_exit_handler(dsl_pipeline)
        self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True))
                for arg_name in argspec.args
            ]
            if argspec.defaults:
                for arg, default in zip(reversed(args_list_with_defaults),
                                        reversed(argspec.defaults)):
                    arg.value = default.value if isinstance(
                        default, dsl.PipelineParam) else default
        elif params_list:
            # Or, if args are provided by params_list, fill in pipeline_meta.
            for param in params_list:
                param.value = default_param_values[param.name]

            args_list_with_defaults = params_list
            pipeline_meta.inputs = [
                InputSpec(name=param.name,
                          type=param.param_type,
                          default=param.value) for param in params_list
            ]

        op_transformers = [add_pod_env]

        # # By default adds telemetry instruments. Users can opt out toggling
        # # allow_telemetry.
        # # Also, TFX pipelines will be bypassed for pipeline compiled by tfx>0.21.4.
        # if allow_telemetry:
        #   pod_labels = get_default_telemetry_labels()
        #   op_transformers.append(add_pod_labels(pod_labels))

        op_transformers.extend(pipeline_conf.op_transformers)

        workflow = self._create_pipeline_workflow(
            args_list_with_defaults,
            dsl_pipeline,
            op_transformers,
            pipeline_conf,
        )

        workflow = fix_big_data_passing(workflow)

        workflow.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \
          json.dumps(pipeline_meta.to_dict(), sort_keys=True)

        # recursively strip empty structures, DANGER: this may remove necessary empty elements ?!
        def remove_empty_elements(obj) -> dict:
            if not isinstance(obj, (dict, list)):
                return obj
            if isinstance(obj, list):
                return [remove_empty_elements(o) for o in obj if o != []]
            return {
                k: remove_empty_elements(v)
                for k, v in obj.items() if v != []
            }

        workflow = remove_empty_elements(workflow)

        return workflow
Beispiel #11
0
  def _create_workflow(
      self,
      pipeline_func: Callable,
      pipeline_name: Optional[Text] = None,
      pipeline_description: Optional[Text] = None,
      params_list: Optional[List[dsl.PipelineParam]] = None,
      pipeline_conf: Optional[dsl.PipelineConf] = None,
  ) -> Dict[Text, Any]:
    """ Internal implementation of create_workflow."""
    params_list = params_list or []

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _extract_pipeline_metadata(pipeline_func)
    pipeline_meta.name = pipeline_name or pipeline_meta.name
    pipeline_meta.description = pipeline_description or pipeline_meta.description
    pipeline_name = sanitize_k8s_name(pipeline_meta.name)

    # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
    # will be resolved immediately in place when being to each component.
    default_param_values = OrderedDict()

    if self._pipeline_root_param:
      params_list.append(self._pipeline_root_param)
    if self._pipeline_name_param:
      params_list.append(self._pipeline_name_param)

    for param in params_list:
      default_param_values[param.name] = param.value
      param.value = None

    args_list = []
    kwargs_dict = dict()
    signature = inspect.signature(pipeline_func)
    for arg_name, arg in signature.parameters.items():
      arg_type = None
      for input in pipeline_meta.inputs or []:
        if arg_name == input.name:
          arg_type = input.type
          break
      param = dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)
      if arg.kind == inspect.Parameter.KEYWORD_ONLY:
        kwargs_dict[arg_name] = param
      else:
        args_list.append(param)

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list, **kwargs_dict)

    pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified.

    self._validate_exit_handler(dsl_pipeline)
    self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

    # Fill in the default values by merging two param lists.
    args_list_with_defaults = OrderedDict()
    if pipeline_meta.inputs:
      args_list_with_defaults = OrderedDict([
        (sanitize_k8s_name(input_spec.name, True), input_spec.default)
        for input_spec in pipeline_meta.inputs
      ])

    if params_list:
      # Or, if args are provided by params_list, fill in pipeline_meta.
      for k, v in default_param_values.items():
        args_list_with_defaults[k] = v

      pipeline_meta.inputs = pipeline_meta.inputs or []
      for param in params_list:
        pipeline_meta.inputs.append(
            InputSpec(
                name=param.name,
                type=param.param_type,
                default=default_param_values[param.name]))

    op_transformers = [add_pod_env]
    pod_labels = {_SDK_VERSION_LABEL: kfp.__version__, _SDK_ENV_LABEL:_SDK_ENV_DEFAULT}
    op_transformers.append(add_pod_labels(pod_labels))
    op_transformers.extend(pipeline_conf.op_transformers)

    if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE:
      # Add self._pipeline_name_param and self._pipeline_root_param to ops inputs
      # if they don't exist already.
      for op in dsl_pipeline.ops.values():
        insert_pipeline_name_param = True
        insert_pipeline_root_param = True
        for param in op.inputs:
          if param.name == self._pipeline_name_param.name:
            insert_pipeline_name_param = False
          elif param.name == self._pipeline_root_param.name:
            insert_pipeline_root_param = False

        if insert_pipeline_name_param:
          op.inputs.append(self._pipeline_name_param)
        if insert_pipeline_root_param:
          op.inputs.append(self._pipeline_root_param)

    workflow = self._create_pipeline_workflow(
        args_list_with_defaults,
        dsl_pipeline,
        op_transformers,
        pipeline_conf,
    )

    from ._data_passing_rewriter import fix_big_data_passing
    workflow = fix_big_data_passing(workflow)

    workflow = _data_passing_rewriter.add_pod_name_passing(
        workflow, str(self._pipeline_root_param or None))

    if pipeline_conf and pipeline_conf.data_passing_method != None:
      workflow = pipeline_conf.data_passing_method(workflow)

    metadata = workflow.setdefault('metadata', {})
    annotations = metadata.setdefault('annotations', {})
    labels = metadata.setdefault('labels', {})

    annotations[_SDK_VERSION_LABEL] = kfp.__version__
    annotations['pipelines.kubeflow.org/pipeline_compilation_time'] = datetime.datetime.now().isoformat()
    annotations['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True)

    if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE:
      annotations['pipelines.kubeflow.org/v2_pipeline'] = "true"
      labels['pipelines.kubeflow.org/v2_pipeline'] = "true"


    # Labels might be logged better than annotations so adding some information here as well
    labels[_SDK_VERSION_LABEL] = kfp.__version__

    return workflow
Beispiel #12
0
 def test_to_dict(self):
     component_meta = ComponentSpec(
         name='foobar',
         description='foobar example',
         inputs=[
             InputSpec(name='input1',
                       description='input1 desc',
                       type={
                           'GCSPath': {
                               'bucket_type': 'directory',
                               'file_type': 'csv'
                           }
                       },
                       default='default1'),
             InputSpec(name='input2',
                       description='input2 desc',
                       type={
                           'TFModel': {
                               'input_data': 'tensor',
                               'version': '1.8.0'
                           }
                       },
                       default='default2'),
             InputSpec(name='input3',
                       description='input3 desc',
                       type='Integer',
                       default='default3'),
         ],
         outputs=[
             OutputSpec(
                 name='output1',
                 description='output1 desc',
                 type={'Schema': {
                     'file_type': 'tsv'
                 }},
             )
         ])
     golden_meta = {
         'name':
         'foobar',
         'description':
         'foobar example',
         'inputs': [{
             'name': 'input1',
             'description': 'input1 desc',
             'type': {
                 'GCSPath': {
                     'bucket_type': 'directory',
                     'file_type': 'csv'
                 }
             },
             'default': 'default1'
         }, {
             'name': 'input2',
             'description': 'input2 desc',
             'type': {
                 'TFModel': {
                     'input_data': 'tensor',
                     'version': '1.8.0'
                 }
             },
             'default': 'default2'
         }, {
             'name': 'input3',
             'description': 'input3 desc',
             'type': 'Integer',
             'default': 'default3'
         }],
         'outputs': [{
             'name': 'output1',
             'description': 'output1 desc',
             'type': {
                 'Schema': {
                     'file_type': 'tsv'
                 }
             },
         }]
     }
     self.assertEqual(component_meta.to_dict(), golden_meta)