def test_decorator_metadata(self):
        """Test @pipeline decorator with metadata."""
        @pipeline(name='p1', description='description1')
        def my_pipeline1(a: {'Schema':
                             {
                                 'file_type': 'csv'
                             }} = 'good',
                         b: Integer() = 12):
            pass

        golden_meta = ComponentSpec(name='p1',
                                    description='description1',
                                    inputs=[])
        golden_meta.inputs.append(
            InputSpec(name='a',
                      type={'Schema': {
                          'file_type': 'csv'
                      }},
                      default='good',
                      optional=True))
        golden_meta.inputs.append(
            InputSpec(name='b',
                      type={
                          'Integer': {
                              'openapi_schema_validator': {
                                  "type": "integer"
                              }
                          }
                      },
                      default="12",
                      optional=True))

        pipeline_meta = _extract_pipeline_metadata(my_pipeline1)
        self.assertEqual(pipeline_meta, golden_meta)
Esempio n. 2
0
    def test_decorator_metadata(self):
        """Test @pipeline decorator with metadata."""
        @pipeline(name='p1', description='description1')
        def my_pipeline1(a: {'Schema':
                             {
                                 'file_type': 'csv'
                             }} = 'good',
                         b: Integer() = 12):
            pass

        golden_meta = PipelineMeta(name='p1', description='description1')
        golden_meta.inputs.append(
            ParameterMeta(name='a',
                          description='',
                          param_type={'Schema': {
                              'file_type': 'csv'
                          }},
                          default='good'))
        golden_meta.inputs.append(
            ParameterMeta(name='b',
                          description='',
                          param_type={
                              'Integer': {
                                  'openapi_schema_validator': {
                                      "type": "integer"
                                  }
                              }
                          },
                          default=12))

        pipeline_meta = _extract_pipeline_metadata(my_pipeline1)
        self.assertEqual(pipeline_meta, golden_meta)
Esempio n. 3
0
  def _create_workflow(self,
      pipeline_func: Callable,
      pipeline_name: Text=None,
      pipeline_description: Text=None,
      params_list: List[dsl.PipelineParam]=None,
      pipeline_conf: dsl.PipelineConf = None,
      ) -> List[Dict[Text, Any]]:  # Tekton change, signature
    """ Internal implementation of create_workflow."""
    params_list = params_list or []
    argspec = inspect.getfullargspec(pipeline_func)

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _extract_pipeline_metadata(pipeline_func)
    pipeline_meta.name = pipeline_name or pipeline_meta.name
    pipeline_meta.description = pipeline_description or pipeline_meta.description
    pipeline_name = sanitize_k8s_name(pipeline_meta.name)

    # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
    # will be resolved immediately in place when being to each component.
    default_param_values = {}
    for param in params_list:
      default_param_values[param.name] = param.value
      param.value = None

    # Currently only allow specifying pipeline params at one place.
    if params_list and pipeline_meta.inputs:
      raise ValueError('Either specify pipeline params in the pipeline function, or in "params_list", but not both.')

    args_list = []
    for arg_name in argspec.args:
      arg_type = None
      for input in pipeline_meta.inputs or []:
        if arg_name == input.name:
          arg_type = input.type
          break
      args_list.append(dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type))

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list)

    pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified.

    self._validate_exit_handler(dsl_pipeline)
    self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

    # Fill in the default values.
    args_list_with_defaults = []
    if pipeline_meta.inputs:
      args_list_with_defaults = [dsl.PipelineParam(sanitize_k8s_name(arg_name, True))
                                 for arg_name in argspec.args]
      if argspec.defaults:
        for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)):
          arg.value = default.value if isinstance(default, dsl.PipelineParam) else default
    elif params_list:
      # Or, if args are provided by params_list, fill in pipeline_meta.
      for param in params_list:
        param.value = default_param_values[param.name]

      args_list_with_defaults = params_list
      pipeline_meta.inputs = [
        InputSpec(
            name=param.name,
            type=param.param_type,
            default=param.value) for param in params_list]

    op_transformers = [add_pod_env]
    op_transformers.extend(pipeline_conf.op_transformers)

    workflow = self._create_pipeline_workflow(
        args_list_with_defaults,
        dsl_pipeline,
        op_transformers,
        pipeline_conf,
    )

    from ._data_passing_rewriter import fix_big_data_passing
    workflow = fix_big_data_passing(workflow)

    import json
    pipeline = [item for item in workflow if item["kind"] == "Pipeline"][0]  # Tekton change
    pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True)

    return workflow
Esempio n. 4
0
    def _create_workflow(
        self,
        pipeline_func: Callable,
        pipeline_name: Text = None,
        pipeline_description: Text = None,
        params_list: List[dsl.PipelineParam] = None,
        pipeline_conf: dsl.PipelineConf = None,
    ) -> Dict[Text, Any]:
        """ Internal implementation of create_workflow."""
        params_list = params_list or []
        argspec = inspect.getfullargspec(pipeline_func)

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = _extract_pipeline_metadata(pipeline_func)
        pipeline_meta.name = pipeline_name or pipeline_meta.name
        pipeline_meta.description = pipeline_description or pipeline_meta.description
        pipeline_name = sanitize_k8s_name(pipeline_meta.name)

        # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
        # will be resolved immediately in place when being to each component.
        default_param_values = {}
        for param in params_list:
            default_param_values[param.name] = param.value
            param.value = None

        # Currently only allow specifying pipeline params at one place.
        if params_list and pipeline_meta.inputs:
            raise ValueError(
                'Either specify pipeline params in the pipeline function, or in "params_list", but not both.'
            )

        args_list = []
        for arg_name in argspec.args:
            arg_type = None
            for input in pipeline_meta.inputs or []:
                if arg_name == input.name:
                    arg_type = input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Configuration passed to the compiler is overriding. Unfortunately, it is
        # not trivial to detect whether the dsl_pipeline.conf was ever modified.
        pipeline_conf = pipeline_conf or dsl_pipeline.conf

        self._validate_exit_handler(dsl_pipeline)
        self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True))
                for arg_name in argspec.args
            ]
            if argspec.defaults:
                for arg, default in zip(reversed(args_list_with_defaults),
                                        reversed(argspec.defaults)):
                    arg.value = default.value if isinstance(
                        default, dsl.PipelineParam) else default
        elif params_list:
            # Or, if args are provided by params_list, fill in pipeline_meta.
            for param in params_list:
                param.value = default_param_values[param.name]

            args_list_with_defaults = params_list
            pipeline_meta.inputs = [
                InputSpec(name=param.name,
                          type=param.param_type,
                          default=param.value) for param in params_list
            ]

        op_transformers = [add_pod_env]

        # # By default adds telemetry instruments. Users can opt out toggling
        # # allow_telemetry.
        # # Also, TFX pipelines will be bypassed for pipeline compiled by tfx>0.21.4.
        # if allow_telemetry:
        #   pod_labels = get_default_telemetry_labels()
        #   op_transformers.append(add_pod_labels(pod_labels))

        op_transformers.extend(pipeline_conf.op_transformers)

        workflow = self._create_pipeline_workflow(
            args_list_with_defaults,
            dsl_pipeline,
            op_transformers,
            pipeline_conf,
        )

        workflow = fix_big_data_passing(workflow)

        workflow.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \
          json.dumps(pipeline_meta.to_dict(), sort_keys=True)

        # recursively strip empty structures, DANGER: this may remove necessary empty elements ?!
        def remove_empty_elements(obj) -> dict:
            if not isinstance(obj, (dict, list)):
                return obj
            if isinstance(obj, list):
                return [remove_empty_elements(o) for o in obj if o != []]
            return {
                k: remove_empty_elements(v)
                for k, v in obj.items() if v != []
            }

        workflow = remove_empty_elements(workflow)

        return workflow
Esempio n. 5
0
  def _create_workflow(
      self,
      pipeline_func: Callable,
      pipeline_name: Optional[Text] = None,
      pipeline_description: Optional[Text] = None,
      params_list: Optional[List[dsl.PipelineParam]] = None,
      pipeline_conf: Optional[dsl.PipelineConf] = None,
  ) -> Dict[Text, Any]:
    """ Internal implementation of create_workflow."""
    params_list = params_list or []

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _extract_pipeline_metadata(pipeline_func)
    pipeline_meta.name = pipeline_name or pipeline_meta.name
    pipeline_meta.description = pipeline_description or pipeline_meta.description
    pipeline_name = sanitize_k8s_name(pipeline_meta.name)

    # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
    # will be resolved immediately in place when being to each component.
    default_param_values = OrderedDict()

    if self._pipeline_root_param:
      params_list.append(self._pipeline_root_param)
    if self._pipeline_name_param:
      params_list.append(self._pipeline_name_param)

    for param in params_list:
      default_param_values[param.name] = param.value
      param.value = None

    args_list = []
    kwargs_dict = dict()
    signature = inspect.signature(pipeline_func)
    for arg_name, arg in signature.parameters.items():
      arg_type = None
      for input in pipeline_meta.inputs or []:
        if arg_name == input.name:
          arg_type = input.type
          break
      param = dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)
      if arg.kind == inspect.Parameter.KEYWORD_ONLY:
        kwargs_dict[arg_name] = param
      else:
        args_list.append(param)

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list, **kwargs_dict)

    pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified.

    self._validate_exit_handler(dsl_pipeline)
    self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

    # Fill in the default values by merging two param lists.
    args_list_with_defaults = OrderedDict()
    if pipeline_meta.inputs:
      args_list_with_defaults = OrderedDict([
        (sanitize_k8s_name(input_spec.name, True), input_spec.default)
        for input_spec in pipeline_meta.inputs
      ])

    if params_list:
      # Or, if args are provided by params_list, fill in pipeline_meta.
      for k, v in default_param_values.items():
        args_list_with_defaults[k] = v

      pipeline_meta.inputs = pipeline_meta.inputs or []
      for param in params_list:
        pipeline_meta.inputs.append(
            InputSpec(
                name=param.name,
                type=param.param_type,
                default=default_param_values[param.name]))

    op_transformers = [add_pod_env]
    pod_labels = {_SDK_VERSION_LABEL: kfp.__version__, _SDK_ENV_LABEL:_SDK_ENV_DEFAULT}
    op_transformers.append(add_pod_labels(pod_labels))
    op_transformers.extend(pipeline_conf.op_transformers)

    if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE:
      # Add self._pipeline_name_param and self._pipeline_root_param to ops inputs
      # if they don't exist already.
      for op in dsl_pipeline.ops.values():
        insert_pipeline_name_param = True
        insert_pipeline_root_param = True
        for param in op.inputs:
          if param.name == self._pipeline_name_param.name:
            insert_pipeline_name_param = False
          elif param.name == self._pipeline_root_param.name:
            insert_pipeline_root_param = False

        if insert_pipeline_name_param:
          op.inputs.append(self._pipeline_name_param)
        if insert_pipeline_root_param:
          op.inputs.append(self._pipeline_root_param)

    workflow = self._create_pipeline_workflow(
        args_list_with_defaults,
        dsl_pipeline,
        op_transformers,
        pipeline_conf,
    )

    from ._data_passing_rewriter import fix_big_data_passing
    workflow = fix_big_data_passing(workflow)

    workflow = _data_passing_rewriter.add_pod_name_passing(
        workflow, str(self._pipeline_root_param or None))

    if pipeline_conf and pipeline_conf.data_passing_method != None:
      workflow = pipeline_conf.data_passing_method(workflow)

    metadata = workflow.setdefault('metadata', {})
    annotations = metadata.setdefault('annotations', {})
    labels = metadata.setdefault('labels', {})

    annotations[_SDK_VERSION_LABEL] = kfp.__version__
    annotations['pipelines.kubeflow.org/pipeline_compilation_time'] = datetime.datetime.now().isoformat()
    annotations['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True)

    if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE:
      annotations['pipelines.kubeflow.org/v2_pipeline'] = "true"
      labels['pipelines.kubeflow.org/v2_pipeline'] = "true"


    # Labels might be logged better than annotations so adding some information here as well
    labels[_SDK_VERSION_LABEL] = kfp.__version__

    return workflow