Ejemplo n.º 1
0
    def _create_pipeline(
        self,
        pipeline_func: Callable[..., Any],
        pipeline_name: Optional[str] = None,
    ) -> pipeline_spec_pb2.PipelineSpec:
        """Creates a pipeline instance and constructs the pipeline spec from it.

    Args:
      pipeline_func: Pipeline function with @dsl.pipeline decorator.
      pipeline_name: The name of the pipeline. Optional.

    Returns:
      The IR representation (pipeline spec) of the pipeline.
    """

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = _python_op._extract_component_interface(pipeline_func)
        pipeline_name = pipeline_name or pipeline_meta.name

        args_list = []
        signature = inspect.signature(pipeline_func)
        for arg_name in signature.parameters:
            arg_type = None
            for pipeline_input in pipeline_meta.inputs or []:
                if arg_name == pipeline_input.name:
                    arg_type = pipeline_input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True),
                                  param_type=input_spec.type,
                                  value=input_spec.default)
                for input_spec in pipeline_meta.inputs
            ]

        pipeline_spec = self._create_pipeline_spec(
            args_list_with_defaults,
            dsl_pipeline,
        )

        return pipeline_spec
Ejemplo n.º 2
0
def _extract_pipeline_metadata(func):
    """Creates pipeline metadata structure instance based on the function
    signature."""

    # Most of this code is only needed for verifying the default values against
    # "openapi_schema_validator" type properties.
    # TODO: Move the value verification code to some other place

    from ._pipeline_param import PipelineParam

    import inspect
    fullargspec = inspect.getfullargspec(func)
    args = fullargspec.args
    annotations = fullargspec.annotations

    # defaults
    arg_defaults = {}
    if fullargspec.defaults:
        for arg, default in zip(reversed(fullargspec.args),
                                reversed(fullargspec.defaults)):
            arg_defaults[arg] = default

    for arg in args:
        arg_type = None
        arg_default = arg_defaults[arg] if arg in arg_defaults else None
        if isinstance(arg_default, PipelineParam):
            warnings.warn(
                'Explicit creation of `kfp.dsl.PipelineParam`s by the users is '
                'deprecated. The users should define the parameter type and default '
                'values using standard pythonic constructs: '
                'def my_func(a: int = 1, b: str = "default"):')
            arg_default = arg_default.value
        if arg in annotations:
            arg_type = _annotation_to_typemeta(annotations[arg])
        arg_type_properties = list(arg_type.values())[0] if isinstance(
            arg_type, dict) else {}
        if 'openapi_schema_validator' in arg_type_properties and (arg_default
                                                                  is not None):
            from jsonschema import validate
            import json
            schema_object = arg_type_properties['openapi_schema_validator']
            if isinstance(schema_object, str):
                # In case the property value for the schema validator is a string
                # instead of a dict.
                schema_object = json.loads(schema_object)
            # Only validating non-serialized values
            validate(instance=arg_default, schema=schema_object)

    from kfp.components._python_op import _extract_component_interface
    component_spec = _extract_component_interface(func)
    return component_spec
Ejemplo n.º 3
0
def get_output_artifacts(
        fn: Callable, output_uris: Dict[str,
                                        str]) -> Dict[str, artifact.Artifact]:
    """Gets the output artifacts from function signature and provided URIs.

  Args:
    fn: A user-provided function, whose signature annotates the type of output
      artifacts.
    output_uris: The mapping from output artifact name to its URI.

  Returns:
    A mapping from output artifact name to Python artifact objects.
  """
    # Inspect the function signature to determine the set of output artifact.
    spec = _python_op._extract_component_interface(fn)

    result = {}  # Mapping from output name to artifacts.
    for output in spec.outputs:
        if (getattr(output, '_passing_style',
                    None) == _python_op.OutputArtifact):
            # Creates an artifact according to its name
            type_name = getattr(output, 'type', None)
            if not type_name:
                continue

            try:
                artifact_cls = getattr(
                    importlib.import_module(
                        artifact.KFP_ARTIFACT_ONTOLOGY_MODULE), type_name)

            except (AttributeError, ImportError, ValueError):
                logging.warning((
                    'Could not load artifact class %s.%s; using fallback deserialization'
                    ' for the relevant artifact. Please make sure that any artifact '
                    'classes can be imported within your container or environment.'
                ), artifact.KFP_ARTIFACT_ONTOLOGY_MODULE, type_name)
                artifact_cls = artifact.Artifact

            if artifact_cls == artifact.Artifact:
                # Provide an empty schema if instantiating an bare-metal artifact.
                art = artifact_cls(
                    instance_schema=artifact.DEFAULT_ARTIFACT_SCHEMA)
            else:
                art = artifact_cls()

            art.uri = output_uris[output.name]
            result[output.name] = art

    return result
Ejemplo n.º 4
0
  def _create_pipeline_v2(
      self,
      pipeline_func: Callable[..., Any],
      pipeline_root: Optional[str] = None,
      pipeline_name: Optional[str] = None,
      pipeline_parameters_override: Optional[Mapping[str, Any]] = None,
  ) -> pipeline_spec_pb2.PipelineJob:
    """Creates a pipeline instance and constructs the pipeline spec from it.

    Args:
      pipeline_func: Pipeline function with @dsl.pipeline decorator.
      pipeline_root: The root of the pipeline outputs. Optional.
      pipeline_name: The name of the pipeline. Optional.
      pipeline_parameters_override: The mapping from parameter names to values.
        Optional.

    Returns:
      A PipelineJob proto representing the compiled pipeline.
    """

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _python_op._extract_component_interface(pipeline_func)
    pipeline_name = pipeline_name or pipeline_meta.name

    pipeline_root = pipeline_root or getattr(pipeline_func, 'output_directory',
                                             None)
    if not pipeline_root:
      warnings.warn('pipeline_root is None or empty. A valid pipeline_root '
                    'must be provided at job submission.')

    args_list = []
    signature = inspect.signature(pipeline_func)
    for arg_name in signature.parameters:
      arg_type = None
      for pipeline_input in pipeline_meta.inputs or []:
        if arg_name == pipeline_input.name:
          arg_type = pipeline_input.type
          break
      args_list.append(
          dsl.PipelineParam(
              sanitize_k8s_name(arg_name, True), param_type=arg_type))

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list)

    self._sanitize_and_inject_artifact(dsl_pipeline)

    # Fill in the default values.
    args_list_with_defaults = []
    if pipeline_meta.inputs:
      args_list_with_defaults = [
          dsl.PipelineParam(
              sanitize_k8s_name(input_spec.name, True),
              param_type=input_spec.type,
              value=input_spec.default) for input_spec in pipeline_meta.inputs
      ]

    # Making the pipeline group name unique to prevent name clashes with templates
    pipeline_group = dsl_pipeline.groups[0]
    temp_pipeline_group_name = uuid.uuid4().hex
    pipeline_group.name = temp_pipeline_group_name

    pipeline_spec = self._create_pipeline_spec(
        args_list_with_defaults,
        dsl_pipeline,
    )

    pipeline_parameters = {
        param.name: param for param in args_list_with_defaults
    }
    # Update pipeline parameters override if there were any.
    pipeline_parameters_override = pipeline_parameters_override or {}
    for k, v in pipeline_parameters_override.items():
      if k not in pipeline_parameters:
        raise ValueError('Pipeline parameter {} does not match any known '
                         'pipeline argument.'.format(k))
      pipeline_parameters[k].value = v

    runtime_config = compiler_utils.build_runtime_config_spec(
        output_directory=pipeline_root, pipeline_parameters=pipeline_parameters)
    pipeline_job = pipeline_spec_pb2.PipelineJob(runtime_config=runtime_config)
    pipeline_job.pipeline_spec.update(json_format.MessageToDict(pipeline_spec))

    return pipeline_job
Ejemplo n.º 5
0
    def _create_pipeline(
        self,
        pipeline_func: Callable[..., Any],
        output_directory: str,
        pipeline_name: Optional[str] = None,
        pipeline_parameters_override: Optional[Mapping[str, Any]] = None,
    ) -> pipeline_spec_pb2.PipelineJob:
        """Creates a pipeline instance and constructs the pipeline spec from it.

    Args:
      pipeline_func: Pipeline function with @dsl.pipeline decorator.
      pipeline_name: The name of the pipeline. Optional.
      output_directory: The root of the pipeline outputs.
      pipeline_parameters_override: The mapping from parameter names to values.
        Optional.

    Returns:
      A PipelineJob proto representing the compiled pipeline.
    """

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = _python_op._extract_component_interface(pipeline_func)
        pipeline_name = pipeline_name or pipeline_meta.name

        args_list = []
        signature = inspect.signature(pipeline_func)
        for arg_name in signature.parameters:
            arg_type = None
            for pipeline_input in pipeline_meta.inputs or []:
                if arg_name == pipeline_input.name:
                    arg_type = pipeline_input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True),
                                  param_type=input_spec.type,
                                  value=input_spec.default)
                for input_spec in pipeline_meta.inputs
            ]

        pipeline_spec = self._create_pipeline_spec(
            args_list_with_defaults,
            dsl_pipeline,
        )

        pipeline_parameters = {
            arg.name: arg.value
            for arg in args_list_with_defaults
        }
        # Update pipeline parameters override if there were any.
        pipeline_parameters.update(pipeline_parameters_override or {})
        runtime_config = compiler_utils.build_runtime_config_spec(
            output_directory=output_directory,
            pipeline_parameters=pipeline_parameters)
        pipeline_job = pipeline_spec_pb2.PipelineJob(
            runtime_config=runtime_config)
        pipeline_job.pipeline_spec.update(
            json_format.MessageToDict(pipeline_spec))

        return pipeline_job