Example #1
0
    def helper_test_component_against_func_using_local_call(
            self, func: Callable, op: Callable, arguments: dict):
        # ! This function cannot be used when component has output types that use custom serialization since it will compare non-serialized function outputs with serialized component outputs.
        # Evaluating the function to get the expected output values
        expected_output_values_list = func(**arguments)
        if not isinstance(expected_output_values_list, Sequence) or isinstance(
                expected_output_values_list, str):
            expected_output_values_list = [str(expected_output_values_list)]
        expected_output_values_list = [
            str(value) for value in expected_output_values_list
        ]

        output_names = [output.name for output in op.component_spec.outputs]
        from kfp.components._naming import generate_unique_name_conversion_table, _sanitize_python_function_name
        output_name_to_pythonic = generate_unique_name_conversion_table(
            output_names, _sanitize_python_function_name)
        pythonic_output_names = [
            output_name_to_pythonic[name] for name in output_names
        ]
        from collections import OrderedDict
        expected_output_values_dict = OrderedDict(
            zip(pythonic_output_names, expected_output_values_list))

        self.helper_test_component_using_local_call(
            op, arguments, expected_output_values_dict)
Example #2
0
def _create_container_op_from_component_and_arguments(
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[_structures.ComponentReference] = None,
) -> _container_op.ContainerOp:
  """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: (only for v1) The component references.

  Returns:
    A ContainerOp instance.
  """

  # Add component inputs with default value to the arguments dict if they are not
  # in the arguments dict already.
  arguments = arguments.copy()
  for input_spec in component_spec.inputs or []:
    if input_spec.name not in arguments and input_spec.default is not None:
      default_value = input_spec.default
      if input_spec.type == 'Integer':
        default_value = int(default_value)
      elif input_spec.type == 'Float':
        default_value = float(default_value)
      arguments[input_spec.name] = default_value

  # Check types of the reference arguments and serialize PipelineParams
  original_arguments = arguments
  arguments = arguments.copy()
  for input_name, argument_value in arguments.items():
    if isinstance(argument_value, _pipeline_param.PipelineParam):
      input_type = component_spec._inputs_dict[input_name].type
      argument_type = argument_value.param_type
      types.verify_type_compatibility(
          argument_type, input_type,
          'Incompatible argument passed to the input "{}" of component "{}": '
          .format(input_name, component_spec.name))

      arguments[input_name] = str(argument_value)
    if isinstance(argument_value, _container_op.ContainerOp):
      raise TypeError(
          'ContainerOp object was passed to component as an input argument. '
          'Pass a single output instead.')
  placeholder_resolver = ExtraPlaceholderResolver()
  resolved_cmd = _components._resolve_command_line_and_paths(
      component_spec=component_spec,
      arguments=arguments,
      placeholder_resolver=placeholder_resolver.resolve_placeholder,
  )

  container_spec = component_spec.implementation.container

  old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

  output_paths = collections.OrderedDict(resolved_cmd.output_paths or {})
  output_paths.update(placeholder_resolver.output_paths)
  input_paths = collections.OrderedDict(resolved_cmd.input_paths or {})
  input_paths.update(placeholder_resolver.input_paths)

  artifact_argument_paths = [
      dsl.InputArgumentPath(
          argument=arguments[input_name],
          input=input_name,
          path=path,
      ) for input_name, path in input_paths.items()
  ]

  task = _container_op.ContainerOp(
      name=component_spec.name or _components._default_component_name,
      image=container_spec.image,
      command=resolved_cmd.command,
      arguments=resolved_cmd.args,
      file_outputs=output_paths,
      artifact_argument_paths=artifact_argument_paths,
  )
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

  component_meta = copy.copy(component_spec)
  task._set_metadata(component_meta)
  if component_ref:
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

  task._parameter_arguments = resolved_cmd.inputs_consumed_by_value

  # Previously, ContainerOp had strict requirements for the output names, so we
  # had to convert all the names before passing them to the ContainerOp
  # constructor.
  # Outputs with non-pythonic names could not be accessed using their original
  # names. Now ContainerOp supports any output names, so we're now using the
  # original output names. However to support legacy pipelines, we're also
  # adding output references with pythonic names.
  # TODO: Add warning when people use the legacy output names.
  output_names = [
      output_spec.name for output_spec in component_spec.outputs or []
  ]  # Stabilizing the ordering
  output_name_to_python = _naming.generate_unique_name_conversion_table(
      output_names, _naming._sanitize_python_function_name)
  for output_name in output_names:
    pythonic_output_name = output_name_to_python[output_name]
    # Note: Some component outputs are currently missing from task.outputs
    # (e.g. MLPipeline UI Metadata)
    if pythonic_output_name not in task.outputs and output_name in task.outputs:
      task.outputs[pythonic_output_name] = task.outputs[output_name]

  if container_spec.env:
    from kubernetes import client as k8s_client
    for name, value in container_spec.env.items():
      task.container.add_env_variable(
          k8s_client.V1EnvVar(name=name, value=value))

  if component_spec.metadata:
    annotations = component_spec.metadata.annotations or {}
    for key, value in annotations.items():
      task.add_pod_annotation(key, value)
    for key, value in (component_spec.metadata.labels or {}).items():
      task.add_pod_label(key, value)
    # Disabling the caching for the volatile components by default
    if annotations.get('volatile_component', 'false') == 'true':
      task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

  _attach_v2_specs(task, component_spec, original_arguments)

  return task
Example #3
0
def create_container_op_from_component_and_arguments(
    component_spec: structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: structures.ComponentReference = None,
) -> container_op.ContainerOp:
    """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: The component reference. Optional.

  Returns:
    A ContainerOp instance.
  """

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
    pipeline_task_spec.task_info.name = component_spec.name
    # might need to append suffix to exuector_label to ensure its uniqueness?
    pipeline_task_spec.executor_label = component_spec.name

    # Keep track of auto-injected importer spec.
    importer_spec = {}

    # Check types of the reference arguments and serialize PipelineParams
    arguments = arguments.copy()
    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, dsl.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            argument_value.op_name)
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].runtime_value.runtime_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].producer_task = (argument_value.op_name)
                    pipeline_task_spec.inputs.artifacts[
                        input_name].output_artifact_key = (argument_value.name)
                else:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_spec[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            else:
                # An importer node with constant value artifact_uri will be inserted.
                pipeline_task_spec.inputs.artifacts[
                    input_name].producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_spec[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, dsl.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            raise NotImplementedError(
                'Input argument supports only the following types: PipelineParam'
                ', str, int, float. Got: "{}".'.format(argument_value))

    for output in component_spec.outputs or []:
        if type_utils.is_parameter_type(output.type):
            pipeline_task_spec.outputs.parameters[
                output.name].type = type_utils.get_parameter_type(output.type)
        else:
            pipeline_task_spec.outputs.artifacts[
                output.name].artifact_type.instance_schema = (
                    type_utils.get_artifact_type_schema(output.type))

    inputs_dict = {
        input_spec.name: input_spec
        for input_spec in component_spec.inputs or []
    }
    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_uri_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputUriPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

    def _input_artifact_path_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputPathPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)
        else:
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputValuePlaceholder.'
                .format(input_key, inputs_dict[input_key].type))

    def _output_artifact_uri_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            raise TypeError(
                'Output "{}" with type "{}" cannot be paired with OutputUriPlaceholder.'
                .format(output_key, outputs_dict[output_key].type))
        else:
            return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key)

    def _output_artifact_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

    def _output_parameter_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
            output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            return _output_parameter_path_placeholder(output_key)
        else:
            return _output_artifact_path_placeholder(output_key)

    resolved_cmd = _resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        input_value_generator=_input_parameter_placeholder,
        input_uri_generator=_input_artifact_uri_placeholder,
        output_uri_generator=_output_artifact_uri_placeholder,
        input_path_generator=_input_artifact_path_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
    )

    container_spec = component_spec.implementation.container

    pipeline_container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec())
    pipeline_container_spec.image = container_spec.image
    pipeline_container_spec.command.extend(resolved_cmd.command)
    pipeline_container_spec.args.extend(resolved_cmd.args)

    output_uris_and_paths = resolved_cmd.output_uris.copy()
    output_uris_and_paths.update(resolved_cmd.output_paths)
    input_uris_and_paths = resolved_cmd.input_uris.copy()
    input_uris_and_paths.update(resolved_cmd.input_paths)

    old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
    task = container_op.ContainerOp(
        name=component_spec.name or _default_component_name,
        image=container_spec.image,
        command=resolved_cmd.command,
        arguments=resolved_cmd.args,
        file_outputs=output_uris_and_paths,
        artifact_argument_paths=[
            dsl.InputArgumentPath(
                argument=arguments[input_name],
                input=input_name,
                path=path,
            ) for input_name, path in input_uris_and_paths.items()
        ],
    )

    task.task_spec = pipeline_task_spec
    task.importer_spec = importer_spec
    task.container_spec = pipeline_container_spec
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    component_meta = copy.copy(component_spec)
    task._set_metadata(component_meta)
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

    # Previously, ContainerOp had strict requirements for the output names, so we
    # had to convert all the names before passing them to the ContainerOp
    # constructor. Outputs with non-pythonic names could not be accessed using
    # their original names. Now ContainerOp supports any output names, so we're
    # now using the original output names. However to support legacy pipelines,
    # we're also adding output references with pythonic names.
    # TODO: Add warning when people use the legacy output names.
    output_names = [
        output_spec.name for output_spec in component_spec.outputs or []
    ]  # Stabilizing the ordering
    output_name_to_python = generate_unique_name_conversion_table(
        output_names, _sanitize_python_function_name)
    for output_name in output_names:
        pythonic_output_name = output_name_to_python[output_name]
        # Note: Some component outputs are currently missing from task.outputs
        # (e.g. MLPipeline UI Metadata)
        if pythonic_output_name not in task.outputs and output_name in task.outputs:
            task.outputs[pythonic_output_name] = task.outputs[output_name]

    if component_spec.metadata:
        annotations = component_spec.metadata.annotations or {}
        for key, value in annotations.items():
            task.add_pod_annotation(key, value)
        for key, value in (component_spec.metadata.labels or {}).items():
            task.add_pod_label(key, value)
            # Disabling the caching for the volatile components by default
        if annotations.get('volatile_component', 'false') == 'true':
            task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

    return task
Example #4
0
def _create_task_factory_from_component_spec(
        component_spec: structures.ComponentSpec,
        component_filename=None,
        component_ref: structures.ComponentReference = None):
    name = component_spec.name or _default_component_name

    func_docstring_lines = []
    if component_spec.name:
        func_docstring_lines.append(component_spec.name)
    if component_spec.description:
        func_docstring_lines.append(component_spec.description)

    inputs_list = component_spec.inputs or []  #List[InputSpec]
    input_names = [input.name for input in inputs_list]

    #Creating the name translation tables : Original <-> Pythonic
    input_name_to_pythonic = generate_unique_name_conversion_table(
        input_names, _sanitize_python_function_name)
    pythonic_name_to_input_name = {
        v: k
        for k, v in input_name_to_pythonic.items()
    }

    if component_ref is None:
        component_ref = structures.ComponentReference(spec=component_spec,
                                                      url=component_filename)
    else:
        component_ref.spec = component_spec

    digest = getattr(component_spec, '_digest', None)
    # TODO: Calculate the digest if missing
    if digest:
        # TODO: Report possible digest conflicts
        component_ref.digest = digest

    def create_task_object_from_component_and_pythonic_arguments(
            pythonic_arguments):
        arguments = {
            pythonic_name_to_input_name[argument_name]: argument_value
            for argument_name, argument_value in pythonic_arguments.items()
            if not isinstance(
                argument_value, _DefaultValue
            )  # Skipping passing arguments for optional values that have not been overridden.
        }
        return _create_task_object_from_component_and_arguments(
            component_spec=component_spec,
            arguments=arguments,
            component_ref=component_ref,
        )

    # import inspect
    # from kfp.components import _dynamic

    #Reordering the inputs since in Python optional parameters must come after required parameters
    reordered_input_list = [
        input for input in inputs_list
        if input.default is None and not input.optional
    ] + [
        input for input in inputs_list
        if not (input.default is None and not input.optional)
    ]

    def component_default_to_func_default(component_default: str,
                                          is_optional: bool):
        if is_optional:
            return _DefaultValue(component_default)
        if component_default is not None:
            return component_default
        return inspect.Parameter.empty

    input_parameters = [
        _dynamic.KwParameter(
            input_name_to_pythonic[port.name],
            annotation=(get_canonical_type_for_type_struct(str(port.type))
                        or str(port.type)
                        if port.type else inspect.Parameter.empty),
            default=component_default_to_func_default(port.default,
                                                      port.optional),
        ) for port in reordered_input_list
    ]
    factory_function_parameters = input_parameters  #Outputs are no longer part of the task factory function signature. The paths are always generated by the system.

    task_factory = _dynamic.create_function_from_parameters(
        create_task_object_from_component_and_pythonic_arguments,
        factory_function_parameters,
        documentation='\n'.join(func_docstring_lines),
        func_name=name,
        func_filename=component_filename if
        (component_filename and
         (component_filename.endswith('.yaml')
          or component_filename.endswith('.yml'))) else None,
    )
    task_factory.component_spec = component_spec
    return task_factory