Exemple #1
0
def build_component_spec_for_task(
    task: pipeline_task.PipelineTask,
    is_exit_task: bool = False,
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a pipeline task.

    Args:
        task: The task to build a ComponentSpec for.
        is_exit_task: Whether the task is used as exit task in Exit Handler.

    Returns:
        A ComponentSpec object for the task.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()
    component_spec.executor_label = component_utils.sanitize_executor_label(
        task.name)

    for input_name, input_spec in (task.component_spec.inputs or {}).items():

        # Special handling for PipelineTaskFinalStatus first.
        if type_utils.is_task_final_status_type(input_spec.type):
            if not is_exit_task:
                raise ValueError(
                    'PipelineTaskFinalStatus can only be used in an exit task.'
                )
            component_spec.input_definitions.parameters[
                input_name].parameter_type = pipeline_spec_pb2.ParameterType.STRUCT
            continue

        # skip inputs not present, as a workaround to support optional inputs.
        if input_name not in task.inputs:
            continue

        if type_utils.is_parameter_type(input_spec.type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_name, output_spec in (task.component_spec.outputs
                                     or {}).items():
        if type_utils.is_parameter_type(output_spec.type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return component_spec
Exemple #2
0
def build_component_inputs_spec(
    component_spec: pipeline_spec_pb2.ComponentSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    is_root_component: bool,
) -> None:
    """Builds component inputs spec from pipeline params.

    Args:
      component_spec: The component spec to fill in its inputs spec.
      pipeline_params: The list of pipeline params.
      is_root_component: Whether the component is the root.
    """
    for param in pipeline_params:
        param_name = param.full_name
        if _for_loop.LoopArguments.name_is_loop_argument(param_name):
            param.param_type = param.param_type or 'String'

        input_name = (
            param_name if is_root_component else
            additional_input_name_for_pipelineparam(param_name))

        if type_utils.is_parameter_type(param.param_type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    param.param_type)
        elif input_name not in getattr(component_spec.input_definitions,
                                       'parameters', []):
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(param.param_type))
def create_pipeline_channel(
    name: str,
    channel_type: Union[str, Dict],
    task_name: Optional[str],
    value: Optional[type_utils.PARAMETER_TYPES] = None,
) -> PipelineChannel:
    """Creates a PipelineChannel object.

    Args:
        name: The name of the channel.
        channel_type: The type of the channel, which decides whether it is an
            PipelineParameterChannel or PipelineArtifactChannel
        task_name: Optional; the task that produced the channel.
        value: Optional; the realized value for a channel.

    Returns:
        A PipelineParameterChannel or PipelineArtifactChannel object.
    """
    if type_utils.is_parameter_type(channel_type):
        return PipelineParameterChannel(
            name=name,
            channel_type=channel_type,
            task_name=task_name,
            value=value,
        )
    else:
        return PipelineArtifactChannel(
            name=name,
            channel_type=channel_type,
            task_name=task_name,
        )
Exemple #4
0
def verify_type_compatibility(given_type: TypeSpecType,
                              expected_type: TypeSpecType,
                              error_message_prefix: str = ""):
    """verify_type_compatibility verifies that the given argument type is
    compatible with the expected input type.

    Args:
            given_type (str/dict): The type of the argument passed to the
              input
            expected_type (str/dict): The declared type of the input
    """
    # Missing types are treated as being compatible with missing types.
    if given_type is None or expected_type is None:
        return True

    # Generic artifacts resulted from missing type or explicit "Artifact" type can
    # be passed to inputs expecting any artifact types.
    # However, generic artifacts resulted from arbitrary unknown types do not have
    # such "compatible" feature.
    if not type_utils.is_parameter_type(str(expected_type)) and (
            given_type is None or str(given_type).lower() == "artifact"):
        return True

    types_are_compatible = check_types(given_type, expected_type)

    if not types_are_compatible:
        error_text = error_message_prefix + (
            'Argument type "{}" is incompatible with the input type "{}"'
        ).format(str(given_type), str(expected_type))
        import kfp
        if kfp.TYPE_CHECK:
            raise InconsistentTypeException(error_text)
        else:
            warnings.warn(InconsistentTypeWarning(error_text))
    return types_are_compatible
    def __init__(
        self,
        name: str,
        channel_type: Union[str, Dict],
        task_name: str,
    ):
        """Initializes a PipelineArtifactChannel instance.

        Args:
          name: The name of the pipeline channel.
          channel_type: The type of the pipeline channel.
          task_name: The name of the task that produces the pipeline channel.

        Raises:
          ValueError: If name or task_name contains invalid characters.
          TypeError: If the channel type is not an artifact type.
        """
        if type_utils.is_parameter_type(channel_type):
            raise TypeError(f'{channel_type} is not an artifact type.')

        super(PipelineArtifactChannel, self).__init__(
            name=name,
            channel_type=channel_type,
            task_name=task_name,
        )
    def __init__(
        self,
        name: str,
        channel_type: Union[str, Dict],
        task_name: Optional[str] = None,
        value: Optional[type_utils.PARAMETER_TYPES] = None,
    ):
        """Initializes a PipelineArtifactChannel instance.

        Args:
          name: The name of the pipeline channel.
          channel_type: The type of the pipeline channel.
          task_name: Optional; The name of the task that produces the pipeline
            channel.
          value: Optional; The actual value of the pipeline channel.

        Raises:
          ValueError: If name or task_name contains invalid characters.
          ValueError: If both task_name and value are set.
          TypeError: If the channel type is not a parameter type.
        """
        if task_name and value:
            raise ValueError('task_name and value cannot be both set.')

        if not type_utils.is_parameter_type(channel_type):
            raise TypeError(f'{channel_type} is not a parameter type.')

        self.value = value

        super(PipelineParameterChannel, self).__init__(
            name=name,
            channel_type=channel_type,
            task_name=task_name,
        )
 def _input_parameter_placeholder(input_key: str) -> str:
     if kfp.COMPILING_FOR_V2 and not type_utils.is_parameter_type(
             inputs_dict[input_key].type):
         raise TypeError(
             'Input "{}" with type "{}" cannot be paired with '
             'InputValuePlaceholder.'.format(
                 input_key, inputs_dict[input_key].type))
     else:
         return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)
 def _output_artifact_uri_placeholder(output_key: str) -> str:
     if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
             outputs_dict[output_key].type):
         raise TypeError(
             'Output "{}" with type "{}" cannot be paired with '
             'OutputUriPlaceholder.'.format(
                 output_key, outputs_dict[output_key].type))
     else:
         return _generate_output_uri_placeholder(output_key)
Exemple #9
0
def build_component_spec_from_structure(
    component_spec: structures.ComponentSpec,
    executor_label: str,
    actual_inputs: List[str],
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds an IR ComponentSpec instance from structures.ComponentSpec.

    Args:
      component_spec: The structure component spec.
      executor_label: The executor label.
      actual_inputs: The actual arugments passed to the task. This is used as a
        short term workaround to support optional inputs in component spec IR.

    Returns:
      An instance of IR ComponentSpec.
    """
    result = pipeline_spec_pb2.ComponentSpec()
    result.executor_label = executor_label

    for input_spec in component_spec.inputs or []:
        # skip inputs not present
        if input_spec.name not in actual_inputs:
            continue
        if type_utils.is_parameter_type(input_spec.type):
            result.input_definitions.parameters[
                input_spec.name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            result.input_definitions.artifacts[
                input_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_spec in component_spec.outputs or []:
        if type_utils.is_parameter_type(output_spec.type):
            result.output_definitions.parameters[
                output_spec
                .name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            result.output_definitions.artifacts[
                output_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return result
Exemple #10
0
def build_task_inputs_spec(
    task_spec: pipeline_spec_pb2.PipelineTaskSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    tasks_in_current_dag: List[str],
    is_parent_component_root: bool,
) -> None:
    """Builds task inputs spec from pipeline params.

    Args:
      task_spec: The task spec to fill in its inputs spec.
      pipeline_params: The list of pipeline params.
      tasks_in_current_dag: The list of tasks names for tasks in the same dag.
      is_parent_component_root: Whether the task is in the root component.
    """
    for param in pipeline_params or []:

        param_full_name, subvar_name = _exclude_loop_arguments_variables(param)
        input_name = additional_input_name_for_pipelineparam(param.full_name)

        param_name = param.name
        if subvar_name:
            task_spec.inputs.parameters[
                input_name].parameter_expression_selector = (
                    'parseJson(string_value)["{}"]'.format(subvar_name))
            param_name = _for_loop.LoopArguments.remove_loop_item_base_name(
                _exclude_loop_arguments_variables(param_name)[0])

        if type_utils.is_parameter_type(param.param_type):
            if param.op_name and dsl_utils.sanitize_task_name(
                    param.op_name) in tasks_in_current_dag:
                task_spec.inputs.parameters[
                    input_name].task_output_parameter.producer_task = (
                        dsl_utils.sanitize_task_name(param.op_name))
                task_spec.inputs.parameters[
                    input_name].task_output_parameter.output_parameter_key = (
                        param_name)
            else:
                task_spec.inputs.parameters[
                    input_name].component_input_parameter = (
                        param_full_name if is_parent_component_root else
                        additional_input_name_for_pipelineparam(param_full_name)
                    )
        else:
            if param.op_name and dsl_utils.sanitize_task_name(
                    param.op_name) in tasks_in_current_dag:
                task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = (
                        dsl_utils.sanitize_task_name(param.op_name))
                task_spec.inputs.artifacts[
                    input_name].task_output_artifact.output_artifact_key = (
                        param_name)
            else:
                task_spec.inputs.artifacts[
                    input_name].component_input_artifact = (
                        param_full_name
                        if is_parent_component_root else input_name)
Exemple #11
0
def build_component_spec_for_task(
        task: pipeline_task.PipelineTask) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a pipeline task.

    Args:
        task: The task to build a ComponentSpec for.

    Returns:
        A ComponentSpec object for the task.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()
    component_spec.executor_label = component_utils.sanitize_executor_label(
        task.name)

    for input_name, input_spec in (task.component_spec.inputs or {}).items():

        # skip inputs not present, as a workaround to support optional inputs.
        if input_name not in task.inputs:
            continue

        if type_utils.is_parameter_type(input_spec.type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_name, output_spec in (task.component_spec.outputs
                                     or {}).items():
        if type_utils.is_parameter_type(output_spec.type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return component_spec
Exemple #12
0
def extract_pipeline_channels_from_string(
        payload: str) -> List[PipelineChannel]:
    """Extracts a list of PipelineChannel instances from the payload string.

    Note: this function removes all duplicate matches.

    Args:
      payload: A string that may contain serialized PipelineChannels.

    Returns:
      A list of PipelineChannels found from the payload.
    """
    matches = re.findall(_PIPELINE_CHANNEL_PLACEHOLDER_REGEX, payload)
    unique_channels = set()
    for match in matches:
        task_name, name, channel_type = match

        # channel_type could be either a string (e.g. "Integer") or a dictionary
        # (e.g.: {"custom_type": {"custom_property": "some_value"}}).
        # Try loading it into dictionary, if failed, it means channel_type is a
        # string.
        try:
            channel_type = json.loads(channel_type)
        except json.JSONDecodeError:
            pass

        if type_utils.is_parameter_type(channel_type):
            pipeline_channel = PipelineParameterChannel(
                name=name,
                channel_type=channel_type,
                task_name=task_name,
            )
        else:
            pipeline_channel = PipelineArtifactChannel(
                name=name,
                channel_type=channel_type,
                task_name=task_name,
            )
        unique_channels.add(pipeline_channel)

    return list(unique_channels)
Exemple #13
0
def build_component_outputs_spec(
    component_spec: pipeline_spec_pb2.ComponentSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
) -> None:
    """Builds component outputs spec from pipeline params.

    Args:
      component_spec: The component spec to fill in its outputs spec.
      pipeline_params: The list of pipeline params.
    """
    for param in pipeline_params or []:
        output_name = param.full_name
        if type_utils.is_parameter_type(param.param_type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    param.param_type)
        elif output_name not in getattr(component_spec.output_definitions,
                                        'parameters', []):
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(param.param_type))
Exemple #14
0
    def _create_pipeline_v2(
        self,
        pipeline_func: Callable[..., Any],
        pipeline_name: Optional[str] = None,
        pipeline_parameters_override: Optional[Mapping[str, Any]] = None,
    ) -> pipeline_spec_pb2.PipelineSpec:
        """Creates a pipeline instance and constructs the pipeline spec from
        it.

        Args:
            pipeline_func: The pipeline function with @dsl.pipeline decorator.
            pipeline_name: Optional; the name of the pipeline.
            pipeline_parameters_override: Optional; the mapping from parameter
                names to values.

        Returns:
            A PipelineSpec proto representing the compiled pipeline.
        """

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineChannel
        pipeline_meta = component_factory.extract_component_interface(
            pipeline_func)
        pipeline_name = pipeline_name or pipeline_meta.name

        pipeline_root = getattr(pipeline_func, 'pipeline_root', None)

        args_list = []
        signature = inspect.signature(pipeline_func)

        for arg_name in signature.parameters:
            arg_type = pipeline_meta.inputs[arg_name].type
            if not type_utils.is_parameter_type(arg_type):
                raise TypeError(
                    'The pipeline argument "{arg_name}" is viewed as an artifact'
                    ' due to its type "{arg_type}". And we currently do not '
                    'support passing artifacts as pipeline inputs. Consider type'
                    ' annotating the argument with a primitive type, such as '
                    '"str", "int", "float", "bool", "dict", and "list".'.format(
                        arg_name=arg_name, arg_type=arg_type))
            args_list.append(
                dsl.PipelineParameterChannel(
                    name=arg_name, channel_type=arg_type))

        with pipeline_context.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        if not dsl_pipeline.tasks:
            raise ValueError('Task is missing from pipeline.')

        self._validate_exit_handler(dsl_pipeline)

        pipeline_inputs = pipeline_meta.inputs or {}

        # Verify that pipeline_parameters_override contains only input names
        # that match the pipeline inputs definition.
        pipeline_parameters_override = pipeline_parameters_override or {}
        for input_name in pipeline_parameters_override:
            if input_name not in pipeline_inputs:
                raise ValueError(
                    'Pipeline parameter {} does not match any known '
                    'pipeline argument.'.format(input_name))

        # Fill in the default values.
        args_list_with_defaults = [
            dsl.PipelineParameterChannel(
                name=input_name,
                channel_type=input_spec.type,
                value=pipeline_parameters_override.get(input_name) or
                input_spec.default,
            ) for input_name, input_spec in pipeline_inputs.items()
        ]

        # Making the pipeline group name unique to prevent name clashes with
        # templates
        pipeline_group = dsl_pipeline.groups[0]
        pipeline_group.name = uuid.uuid4().hex

        pipeline_spec = self._create_pipeline_spec(
            pipeline_args=args_list_with_defaults,
            pipeline=dsl_pipeline,
        )

        if pipeline_root:
            pipeline_spec.default_pipeline_root = pipeline_root

        return pipeline_spec
Exemple #15
0
 def _is_output_parameter(output_key: str) -> bool:
     for output in component_spec.component_spec.outputs:
         if output.name == output_key:
             return type_utils.is_parameter_type(output.type)
     return False
Exemple #16
0
        def expand_command_part(arg) -> Union[str, List[str], None]:
            if arg is None:
                return None

            if isinstance(arg, (str, int, float, bool)):
                return str(arg)

            elif isinstance(arg, (dict, list)):
                return json.dumps(arg)

            elif isinstance(arg, structures.InputValuePlaceholder):
                input_name = arg.input_name
                if not type_utils.is_parameter_type(
                        inputs_dict[input_name].type):
                    raise TypeError(
                        f'Input "{input_name}" with type '
                        f'"{inputs_dict[input_name].type}" cannot be paired with '
                        'InputValuePlaceholder.')

                if input_name in arguments:
                    return placeholders.input_parameter_placeholder(input_name)
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.default is not None:
                        return None
                    else:
                        raise ValueError(
                            f'No value provided for input: {input_name}.')

            elif isinstance(arg, structures.InputUriPlaceholder):
                input_name = arg.input_name
                if type_utils.is_parameter_type(inputs_dict[input_name].type):
                    raise TypeError(
                        f'Input "{input_name}" with type '
                        f'"{inputs_dict[input_name].type}" cannot be paired with '
                        'InputUriPlaceholder.')

                if input_name in arguments:
                    input_uri = placeholders.input_artifact_uri_placeholder(
                        input_name)
                    return input_uri
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.default is not None:
                        return None
                    else:
                        raise ValueError(
                            f'No value provided for input: {input_name}.')

            elif isinstance(arg, structures.InputPathPlaceholder):
                input_name = arg.input_name
                if type_utils.is_parameter_type(inputs_dict[input_name].type):
                    raise TypeError(
                        f'Input "{input_name}" with type '
                        f'"{inputs_dict[input_name].type}" cannot be paired with '
                        'InputPathPlaceholder.')

                if input_name in arguments:
                    input_path = placeholders.input_artifact_path_placeholder(
                        input_name)
                    return input_path
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            f'No value provided for input: {input_name}.')

            elif isinstance(arg, structures.OutputUriPlaceholder):
                output_name = arg.output_name
                if type_utils.is_parameter_type(outputs_dict[output_name].type):
                    raise TypeError(
                        f'Onput "{output_name}" with type '
                        f'"{outputs_dict[output_name].type}" cannot be paired with '
                        'OutputUriPlaceholder.')

                output_uri = placeholders.output_artifact_uri_placeholder(
                    output_name)
                return output_uri

            elif isinstance(arg, structures.OutputPathPlaceholder):
                output_name = arg.output_name

                if type_utils.is_parameter_type(outputs_dict[output_name].type):
                    output_path = placeholders.output_parameter_path_placeholder(
                        output_name)
                else:
                    output_path = placeholders.output_artifact_path_placeholder(
                        output_name)
                return output_path

            elif isinstance(arg, structures.ConcatPlaceholder):
                expanded_argument_strings = expand_argument_list(arg.items)
                return ''.join(expanded_argument_strings)

            elif isinstance(arg, structures.IfPresentPlaceholder):
                if arg.if_structure.input_name in argument_values:
                    result_node = arg.if_structure.then
                else:
                    result_node = arg.if_structure.otherwise

                if result_node is None:
                    return []

                if isinstance(result_node, list):
                    expanded_result = expand_argument_list(result_node)
                else:
                    expanded_result = expand_command_part(result_node)
                return expanded_result

            else:
                raise TypeError('Unrecognized argument type: {}'.format(arg))
 def _resolve_output_path_placeholder(output_key: str) -> str:
     if type_utils.is_parameter_type(outputs_dict[output_key].type):
         return _output_parameter_path_placeholder(output_key)
     else:
         return _output_artifact_path_placeholder(output_key)
Exemple #18
0
 def test_is_parameter_type(self):
     for type_name in _PARAMETER_TYPES:
         self.assertTrue(type_utils.is_parameter_type(type_name))
     for type_name in _KNOWN_ARTIFACT_TYPES + _UNKNOWN_ARTIFACT_TYPES:
         self.assertFalse(type_utils.is_parameter_type(type_name))
def _attach_v2_specs(
    task: _container_op.ContainerOp,
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
) -> None:
    """Attaches v2 specs to a ContainerOp object.

    Attach v2_specs to the ContainerOp object regardless whether the pipeline is
    being compiled to v1 (Argo yaml) or v2 (IR json).
    However, there're different behaviors for the two cases. Namely, resolved
    commands and arguments, error handling, etc.
    Regarding the difference in error handling, v2 has a stricter requirement on
    input type annotation. For instance, an input without any type annotation is
    viewed as an artifact, and if it's paired with InputValuePlaceholder, an
    error will be thrown at compile time. However, we cannot raise such an error
    in v1, as it wouldn't break existing pipelines.

    Args:
      task: The ContainerOp object to attach IR specs.
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.
    """
    def _resolve_commands_and_args_v2(
        component_spec: _structures.ComponentSpec,
        arguments: Mapping[str, Any],
    ) -> _components._ResolvedCommandLineAndPaths:
        """Resolves the command line argument placeholders for v2 (IR).

        Args:
          component_spec: The component spec object.
          arguments: The dictionary of component arguments.

        Returns:
          A named tuple: _components._ResolvedCommandLineAndPaths.
        """
        inputs_dict = {
            input_spec.name: input_spec
            for input_spec in component_spec.inputs or []
        }
        outputs_dict = {
            output_spec.name: output_spec
            for output_spec in component_spec.outputs or []
        }

        def _input_artifact_uri_placeholder(input_key: str) -> str:
            if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputUriPlaceholder.'.format(input_key,
                                                  inputs_dict[input_key].type))
            else:
                return _generate_input_uri_placeholder(input_key)

        def _input_artifact_path_placeholder(input_key: str) -> str:
            if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputPathPlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].path}}}}".format(
                    input_key)

        def _input_parameter_placeholder(input_key: str) -> str:
            if kfp.COMPILING_FOR_V2 and not type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputValuePlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

        def _output_artifact_uri_placeholder(output_key: str) -> str:
            if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
                    outputs_dict[output_key].type):
                raise TypeError(
                    'Output "{}" with type "{}" cannot be paired with '
                    'OutputUriPlaceholder.'.format(
                        output_key, outputs_dict[output_key].type))
            else:
                return _generate_output_uri_placeholder(output_key)

        def _output_artifact_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

        def _output_parameter_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
                output_key)

        def _resolve_output_path_placeholder(output_key: str) -> str:
            if type_utils.is_parameter_type(outputs_dict[output_key].type):
                return _output_parameter_path_placeholder(output_key)
            else:
                return _output_artifact_path_placeholder(output_key)

        placeholder_resolver = ExtraPlaceholderResolver()

        def _resolve_ir_placeholders_v2(
            arg,
            component_spec: _structures.ComponentSpec,
            arguments: dict,
        ) -> str:
            inputs_dict = {
                input_spec.name: input_spec
                for input_spec in component_spec.inputs or []
            }
            if isinstance(arg, _structures.InputValuePlaceholder):
                input_name = arg.input_name
                input_value = arguments.get(input_name, None)
                if input_value is not None:
                    return _input_parameter_placeholder(input_name)
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.InputUriPlaceholder):
                input_name = arg.input_name
                if input_name in arguments:
                    input_uri = _input_artifact_uri_placeholder(input_name)
                    return input_uri
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.OutputUriPlaceholder):
                output_name = arg.output_name
                output_uri = _output_artifact_uri_placeholder(output_name)
                return output_uri

            return placeholder_resolver.resolve_placeholder(
                arg=arg,
                component_spec=component_spec,
                arguments=arguments,
            )

        resolved_cmd = _components._resolve_command_line_and_paths(
            component_spec=component_spec,
            arguments=arguments,
            input_path_generator=_input_artifact_path_placeholder,
            output_path_generator=_resolve_output_path_placeholder,
            placeholder_resolver=_resolve_ir_placeholders_v2,
        )
        return resolved_cmd

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

    # Check types of the reference arguments and serialize PipelineParams
    arguments = arguments.copy()

    # Preserve input params for ContainerOp.inputs
    input_params_set = set([
        param for param in arguments.values()
        if isinstance(param, _pipeline_param.PipelineParam)
    ])

    for input_name, argument_value in arguments.items():
        input_type = component_spec._inputs_dict[input_name].type
        argument_type = None

        if isinstance(argument_value, _pipeline_param.PipelineParam):
            argument_type = argument_value.param_type

            types.verify_type_compatibility(
                argument_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            # Loop arguments defaults to 'String' type if type is unknown.
            # This has to be done after the type compatiblity check.
            if argument_type is None and isinstance(
                    argument_value,
                (_for_loop.LoopArguments, _for_loop.LoopArgumentVariable)):
                argument_type = 'String'

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].component_input_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            argument_value.name)
        elif isinstance(argument_value, str):
            argument_type = 'String'
            pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
                argument_value)
            if pipeline_params and kfp.COMPILING_FOR_V2:
                # argument_value contains PipelineParam placeholders which needs to be
                # replaced. And the input needs to be added to the task spec.
                for param in pipeline_params:
                    # Form the name for the compiler injected input, and make sure it
                    # doesn't collide with any existing input names.
                    additional_input_name = (
                        dsl_component_spec.
                        additional_input_name_for_pipelineparam(param))
                    for existing_input_name, _ in arguments.items():
                        if existing_input_name == additional_input_name:
                            raise ValueError(
                                'Name collision between existing input name '
                                '{} and compiler injected input name {}'.
                                format(existing_input_name,
                                       additional_input_name))

                    # Add the additional param to the input params set. Otherwise, it will
                    # not be included when the params set is not empty.
                    input_params_set.add(param)
                    additional_input_placeholder = (
                        "{{{{$.inputs.parameters['{}']}}}}".format(
                            additional_input_name))
                    argument_value = argument_value.replace(
                        param.pattern, additional_input_placeholder)

                    # The output references are subject to change -- the producer task may
                    # not be whitin the same DAG.
                    if param.op_name:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.producer_task = (
                                dsl_utils.sanitize_task_name(param.op_name))
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.output_parameter_key = param.name
                    else:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].component_input_parameter = param.full_name

            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant.string_value = argument_value
        elif isinstance(argument_value, int):
            argument_type = 'Integer'
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant.number_value = argument_value
        elif isinstance(argument_value, float):
            argument_type = 'Float'
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant.number_value = argument_value
        elif isinstance(argument_value, bool):
            argument_type = 'Bool'
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant.bool_value = argument_value
        elif isinstance(argument_value, list):
            argument_type = 'List'

            # Convert any PipelineParams to strings.
            argument_value = map(
                lambda x: str(x)
                if isinstance(x, dsl.PipelineParam) else x, argument_value)

            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant.list_value.extend(
                    argument_value)
        elif isinstance(argument_value, dict):
            argument_type = 'Dict'
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant.struct_value.update(
                    argument_value)
        elif isinstance(argument_value, _container_op.ContainerOp):
            raise TypeError(
                f'ContainerOp object {input_name} was passed to component as an '
                'input argument. Pass a single output instead.')
        else:
            if kfp.COMPILING_FOR_V2:
                raise NotImplementedError(
                    'Input argument supports only the following types: '
                    'PipelineParam, str, int, float, bool, dict, and list. Got: '
                    f'"{argument_value}".')

        argument_is_parameter_type = type_utils.is_parameter_type(
            argument_type)
        input_is_parameter_type = type_utils.is_parameter_type(input_type)
        if kfp.COMPILING_FOR_V2 and (argument_is_parameter_type !=
                                     input_is_parameter_type):
            if isinstance(argument_value, dsl.PipelineParam):
                param_or_value_msg = 'PipelineParam "{}"'.format(
                    argument_value.full_name)
            else:
                param_or_value_msg = 'value "{}"'.format(argument_value)

            raise TypeError(
                'Passing '
                '{param_or_value} with type "{arg_type}" (as "{arg_category}") to '
                'component input '
                '"{input_name}" with type "{input_type}" (as "{input_category}") is '
                'incompatible. Please fix the type of the component input.'.
                format(
                    param_or_value=param_or_value_msg,
                    arg_type=argument_type,
                    arg_category='Parameter'
                    if argument_is_parameter_type else 'Artifact',
                    input_name=input_name,
                    input_type=input_type,
                    input_category='Parameter'
                    if input_is_parameter_type else 'Artifact',
                ))

    if not component_spec.name:
        component_spec.name = _components._default_component_name

    resolved_cmd = _resolve_commands_and_args_v2(component_spec=component_spec,
                                                 arguments=arguments)

    task.container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
            image=component_spec.implementation.container.image,
            command=resolved_cmd.command,
            args=resolved_cmd.args,
            env=[
                pipeline_spec_pb2.PipelineDeploymentConfig.
                PipelineContainerSpec.EnvVar(name=name, value=value)
                for name, value in task.container.env_dict.items()
            ],
        ))

    # TODO(chensun): dedupe IR component_spec and contaienr_spec
    pipeline_task_spec.component_ref.name = (dsl_utils.sanitize_component_name(
        task.name))
    executor_label = dsl_utils.sanitize_executor_label(task.name)

    task.component_spec = dsl_component_spec.build_component_spec_from_structure(
        component_spec, executor_label, arguments.keys())

    task.task_spec = pipeline_task_spec

    # Override command and arguments if compiling to v2.
    if kfp.COMPILING_FOR_V2:
        task.command = resolved_cmd.command
        task.arguments = resolved_cmd.args

        # limit this to v2 compiling only to avoid possible behavior change in v1.
        task.inputs = list(input_params_set)
def _create_container_op_from_component_and_arguments(
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[_structures.ComponentReference] = None,
) -> _container_op.ContainerOp:
    """Instantiates ContainerOp object.

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.
      component_ref: (only for v1) The component references.

    Returns:
      A ContainerOp instance.
    """
    # Add component inputs with default value to the arguments dict if they are not
    # in the arguments dict already.
    arguments = arguments.copy()
    for input_spec in component_spec.inputs or []:
        if input_spec.name not in arguments and input_spec.default is not None:
            default_value = input_spec.default
            if input_spec.type == 'Integer':
                default_value = int(default_value)
            elif input_spec.type == 'Float':
                default_value = float(default_value)
            elif (type_utils.is_parameter_type(input_spec.type)
                  and kfp.COMPILING_FOR_V2):
                parameter_type = type_utils.get_parameter_type(input_spec.type)
                default_value = type_utils.deserialize_parameter_value(
                    value=default_value, parameter_type=parameter_type)

            arguments[input_spec.name] = default_value

    # Check types of the reference arguments and serialize PipelineParams
    original_arguments = arguments
    arguments = arguments.copy()
    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, _pipeline_param.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            argument_type = argument_value.param_type
            types.verify_type_compatibility(
                argument_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)
        if isinstance(argument_value, _container_op.ContainerOp):
            raise TypeError(
                'ContainerOp object was passed to component as an input argument. '
                'Pass a single output instead.')
    placeholder_resolver = ExtraPlaceholderResolver()
    resolved_cmd = _components._resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        placeholder_resolver=placeholder_resolver.resolve_placeholder,
    )

    container_spec = component_spec.implementation.container

    old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

    output_paths = collections.OrderedDict(resolved_cmd.output_paths or {})
    output_paths.update(placeholder_resolver.output_paths)
    input_paths = collections.OrderedDict(resolved_cmd.input_paths or {})
    input_paths.update(placeholder_resolver.input_paths)

    artifact_argument_paths = [
        dsl.InputArgumentPath(
            argument=arguments[input_name],
            input=input_name,
            path=path,
        ) for input_name, path in input_paths.items()
    ]

    task = _container_op.ContainerOp(
        name=component_spec.name or _components._default_component_name,
        image=container_spec.image,
        command=resolved_cmd.command,
        arguments=resolved_cmd.args,
        file_outputs=output_paths,
        artifact_argument_paths=artifact_argument_paths,
    )
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    component_meta = copy.copy(component_spec)
    task._set_metadata(component_meta, original_arguments)
    if component_ref:
        component_ref_without_spec = copy.copy(component_ref)
        component_ref_without_spec.spec = None
        task._component_ref = component_ref_without_spec

    task._parameter_arguments = resolved_cmd.inputs_consumed_by_value
    name_to_spec_type = {}
    if component_meta.inputs:
        name_to_spec_type = {
            input.name: {
                'type': input.type,
                'default': input.default,
            }
            for input in component_meta.inputs
        }

    if kfp.COMPILING_FOR_V2:
        for name, spec_type in name_to_spec_type.items():
            if (name in original_arguments
                    and type_utils.is_parameter_type(spec_type['type'])):
                if isinstance(original_arguments[name], (list, dict)):
                    task._parameter_arguments[name] = json.dumps(
                        original_arguments[name])
                else:
                    task._parameter_arguments[name] = str(
                        original_arguments[name])

    for name in list(task.artifact_arguments.keys()):
        if name in task._parameter_arguments:
            del task.artifact_arguments[name]

    for name in list(task.input_artifact_paths.keys()):
        if name in task._parameter_arguments:
            del task.input_artifact_paths[name]

    # Previously, ContainerOp had strict requirements for the output names, so we
    # had to convert all the names before passing them to the ContainerOp
    # constructor.
    # Outputs with non-pythonic names could not be accessed using their original
    # names. Now ContainerOp supports any output names, so we're now using the
    # original output names. However to support legacy pipelines, we're also
    # adding output references with pythonic names.
    # TODO: Add warning when people use the legacy output names.
    output_names = [
        output_spec.name for output_spec in component_spec.outputs or []
    ]  # Stabilizing the ordering
    output_name_to_python = _naming.generate_unique_name_conversion_table(
        output_names, _naming._sanitize_python_function_name)
    for output_name in output_names:
        pythonic_output_name = output_name_to_python[output_name]
        # Note: Some component outputs are currently missing from task.outputs
        # (e.g. MLPipeline UI Metadata)
        if pythonic_output_name not in task.outputs and output_name in task.outputs:
            task.outputs[pythonic_output_name] = task.outputs[output_name]

    if container_spec.env:
        from kubernetes import client as k8s_client
        for name, value in container_spec.env.items():
            task.container.add_env_variable(
                k8s_client.V1EnvVar(name=name, value=value))

    if component_spec.metadata:
        annotations = component_spec.metadata.annotations or {}
        for key, value in annotations.items():
            task.add_pod_annotation(key, value)
        for key, value in (component_spec.metadata.labels or {}).items():
            task.add_pod_label(key, value)
        # Disabling the caching for the volatile components by default
        if annotations.get('volatile_component', 'false') == 'true':
            task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

    _attach_v2_specs(task, component_spec, original_arguments)

    return task
Exemple #21
0
 def test_is_parameter_type_true(self, type_name, expected_result):
     self.assertEqual(expected_result,
                      type_utils.is_parameter_type(type_name))