Exemple #1
0
    def test_get_parameter_type(self, given_type, expected_type):
        self.assertEqual(expected_type,
                         type_utils.get_parameter_type(given_type))

        # Test get parameter by Python type.
        self.assertEqual(pb.ParameterType.NUMBER_INTEGER,
                         type_utils.get_parameter_type(int))
Exemple #2
0
def build_component_spec_for_task(
    task: pipeline_task.PipelineTask,
    is_exit_task: bool = False,
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a pipeline task.

    Args:
        task: The task to build a ComponentSpec for.
        is_exit_task: Whether the task is used as exit task in Exit Handler.

    Returns:
        A ComponentSpec object for the task.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()
    component_spec.executor_label = component_utils.sanitize_executor_label(
        task.name)

    for input_name, input_spec in (task.component_spec.inputs or {}).items():

        # Special handling for PipelineTaskFinalStatus first.
        if type_utils.is_task_final_status_type(input_spec.type):
            if not is_exit_task:
                raise ValueError(
                    'PipelineTaskFinalStatus can only be used in an exit task.'
                )
            component_spec.input_definitions.parameters[
                input_name].parameter_type = pipeline_spec_pb2.ParameterType.STRUCT
            continue

        # skip inputs not present, as a workaround to support optional inputs.
        if input_name not in task.inputs:
            continue

        if type_utils.is_parameter_type(input_spec.type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_name, output_spec in (task.component_spec.outputs
                                     or {}).items():
        if type_utils.is_parameter_type(output_spec.type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return component_spec
Exemple #3
0
def build_component_inputs_spec(
    component_spec: pipeline_spec_pb2.ComponentSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    is_root_component: bool,
) -> None:
    """Builds component inputs spec from pipeline params.

    Args:
      component_spec: The component spec to fill in its inputs spec.
      pipeline_params: The list of pipeline params.
      is_root_component: Whether the component is the root.
    """
    for param in pipeline_params:
        param_name = param.full_name
        if _for_loop.LoopArguments.name_is_loop_argument(param_name):
            param.param_type = param.param_type or 'String'

        input_name = (
            param_name if is_root_component else
            additional_input_name_for_pipelineparam(param_name))

        if type_utils.is_parameter_type(param.param_type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    param.param_type)
        elif input_name not in getattr(component_spec.input_definitions,
                                       'parameters', []):
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(param.param_type))
Exemple #4
0
def build_component_spec_from_structure(
    component_spec: structures.ComponentSpec,
    executor_label: str,
    actual_inputs: List[str],
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds an IR ComponentSpec instance from structures.ComponentSpec.

    Args:
      component_spec: The structure component spec.
      executor_label: The executor label.
      actual_inputs: The actual arugments passed to the task. This is used as a
        short term workaround to support optional inputs in component spec IR.

    Returns:
      An instance of IR ComponentSpec.
    """
    result = pipeline_spec_pb2.ComponentSpec()
    result.executor_label = executor_label

    for input_spec in component_spec.inputs or []:
        # skip inputs not present
        if input_spec.name not in actual_inputs:
            continue
        if type_utils.is_parameter_type(input_spec.type):
            result.input_definitions.parameters[
                input_spec.name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            result.input_definitions.artifacts[
                input_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_spec in component_spec.outputs or []:
        if type_utils.is_parameter_type(output_spec.type):
            result.output_definitions.parameters[
                output_spec
                .name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            result.output_definitions.artifacts[
                output_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return result
Exemple #5
0
def build_component_spec_for_task(
        task: pipeline_task.PipelineTask) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a pipeline task.

    Args:
        task: The task to build a ComponentSpec for.

    Returns:
        A ComponentSpec object for the task.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()
    component_spec.executor_label = component_utils.sanitize_executor_label(
        task.name)

    for input_name, input_spec in (task.component_spec.inputs or {}).items():

        # skip inputs not present, as a workaround to support optional inputs.
        if input_name not in task.inputs:
            continue

        if type_utils.is_parameter_type(input_spec.type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_name, output_spec in (task.component_spec.outputs
                                     or {}).items():
        if type_utils.is_parameter_type(output_spec.type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return component_spec
def build_component_spec_for_group(
    pipeline_channels: List[pipeline_channel.PipelineChannel],
    is_root_group: bool,
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds ComponentSpec for a TasksGroup.

    Args:
        group: The group to build a ComponentSpec for.
        pipeline_channels: The list of pipeline channels referenced by the group.

    Returns:
        A PipelineTaskSpec object representing the loop group.
    """
    component_spec = pipeline_spec_pb2.ComponentSpec()

    for channel in pipeline_channels:

        input_name = (
            channel.name if is_root_group else
            _additional_input_name_for_pipeline_channel(channel))

        if isinstance(channel, pipeline_channel.PipelineArtifactChannel):
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(channel.channel_type))
        else:
            # channel is one of PipelineParameterChannel, LoopArgument, or
            # LoopArgumentVariable.
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    channel.channel_type)

            # TODO: should we fill in default value for all groups and tasks?
            if is_root_group:
                _fill_in_component_input_default_value(
                    component_spec=component_spec,
                    input_name=input_name,
                    default_value=channel.value,
                )

    return component_spec
Exemple #7
0
def build_component_outputs_spec(
    component_spec: pipeline_spec_pb2.ComponentSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
) -> None:
    """Builds component outputs spec from pipeline params.

    Args:
      component_spec: The component spec to fill in its outputs spec.
      pipeline_params: The list of pipeline params.
    """
    for param in pipeline_params or []:
        output_name = param.full_name
        if type_utils.is_parameter_type(param.param_type):
            component_spec.output_definitions.parameters[
                output_name].parameter_type = type_utils.get_parameter_type(
                    param.param_type)
        elif output_name not in getattr(component_spec.output_definitions,
                                        'parameters', []):
            component_spec.output_definitions.artifacts[
                output_name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(param.param_type))
Exemple #8
0
    def _get_value(param: _pipeline_param.PipelineParam) -> struct_pb2.Value:
        assert param.value is not None, 'None values should be filtered out.'

        result = struct_pb2.Value()
        # TODO(chensun): remove defaulting to 'String' for None param_type once we
        # fix importer behavior.
        param_type = type_utils.get_parameter_type(param.param_type
                                                   or 'String')
        if (param_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER or
                param_type == pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE):
            result.number_value = float(param.value)
        elif param_type == pipeline_spec_pb2.ParameterType.STRING:
            result.string_value = param.value
        elif param_type == pipeline_spec_pb2.ParameterType.BOOLEAN:
            result.bool_value = param.value
        elif param_type == pipeline_spec_pb2.ParameterType.LIST:
            result.list_value.extend(param.value)
        elif param_type == pipeline_spec_pb2.ParameterType.STRUCT:
            result.struct_value.update(param.value)
        else:
            raise ValueError('Unknown type for PipelineParam {}'.format(param))

        return result
def _resolve_condition_operands(
    left_operand: Union[str, pipeline_channel.PipelineChannel],
    right_operand: Union[str, pipeline_channel.PipelineChannel],
) -> Tuple[str, str]:
    """Resolves values and PipelineChannels for condition operands.

    Args:
        left_operand: The left operand of a condition expression.
        right_operand: The right operand of a condition expression.

    Returns:
        A tuple of the resolved operands values:
        (left_operand_value, right_operand_value).
    """

    # Pre-scan the operand to get the type of constant value if there's any.
    # The value_type can be used to backfill missing PipelineChannel.channel_type.
    value_type = None
    for value_or_reference in [left_operand, right_operand]:
        if isinstance(value_or_reference, pipeline_channel.PipelineChannel):
            parameter_type = type_utils.get_parameter_type(
                value_or_reference.channel_type)
            if parameter_type in [
                    pipeline_spec_pb2.ParameterType.STRUCT,
                    pipeline_spec_pb2.ParameterType.LIST,
                    pipeline_spec_pb2.ParameterType
                    .PARAMETER_TYPE_ENUM_UNSPECIFIED,
            ]:
                input_name = _additional_input_name_for_pipeline_channel(
                    value_or_reference)
                raise ValueError('Conditional requires scalar parameter values'
                                 ' for comparison. Found input "{}" of type {}'
                                 ' in pipeline definition instead.'.format(
                                     input_name,
                                     value_or_reference.channel_type))
    parameter_types = set()
    for value_or_reference in [left_operand, right_operand]:
        if isinstance(value_or_reference, pipeline_channel.PipelineChannel):
            parameter_type = type_utils.get_parameter_type(
                value_or_reference.channel_type)
        else:
            parameter_type = type_utils.get_parameter_type(
                type(value_or_reference).__name__)

        parameter_types.add(parameter_type)

    if len(parameter_types) == 2:
        # Two different types being compared. The only possible types are
        # String, Boolean, Double and Integer. We'll promote the other type
        # using the following precedence:
        # String > Boolean > Double > Integer
        if pipeline_spec_pb2.ParameterType.STRING in parameter_types:
            canonical_parameter_type = pipeline_spec_pb2.ParameterType.STRING
        elif pipeline_spec_pb2.ParameterType.BOOLEAN in parameter_types:
            canonical_parameter_type = pipeline_spec_pb2.ParameterType.BOOLEAN
        else:
            # Must be a double and int, promote to double.
            assert pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE in parameter_types, \
                'Types: {} [{} {}]'.format(
                parameter_types, left_operand, right_operand)
            assert pipeline_spec_pb2.ParameterType.NUMBER_INTEGER in parameter_types, \
                'Types: {} [{} {}]'.format(
                parameter_types, left_operand, right_operand)
            canonical_parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE
    elif len(parameter_types) == 1:  # Both operands are the same type.
        canonical_parameter_type = parameter_types.pop()
    else:
        # Probably shouldn't happen.
        raise ValueError('Unable to determine operand types for'
                         ' "{}" and "{}"'.format(left_operand, right_operand))

    operand_values = []
    for value_or_reference in [left_operand, right_operand]:
        if isinstance(value_or_reference, pipeline_channel.PipelineChannel):
            input_name = _additional_input_name_for_pipeline_channel(
                value_or_reference)
            operand_value = "inputs.parameter_values['{input_name}']".format(
                input_name=input_name)
            parameter_type = type_utils.get_parameter_type(
                value_or_reference.channel_type)
            if parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER:
                operand_value = 'int({})'.format(operand_value)
        elif isinstance(value_or_reference, str):
            operand_value = "'{}'".format(value_or_reference)
            parameter_type = pipeline_spec_pb2.ParameterType.STRING
        elif isinstance(value_or_reference, bool):
            # Booleans need to be compared as 'true' or 'false' in CEL.
            operand_value = str(value_or_reference).lower()
            parameter_type = pipeline_spec_pb2.ParameterType.BOOLEAN
        elif isinstance(value_or_reference, int):
            operand_value = str(value_or_reference)
            parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_INTEGER
        else:
            assert isinstance(value_or_reference, float), value_or_reference
            operand_value = str(value_or_reference)
            parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE

        if parameter_type != canonical_parameter_type:
            # Type-cast to so CEL does not complain.
            if canonical_parameter_type == pipeline_spec_pb2.ParameterType.STRING:
                assert parameter_type in [
                    pipeline_spec_pb2.ParameterType.BOOLEAN,
                    pipeline_spec_pb2.ParameterType.NUMBER_INTEGER,
                    pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE,
                ]
                operand_value = "'{}'".format(operand_value)
            elif canonical_parameter_type == pipeline_spec_pb2.ParameterType.BOOLEAN:
                assert parameter_type in [
                    pipeline_spec_pb2.ParameterType.NUMBER_INTEGER,
                    pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE,
                ]
                operand_value = 'true' if int(operand_value) == 0 else 'false'
            else:
                assert canonical_parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE
                assert parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER
                operand_value = 'double({})'.format(operand_value)

        operand_values.append(operand_value)

    return tuple(operand_values)
def _create_container_op_from_component_and_arguments(
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[_structures.ComponentReference] = None,
) -> _container_op.ContainerOp:
    """Instantiates ContainerOp object.

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.
      component_ref: (only for v1) The component references.

    Returns:
      A ContainerOp instance.
    """
    # Add component inputs with default value to the arguments dict if they are not
    # in the arguments dict already.
    arguments = arguments.copy()
    for input_spec in component_spec.inputs or []:
        if input_spec.name not in arguments and input_spec.default is not None:
            default_value = input_spec.default
            if input_spec.type == 'Integer':
                default_value = int(default_value)
            elif input_spec.type == 'Float':
                default_value = float(default_value)
            elif (type_utils.is_parameter_type(input_spec.type)
                  and kfp.COMPILING_FOR_V2):
                parameter_type = type_utils.get_parameter_type(input_spec.type)
                default_value = type_utils.deserialize_parameter_value(
                    value=default_value, parameter_type=parameter_type)

            arguments[input_spec.name] = default_value

    # Check types of the reference arguments and serialize PipelineParams
    original_arguments = arguments
    arguments = arguments.copy()
    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, _pipeline_param.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            argument_type = argument_value.param_type
            types.verify_type_compatibility(
                argument_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)
        if isinstance(argument_value, _container_op.ContainerOp):
            raise TypeError(
                'ContainerOp object was passed to component as an input argument. '
                'Pass a single output instead.')
    placeholder_resolver = ExtraPlaceholderResolver()
    resolved_cmd = _components._resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        placeholder_resolver=placeholder_resolver.resolve_placeholder,
    )

    container_spec = component_spec.implementation.container

    old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

    output_paths = collections.OrderedDict(resolved_cmd.output_paths or {})
    output_paths.update(placeholder_resolver.output_paths)
    input_paths = collections.OrderedDict(resolved_cmd.input_paths or {})
    input_paths.update(placeholder_resolver.input_paths)

    artifact_argument_paths = [
        dsl.InputArgumentPath(
            argument=arguments[input_name],
            input=input_name,
            path=path,
        ) for input_name, path in input_paths.items()
    ]

    task = _container_op.ContainerOp(
        name=component_spec.name or _components._default_component_name,
        image=container_spec.image,
        command=resolved_cmd.command,
        arguments=resolved_cmd.args,
        file_outputs=output_paths,
        artifact_argument_paths=artifact_argument_paths,
    )
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    component_meta = copy.copy(component_spec)
    task._set_metadata(component_meta, original_arguments)
    if component_ref:
        component_ref_without_spec = copy.copy(component_ref)
        component_ref_without_spec.spec = None
        task._component_ref = component_ref_without_spec

    task._parameter_arguments = resolved_cmd.inputs_consumed_by_value
    name_to_spec_type = {}
    if component_meta.inputs:
        name_to_spec_type = {
            input.name: {
                'type': input.type,
                'default': input.default,
            }
            for input in component_meta.inputs
        }

    if kfp.COMPILING_FOR_V2:
        for name, spec_type in name_to_spec_type.items():
            if (name in original_arguments
                    and type_utils.is_parameter_type(spec_type['type'])):
                if isinstance(original_arguments[name], (list, dict)):
                    task._parameter_arguments[name] = json.dumps(
                        original_arguments[name])
                else:
                    task._parameter_arguments[name] = str(
                        original_arguments[name])

    for name in list(task.artifact_arguments.keys()):
        if name in task._parameter_arguments:
            del task.artifact_arguments[name]

    for name in list(task.input_artifact_paths.keys()):
        if name in task._parameter_arguments:
            del task.input_artifact_paths[name]

    # Previously, ContainerOp had strict requirements for the output names, so we
    # had to convert all the names before passing them to the ContainerOp
    # constructor.
    # Outputs with non-pythonic names could not be accessed using their original
    # names. Now ContainerOp supports any output names, so we're now using the
    # original output names. However to support legacy pipelines, we're also
    # adding output references with pythonic names.
    # TODO: Add warning when people use the legacy output names.
    output_names = [
        output_spec.name for output_spec in component_spec.outputs or []
    ]  # Stabilizing the ordering
    output_name_to_python = _naming.generate_unique_name_conversion_table(
        output_names, _naming._sanitize_python_function_name)
    for output_name in output_names:
        pythonic_output_name = output_name_to_python[output_name]
        # Note: Some component outputs are currently missing from task.outputs
        # (e.g. MLPipeline UI Metadata)
        if pythonic_output_name not in task.outputs and output_name in task.outputs:
            task.outputs[pythonic_output_name] = task.outputs[output_name]

    if container_spec.env:
        from kubernetes import client as k8s_client
        for name, value in container_spec.env.items():
            task.container.add_env_variable(
                k8s_client.V1EnvVar(name=name, value=value))

    if component_spec.metadata:
        annotations = component_spec.metadata.annotations or {}
        for key, value in annotations.items():
            task.add_pod_annotation(key, value)
        for key, value in (component_spec.metadata.labels or {}).items():
            task.add_pod_label(key, value)
        # Disabling the caching for the volatile components by default
        if annotations.get('volatile_component', 'false') == 'true':
            task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

    _attach_v2_specs(task, component_spec, original_arguments)

    return task