コード例 #1
0
    def test_build_importer_spec_with_invalid_inputs_should_fail(self):
        with self.assertRaisesRegex(
                AssertionError,
                'importer spec should be built using either pipeline_param_name or '
                'constant_value'):
            importer_node.build_importer_spec(
                input_type_schema='title: kfp.Artifact',
                pipeline_param_name='param1',
                constant_value='some_uri')

        with self.assertRaisesRegex(
                AssertionError,
                'importer spec should be built using either pipeline_param_name or '
                'constant_value'):
            importer_node.build_importer_spec(
                input_type_schema='title: kfp.Artifact')
コード例 #2
0
    def test_build_importer_spec_from_pipeline_param(self):
        expected_importer = {
            'artifactUri': {
                'runtimeParameter': 'param1'
            },
            'typeSchema': {
                'instanceSchema': 'title: kfp.Artifact'
            }
        }
        expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec()
        json_format.ParseDict(expected_importer, expected_importer_spec)
        importer_spec = importer_node.build_importer_spec(
            input_type_schema='title: kfp.Artifact',
            pipeline_param_name='param1')

        self.maxDiff = None
        self.assertEqual(expected_importer_spec, importer_spec)
コード例 #3
0
    def test_build_importer_spec_from_constant_value(self):
        expected_importer = {
            'artifactUri': {
                'constantValue': {
                    'stringValue': 'some_uri'
                }
            },
            'typeSchema': {
                'instanceSchema': 'title: kfp.Artifact'
            }
        }
        expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec()
        json_format.ParseDict(expected_importer, expected_importer_spec)
        importer_spec = importer_node.build_importer_spec(
            input_type_schema='title: kfp.Artifact', constant_value='some_uri')

        self.maxDiff = None
        self.assertEqual(expected_importer_spec, importer_spec)
コード例 #4
0
def _attach_v2_specs(
    task: _container_op.ContainerOp,
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
) -> None:
    """Attaches v2 specs to a ContainerOp object.

    Args:
      task: The ContainerOp object to attach IR specs.
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.
  """

    # Attach v2_specs to the ContainerOp object regardless whether the pipeline is
    # being compiled to v1 (Argo yaml) or v2 (IR json).
    # However, there're different behaviors for the two cases. Namely, resolved
    # commands and arguments, error handling, etc.
    # Regarding the difference in error handling, v2 has a stricter requirement on
    # input type annotation. For instance, an input without any type annotation is
    # viewed as an artifact, and if it's paired with InputValuePlaceholder, an
    # error will be thrown at compile time. However, we cannot raise such an error
    # in v1, as it wouldn't break existing pipelines.
    is_compiling_for_v2 = False
    for frame in inspect.stack():
        if '_create_pipeline_v2' in frame:
            is_compiling_for_v2 = True
            break

    def _resolve_commands_and_args_v2(
        component_spec: _structures.ComponentSpec,
        arguments: Mapping[str, Any],
    ) -> _components._ResolvedCommandLineAndPaths:
        """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
        inputs_dict = {
            input_spec.name: input_spec
            for input_spec in component_spec.inputs or []
        }
        outputs_dict = {
            output_spec.name: output_spec
            for output_spec in component_spec.outputs or []
        }

        def _input_artifact_uri_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputUriPlaceholder.'.format(input_key,
                                                  inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

        def _input_artifact_path_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputPathPlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            elif is_compiling_for_v2 and input_key in importer_specs:
                raise TypeError(
                    'Input "{}" with type "{}" is not connected to any upstream output. '
                    'However it is used with InputPathPlaceholder. '
                    'If you want to import an existing artifact using a system-connected'
                    ' importer node, use InputUriPlaceholder instead. '
                    'Or if you just want to pass a string parameter, use string type and'
                    ' InputValuePlaceholder instead.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].path}}}}".format(
                    input_key)

        def _input_parameter_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and not type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputValuePlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

        def _output_artifact_uri_placeholder(output_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    outputs_dict[output_key].type):
                raise TypeError(
                    'Output "{}" with type "{}" cannot be paired with '
                    'OutputUriPlaceholder.'.format(
                        output_key, outputs_dict[output_key].type))
            else:
                return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(
                    output_key)

        def _output_artifact_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

        def _output_parameter_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
                output_key)

        def _resolve_output_path_placeholder(output_key: str) -> str:
            if type_utils.is_parameter_type(outputs_dict[output_key].type):
                return _output_parameter_path_placeholder(output_key)
            else:
                return _output_artifact_path_placeholder(output_key)

        placeholder_resolver = ExtraPlaceholderResolver()

        def _resolve_ir_placeholders_v2(
            arg,
            component_spec: _structures.ComponentSpec,
            arguments: dict,
        ) -> str:
            inputs_dict = {
                input_spec.name: input_spec
                for input_spec in component_spec.inputs or []
            }
            if isinstance(arg, _structures.InputValuePlaceholder):
                input_name = arg.input_name
                input_value = arguments.get(input_name, None)
                if input_value is not None:
                    return _input_parameter_placeholder(input_name)
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.InputUriPlaceholder):
                input_name = arg.input_name
                if input_name in arguments:
                    input_uri = _input_artifact_uri_placeholder(input_name)
                    return input_uri
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.OutputUriPlaceholder):
                output_name = arg.output_name
                output_uri = _output_artifact_uri_placeholder(output_name)
                return output_uri

            return placeholder_resolver.resolve_placeholder(
                arg=arg,
                component_spec=component_spec,
                arguments=arguments,
            )

        resolved_cmd = _components._resolve_command_line_and_paths(
            component_spec=component_spec,
            arguments=arguments,
            input_path_generator=_input_artifact_path_placeholder,
            output_path_generator=_resolve_output_path_placeholder,
            placeholder_resolver=_resolve_ir_placeholders_v2,
        )
        return resolved_cmd

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

    # Keep track of auto-injected importer spec.
    importer_specs = {}

    # Check types of the reference arguments and serialize PipelineParams
    original_arguments = arguments
    arguments = arguments.copy()

    # Preserver input params for ContainerOp.inputs
    input_params = list(
        set([
            param for param in arguments.values()
            if isinstance(param, _pipeline_param.PipelineParam)
        ]))

    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, _pipeline_param.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].component_input_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            argument_value.name)
                elif is_compiling_for_v2:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    # Importer node is only applicable for v2 engine.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_specs[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
                argument_value)
            if pipeline_params and is_compiling_for_v2:
                # argument_value contains PipelineParam placeholders which needs to be
                # replaced. And the input needs to be added to the task spec.
                for param in pipeline_params:
                    # Form the name for the compiler injected input, and make sure it
                    # doesn't collide with any existing input names.
                    additional_input_name = (
                        dsl_component_spec.
                        additional_input_name_for_pipelineparam(param))
                    for existing_input_name, _ in arguments.items():
                        if existing_input_name == additional_input_name:
                            raise ValueError(
                                'Name collision between existing input name '
                                '{} and compiler injected input name {}'.
                                format(existing_input_name,
                                       additional_input_name))

                    additional_input_placeholder = (
                        "{{{{$.inputs.parameters['{}']}}}}".format(
                            additional_input_name))
                    argument_value = argument_value.replace(
                        param.pattern, additional_input_placeholder)

                    # The output references are subject to change -- the producer task may
                    # not be whitin the same DAG.
                    if param.op_name:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.producer_task = (
                                dsl_utils.sanitize_task_name(param.op_name))
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.output_parameter_key = param.name
                    else:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].component_input_parameter = param.full_name

            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            elif is_compiling_for_v2:
                # An importer node with constant value artifact_uri will be inserted.
                # Importer node is only applicable for v2 engine.
                pipeline_task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_specs[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, _container_op.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            if is_compiling_for_v2:
                raise NotImplementedError(
                    'Input argument supports only the following types: PipelineParam'
                    ', str, int, float. Got: "{}".'.format(argument_value))

    if not component_spec.name:
        component_spec.name = _components._default_component_name

    # task.name is unique at this point.
    pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name(
        task.name))

    resolved_cmd = _resolve_commands_and_args_v2(component_spec=component_spec,
                                                 arguments=original_arguments)

    task.container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
            image=component_spec.implementation.container.image,
            command=resolved_cmd.command,
            args=resolved_cmd.args))

    # TODO(chensun): dedupe IR component_spec and contaienr_spec
    pipeline_task_spec.component_ref.name = (dsl_utils.sanitize_component_name(
        task.name))
    executor_label = dsl_utils.sanitize_executor_label(task.name)

    task.component_spec = dsl_component_spec.build_component_spec_from_structure(
        component_spec, executor_label, arguments.keys())

    task.task_spec = pipeline_task_spec
    task.importer_specs = importer_specs

    # Override command and arguments if compiling to v2.
    if is_compiling_for_v2:
        task.command = resolved_cmd.command
        task.arguments = resolved_cmd.args

        # limit this to v2 compiling only to avoid possible behavior change in v1.
        task.inputs = input_params