def test_extract_pipelineparams_from_dict(self):
        """Test extract_pipeleineparams."""
        p1 = PipelineParam(name='param1', op_name='op1')
        p2 = PipelineParam(name='param2')

        configmap = V1ConfigMap(data={str(p1): str(p2)})

        params = extract_pipelineparams_from_any(configmap)
        self.assertListEqual(sorted([p1, p2]), sorted(params))
    def test_extract_pipelineparams_from_any(self):
        """Test extract_pipeleineparams."""
        p1 = PipelineParam(name='param1', op_name='op1')
        p2 = PipelineParam(name='param2')
        p3 = PipelineParam(name='param3', value='value3')
        stuff_chars = ' between '
        payload = str(p1) + stuff_chars + str(p2) + stuff_chars + str(p3)

        container = V1Container(name=p1,
                                image=p2,
                                env=[V1EnvVar(name="foo", value=payload)])

        params = extract_pipelineparams_from_any(container)
        self.assertListEqual(sorted([p1, p2, p3]), sorted(params))
Example #3
0
def _attach_v2_specs(
    task: _container_op.ContainerOp,
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
) -> None:
  """Attaches v2 specs to a ContainerOp object.

  Attach v2_specs to the ContainerOp object regardless whether the pipeline is
  being compiled to v1 (Argo yaml) or v2 (IR json).
  However, there're different behaviors for the two cases. Namely, resolved
  commands and arguments, error handling, etc.
  Regarding the difference in error handling, v2 has a stricter requirement on
  input type annotation. For instance, an input without any type annotation is
  viewed as an artifact, and if it's paired with InputValuePlaceholder, an
  error will be thrown at compile time. However, we cannot raise such an error
  in v1, as it wouldn't break existing pipelines.

  Args:
    task: The ContainerOp object to attach IR specs.
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
  """

  def _resolve_commands_and_args_v2(
      component_spec: _structures.ComponentSpec,
      arguments: Mapping[str, Any],
  ) -> _components._ResolvedCommandLineAndPaths:
    """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
    inputs_dict = {
        input_spec.name: input_spec
        for input_spec in component_spec.inputs or []
    }
    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_uri_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputUriPlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return _generate_input_uri_placeholder(input_key)

    def _input_artifact_path_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputPathPlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and not type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputValuePlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

    def _output_artifact_uri_placeholder(output_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          outputs_dict[output_key].type):
        raise TypeError('Output "{}" with type "{}" cannot be paired with '
                        'OutputUriPlaceholder.'.format(
                            output_key, outputs_dict[output_key].type))
      else:
        return _generate_output_uri_placeholder(output_key)

    def _output_artifact_path_placeholder(output_key: str) -> str:
      return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

    def _output_parameter_path_placeholder(output_key: str) -> str:
      return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
      if type_utils.is_parameter_type(outputs_dict[output_key].type):
        return _output_parameter_path_placeholder(output_key)
      else:
        return _output_artifact_path_placeholder(output_key)

    placeholder_resolver = ExtraPlaceholderResolver()
    def _resolve_ir_placeholders_v2(
        arg,
        component_spec: _structures.ComponentSpec,
        arguments: dict,
    ) -> str:
      inputs_dict = {input_spec.name: input_spec for input_spec in component_spec.inputs or []}
      if isinstance(arg, _structures.InputValuePlaceholder):
        input_name = arg.input_name
        input_value = arguments.get(input_name, None)
        if input_value is not None:
          return _input_parameter_placeholder(input_name)
        else:
          input_spec = inputs_dict[input_name]
          if input_spec.optional:
            return None
          else:
            raise ValueError('No value provided for input {}'.format(input_name))

      elif isinstance(arg, _structures.InputUriPlaceholder):
        input_name = arg.input_name
        if input_name in arguments:
          input_uri = _input_artifact_uri_placeholder(input_name)
          return input_uri
        else:
          input_spec = inputs_dict[input_name]
          if input_spec.optional:
            return None
          else:
            raise ValueError('No value provided for input {}'.format(input_name))

      elif isinstance(arg, _structures.OutputUriPlaceholder):
        output_name = arg.output_name
        output_uri = _output_artifact_uri_placeholder(output_name)
        return output_uri

      return placeholder_resolver.resolve_placeholder(
        arg=arg,
        component_spec=component_spec,
        arguments=arguments,
      )

    resolved_cmd = _components._resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        input_path_generator=_input_artifact_path_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
        placeholder_resolver=_resolve_ir_placeholders_v2,
    )
    return resolved_cmd

  pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

  # Check types of the reference arguments and serialize PipelineParams
  arguments = arguments.copy()

  # Preserve input params for ContainerOp.inputs
  input_params_set = set([
      param for param in arguments.values()
      if isinstance(param, _pipeline_param.PipelineParam)
  ])

  for input_name, argument_value in arguments.items():
    input_type = component_spec._inputs_dict[input_name].type
    argument_type = None

    if isinstance(argument_value, _pipeline_param.PipelineParam):
      argument_type = argument_value.param_type

      types.verify_type_compatibility(
          argument_type, input_type,
          'Incompatible argument passed to the input "{}" of component "{}": '
          .format(input_name, component_spec.name))

      # Loop arguments defaults to 'String' type if type is unknown.
      # This has to be done after the type compatiblity check.
      if argument_type is None and isinstance(
          argument_value, (_for_loop.LoopArguments,
                           _for_loop.LoopArgumentVariable)):
        argument_type = 'String'

      arguments[input_name] = str(argument_value)

      if type_utils.is_parameter_type(input_type):
        if argument_value.op_name:
          pipeline_task_spec.inputs.parameters[
              input_name].task_output_parameter.producer_task = (
                  dsl_utils.sanitize_task_name(argument_value.op_name))
          pipeline_task_spec.inputs.parameters[
              input_name].task_output_parameter.output_parameter_key = (
                  argument_value.name)
        else:
          pipeline_task_spec.inputs.parameters[
              input_name].component_input_parameter = argument_value.name
      else:
        if argument_value.op_name:
          pipeline_task_spec.inputs.artifacts[
              input_name].task_output_artifact.producer_task = (
                  dsl_utils.sanitize_task_name(argument_value.op_name))
          pipeline_task_spec.inputs.artifacts[
              input_name].task_output_artifact.output_artifact_key = (
                  argument_value.name)
    elif isinstance(argument_value, str):
      argument_type = 'String'
      pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
          argument_value)
      if pipeline_params and kfp.COMPILING_FOR_V2:
        # argument_value contains PipelineParam placeholders which needs to be
        # replaced. And the input needs to be added to the task spec.
        for param in pipeline_params:
          # Form the name for the compiler injected input, and make sure it
          # doesn't collide with any existing input names.
          additional_input_name = (
              dsl_component_spec.additional_input_name_for_pipelineparam(param))
          for existing_input_name, _ in arguments.items():
            if existing_input_name == additional_input_name:
              raise ValueError('Name collision between existing input name '
                               '{} and compiler injected input name {}'.format(
                                   existing_input_name, additional_input_name))

          # Add the additional param to the input params set. Otherwise, it will
          # not be included when the params set is not empty.
          input_params_set.add(param)
          additional_input_placeholder = (
              "{{{{$.inputs.parameters['{}']}}}}".format(additional_input_name))
          argument_value = argument_value.replace(param.pattern,
                                                  additional_input_placeholder)

          # The output references are subject to change -- the producer task may
          # not be whitin the same DAG.
          if param.op_name:
            pipeline_task_spec.inputs.parameters[
                additional_input_name].task_output_parameter.producer_task = (
                    dsl_utils.sanitize_task_name(param.op_name))
            pipeline_task_spec.inputs.parameters[
                additional_input_name].task_output_parameter.output_parameter_key = param.name
          else:
            pipeline_task_spec.inputs.parameters[
                additional_input_name].component_input_parameter = param.full_name

      input_type = component_spec._inputs_dict[input_name].type
      if type_utils.is_parameter_type(input_type):
        pipeline_task_spec.inputs.parameters[
            input_name].runtime_value.constant_value.string_value = (
                argument_value)
    elif isinstance(argument_value, int):
      argument_type = 'Integer'
      pipeline_task_spec.inputs.parameters[
          input_name].runtime_value.constant_value.int_value = argument_value
    elif isinstance(argument_value, float):
      argument_type = 'Float'
      pipeline_task_spec.inputs.parameters[
          input_name].runtime_value.constant_value.double_value = argument_value
    elif isinstance(argument_value, _container_op.ContainerOp):
      raise TypeError(
          'ContainerOp object {} was passed to component as an input argument. '
          'Pass a single output instead.'.format(input_name))
    else:
      if kfp.COMPILING_FOR_V2:
        raise NotImplementedError(
            'Input argument supports only the following types: PipelineParam'
            ', str, int, float. Got: "{}".'.format(argument_value))

    argument_is_parameter_type = type_utils.is_parameter_type(argument_type)
    input_is_parameter_type = type_utils.is_parameter_type(input_type)
    if kfp.COMPILING_FOR_V2 and (argument_is_parameter_type !=
                                input_is_parameter_type):
      if isinstance(argument_value, dsl.PipelineParam):
        param_or_value_msg = 'PipelineParam "{}"'.format(
            argument_value.full_name)
      else:
        param_or_value_msg = 'value "{}"'.format(argument_value)

      raise TypeError(
          'Passing '
          '{param_or_value} with type "{arg_type}" (as "{arg_category}") to '
          'component input '
          '"{input_name}" with type "{input_type}" (as "{input_category}") is '
          'incompatible. Please fix the type of the component input.'.format(
              param_or_value=param_or_value_msg,
              arg_type=argument_type,
              arg_category='Parameter'
              if argument_is_parameter_type else 'Artifact',
              input_name=input_name,
              input_type=input_type,
              input_category='Paramter'
              if input_is_parameter_type else 'Artifact',
          ))

  if not component_spec.name:
    component_spec.name = _components._default_component_name

  # task.name is unique at this point.
  pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name(task.name))

  resolved_cmd = _resolve_commands_and_args_v2(
      component_spec=component_spec, arguments=arguments)

  task.container_spec = (
      pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
          image=component_spec.implementation.container.image,
          command=resolved_cmd.command,
          args=resolved_cmd.args))

  # TODO(chensun): dedupe IR component_spec and contaienr_spec
  pipeline_task_spec.component_ref.name = (
      dsl_utils.sanitize_component_name(task.name))
  executor_label = dsl_utils.sanitize_executor_label(task.name)

  task.component_spec = dsl_component_spec.build_component_spec_from_structure(
      component_spec, executor_label, arguments.keys())

  task.task_spec = pipeline_task_spec

  # Override command and arguments if compiling to v2.
  if kfp.COMPILING_FOR_V2:
    task.command = resolved_cmd.command
    task.arguments = resolved_cmd.args

    # limit this to v2 compiling only to avoid possible behavior change in v1.
    task.inputs = list(input_params_set)
def _attach_v2_specs(
    task: _container_op.ContainerOp,
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
) -> None:
    """Attaches v2 specs to a ContainerOp object.

    Args:
      task: The ContainerOp object to attach IR specs.
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.
  """

    # Attach v2_specs to the ContainerOp object regardless whether the pipeline is
    # being compiled to v1 (Argo yaml) or v2 (IR json).
    # However, there're different behaviors for the two cases. Namely, resolved
    # commands and arguments, error handling, etc.
    # Regarding the difference in error handling, v2 has a stricter requirement on
    # input type annotation. For instance, an input without any type annotation is
    # viewed as an artifact, and if it's paired with InputValuePlaceholder, an
    # error will be thrown at compile time. However, we cannot raise such an error
    # in v1, as it wouldn't break existing pipelines.
    is_compiling_for_v2 = False
    for frame in inspect.stack():
        if '_create_pipeline_v2' in frame:
            is_compiling_for_v2 = True
            break

    def _resolve_commands_and_args_v2(
        component_spec: _structures.ComponentSpec,
        arguments: Mapping[str, Any],
    ) -> _components._ResolvedCommandLineAndPaths:
        """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
        inputs_dict = {
            input_spec.name: input_spec
            for input_spec in component_spec.inputs or []
        }
        outputs_dict = {
            output_spec.name: output_spec
            for output_spec in component_spec.outputs or []
        }

        def _input_artifact_uri_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputUriPlaceholder.'.format(input_key,
                                                  inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

        def _input_artifact_path_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputPathPlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            elif is_compiling_for_v2 and input_key in importer_specs:
                raise TypeError(
                    'Input "{}" with type "{}" is not connected to any upstream output. '
                    'However it is used with InputPathPlaceholder. '
                    'If you want to import an existing artifact using a system-connected'
                    ' importer node, use InputUriPlaceholder instead. '
                    'Or if you just want to pass a string parameter, use string type and'
                    ' InputValuePlaceholder instead.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].path}}}}".format(
                    input_key)

        def _input_parameter_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and not type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputValuePlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

        def _output_artifact_uri_placeholder(output_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    outputs_dict[output_key].type):
                raise TypeError(
                    'Output "{}" with type "{}" cannot be paired with '
                    'OutputUriPlaceholder.'.format(
                        output_key, outputs_dict[output_key].type))
            else:
                return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(
                    output_key)

        def _output_artifact_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

        def _output_parameter_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
                output_key)

        def _resolve_output_path_placeholder(output_key: str) -> str:
            if type_utils.is_parameter_type(outputs_dict[output_key].type):
                return _output_parameter_path_placeholder(output_key)
            else:
                return _output_artifact_path_placeholder(output_key)

        placeholder_resolver = ExtraPlaceholderResolver()

        def _resolve_ir_placeholders_v2(
            arg,
            component_spec: _structures.ComponentSpec,
            arguments: dict,
        ) -> str:
            inputs_dict = {
                input_spec.name: input_spec
                for input_spec in component_spec.inputs or []
            }
            if isinstance(arg, _structures.InputValuePlaceholder):
                input_name = arg.input_name
                input_value = arguments.get(input_name, None)
                if input_value is not None:
                    return _input_parameter_placeholder(input_name)
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.InputUriPlaceholder):
                input_name = arg.input_name
                if input_name in arguments:
                    input_uri = _input_artifact_uri_placeholder(input_name)
                    return input_uri
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.OutputUriPlaceholder):
                output_name = arg.output_name
                output_uri = _output_artifact_uri_placeholder(output_name)
                return output_uri

            return placeholder_resolver.resolve_placeholder(
                arg=arg,
                component_spec=component_spec,
                arguments=arguments,
            )

        resolved_cmd = _components._resolve_command_line_and_paths(
            component_spec=component_spec,
            arguments=arguments,
            input_path_generator=_input_artifact_path_placeholder,
            output_path_generator=_resolve_output_path_placeholder,
            placeholder_resolver=_resolve_ir_placeholders_v2,
        )
        return resolved_cmd

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

    # Keep track of auto-injected importer spec.
    importer_specs = {}

    # Check types of the reference arguments and serialize PipelineParams
    original_arguments = arguments
    arguments = arguments.copy()

    # Preserver input params for ContainerOp.inputs
    input_params = list(
        set([
            param for param in arguments.values()
            if isinstance(param, _pipeline_param.PipelineParam)
        ]))

    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, _pipeline_param.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].component_input_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            argument_value.name)
                elif is_compiling_for_v2:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    # Importer node is only applicable for v2 engine.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_specs[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
                argument_value)
            if pipeline_params and is_compiling_for_v2:
                # argument_value contains PipelineParam placeholders which needs to be
                # replaced. And the input needs to be added to the task spec.
                for param in pipeline_params:
                    # Form the name for the compiler injected input, and make sure it
                    # doesn't collide with any existing input names.
                    additional_input_name = (
                        dsl_component_spec.
                        additional_input_name_for_pipelineparam(param))
                    for existing_input_name, _ in arguments.items():
                        if existing_input_name == additional_input_name:
                            raise ValueError(
                                'Name collision between existing input name '
                                '{} and compiler injected input name {}'.
                                format(existing_input_name,
                                       additional_input_name))

                    additional_input_placeholder = (
                        "{{{{$.inputs.parameters['{}']}}}}".format(
                            additional_input_name))
                    argument_value = argument_value.replace(
                        param.pattern, additional_input_placeholder)

                    # The output references are subject to change -- the producer task may
                    # not be whitin the same DAG.
                    if param.op_name:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.producer_task = (
                                dsl_utils.sanitize_task_name(param.op_name))
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.output_parameter_key = param.name
                    else:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].component_input_parameter = param.full_name

            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            elif is_compiling_for_v2:
                # An importer node with constant value artifact_uri will be inserted.
                # Importer node is only applicable for v2 engine.
                pipeline_task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_specs[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, _container_op.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            if is_compiling_for_v2:
                raise NotImplementedError(
                    'Input argument supports only the following types: PipelineParam'
                    ', str, int, float. Got: "{}".'.format(argument_value))

    if not component_spec.name:
        component_spec.name = _components._default_component_name

    # task.name is unique at this point.
    pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name(
        task.name))

    resolved_cmd = _resolve_commands_and_args_v2(component_spec=component_spec,
                                                 arguments=original_arguments)

    task.container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
            image=component_spec.implementation.container.image,
            command=resolved_cmd.command,
            args=resolved_cmd.args))

    # TODO(chensun): dedupe IR component_spec and contaienr_spec
    pipeline_task_spec.component_ref.name = (dsl_utils.sanitize_component_name(
        task.name))
    executor_label = dsl_utils.sanitize_executor_label(task.name)

    task.component_spec = dsl_component_spec.build_component_spec_from_structure(
        component_spec, executor_label, arguments.keys())

    task.task_spec = pipeline_task_spec
    task.importer_specs = importer_specs

    # Override command and arguments if compiling to v2.
    if is_compiling_for_v2:
        task.command = resolved_cmd.command
        task.arguments = resolved_cmd.args

        # limit this to v2 compiling only to avoid possible behavior change in v1.
        task.inputs = input_params
Example #5
0
  def _group_to_dag_template(self, group, inputs, outputs, dependencies):
    """Generate template given an OpsGroup.

    inputs, outputs, dependencies are all helper dicts.
    """
    template = {'name': group.name}
    if group.parallelism != None:
      template["parallelism"] = group.parallelism

    # Generate inputs section.
    if inputs.get(group.name, None):
      template_inputs = [{'name': x[0]} for x in inputs[group.name]]
      template_inputs.sort(key=lambda x: x['name'])
      template['inputs'] = {
        'parameters': template_inputs
      }

    # Generate outputs section.
    if outputs.get(group.name, None):
      template_outputs = []
      for param_name, dependent_name in outputs[group.name]:
        template_outputs.append({
          'name': param_name,
          'valueFrom': {
            'parameter': '{{tasks.%s.outputs.parameters.%s}}' % (dependent_name, param_name)
          }
        })
      template_outputs.sort(key=lambda x: x['name'])
      template['outputs'] = {'parameters': template_outputs}

    # Generate tasks section.
    tasks = []
    sub_groups = group.groups + group.ops
    for sub_group in sub_groups:
      is_recursive_subgroup = (isinstance(sub_group, OpsGroup) and sub_group.recursive_ref)
      # Special handling for recursive subgroup: use the existing opsgroup name
      if is_recursive_subgroup:
        task = {
            'name': sub_group.recursive_ref.name,
            'template': sub_group.recursive_ref.name,
        }
      else:
        task = {
          'name': sub_group.name,
          'template': sub_group.name,
        }
      if isinstance(sub_group, dsl.OpsGroup) and sub_group.type == 'condition':
        subgroup_inputs = inputs.get(sub_group.name, [])
        condition = sub_group.condition
        operand1_value = self._resolve_value_or_reference(condition.operand1, subgroup_inputs)
        operand2_value = self._resolve_value_or_reference(condition.operand2, subgroup_inputs)
        if condition.operator in ['==', '!=']:
          operand1_value = '"' + operand1_value + '"'
          operand2_value = '"' + operand2_value + '"'
        task['when'] = '{} {} {}'.format(operand1_value, condition.operator, operand2_value)

      # Generate dependencies section for this task.
      if dependencies.get(sub_group.name, None):
        group_dependencies = list(dependencies[sub_group.name])
        group_dependencies.sort()
        task['dependencies'] = group_dependencies

      # Generate arguments section for this task.
      if inputs.get(sub_group.name, None):
        task['arguments'] = {'parameters': self.get_arguments_for_sub_group(sub_group, is_recursive_subgroup, inputs)}

      # additional task modifications for withItems and withParam
      if isinstance(sub_group, dsl.ParallelFor):
        if sub_group.items_is_pipeline_param:
          # these loop args are a 'withParam' rather than 'withItems'.
          # i.e., rather than a static list, they are either the output of another task or were input
          # as global pipeline parameters

          pipeline_param = sub_group.loop_args.items_or_pipeline_param
          withparam_value = self._resolve_task_pipeline_param(pipeline_param, group.type)
          if pipeline_param.op_name:
            # these loop args are the output of another task
            if 'dependencies' not in task or task['dependencies'] is None:
              task['dependencies'] = []
            if sanitize_k8s_name(
                pipeline_param.op_name) not in task['dependencies'] and group.type != 'subgraph':
              task['dependencies'].append(
                  sanitize_k8s_name(pipeline_param.op_name))

          task['withParam'] = withparam_value
        else:
          # Need to sanitize the dict keys for consistency.
          loop_tasks = sub_group.loop_args.to_list_for_task_yaml()
          nested_pipeline_params = extract_pipelineparams_from_any(loop_tasks)

          # Set dependencies in case of nested pipeline_params
          map_to_tmpl_var = {str(p): self._resolve_task_pipeline_param(p, group.type) for p in nested_pipeline_params}
          for pipeline_param in nested_pipeline_params:
            if pipeline_param.op_name:
              # these pipeline_param are the output of another task
              if 'dependencies' not in task or task['dependencies'] is None:
                task['dependencies'] = []
              if sanitize_k8s_name(
                  pipeline_param.op_name) not in task['dependencies']:
                task['dependencies'].append(
                    sanitize_k8s_name(pipeline_param.op_name))

          sanitized_tasks = []
          if isinstance(loop_tasks[0], dict):
            for argument_set in loop_tasks:
              c_dict = {}
              for k, v in argument_set.items():
                c_dict[sanitize_k8s_name(k, True)] = v
              sanitized_tasks.append(c_dict)
          else:
            sanitized_tasks = loop_tasks
          # Replace pipeline param if map_to_tmpl_var not empty
          task['withItems'] = _process_obj(sanitized_tasks, map_to_tmpl_var) if map_to_tmpl_var else sanitized_tasks

        # We will sort dependencies to have determinitc yaml and thus stable tests
        if task.get('dependencies'):
          task['dependencies'].sort()

      tasks.append(task)
    tasks.sort(key=lambda x: x['name'])
    template['dag'] = {'tasks': tasks}
    return template
Example #6
0
def create_container_op_from_component_and_arguments(
    component_spec: structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[structures.ComponentReference] = None,
) -> container_op.ContainerOp:
    """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: (not used in v2)

  Returns:
    A ContainerOp instance.
  """

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

    # Keep track of auto-injected importer spec.
    importer_specs = {}

    # Check types of the reference arguments and serialize PipelineParams
    arguments = arguments.copy()
    # Preserver input params for ContainerOp.inputs
    input_params = list(
        set([
            param for param in arguments.values()
            if isinstance(param, dsl.PipelineParam)
        ]))
    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, dsl.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].component_input_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            argument_value.name)
                else:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_specs[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
                argument_value)
            if pipeline_params:
                # argument_value contains PipelineParam placeholders.
                raise NotImplementedError(
                    'Currently, a component input can only accept either a constant '
                    'value or a reference to another pipeline parameter. It cannot be a '
                    'combination of both. Got: {} for input {}'.format(
                        argument_value, input_name))

            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            else:
                # An importer node with constant value artifact_uri will be inserted.
                pipeline_task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_specs[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, dsl.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            raise NotImplementedError(
                'Input argument supports only the following types: PipelineParam'
                ', str, int, float. Got: "{}".'.format(argument_value))

    inputs_dict = {
        input_spec.name: input_spec
        for input_spec in component_spec.inputs or []
    }
    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_uri_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputUriPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

    def _input_artifact_path_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputPathPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        elif input_key in importer_specs:
            raise TypeError(
                'Input "{}" with type "{}" is not connected to any upstream output. '
                'However it is used with InputPathPlaceholder. '
                'If you want to import an existing artifact using a system-connected '
                'importer node, use InputUriPlaceholder instead. '
                'Or if you just want to pass a string parameter, use string type and '
                'InputValuePlaceholder instead.'.format(
                    input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)
        else:
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputValuePlaceholder.'
                .format(input_key, inputs_dict[input_key].type))

    def _output_artifact_uri_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            raise TypeError(
                'Output "{}" with type "{}" cannot be paired with OutputUriPlaceholder.'
                .format(output_key, outputs_dict[output_key].type))
        else:
            return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key)

    def _output_artifact_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

    def _output_parameter_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
            output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            return _output_parameter_path_placeholder(output_key)
        else:
            return _output_artifact_path_placeholder(output_key)

    resolved_cmd = _resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        input_value_generator=_input_parameter_placeholder,
        input_uri_generator=_input_artifact_uri_placeholder,
        output_uri_generator=_output_artifact_uri_placeholder,
        input_path_generator=_input_artifact_path_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
    )

    container_spec = component_spec.implementation.container

    output_uris_and_paths = resolved_cmd.output_uris.copy()
    output_uris_and_paths.update(resolved_cmd.output_paths)
    input_uris_and_paths = resolved_cmd.input_uris.copy()
    input_uris_and_paths.update(resolved_cmd.input_paths)

    old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

    task = container_op.ContainerOp(
        name=component_spec.name or _default_component_name,
        image=container_spec.image,
        command=resolved_cmd.command,
        arguments=resolved_cmd.args,
        file_outputs=output_uris_and_paths,
        artifact_argument_paths=[
            dsl.InputArgumentPath(
                argument=arguments[input_name],
                input=input_name,
                path=path,
            ) for input_name, path in input_uris_and_paths.items()
        ],
    )

    # task.name is unique at this point.
    pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name(
        task.name))
    pipeline_task_spec.component_ref.name = (dsl_utils.sanitize_component_name(
        component_spec.name))

    task.task_spec = pipeline_task_spec
    task.importer_specs = importer_specs
    task.component_spec = dsl_component_spec.build_component_spec_from_structure(
        component_spec)
    task.container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
            image=container_spec.image,
            command=resolved_cmd.command,
            args=resolved_cmd.args))

    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    component_meta = copy.copy(component_spec)
    task._set_metadata(component_meta)

    task.inputs = input_params

    # Previously, ContainerOp had strict requirements for the output names, so we
    # had to convert all the names before passing them to the ContainerOp
    # constructor. Outputs with non-pythonic names could not be accessed using
    # their original names. Now ContainerOp supports any output names, so we're
    # now using the original output names. However to support legacy pipelines,
    # we're also adding output references with pythonic names.
    # TODO: Add warning when people use the legacy output names.
    output_names = [
        output_spec.name for output_spec in component_spec.outputs or []
    ]  # Stabilizing the ordering
    output_name_to_python = generate_unique_name_conversion_table(
        output_names, _sanitize_python_function_name)
    for output_name in output_names:
        pythonic_output_name = output_name_to_python[output_name]
        # Note: Some component outputs are currently missing from task.outputs
        # (e.g. MLPipeline UI Metadata)
        if pythonic_output_name not in task.outputs and output_name in task.outputs:
            task.outputs[pythonic_output_name] = task.outputs[output_name]

    if component_spec.metadata:
        annotations = component_spec.metadata.annotations or {}
        for key, value in annotations.items():
            task.add_pod_annotation(key, value)
        for key, value in (component_spec.metadata.labels or {}).items():
            task.add_pod_label(key, value)
            # Disabling the caching for the volatile components by default
        if annotations.get('volatile_component', 'false') == 'true':
            task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

    return task