Beispiel #1
0
 def test_get_input_artifact_type_schema(self):
   input_specs = [
       structures.InputSpec(name='input1', type='String'),
       structures.InputSpec(name='input2', type='GCSPath'),
   ]
   # input not found.
   self.assertEqual(
       None, type_utils.get_input_artifact_type_schema('input0', input_specs))
   # input found, but it doesn't map to an artifact type.
   self.assertEqual(
       None, type_utils.get_input_artifact_type_schema('input1', input_specs))
   # input found, and a matching artifact type schema returned.
   self.assertEqual(
       'title: Artifact\ntype: object\nproperties:\n',
       type_utils.get_input_artifact_type_schema('input2', input_specs))
    def test_get_input_artifact_type_schema(self):
        input_specs = [
            structures.InputSpec(name='input1', type='String'),
            structures.InputSpec(name='input2', type='Model'),
            structures.InputSpec(name='input3', type=None),
        ]
        # input not found.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input0', input_specs)
            self.assertEqual('Input not found.', str(cm))

        # input found, but it doesn't map to an artifact type.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input1', input_specs)
            self.assertEqual('Input is not an artifact type.', str(cm))

        # input found, and a matching artifact type schema returned.
        self.assertEqual(
            'properties:\ntitle: kfp.Model\ntype: object\n',
            type_utils.get_input_artifact_type_schema('input2', input_specs))

        # input found, and the default artifact type schema returned.
        self.assertEqual(
            'properties:\ntitle: kfp.Artifact\ntype: object\n',
            type_utils.get_input_artifact_type_schema('input3', input_specs))
Beispiel #3
0
def create_container_op_from_component_and_arguments(
    component_spec: structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: structures.ComponentReference = None,
) -> container_op.ContainerOp:
    """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: The component reference. Optional.

  Returns:
    A ContainerOp instance.
  """

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
    pipeline_task_spec.task_info.name = component_spec.name
    # might need to append suffix to exuector_label to ensure its uniqueness?
    pipeline_task_spec.executor_label = component_spec.name

    # Keep track of auto-injected importer spec.
    importer_spec = {}

    # Check types of the reference arguments and serialize PipelineParams
    arguments = arguments.copy()
    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, dsl.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            argument_value.op_name)
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].runtime_value.runtime_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].producer_task = (argument_value.op_name)
                    pipeline_task_spec.inputs.artifacts[
                        input_name].output_artifact_key = (argument_value.name)
                else:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_spec[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            else:
                # An importer node with constant value artifact_uri will be inserted.
                pipeline_task_spec.inputs.artifacts[
                    input_name].producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_spec[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, dsl.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            raise NotImplementedError(
                'Input argument supports only the following types: PipelineParam'
                ', str, int, float. Got: "{}".'.format(argument_value))

    for output in component_spec.outputs or []:
        if type_utils.is_parameter_type(output.type):
            pipeline_task_spec.outputs.parameters[
                output.name].type = type_utils.get_parameter_type(output.type)
        else:
            pipeline_task_spec.outputs.artifacts[
                output.name].artifact_type.instance_schema = (
                    type_utils.get_artifact_type_schema(output.type))

    inputs_dict = {
        input_spec.name: input_spec
        for input_spec in component_spec.inputs or []
    }
    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_uri_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputUriPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

    def _input_artifact_path_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputPathPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)
        else:
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputValuePlaceholder.'
                .format(input_key, inputs_dict[input_key].type))

    def _output_artifact_uri_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            raise TypeError(
                'Output "{}" with type "{}" cannot be paired with OutputUriPlaceholder.'
                .format(output_key, outputs_dict[output_key].type))
        else:
            return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key)

    def _output_artifact_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

    def _output_parameter_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
            output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            return _output_parameter_path_placeholder(output_key)
        else:
            return _output_artifact_path_placeholder(output_key)

    resolved_cmd = _resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        input_value_generator=_input_parameter_placeholder,
        input_uri_generator=_input_artifact_uri_placeholder,
        output_uri_generator=_output_artifact_uri_placeholder,
        input_path_generator=_input_artifact_path_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
    )

    container_spec = component_spec.implementation.container

    pipeline_container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec())
    pipeline_container_spec.image = container_spec.image
    pipeline_container_spec.command.extend(resolved_cmd.command)
    pipeline_container_spec.args.extend(resolved_cmd.args)

    output_uris_and_paths = resolved_cmd.output_uris.copy()
    output_uris_and_paths.update(resolved_cmd.output_paths)
    input_uris_and_paths = resolved_cmd.input_uris.copy()
    input_uris_and_paths.update(resolved_cmd.input_paths)

    old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
    task = container_op.ContainerOp(
        name=component_spec.name or _default_component_name,
        image=container_spec.image,
        command=resolved_cmd.command,
        arguments=resolved_cmd.args,
        file_outputs=output_uris_and_paths,
        artifact_argument_paths=[
            dsl.InputArgumentPath(
                argument=arguments[input_name],
                input=input_name,
                path=path,
            ) for input_name, path in input_uris_and_paths.items()
        ],
    )

    task.task_spec = pipeline_task_spec
    task.importer_spec = importer_spec
    task.container_spec = pipeline_container_spec
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    component_meta = copy.copy(component_spec)
    task._set_metadata(component_meta)
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

    # Previously, ContainerOp had strict requirements for the output names, so we
    # had to convert all the names before passing them to the ContainerOp
    # constructor. Outputs with non-pythonic names could not be accessed using
    # their original names. Now ContainerOp supports any output names, so we're
    # now using the original output names. However to support legacy pipelines,
    # we're also adding output references with pythonic names.
    # TODO: Add warning when people use the legacy output names.
    output_names = [
        output_spec.name for output_spec in component_spec.outputs or []
    ]  # Stabilizing the ordering
    output_name_to_python = generate_unique_name_conversion_table(
        output_names, _sanitize_python_function_name)
    for output_name in output_names:
        pythonic_output_name = output_name_to_python[output_name]
        # Note: Some component outputs are currently missing from task.outputs
        # (e.g. MLPipeline UI Metadata)
        if pythonic_output_name not in task.outputs and output_name in task.outputs:
            task.outputs[pythonic_output_name] = task.outputs[output_name]

    if component_spec.metadata:
        annotations = component_spec.metadata.annotations or {}
        for key, value in annotations.items():
            task.add_pod_annotation(key, value)
        for key, value in (component_spec.metadata.labels or {}).items():
            task.add_pod_label(key, value)
            # Disabling the caching for the volatile components by default
        if annotations.get('volatile_component', 'false') == 'true':
            task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

    return task
Beispiel #4
0
    def _create_pipeline_spec(
        self,
        args: List[dsl.PipelineParam],
        pipeline: dsl.Pipeline,
    ) -> pipeline_spec_pb2.PipelineSpec:
        """Creates the pipeline spec object.

    Args:
      args: The list of pipeline arguments.
      pipeline: The instantiated pipeline object.

    Returns:
      A PipelineSpec proto representing the compiled pipeline.

    Raises:
      NotImplementedError if the argument is of unsupported types.
    """
        if not pipeline.name:
            raise ValueError('Pipeline name is required.')

        pipeline_spec = pipeline_spec_pb2.PipelineSpec(
            runtime_parameters=compiler_utils.build_runtime_parameter_spec(
                args))

        pipeline_spec.pipeline_info.name = pipeline.name
        pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__)
        pipeline_spec.schema_version = 'v2alpha1'

        deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig()
        importer_tasks = []

        for op in pipeline.ops.values():
            component_spec = op._metadata
            task = pipeline_spec.tasks.add()
            task.CopyFrom(op.task_spec)
            deployment_config.executors[
                task.executor_label].container.CopyFrom(op.container_spec)

            # Check if need to insert importer node
            for input_name in task.inputs.artifacts:
                if not task.inputs.artifacts[input_name].producer_task:
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)

                    importer_task = importer_node.build_importer_task_spec(
                        dependent_task=task,
                        input_name=input_name,
                        input_type_schema=type_schema)
                    importer_tasks.append(importer_task)

                    task.inputs.artifacts[
                        input_name].producer_task = importer_task.task_info.name
                    task.inputs.artifacts[
                        input_name].output_artifact_key = importer_node.OUTPUT_KEY

                    # Retrieve the pre-built importer spec
                    importer_spec = op.importer_spec[input_name]
                    deployment_config.executors[
                        importer_task.executor_label].importer.CopyFrom(
                            importer_spec)

        pipeline_spec.deployment_config.Pack(deployment_config)
        pipeline_spec.tasks.extend(importer_tasks)

        return pipeline_spec
Beispiel #5
0
    def _create_pipeline_spec(
        self,
        args: List[dsl.PipelineParam],
        pipeline: dsl.Pipeline,
    ) -> pipeline_spec_pb2.PipelineSpec:
        """Creates the pipeline spec object.

    Args:
      args: The list of pipeline arguments.
      pipeline: The instantiated pipeline object.

    Returns:
      A PipelineSpec proto representing the compiled pipeline.

    Raises:
      NotImplementedError if the argument is of unsupported types.
    """
        pipeline_spec = pipeline_spec_pb2.PipelineSpec()
        if not pipeline.name:
            raise ValueError('Pipeline name is required.')
        pipeline_spec.pipeline_info.name = pipeline.name
        pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__)
        pipeline_spec.schema_version = 'v2alpha1'

        # Pipeline Parameters
        for arg in args:
            if arg.value is not None:
                if isinstance(arg.value, int):
                    pipeline_spec.runtime_parameters[
                        arg.name].type = pipeline_spec_pb2.PrimitiveType.INT
                    pipeline_spec.runtime_parameters[
                        arg.name].default_value.int_value = arg.value
                elif isinstance(arg.value, float):
                    pipeline_spec.runtime_parameters[
                        arg.name].type = pipeline_spec_pb2.PrimitiveType.DOUBLE
                    pipeline_spec.runtime_parameters[
                        arg.name].default_value.double_value = arg.value
                elif isinstance(arg.value, str):
                    pipeline_spec.runtime_parameters[
                        arg.name].type = pipeline_spec_pb2.PrimitiveType.STRING
                    pipeline_spec.runtime_parameters[
                        arg.name].default_value.string_value = arg.value
                else:
                    raise NotImplementedError(
                        'Unexpected parameter type with: "{}".'.format(
                            str(arg.value)))

        deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig()
        importer_tasks = []

        for op in pipeline.ops.values():
            component_spec = op._metadata
            task = pipeline_spec.tasks.add()
            task.CopyFrom(op.task_spec)
            deployment_config.executors[
                task.executor_label].container.CopyFrom(op.container_spec)

            # Check if need to insert importer node
            for input_name in task.inputs.artifacts:
                if not task.inputs.artifacts[input_name].producer_task:
                    artifact_type = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)

                    importer_task, importer_spec = importer_node.build_importer_spec(
                        task, input_name, artifact_type)
                    importer_tasks.append(importer_task)

                    task.inputs.artifacts[
                        input_name].producer_task = importer_task.task_info.name
                    task.inputs.artifacts[
                        input_name].output_artifact_key = importer_node.OUTPUT_KEY

                    deployment_config.executors[
                        importer_task.executor_label].importer.CopyFrom(
                            importer_spec)

        pipeline_spec.deployment_config.Pack(deployment_config)
        pipeline_spec.tasks.extend(importer_tasks)

        return pipeline_spec
Beispiel #6
0
  def _group_to_dag_spec(
      self,
      group: dsl.OpsGroup,
      inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      dependencies: Dict[str, List[_GroupOrOp]],
      pipeline_spec: pipeline_spec_pb2.PipelineSpec,
      deployment_config: pipeline_spec_pb2.PipelineDeploymentConfig,
      rootgroup_name: str,
  ) -> None:
    """Generate IR spec given an OpsGroup.

    Args:
      group: The OpsGroup to generate spec for.
      inputs: The inputs dictionary. The keys are group/op names and values are
        lists of tuples (param, producing_op_name).
      outputs: The outputs dictionary. The keys are group/op names and values
        are lists of tuples (param, producing_op_name).
      dependencies: The group dependencies dictionary. The keys are group/op
        names, and the values are lists of dependent groups/ops.
      pipeline_spec: The pipeline_spec to update in-place.
      deployment_config: The deployment_config to hold all executors.
      rootgroup_name: The name of the group root. Used to determine whether the
        component spec for the current group should be the root dag.
    """
    group_component_name = dsl_utils.sanitize_component_name(group.name)

    if group.name == rootgroup_name:
      group_component_spec = pipeline_spec.root
    else:
      group_component_spec = pipeline_spec.components[group_component_name]

    # Generate component inputs spec.
    if inputs.get(group.name, None):
      dsl_component_spec.build_component_inputs_spec(
          group_component_spec, [param for param, _ in inputs[group.name]])

    # Generate component outputs spec.
    if outputs.get(group.name, None):
      group_component_spec.output_definitions.CopyFrom(
          dsl_component_spec.build_component_outputs_spec(
              [param for param, _ in outputs[group.name]]))

    # Generate task specs and component specs for the dag.
    subgroups = group.groups + group.ops
    for subgroup in subgroups:
      subgroup_task_spec = getattr(subgroup, 'task_spec',
                                   pipeline_spec_pb2.PipelineTaskSpec())
      subgroup_component_spec = getattr(subgroup, 'component_spec',
                                        pipeline_spec_pb2.ComponentSpec())
      is_loop_subgroup = (isinstance(group, dsl.ParallelFor))
      is_recursive_subgroup = (
          isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref)

      # Special handling for recursive subgroup: use the existing opsgroup name
      if is_recursive_subgroup:
        subgroup_key = subgroup.recursive_ref.name
      else:
        subgroup_key = subgroup.name

      subgroup_task_spec.task_info.name = dsl_utils.sanitize_task_name(
          subgroup_key)
      # human_name exists for ops only, and is used to de-dupe component spec.
      subgroup_component_name = dsl_utils.sanitize_component_name(
          getattr(subgroup, 'human_name', subgroup_key))
      subgroup_task_spec.component_ref.name = subgroup_component_name

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition':
        condition = subgroup.condition
        operand_values = []
        subgroup_inputs = inputs.get(subgroup.name, [])
        subgroup_params = [param for param, _ in subgroup_inputs]
        tasks_in_current_dag = [subgroup.name for subgroup in subgroups]

        dsl_component_spec.build_component_inputs_spec(
            subgroup_component_spec,
            subgroup_params,
        )
        dsl_component_spec.build_task_inputs_spec(
            subgroup_task_spec,
            subgroup_params,
            tasks_in_current_dag,
        )

        for operand in [condition.operand1, condition.operand2]:
          operand_values.append(self._resolve_value_or_reference(operand))

        condition_string = '{} {} {}'.format(operand_values[0],
                                             condition.operator,
                                             operand_values[1])

        subgroup_task_spec.trigger_policy.CopyFrom(
            pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy(
                condition=condition_string))

      # Generate dependencies section for this task.
      if dependencies.get(subgroup.name, None):
        group_dependencies = list(dependencies[subgroup.name])
        group_dependencies.sort()
        subgroup_task_spec.dependent_tasks.extend(
            [dsl_utils.sanitize_task_name(dep) for dep in group_dependencies])

      if isinstance(subgroup, dsl.ParallelFor):
        # Remove loop arguments related inputs from parent group component spec.
        input_names = [param.full_name for param, _ in inputs[subgroup.name]]
        for input_name in input_names:
          if _for_loop.LoopArguments.name_is_loop_argument(input_name):
            dsl_component_spec.pop_input_from_component_spec(
                group_component_spec, input_name)

        if subgroup.items_is_pipeline_param:
          # These loop args are a 'withParam' rather than 'withItems'.
          # i.e., rather than a static list, they are either the output of
          # another task or were input as global pipeline parameters.

          pipeline_param = subgroup.loop_args.items_or_pipeline_param
          input_parameter_name = pipeline_param.full_name

          if pipeline_param.op_name:
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].task_output_parameter.producer_task = (
                    dsl_utils.sanitize_task_name(pipeline_param.op_name))
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].task_output_parameter.output_parameter_key = (
                    pipeline_param.name)
          else:
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].component_input_parameter = (
                    input_parameter_name)

          # Correct loop argument input type in the parent component spec.
          # The loop argument was categorized as an artifact due to its missing
          # or non-primitive type annotation. But it should always be String
          # typed, as its value is a serialized JSON string.
          dsl_component_spec.pop_input_from_component_spec(
              group_component_spec, input_parameter_name)
          group_component_spec.input_definitions.parameters[
              input_parameter_name].type = pipeline_spec_pb2.PrimitiveType.STRING

      # Additional spec modifications for dsl.ParallelFor's subgroups.
      if is_loop_subgroup:
        self._update_loop_specs(group, subgroup, group_component_spec,
                                subgroup_component_spec, subgroup_task_spec)

      # Add importer node when applicable
      for input_name in subgroup_task_spec.inputs.artifacts:
        if not subgroup_task_spec.inputs.artifacts[
            input_name].task_output_artifact.producer_task:
          type_schema = type_utils.get_input_artifact_type_schema(
              input_name, subgroup._metadata.inputs)

          importer_name = importer_node.generate_importer_base_name(
              dependent_task_name=subgroup_task_spec.task_info.name,
              input_name=input_name)
          importer_task_spec = importer_node.build_importer_task_spec(
              importer_name)
          importer_comp_spec = importer_node.build_importer_component_spec(
              importer_base_name=importer_name,
              input_name=input_name,
              input_type_schema=type_schema)
          importer_task_name = importer_task_spec.task_info.name
          importer_comp_name = importer_task_spec.component_ref.name
          importer_exec_label = importer_comp_spec.executor_label
          group_component_spec.dag.tasks[importer_task_name].CopyFrom(
              importer_task_spec)
          pipeline_spec.components[importer_comp_name].CopyFrom(
              importer_comp_spec)

          subgroup_task_spec.inputs.artifacts[
              input_name].task_output_artifact.producer_task = (
                  importer_task_name)
          subgroup_task_spec.inputs.artifacts[
              input_name].task_output_artifact.output_artifact_key = (
                  importer_node.OUTPUT_KEY)

          # Retrieve the pre-built importer spec
          importer_spec = subgroup.importer_specs[input_name]
          deployment_config.executors[importer_exec_label].importer.CopyFrom(
              importer_spec)

      # Add component spec if not exists
      if subgroup_component_name not in pipeline_spec.components:
        pipeline_spec.components[subgroup_component_name].CopyFrom(
            subgroup_component_spec)

      # Add task spec
      group_component_spec.dag.tasks[
          subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec)

      # Add executor spec, if applicable.
      container_spec = getattr(subgroup, 'container_spec', None)
      if container_spec:
        if compiler_utils.is_v2_component(subgroup):
          compiler_utils.refactor_v2_container_spec(container_spec)
        executor_label = subgroup_component_spec.executor_label

        if executor_label not in deployment_config.executors:
          deployment_config.executors[executor_label].container.CopyFrom(
              container_spec)

      # Add AIPlatformCustomJobSpec, if applicable.
      custom_job_spec = getattr(subgroup, 'custom_job_spec', None)
      if custom_job_spec:
        executor_label = subgroup_component_spec.executor_label
        if executor_label not in deployment_config.executors:
          deployment_config.executors[
            executor_label].custom_job.custom_job.update(custom_job_spec)

    pipeline_spec.deployment_spec.update(
        json_format.MessageToDict(deployment_config))
Beispiel #7
0
    def _create_pipeline_spec(
        self,
        args: List[dsl.PipelineParam],
        pipeline: dsl.Pipeline,
    ) -> pipeline_spec_pb2.PipelineSpec:
        """Creates the pipeline spec object.

    Args:
      args: The list of pipeline arguments.
      pipeline: The instantiated pipeline object.

    Returns:
      A PipelineSpec proto representing the compiled pipeline.

    Raises:
      NotImplementedError if the argument is of unsupported types.
    """
        compiler_utils.validate_pipeline_name(pipeline.name)

        pipeline_spec = pipeline_spec_pb2.PipelineSpec()

        pipeline_spec.pipeline_info.name = pipeline.name
        pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__)
        # Schema version 2.0.0 is required for kfp-pipeline-spec>0.1.3.1
        pipeline_spec.schema_version = '2.0.0'

        pipeline_spec.root.CopyFrom(
            dsl_component_spec.build_root_spec_from_pipeline_params(args))

        deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig()

        for op in pipeline.ops.values():
            task_name = op.task_spec.task_info.name
            component_name = op.task_spec.component_ref.name
            executor_label = op.component_spec.executor_label

            pipeline_spec.root.dag.tasks[task_name].CopyFrom(op.task_spec)
            pipeline_spec.components[component_name].CopyFrom(
                op.component_spec)
            deployment_config.executors[executor_label].container.CopyFrom(
                op.container_spec)

            task = pipeline_spec.root.dag.tasks[task_name]
            # A task may have explicit depdency on other tasks even though they may
            # not have inputs/outputs dependency. e.g.: op2.after(op1)
            if op.dependent_names:
                op.dependent_names = [
                    dsl_utils.sanitize_task_name(name)
                    for name in op.dependent_names
                ]
                task.dependent_tasks.extend(op.dependent_names)

            # Check if need to insert importer node
            for input_name in task.inputs.artifacts:
                if not task.inputs.artifacts[
                        input_name].task_output_artifact.producer_task:
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, op._metadata.inputs)

                    importer_name = importer_node.generate_importer_base_name(
                        dependent_task_name=task_name, input_name=input_name)
                    importer_task_spec = importer_node.build_importer_task_spec(
                        importer_name)
                    importer_comp_spec = importer_node.build_importer_component_spec(
                        importer_base_name=importer_name,
                        input_name=input_name,
                        input_type_schema=type_schema)
                    importer_task_name = importer_task_spec.task_info.name
                    importer_comp_name = importer_task_spec.component_ref.name
                    importer_exec_label = importer_comp_spec.executor_label
                    pipeline_spec.root.dag.tasks[importer_task_name].CopyFrom(
                        importer_task_spec)
                    pipeline_spec.components[importer_comp_name].CopyFrom(
                        importer_comp_spec)

                    task.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            importer_task_name)
                    task.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            importer_node.OUTPUT_KEY)

                    # Retrieve the pre-built importer spec
                    importer_spec = op.importer_specs[input_name]
                    deployment_config.executors[
                        importer_exec_label].importer.CopyFrom(importer_spec)

        pipeline_spec.deployment_spec.update(
            json_format.MessageToDict(deployment_config))

        return pipeline_spec
    def _group_to_dag_spec(
        self,
        group: dsl.OpsGroup,
        inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
        outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
        dependencies: Dict[str, List[_GroupOrOp]],
        pipeline_spec: pipeline_spec_pb2.PipelineSpec,
        rootgroup_name: str,
    ) -> None:
        """Generate IR spec given an OpsGroup.

    Args:
      group: The OpsGroup to generate spec for.
      inputs: The inputs dictionary. The keys are group/op names and values are
        lists of tuples (param, producing_op_name).
      outputs: The outputs dictionary. The keys are group/op names and values
        are lists of tuples (param, producing_op_name).
      dependencies: The group dependencies dictionary. The keys are group/op
        names, and the values are lists of dependent groups/ops.
      pipeline_spec: The pipeline_spec to update in-place.
      rootgroup_name: The name of the group root. Used to determine whether the
        component spec for the current group should be the root dag.
    """
        group_component_name = dsl_utils.sanitize_component_name(group.name)

        if group.name == rootgroup_name:
            group_component_spec = pipeline_spec.root
        else:
            group_component_spec = pipeline_spec.components[
                group_component_name]

        deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig()

        # Generate component inputs spec.
        if inputs.get(group.name, None):
            dsl_component_spec.build_component_inputs_spec(
                group_component_spec,
                [param for param, _ in inputs[group.name]])

        # Generate component outputs spec.
        if outputs.get(group.name, None):
            group_component_spec.output_definitions.CopyFrom(
                dsl_component_spec.build_component_outputs_spec(
                    [param for param, _ in outputs[group.name]]))

        # Generate task specs and component specs for the dag.
        subgroups = group.groups + group.ops
        for subgroup in subgroups:
            subgroup_task_spec = getattr(subgroup, 'task_spec',
                                         pipeline_spec_pb2.PipelineTaskSpec())
            subgroup_component_spec = getattr(
                subgroup, 'component_spec', pipeline_spec_pb2.ComponentSpec())
            is_recursive_subgroup = (isinstance(subgroup, dsl.OpsGroup)
                                     and subgroup.recursive_ref)
            # Special handling for recursive subgroup: use the existing opsgroup name
            if is_recursive_subgroup:
                subgroup_key = subgroup.recursive_ref.name
            else:
                subgroup_key = subgroup.name

            subgroup_task_spec.task_info.name = dsl_utils.sanitize_task_name(
                subgroup_key)
            # human_name exists for ops only, and is used to de-dupe component spec.
            subgroup_component_name = dsl_utils.sanitize_component_name(
                getattr(subgroup, 'human_name', subgroup_key))
            subgroup_task_spec.component_ref.name = subgroup_component_name

            if isinstance(subgroup,
                          dsl.OpsGroup) and subgroup.type == 'condition':
                condition = subgroup.condition
                operand_values = []
                subgroup_inputs = inputs.get(subgroup.name, [])
                subgroup_params = [param for param, _ in subgroup_inputs]
                tasks_in_current_dag = [
                    subgroup.name for subgroup in subgroups
                ]

                dsl_component_spec.build_component_inputs_spec(
                    subgroup_component_spec,
                    subgroup_params,
                )
                dsl_component_spec.build_task_inputs_spec(
                    subgroup_task_spec,
                    subgroup_params,
                    tasks_in_current_dag,
                )

                for operand in [condition.operand1, condition.operand2]:
                    operand_values.append(
                        self._resolve_value_or_reference(operand))

                condition_string = '{} {} {}'.format(operand_values[0],
                                                     condition.operator,
                                                     operand_values[1])

                subgroup_task_spec.trigger_policy.CopyFrom(
                    pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy(
                        condition=condition_string))

            # Generate dependencies section for this task.
            if dependencies.get(subgroup.name, None):
                group_dependencies = list(dependencies[subgroup.name])
                group_dependencies.sort()
                subgroup_task_spec.dependent_tasks.extend([
                    dsl_utils.sanitize_task_name(dep)
                    for dep in group_dependencies
                ])

            # Add importer node when applicable
            for input_name in subgroup_task_spec.inputs.artifacts:
                if not subgroup_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task:
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, subgroup._metadata.inputs)

                    importer_name = importer_node.generate_importer_base_name(
                        dependent_task_name=subgroup_task_spec.task_info.name,
                        input_name=input_name)
                    importer_task_spec = importer_node.build_importer_task_spec(
                        importer_name)
                    importer_comp_spec = importer_node.build_importer_component_spec(
                        importer_base_name=importer_name,
                        input_name=input_name,
                        input_type_schema=type_schema)
                    importer_task_name = importer_task_spec.task_info.name
                    importer_comp_name = importer_task_spec.component_ref.name
                    importer_exec_label = importer_comp_spec.executor_label
                    group_component_spec.dag.tasks[
                        importer_task_name].CopyFrom(importer_task_spec)
                    pipeline_spec.components[importer_comp_name].CopyFrom(
                        importer_comp_spec)

                    subgroup_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            importer_task_name)
                    subgroup_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            importer_node.OUTPUT_KEY)

                    # Retrieve the pre-built importer spec
                    importer_spec = subgroup.importer_specs[input_name]
                    deployment_config.executors[
                        importer_exec_label].importer.CopyFrom(importer_spec)

            # Add component spec if not exists
            if subgroup_component_name not in pipeline_spec.components:
                pipeline_spec.components[subgroup_component_name].CopyFrom(
                    subgroup_component_spec)

            # Add task spec
            group_component_spec.dag.tasks[
                subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec)

            # Add executor spec
            container_spec = getattr(subgroup, 'container_spec', None)
            if container_spec:
                if compiler_utils.is_v2_component(subgroup):
                    compiler_utils.refactor_v2_container_spec(container_spec)
                executor_label = subgroup_component_spec.executor_label

                if executor_label not in deployment_config.executors:
                    deployment_config.executors[
                        executor_label].container.CopyFrom(container_spec)

        pipeline_spec.deployment_spec.update(
            json_format.MessageToDict(deployment_config))