Ejemplo n.º 1
0
def build_component_spec_from_structure(
    component_spec: structures.ComponentSpec,
) -> pipeline_spec_pb2.ComponentSpec:
  """Builds an IR ComponentSpec instance from structures.ComponentSpec.

  Args:
    component_spec: The structure component spec.

  Returns:
    An instance of IR ComponentSpec.
  """
  result = pipeline_spec_pb2.ComponentSpec()
  result.executor_label = dsl_utils.sanitize_executor_label(component_spec.name)

  for input_spec in component_spec.inputs or []:
    if type_utils.is_parameter_type(input_spec.type):
      result.input_definitions.parameters[
          input_spec.name].type = type_utils.get_parameter_type(input_spec.type)
    else:
      result.input_definitions.artifacts[
          input_spec.name].artifact_type.instance_schema = (
              type_utils.get_artifact_type_schema(input_spec.type))

  for output_spec in component_spec.outputs or []:
    if type_utils.is_parameter_type(output_spec.type):
      result.output_definitions.parameters[
          output_spec.name].type = type_utils.get_parameter_type(
              output_spec.type)
    else:
      result.output_definitions.artifacts[
          output_spec.name].artifact_type.instance_schema = (
              type_utils.get_artifact_type_schema(output_spec.type))

  return result
Ejemplo n.º 2
0
 def test_get_artifact_type_schema(self):
     self.assertEqual(
         'title: kfp.Model\ntype: object\nproperties:\n  framework:\n    type: string\n  framework_version:\n    type: string\n',
         type_utils.get_artifact_type_schema('Model'))
     self.assertEqual(
         'title: kfp.Dataset\ntype: object\nproperties:\n  payload_format:\n    type: string\n  container_format:\n    type: string',
         type_utils.get_artifact_type_schema('Dataset'))
     for type_name in _UNKNOWN_ARTIFACT_TYPES:
         self.assertEqual('title: kfp.Artifact\ntype: object\n',
                          type_utils.get_artifact_type_schema(type_name))
Ejemplo n.º 3
0
def build_component_inputs_spec(
    component_spec: pipeline_spec_pb2.ComponentSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    is_root_component: bool,
) -> None:
  """Builds component inputs spec from pipeline params.

  Args:
    component_spec: The component spec to fill in its inputs spec.
    pipeline_params: The list of pipeline params.
    is_root_component: Whether the component is the root.
  """
  for param in pipeline_params:
    param_name = param.full_name
    if _for_loop.LoopArguments.name_is_loop_argument(param_name):
      param.param_type = param.param_type or 'String'

    input_name = (
        param_name if is_root_component else
        additional_input_name_for_pipelineparam(param_name))

    if type_utils.is_parameter_type(param.param_type):
      component_spec.input_definitions.parameters[
          input_name].type = type_utils.get_parameter_type(param.param_type)
    elif input_name not in getattr(component_spec.input_definitions,
                                   'parameters', []):
      component_spec.input_definitions.artifacts[
          input_name].artifact_type.CopyFrom(
              type_utils.get_artifact_type_schema(param.param_type))
Ejemplo n.º 4
0
    def test_get_artifact_type_schema(self):
        self.assertTrue(
            'title: kfp.Model' in type_utils.get_artifact_type_schema('Model'))
        self.assertTrue('title: kfp.Dataset' in
                        type_utils.get_artifact_type_schema('Dataset'))
        self.assertTrue('title: kfp.Metrics' in
                        type_utils.get_artifact_type_schema('Metrics'))
        self.assertTrue('title: kfp.ClassificationMetrics' in type_utils.
                        get_artifact_type_schema('ClassificationMetrics'))
        self.assertTrue(
            'title: kfp.SlicedClassificationMetrics' in
            type_utils.get_artifact_type_schema('SlicedClassificationMetrics'))

        for type_name in _UNKNOWN_ARTIFACT_TYPES:
            self.assertEqual('title: kfp.Artifact\ntype: object\n',
                             type_utils.get_artifact_type_schema(type_name))
Ejemplo n.º 5
0
def build_component_spec_from_structure(
    component_spec: structures.ComponentSpec,
    executor_label: str,
    actual_inputs: List[str],
) -> pipeline_spec_pb2.ComponentSpec:
    """Builds an IR ComponentSpec instance from structures.ComponentSpec.

  Args:
    component_spec: The structure component spec.
    executor_label: The executor label.
    actual_inputs: The actual arugments passed to the task. This is used as a
      short term workaround to support optional inputs in component spec IR.

  Returns:
    An instance of IR ComponentSpec.
  """
    result = pipeline_spec_pb2.ComponentSpec()
    result.executor_label = executor_label

    for input_spec in component_spec.inputs or []:
        # skip inputs not present
        if input_spec.name not in actual_inputs:
            continue
        if type_utils.is_parameter_type(input_spec.type):
            result.input_definitions.parameters[
                input_spec.name].type = type_utils.get_parameter_type(
                    input_spec.type)
        else:
            result.input_definitions.artifacts[
                input_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(input_spec.type))

    for output_spec in component_spec.outputs or []:
        if type_utils.is_parameter_type(output_spec.type):
            result.output_definitions.parameters[
                output_spec.name].type = type_utils.get_parameter_type(
                    output_spec.type)
        else:
            result.output_definitions.artifacts[
                output_spec.name].artifact_type.CopyFrom(
                    type_utils.get_artifact_type_schema(output_spec.type))

    return result
Ejemplo n.º 6
0
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str],
             artifact_class: Type[io_types.Artifact],
             reimport: bool = False) -> _container_op.ContainerOp:
    """dsl.importer for importing an existing artifact. Only for v2 pipeline.

  Args:
    artifact_uri: The artifact uri to import from.
    artifact_type_schema: The user specified artifact type schema of the
      artifact to be imported.
    reimport: Whether to reimport the artifact. Defaults to False.

  Returns:
    A ContainerOp instance.

  Raises:
    ValueError if the passed in artifact_uri is neither a PipelineParam nor a
      constant string value.
  """

    if isinstance(artifact_uri, _pipeline_param.PipelineParam):
        input_param = artifact_uri
    elif isinstance(artifact_uri, str):
        input_param = _pipeline_param.PipelineParam(name='uri',
                                                    value=artifact_uri,
                                                    param_type='String')
    else:
        raise ValueError(
            'Importer got unexpected artifact_uri: {} of type: {}.'.format(
                artifact_uri, type(artifact_uri)))

    old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

    task = _container_op.ContainerOp(
        name='importer',
        image='importer_image',  # TODO: need a v1 implementation of importer.
        file_outputs={
            OUTPUT_KEY:
            "{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY)
        },
    )
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class)
    task.importer_spec = _build_importer_spec(
        artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema)
    task.task_spec = _build_importer_task_spec(importer_base_name=task.name,
                                               artifact_uri=artifact_uri)
    task.component_spec = _build_importer_component_spec(
        importer_base_name=task.name,
        artifact_type_schema=artifact_type_schema)
    task.inputs = [input_param]

    return task
Ejemplo n.º 7
0
def build_component_outputs_spec(
    component_spec: pipeline_spec_pb2.ComponentSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
) -> None:
  """Builds component outputs spec from pipeline params.

  Args:
    component_spec: The component spec to fill in its outputs spec.
    pipeline_params: The list of pipeline params.
  """
  for param in pipeline_params or []:
    output_name = param.full_name
    if type_utils.is_parameter_type(param.param_type):
      component_spec.output_definitions.parameters[
          output_name].type = type_utils.get_parameter_type(param.param_type)
    elif output_name not in getattr(component_spec.output_definitions,
                                    'parameters', []):
      component_spec.output_definitions.artifacts[
          output_name].artifact_type.CopyFrom(
              type_utils.get_artifact_type_schema(param.param_type))
Ejemplo n.º 8
0
def build_component_inputs_spec(
    component_spec: pipeline_spec_pb2.ComponentSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
) -> None:
    """Builds component inputs spec from pipeline params.

  Args:
    component_spec: The component spec to fill in its inputs spec.
    pipeline_params: The list of pipeline params.
  """
    for param in pipeline_params:
        input_name = param.full_name

        if type_utils.is_parameter_type(param.param_type):
            component_spec.input_definitions.parameters[
                input_name].type = type_utils.get_parameter_type(
                    param.param_type)
        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.instance_schema = (
                    type_utils.get_artifact_type_schema(param.param_type))
Ejemplo n.º 9
0
 def test_get_artifact_type_schema(self, artifact_class_or_type_name,
                                   expected_result):
     self.assertEqual(
         expected_result,
         type_utils.get_artifact_type_schema(artifact_class_or_type_name))
Ejemplo n.º 10
0
def _get_custom_job_op(
    task_name: str,
    job_spec: Dict[str, Any],
    input_artifacts: Optional[Dict[str, dsl.PipelineParam]] = None,
    input_parameters: Optional[Dict[str, _ValueOrPipelineParam]] = None,
    output_artifacts: Optional[Dict[str, Type[io_types.Artifact]]] = None,
    output_parameters: Optional[Dict[str, Any]] = None,
) -> AiPlatformCustomJobOp:
    """Gets an AiPlatformCustomJobOp from job spec and I/O definition."""
    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
    pipeline_component_spec = pipeline_spec_pb2.ComponentSpec()

    pipeline_task_spec.task_info.CopyFrom(
        pipeline_spec_pb2.PipelineTaskInfo(
            name=dsl_utils.sanitize_task_name(task_name)))

    # Iterate through the inputs/outputs declaration to get pipeline component
    # spec.
    for input_name, param in input_parameters.items():
        if isinstance(param, dsl.PipelineParam):
            pipeline_component_spec.input_definitions.parameters[
                input_name].type = type_utils.get_parameter_type(
                    param.param_type)
        else:
            pipeline_component_spec.input_definitions.parameters[
                input_name].type = type_utils.get_parameter_type(type(param))

    for input_name, art in input_artifacts.items():
        if not isinstance(art, dsl.PipelineParam):
            raise RuntimeError(
                'Get unresolved input artifact for input %s. Input '
                'artifacts must be connected to a producer task.' % input_name)
        pipeline_component_spec.input_definitions.artifacts[
            input_name].artifact_type.CopyFrom(
                type_utils.get_artifact_type_schema(art.param_type))

    for output_name, param_type in output_parameters.items():
        pipeline_component_spec.output_definitions.parameters[
            output_name].type = type_utils.get_parameter_type(param_type)

    for output_name, artifact_type in output_artifacts.items():
        pipeline_component_spec.output_definitions.artifacts[
            output_name].artifact_type.CopyFrom(
                type_utils.get_artifact_type_schema(artifact_type))

    pipeline_component_spec.executor_label = dsl_utils.sanitize_executor_label(
        task_name)

    # Iterate through the inputs/outputs specs to get pipeline task spec.
    for input_name, param in input_parameters.items():
        if isinstance(param, dsl.PipelineParam) and param.op_name:
            # If the param has a valid op_name, this should be a pipeline parameter
            # produced by an upstream task.
            pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
                    task_output_parameter=pipeline_spec_pb2.TaskInputsSpec.
                    InputParameterSpec.TaskOutputParameterSpec(
                        producer_task=dsl_utils.sanitize_task_name(
                            param.op_name),
                        output_parameter_key=param.name)))
        elif isinstance(param, dsl.PipelineParam) and not param.op_name:
            # If a valid op_name is missing, this should be a pipeline parameter.
            pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
                    component_input_parameter=param.name))
        else:
            # If this is not a pipeline param, then it should be a value.
            pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
                    runtime_value=pipeline_spec_pb2.ValueOrRuntimeParameter(
                        constant_value=dsl_utils.get_value(param))))

    for input_name, art in input_artifacts.items():
        if art.op_name:
            # If the param has a valid op_name, this should be an artifact produced
            # by an upstream task.
            pipeline_task_spec.inputs.artifacts[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec(
                    task_output_artifact=pipeline_spec_pb2.TaskInputsSpec.
                    InputArtifactSpec.TaskOutputArtifactSpec(
                        producer_task=dsl_utils.sanitize_task_name(
                            art.op_name),
                        output_artifact_key=art.name)))
        else:
            # Otherwise, this should be from the input of the subdag.
            pipeline_task_spec.inputs.artifacts[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec(
                    component_input_artifact=art.name))

    # TODO: Add task dependencies/trigger policies/caching/iterator
    pipeline_task_spec.component_ref.name = dsl_utils.sanitize_component_name(
        task_name)

    # Construct dummy I/O declaration for the op.
    # TODO: resolve name conflict instead of raising errors.
    dummy_outputs = collections.OrderedDict()
    for output_name, _ in output_artifacts.items():
        dummy_outputs[output_name] = _DUMMY_PATH

    for output_name, _ in output_parameters.items():
        if output_name in dummy_outputs:
            raise KeyError(
                'Got name collision for output key %s. Consider renaming '
                'either output parameters or output '
                'artifacts.' % output_name)
        dummy_outputs[output_name] = _DUMMY_PATH

    dummy_inputs = collections.OrderedDict()
    for input_name, art in input_artifacts.items():
        dummy_inputs[input_name] = _DUMMY_PATH
    for input_name, param in input_parameters.items():
        if input_name in dummy_inputs:
            raise KeyError(
                'Got name collision for input key %s. Consider renaming '
                'either input parameters or input '
                'artifacts.' % input_name)
        dummy_inputs[input_name] = _DUMMY_PATH

    # Construct the AIP (Unified) custom job op.
    return AiPlatformCustomJobOp(
        name=task_name,
        custom_job_spec=job_spec,
        component_spec=pipeline_component_spec,
        task_spec=pipeline_task_spec,
        task_inputs=[
            dsl.InputArgumentPath(
                argument=dummy_inputs[input_name],
                input=input_name,
                path=path,
            ) for input_name, path in dummy_inputs.items()
        ],
        task_outputs=dummy_outputs)