コード例 #1
0
def pipeline(project_id='loan-predict'):
    preprocessor = dsl.ContainerOp(
        name='preprocessor',
        image='praveen049/loan-predict-logreg-preproc',
        command=['python', 'preprocessor.py'],
        arguments=[
            '--output-x', '/x.pkl',
            '--output-y', '/y.pkl',
    ],
        file_outputs={
            'x-output': '/x.pkl',
            'y-output': '/y.pkl',
        }
    )
    trainer = dsl.ContainerOp(
        name='trainer',
        image='praveen049/loan-predict-logreg-train',
        command=['python', 'train.py'],
        arguments=[
            '--input_x_path_file', dsl.InputArgumentPath(preprocessor.outputs['x-output']),
            '--input_y_path_file', dsl.InputArgumentPath(preprocessor.outputs['y-output']),
            '--output_model', '/model.pkl',
            '--output_model_path_file', '/model.txt',
        ],
        file_outputs={
            'model': '/model.pkl',
        }
    )
    trainer.after(preprocessor)
コード例 #2
0
def component_with_inline_input_artifact(text: str):
    return dsl.ContainerOp(
        name='component_with_inline_input_artifact',
        image='alpine',
        command=[
            'cat',
            dsl.InputArgumentPath(
                text, path='/tmp/inputs/text/data', input='text')
        ],  # path and input are optional
    )
コード例 #3
0
def component_with_input_artifact(text):
    '''A component that passes text as input artifact'''

    return dsl.ContainerOp(
        name='component_with_input_artifact',
        artifact_argument_paths=[
            dsl.InputArgumentPath(argument=text, path='/tmp/inputs/text/data', input='text'), # path and input are optional
        ],
        image='alpine',
        command=['cat', '/tmp/inputs/text/data'],
    )
コード例 #4
0
def prepare_data_op(input_path, output_path, pvc_path, vol, TAG):
    return dsl.ContainerOp(name='prepare_data',
                           image=f'rsthesis/prepare_data_image:{TAG}',
                           arguments=[
                               '--input_path',
                               dsl.InputArgumentPath(input_path),
                               '--output_path', output_path
                           ],
                           command=["python", "prepare_data.py"],
                           file_outputs={'data_output': output_path},
                           pvolumes={pvc_path: vol},
                           container_kwargs={"image_pull_policy": "Always"})
コード例 #5
0
def pipeline_demo():

    #each component is defined as a function that returns an object of type ContainerOP, which comes from kfp sdk
    preprocess_op = dsl.ContainerOp(
        name='Preprocess Data',
        image='ghcr.io/jaredallencarterjac/preprocess:latest',
        arguments=[],
        #/app is coming from where we placed the npy files in the WORKDIR of the Dockerfile
        file_outputs={
            'x_train': '/app/x_train.npy',
            'x_test': '/app/x_test.npy',
            'y_train': '/app/y_train.npy',
            'y_test': '/app/y_test.npy',
        })
    preprocess_op.set_image_pull_policy("Always")

    train_op = dsl.ContainerOp(
        name='Train Model',
        image='ghcr.io/jaredallencarterjac/train:latest',
        arguments=[
            '--x_train',
            dsl.InputArgumentPath(preprocess_op.outputs['x_train']),
            '--y_train',
            dsl.InputArgumentPath(preprocess_op.outputs['y_train'])
        ],
        #model trained and packaged to send to test step
        file_outputs={'model': '/app/model.pkl'},
    )

    train_op.set_image_pull_policy("Always")

    test_op = dsl.ContainerOp(
        name='Test Model',
        image='ghcr.io/jaredallencarterjac/test:latest',
        arguments=[
            '--x_test',
            dsl.InputArgumentPath(preprocess_op.outputs['x_test']), '--y_test',
            dsl.InputArgumentPath(preprocess_op.outputs['y_test']), '--model',
            dsl.InputArgumentPath(train_op.outputs['model'])
        ],
        file_outputs={'mean_squared_error': '/app/output.txt'},
    )

    test_op.set_image_pull_policy("Always")

    deploy_op = dsl.ContainerOp(
        name='Deploy Model',
        image='ghcr.io/jaredallencarterjac/deploy:latest',
        arguments=[
            '--model',
            dsl.InputArgumentPath(train_op.outputs['model'])
        ]).after(test_op)
    deploy_op.set_image_pull_policy("Always")
コード例 #6
0
def presidential_elections_pipeline():
    _preprocess_op = preprocess_op()

    _train_op = train_op(
        dsl.InputArgumentPath(_preprocess_op.outputs['x_train']),
        dsl.InputArgumentPath(
            _preprocess_op.outputs['y_train'])).after(_preprocess_op)

    _test_op = test_op(dsl.InputArgumentPath(_preprocess_op.outputs['x_test']),
                       dsl.InputArgumentPath(_preprocess_op.outputs['y_test']),
                       dsl.InputArgumentPath(
                           _train_op.outputs['model'])).after(_train_op)

    deploy_model_op(dsl.InputArgumentPath(
        _train_op.outputs['model'])).after(_test_op)
コード例 #7
0
def _get_custom_job_op(
    task_name: str,
    job_spec: Dict[str, Any],
    input_artifacts: Optional[Dict[str, dsl.PipelineParam]] = None,
    input_parameters: Optional[Dict[str, _ValueOrPipelineParam]] = None,
    output_artifacts: Optional[Dict[str, Type[artifact.Artifact]]] = None,
    output_parameters: Optional[Dict[str, Any]] = None,
) -> AiPlatformCustomJobOp:
    """Gets an AiPlatformCustomJobOp from job spec and I/O definition."""
    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
    pipeline_component_spec = pipeline_spec_pb2.ComponentSpec()

    pipeline_task_spec.task_info.CopyFrom(
        pipeline_spec_pb2.PipelineTaskInfo(name=task_name))

    # Iterate through the inputs/outputs declaration to get pipeline component
    # spec.
    for input_name, param in input_parameters.items():
        if isinstance(param, dsl.PipelineParam):
            pipeline_component_spec.input_definitions.parameters[
                input_name].type = type_utils.get_parameter_type(
                    param.param_type)
        else:
            pipeline_component_spec.input_definitions.parameters[
                input_name].type = type_utils.get_parameter_type(type(param))

    for input_name, art in input_artifacts.items():
        if not isinstance(art, dsl.PipelineParam):
            raise RuntimeError(
                'Get unresolved input artifact for input %s. Input '
                'artifacts must be connected to a producer task.' % input_name)
        pipeline_component_spec.input_definitions.artifacts[
            input_name].artifact_type.CopyFrom(
                type_utils.get_artifact_type_schema_message(art.param_type))

    for output_name, param_type in output_parameters.items():
        pipeline_component_spec.output_definitions.parameters[
            output_name].type = type_utils.get_parameter_type(param_type)

    for output_name, artifact_type in output_artifacts.items():
        pipeline_component_spec.output_definitions.artifacts[
            output_name].artifact_type.CopyFrom(artifact_type.get_ir_type())

    pipeline_component_spec.executor_label = dsl_utils.sanitize_executor_label(
        task_name)

    # Iterate through the inputs/outputs specs to get pipeline task spec.
    for input_name, param in input_parameters.items():
        if isinstance(param, dsl.PipelineParam) and param.op_name:
            # If the param has a valid op_name, this should be a pipeline parameter
            # produced by an upstream task.
            pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
                    task_output_parameter=pipeline_spec_pb2.TaskInputsSpec.
                    InputParameterSpec.TaskOutputParameterSpec(
                        producer_task='task-{}'.format(param.op_name),
                        output_parameter_key=param.name)))
        elif isinstance(param, dsl.PipelineParam) and not param.op_name:
            # If a valid op_name is missing, this should be a pipeline parameter.
            pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
                    component_input_parameter=param.name))
        else:
            # If this is not a pipeline param, then it should be a value.
            pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
                    runtime_value=pipeline_spec_pb2.ValueOrRuntimeParameter(
                        constant_value=dsl_utils.get_value(param))))

    for input_name, art in input_artifacts.items():
        if art.op_name:
            # If the param has a valid op_name, this should be an artifact produced
            # by an upstream task.
            pipeline_task_spec.inputs.artifacts[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec(
                    task_output_artifact=pipeline_spec_pb2.TaskInputsSpec.
                    InputArtifactSpec.TaskOutputArtifactSpec(
                        producer_task='task-{}'.format(art.op_name),
                        output_artifact_key=art.name)))
        else:
            # Otherwise, this should be from the input of the subdag.
            pipeline_task_spec.inputs.artifacts[input_name].CopyFrom(
                pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec(
                    component_input_artifact=art.name))

    # TODO: Add task dependencies/trigger policies/caching/iterator
    pipeline_task_spec.component_ref.name = dsl_utils.sanitize_component_name(
        task_name)

    # Construct dummy I/O declaration for the op.
    # TODO: resolve name conflict instead of raising errors.
    dummy_outputs = collections.OrderedDict()
    for output_name, _ in output_artifacts.items():
        dummy_outputs[output_name] = _DUMMY_PATH

    for output_name, _ in output_parameters.items():
        if output_name in dummy_outputs:
            raise KeyError(
                'Got name collision for output key %s. Consider renaming '
                'either output parameters or output '
                'artifacts.' % output_name)
        dummy_outputs[output_name] = _DUMMY_PATH

    dummy_inputs = collections.OrderedDict()
    for input_name, art in input_artifacts.items():
        dummy_inputs[input_name] = _DUMMY_PATH
    for input_name, param in input_parameters.items():
        if input_name in dummy_inputs:
            raise KeyError(
                'Got name collision for input key %s. Consider renaming '
                'either input parameters or input '
                'artifacts.' % input_name)
        dummy_inputs[input_name] = _DUMMY_PATH

    # Construct the AIP (Unified) custom job op.
    return AiPlatformCustomJobOp(
        name=task_name,
        custom_job_spec=job_spec,
        component_spec=pipeline_component_spec,
        task_spec=pipeline_task_spec,
        task_inputs=[
            dsl.InputArgumentPath(
                argument=dummy_inputs[input_name],
                input=input_name,
                path=path,
            ) for input_name, path in dummy_inputs.items()
        ],
        task_outputs=dummy_outputs)
コード例 #8
0
def _create_container_op_from_component_and_arguments(
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[_structures.ComponentReference] = None,
) -> _container_op.ContainerOp:
  """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: (only for v1) The component references.

  Returns:
    A ContainerOp instance.
  """

  # Add component inputs with default value to the arguments dict if they are not
  # in the arguments dict already.
  arguments = arguments.copy()
  for input_spec in component_spec.inputs or []:
    if input_spec.name not in arguments and input_spec.default is not None:
      default_value = input_spec.default
      if input_spec.type == 'Integer':
        default_value = int(default_value)
      elif input_spec.type == 'Float':
        default_value = float(default_value)
      arguments[input_spec.name] = default_value

  # Check types of the reference arguments and serialize PipelineParams
  original_arguments = arguments
  arguments = arguments.copy()
  for input_name, argument_value in arguments.items():
    if isinstance(argument_value, _pipeline_param.PipelineParam):
      input_type = component_spec._inputs_dict[input_name].type
      argument_type = argument_value.param_type
      types.verify_type_compatibility(
          argument_type, input_type,
          'Incompatible argument passed to the input "{}" of component "{}": '
          .format(input_name, component_spec.name))

      arguments[input_name] = str(argument_value)
    if isinstance(argument_value, _container_op.ContainerOp):
      raise TypeError(
          'ContainerOp object was passed to component as an input argument. '
          'Pass a single output instead.')
  placeholder_resolver = ExtraPlaceholderResolver()
  resolved_cmd = _components._resolve_command_line_and_paths(
      component_spec=component_spec,
      arguments=arguments,
      placeholder_resolver=placeholder_resolver.resolve_placeholder,
  )

  container_spec = component_spec.implementation.container

  old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

  output_paths = collections.OrderedDict(resolved_cmd.output_paths or {})
  output_paths.update(placeholder_resolver.output_paths)
  input_paths = collections.OrderedDict(resolved_cmd.input_paths or {})
  input_paths.update(placeholder_resolver.input_paths)

  artifact_argument_paths = [
      dsl.InputArgumentPath(
          argument=arguments[input_name],
          input=input_name,
          path=path,
      ) for input_name, path in input_paths.items()
  ]

  task = _container_op.ContainerOp(
      name=component_spec.name or _components._default_component_name,
      image=container_spec.image,
      command=resolved_cmd.command,
      arguments=resolved_cmd.args,
      file_outputs=output_paths,
      artifact_argument_paths=artifact_argument_paths,
  )
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

  component_meta = copy.copy(component_spec)
  task._set_metadata(component_meta)
  if component_ref:
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

  task._parameter_arguments = resolved_cmd.inputs_consumed_by_value

  # Previously, ContainerOp had strict requirements for the output names, so we
  # had to convert all the names before passing them to the ContainerOp
  # constructor.
  # Outputs with non-pythonic names could not be accessed using their original
  # names. Now ContainerOp supports any output names, so we're now using the
  # original output names. However to support legacy pipelines, we're also
  # adding output references with pythonic names.
  # TODO: Add warning when people use the legacy output names.
  output_names = [
      output_spec.name for output_spec in component_spec.outputs or []
  ]  # Stabilizing the ordering
  output_name_to_python = _naming.generate_unique_name_conversion_table(
      output_names, _naming._sanitize_python_function_name)
  for output_name in output_names:
    pythonic_output_name = output_name_to_python[output_name]
    # Note: Some component outputs are currently missing from task.outputs
    # (e.g. MLPipeline UI Metadata)
    if pythonic_output_name not in task.outputs and output_name in task.outputs:
      task.outputs[pythonic_output_name] = task.outputs[output_name]

  if container_spec.env:
    from kubernetes import client as k8s_client
    for name, value in container_spec.env.items():
      task.container.add_env_variable(
          k8s_client.V1EnvVar(name=name, value=value))

  if component_spec.metadata:
    annotations = component_spec.metadata.annotations or {}
    for key, value in annotations.items():
      task.add_pod_annotation(key, value)
    for key, value in (component_spec.metadata.labels or {}).items():
      task.add_pod_label(key, value)
    # Disabling the caching for the volatile components by default
    if annotations.get('volatile_component', 'false') == 'true':
      task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

  _attach_v2_specs(task, component_spec, original_arguments)

  return task
コード例 #9
0
def create_container_op_from_component_and_arguments(
    component_spec: structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: structures.ComponentReference = None,
) -> container_op.ContainerOp:
    """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: The component reference. Optional.

  Returns:
    A ContainerOp instance.
  """

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
    pipeline_task_spec.task_info.name = component_spec.name
    # might need to append suffix to exuector_label to ensure its uniqueness?
    pipeline_task_spec.executor_label = component_spec.name

    # Keep track of auto-injected importer spec.
    importer_spec = {}

    # Check types of the reference arguments and serialize PipelineParams
    arguments = arguments.copy()
    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, dsl.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            argument_value.op_name)
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].runtime_value.runtime_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].producer_task = (argument_value.op_name)
                    pipeline_task_spec.inputs.artifacts[
                        input_name].output_artifact_key = (argument_value.name)
                else:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_spec[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            else:
                # An importer node with constant value artifact_uri will be inserted.
                pipeline_task_spec.inputs.artifacts[
                    input_name].producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_spec[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, dsl.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            raise NotImplementedError(
                'Input argument supports only the following types: PipelineParam'
                ', str, int, float. Got: "{}".'.format(argument_value))

    for output in component_spec.outputs or []:
        if type_utils.is_parameter_type(output.type):
            pipeline_task_spec.outputs.parameters[
                output.name].type = type_utils.get_parameter_type(output.type)
        else:
            pipeline_task_spec.outputs.artifacts[
                output.name].artifact_type.instance_schema = (
                    type_utils.get_artifact_type_schema(output.type))

    inputs_dict = {
        input_spec.name: input_spec
        for input_spec in component_spec.inputs or []
    }
    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_uri_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputUriPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

    def _input_artifact_path_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputPathPlaceholder.'
                .format(input_key, inputs_dict[input_key].type))
        else:
            return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
        if type_utils.is_parameter_type(inputs_dict[input_key].type):
            return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)
        else:
            raise TypeError(
                'Input "{}" with type "{}" cannot be paired with InputValuePlaceholder.'
                .format(input_key, inputs_dict[input_key].type))

    def _output_artifact_uri_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            raise TypeError(
                'Output "{}" with type "{}" cannot be paired with OutputUriPlaceholder.'
                .format(output_key, outputs_dict[output_key].type))
        else:
            return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key)

    def _output_artifact_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

    def _output_parameter_path_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
            output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            return _output_parameter_path_placeholder(output_key)
        else:
            return _output_artifact_path_placeholder(output_key)

    resolved_cmd = _resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        input_value_generator=_input_parameter_placeholder,
        input_uri_generator=_input_artifact_uri_placeholder,
        output_uri_generator=_output_artifact_uri_placeholder,
        input_path_generator=_input_artifact_path_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
    )

    container_spec = component_spec.implementation.container

    pipeline_container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec())
    pipeline_container_spec.image = container_spec.image
    pipeline_container_spec.command.extend(resolved_cmd.command)
    pipeline_container_spec.args.extend(resolved_cmd.args)

    output_uris_and_paths = resolved_cmd.output_uris.copy()
    output_uris_and_paths.update(resolved_cmd.output_paths)
    input_uris_and_paths = resolved_cmd.input_uris.copy()
    input_uris_and_paths.update(resolved_cmd.input_paths)

    old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
    task = container_op.ContainerOp(
        name=component_spec.name or _default_component_name,
        image=container_spec.image,
        command=resolved_cmd.command,
        arguments=resolved_cmd.args,
        file_outputs=output_uris_and_paths,
        artifact_argument_paths=[
            dsl.InputArgumentPath(
                argument=arguments[input_name],
                input=input_name,
                path=path,
            ) for input_name, path in input_uris_and_paths.items()
        ],
    )

    task.task_spec = pipeline_task_spec
    task.importer_spec = importer_spec
    task.container_spec = pipeline_container_spec
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    component_meta = copy.copy(component_spec)
    task._set_metadata(component_meta)
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

    # Previously, ContainerOp had strict requirements for the output names, so we
    # had to convert all the names before passing them to the ContainerOp
    # constructor. Outputs with non-pythonic names could not be accessed using
    # their original names. Now ContainerOp supports any output names, so we're
    # now using the original output names. However to support legacy pipelines,
    # we're also adding output references with pythonic names.
    # TODO: Add warning when people use the legacy output names.
    output_names = [
        output_spec.name for output_spec in component_spec.outputs or []
    ]  # Stabilizing the ordering
    output_name_to_python = generate_unique_name_conversion_table(
        output_names, _sanitize_python_function_name)
    for output_name in output_names:
        pythonic_output_name = output_name_to_python[output_name]
        # Note: Some component outputs are currently missing from task.outputs
        # (e.g. MLPipeline UI Metadata)
        if pythonic_output_name not in task.outputs and output_name in task.outputs:
            task.outputs[pythonic_output_name] = task.outputs[output_name]

    if component_spec.metadata:
        annotations = component_spec.metadata.annotations or {}
        for key, value in annotations.items():
            task.add_pod_annotation(key, value)
        for key, value in (component_spec.metadata.labels or {}).items():
            task.add_pod_label(key, value)
            # Disabling the caching for the volatile components by default
        if annotations.get('volatile_component', 'false') == 'true':
            task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

    return task
コード例 #10
0
def iris_train_pipeline(
        kernel: dsl.PipelineParam = dsl.PipelineParam(
            name='kernel', value='linear, poly, rbf, sigmoid or precomputed'),
        C: dsl.PipelineParam = dsl.PipelineParam(
            name='C', value='Float value, default value is 1'),
        n_neighbors: dsl.PipelineParam = dsl.PipelineParam(name='n_neighbors',
                                                           value='int value'),
        n_splits: dsl.PipelineParam = dsl.PipelineParam(
            name='n_splits', value="Number of splits for fold"),
        location: dsl.PipelineParam = dsl.PipelineParam(
            name='location', value='FOLDER_NAME_TO_MODELS'),
        svm_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='svm-filename', value='SVM_NAME'),
        lr_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='logistic-regression-filename',
            value='LOGISTIC_REGRESSION_NAME'),
        dt_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='decision-tree-filename', value='DECISION_TREE_NAME'),
        knn_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='knn-filename', value='KNN_NAME'),
        label1: dsl.PipelineParam = dsl.PipelineParam(name='labels',
                                                      value='Label 1'),
        label2: dsl.PipelineParam = dsl.PipelineParam(name='labels',
                                                      value='Label 2'),
        label3: dsl.PipelineParam = dsl.PipelineParam(name='labels',
                                                      value='Label 3')):
    _load_data = load_op()

    _transform = transform_op(dsl.InputArgumentPath(
        _load_data.outputs['iris'])).after(_load_data)

    _svm = svm_op(
        str(svm_filename) + '.pkl',
        dsl.InputArgumentPath(_transform.outputs['X_train']),
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_transform.outputs['X_test']), kernel, C,
        n_splits).after(_transform)

    _lr = lr_op(dsl.InputArgumentPath(_transform.outputs['X_train']),
                dsl.InputArgumentPath(_transform.outputs['y_train']),
                dsl.InputArgumentPath(_transform.outputs['X_test']),
                str(lr_filename) + '.pkl', n_splits).after(_transform)

    _dt = dt_op(dsl.InputArgumentPath(_transform.outputs['X_train']),
                dsl.InputArgumentPath(_transform.outputs['y_train']),
                dsl.InputArgumentPath(_transform.outputs['X_test']),
                str(dt_filename) + '.pkl', n_splits).after(_transform)

    _knn = knn_op(
        dsl.InputArgumentPath(_transform.outputs['X_train']),
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_transform.outputs['X_test']),
        n_neighbors,
        n_splits,
        str(knn_filename) + '.pkl',
    ).after(_transform)

    models = [
        dsl.InputArgumentPath(_svm.outputs['svm_model']),
        dsl.InputArgumentPath(_lr.outputs['lr_model']),
        dsl.InputArgumentPath(_dt.outputs['dt_model']),
        dsl.InputArgumentPath(_knn.outputs['knn_model']),
    ]
    _save_s3 = save_s3_op(
        models, location,
        [svm_filename, lr_filename, dt_filename, knn_filename]).after(
            _svm, _lr, _dt,
            _knn).apply(aws.use_aws_secret(secret_name='s3-secrets'))

    _evaluation_knn = evaluation_op(
        dsl.InputArgumentPath(_knn.outputs['knn_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_knn.outputs['knn_y_scores'])).after(_knn)
    _evaluation_dt = evaluation_op(
        dsl.InputArgumentPath(_dt.outputs['dt_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_dt.outputs['dt_y_scores'])).after(_dt)
    _evaluation_svm = evaluation_op(
        dsl.InputArgumentPath(_svm.outputs['svm_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_dt.outputs['svm_y_scores'])).after(_svm)
    _evaluation_svm = evaluation_op(
        dsl.InputArgumentPath(_lr.outputs['lr_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_dt.outputs['lr_y_scores'])).after(_lr)