def test_command_and_args(self):
        obj = structures.ContainerSpec(
            image='image', command=['command'], args=['args'])
        self.assertEqual(obj.command, ['command'])
        self.assertEqual(obj.args, ['args'])

        obj = structures.ContainerSpec(image='image', command=[], args=[])
        self.assertEqual(obj.command, None)
        self.assertEqual(obj.args, None)
    def test_env(self):
        obj = structures.ContainerSpec(
            image='image',
            command=['command'],
            args=['args'],
            env={'env': 'env'})
        self.assertEqual(obj.env, {'env': 'env'})

        obj = structures.ContainerSpec(
            image='image', command=[], args=[], env={})
        self.assertEqual(obj.env, None)
    def test_simple_component_spec_save_to_component_yaml(self):
        # tests writing old style (less verbose) and reading in new style (more verbose)
        original_component_spec = structures.ComponentSpec(
            name='component_1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=[
                        'sh',
                        '-c',
                        'set -ex\necho "$0" > "$1"',
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputParameterPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={'input1': structures.InputSpec(type='String')},
            outputs={'output1': structures.OutputSpec(type='String')},
        )
        from kfp.components import yaml_component
        yaml_component = yaml_component.YamlComponent(
            component_spec=original_component_spec)
        with tempfile.TemporaryDirectory() as tempdir:
            output_path = os.path.join(tempdir, 'component.yaml')
            compiler.Compiler().compile(yaml_component, output_path)

            # test that it can be read back correctly
            with open(output_path, 'r') as f:
                contents = f.read()
            new_component_spec = structures.ComponentSpec.load_from_component_yaml(
                contents)

        self.assertEqual(original_component_spec, new_component_spec)
    def test_component_spec_with_placeholder_referencing_nonexisting_input_output(
            self):
        with self.assertRaisesRegex(
                ValueError,
                r'^Argument \"InputValuePlaceholder[\s\S]*\'input000\'[\s\S]*references non-existing input.'
        ):
            structures.ComponentSpec(
                name='component_1',
                implementation=structures.Implementation(
                    container=structures.ContainerSpec(
                        image='alpine',
                        command=[
                            'sh',
                            '-c',
                            'set -ex\necho "$0" > "$1"',
                            structures.InputValuePlaceholder(
                                input_name='input000'),
                            structures.OutputPathPlaceholder(
                                output_name='output1'),
                        ],
                    )),
                inputs={'input1': structures.InputSpec(type='String')},
                outputs={'output1': structures.OutputSpec(type='String')},
            )

        with self.assertRaisesRegex(
                ValueError,
                r'^Argument \"OutputPathPlaceholder[\s\S]*\'output000\'[\s\S]*references non-existing output.'
        ):
            structures.ComponentSpec(
                name='component_1',
                implementation=structures.Implementation(
                    container=structures.ContainerSpec(
                        image='alpine',
                        command=[
                            'sh',
                            '-c',
                            'set -ex\necho "$0" > "$1"',
                            structures.InputValuePlaceholder(
                                input_name='input1'),
                            structures.OutputPathPlaceholder(
                                output_name='output000'),
                        ],
                    )),
                inputs={'input1': structures.InputSpec(type='String')},
                outputs={'output1': structures.OutputSpec(type='String')},
            )
Beispiel #5
0
    def test_create_pipeline_task_valid(self):
        expected_component_spec = structures.ComponentSpec(
            name='component1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" >> "$1"'],
                    args=[
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputPathPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={
                'input1': structures.InputSpec(type='String'),
            },
            outputs={
                'output1': structures.OutputSpec(type='Artifact'),
            },
        )
        expected_task_spec = structures.TaskSpec(
            name='component1',
            inputs={'input1': 'value'},
            dependent_tasks=[],
            component_ref='component1',
        )
        expected_container_spec = structures.ContainerSpec(
            image='alpine',
            command=['sh', '-c', 'echo "$0" >> "$1"'],
            args=[
                "{{$.inputs.parameters['input1']}}",
                "{{$.outputs.artifacts['output1'].path}}",
            ],
        )

        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        self.assertEqual(task.task_spec, expected_task_spec)
        self.assertEqual(task.component_spec, expected_component_spec)
        self.assertEqual(task.container_spec, expected_container_spec)
Beispiel #6
0
    def test_simple_component_spec_save_to_component_yaml(self):
        open_mock = mock.mock_open()
        expected_yaml = textwrap.dedent("""\
        implementation:
          container:
            command:
            - sh
            - -c
            - 'set -ex

              echo "$0" > "$1"'
            - {inputValue: input1}
            - {outputPath: output1}
            image: alpine
        inputs:
          input1: {type: String}
        name: component_1
        outputs:
          output1: {type: String}
        """)

        with mock.patch(
                "builtins.open", open_mock, create=True), self.assertWarnsRegex(
                    DeprecationWarning, r"Compiling to JSON is deprecated"):
            structures.ComponentSpec(
                name='component_1',
                implementation=structures.Implementation(
                    container=structures.ContainerSpec(
                        image='alpine',
                        command=[
                            'sh',
                            '-c',
                            'set -ex\necho "$0" > "$1"',
                            structures.InputValuePlaceholder(
                                input_name='input1'),
                            structures.OutputPathPlaceholder(
                                output_name='output1'),
                        ],
                    )),
                inputs={
                    'input1': structures.InputSpec(type='String')
                },
                outputs={
                    'output1': structures.OutputSpec(type='String')
                },
            ).save_to_component_yaml('test_save_file.json')

        open_mock.assert_called_once_with('test_save_file.json', 'w')
    def test_from_container_dict_no_placeholders(self):
        component_spec = structures.ComponentSpec(
            name='test',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='python:3.7',
                    command=[
                        'sh', '-c',
                        '\nif ! [ -x "$(command -v pip)" ]; then\n    python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet     --no-warn-script-location \'kfp==2.0.0-alpha.2\' && "$0" "$@"\n',
                        'sh', '-ec',
                        'program_path=$(mktemp -d)\nprintf "%s" "$0" > "$program_path/ephemeral_component.py"\npython3 -m kfp.components.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"\n',
                        '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef concat_message(first: str, second: str) -> str:\n    return first + second\n\n'
                    ],
                    args=[
                        '--executor_input', '{{$}}', '--function_to_execute',
                        'concat_message'
                    ],
                    env=None,
                    resources=None),
                graph=None,
                importer=None),
            description=None,
            inputs={
                'first': structures.InputSpec(type='String', default=None),
                'second': structures.InputSpec(type='String', default=None)
            },
            outputs={'Output': structures.OutputSpec(type='String')})
        container_dict = {
            'args': [
                '--executor_input', '{{$}}', '--function_to_execute', 'fail_op'
            ],
            'command': [
                'sh', '-c',
                '\nif ! [ -x "$(command -v pip)" ]; then\n    python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet     --no-warn-script-location \'kfp==2.0.0-alpha.2\' && "$0" "$@"\n',
                'sh', '-ec',
                'program_path=$(mktemp -d)\nprintf "%s" "$0" > "$program_path/ephemeral_component.py"\npython3 -m kfp.components.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"\n',
                '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef fail_op(message: str):\n    """Fails."""\n    import sys\n    print(message)\n    sys.exit(1)\n\n'
            ],
            'image': 'python:3.7'
        }

        loaded_container_spec = structures.ContainerSpec.from_container_dict(
            container_dict)
Beispiel #8
0
    def test_simple_component_spec_load_from_v2_component_yaml(self):
        component_yaml_v2 = textwrap.dedent("""\
        name: component_1
        inputs:
          input1:
            type: String
        outputs:
          output1:
            type: String
        implementation:
          container:
            image: alpine
            command:
            - sh
            - -c
            - 'set -ex

                echo "$0" > "$1"'
            - inputValue: input1
            - outputPath: output1
        """)

        generated_spec = structures.ComponentSpec.load_from_component_yaml(
            component_yaml_v2)

        expected_spec = structures.ComponentSpec(
            name='component_1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=[
                        'sh',
                        '-c',
                        'set -ex\necho "$0" > "$1"',
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputPathPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={'input1': structures.InputSpec(type='String')},
            outputs={'output1': structures.OutputSpec(type='String')})
        self.assertEqual(generated_spec, expected_spec)
    def test_resolve_concat_placeholder(self):
        expected_container_spec = structures.ContainerSpec(
            image='alpine',
            command=[
                'sh',
                '-c',
                'echo "$0"',
                "{{$.inputs.parameters['input1']}}+{{$.inputs.parameters['input2']}}",
            ],
        )

        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML_CONCAT_PLACEHOLDER),
            args={
                'input1': '1',
                'input2': '2',
            },
        )
        self.assertEqual(task.container_spec, expected_container_spec)
Beispiel #10
0
    def to_v1_component_spec(self) -> v1_structures.ComponentSpec:
        """Converts to v1 ComponentSpec.

        Returns:
            Component spec in the form of V1 ComponentSpec.

        Needed until downstream accept new ComponentSpec.
        """
        def _transform_arg(arg: ValidCommandArgs) -> Any:
            if isinstance(arg, str):
                return arg
            if isinstance(arg, InputValuePlaceholder):
                return v1_structures.InputValuePlaceholder(arg.input_name)
            if isinstance(arg, InputPathPlaceholder):
                return v1_structures.InputPathPlaceholder(arg.input_name)
            if isinstance(arg, InputUriPlaceholder):
                return v1_structures.InputUriPlaceholder(arg.input_name)
            if isinstance(arg, OutputPathPlaceholder):
                return v1_structures.OutputPathPlaceholder(arg.output_name)
            if isinstance(arg, OutputUriPlaceholder):
                return v1_structures.OutputUriPlaceholder(arg.output_name)
            if isinstance(arg, IfPresentPlaceholder):
                return v1_structures.IfPlaceholder(arg.if_structure)
            if isinstance(arg, ConcatPlaceholder):
                return v1_structures.ConcatPlaceholder(arg.concat)
            raise ValueError(
                f'Unexpected command/argument type: "{arg}" of type "{type(arg)}".'
            )

        return v1_structures.ComponentSpec(
            name=self.name,
            inputs=[
                v1_structures.InputSpec(
                    name=name,
                    type=input_spec.type,
                    default=input_spec.default,
                ) for name, input_spec in self.inputs.items()
            ],
            outputs=[
                v1_structures.OutputSpec(
                    name=name,
                    type=output_spec.type,
                ) for name, output_spec in self.outputs.items()
            ],
            implementation=v1_structures.
            ContainerImplementation(container=v1_structures.ContainerSpec(
                image=self.implementation.container.image,
                command=[
                    _transform_arg(cmd)
                    for cmd in self.implementation.container.commands or []
                ],
                args=[
                    _transform_arg(arg)
                    for arg in self.implementation.container.arguments or []
                ],
                env={
                    name: _transform_arg(value)
                    for name, value in self.implementation.container.env or {}
                },
            )),
        )
Beispiel #11
0
def create_custom_training_job_op_from_component(
    component_spec: Callable,  # pylint: disable=g-bare-generic
    display_name: Optional[str] = '',
    replica_count: Optional[int] = 1,
    machine_type: Optional[str] = 'n1-standard-4',
    accelerator_type: Optional[str] = '',
    accelerator_count: Optional[int] = 1,
    boot_disk_type: Optional[str] = 'pd-ssd',
    boot_disk_size_gb: Optional[int] = 100,
    timeout: Optional[str] = '',
    restart_job_on_worker_restart: Optional[bool] = False,
    service_account: Optional[str] = '',
    network: Optional[str] = '',
    encryption_spec_key_name: Optional[str] = '',
    tensorboard: Optional[str] = '',
    enable_web_access: Optional[bool] = False,
    base_output_directory: Optional[str] = '',
    labels: Optional[Dict[str, str]] = None,
) -> Callable:  # pylint: disable=g-bare-generic
    """Create a component spec that runs a custom training in Vertex AI.

  This utility converts a given component to a CustomTrainingJobOp that runs a
  custom training in Vertex AI. This simplifies the creation of custom training
  jobs. All Inputs and Outputs of the supplied component will be copied over to
  the constructed training job.

  Note that this utility constructs a ClusterSpec where the master and all the
  workers use the same spec, meaning all disk/machine spec related parameters
  will apply to all replicas. This is suitable for use cases such as training
  with MultiWorkerMirroredStrategy or Mirrored Strategy.

  This component does not support Vertex AI Python training application.

  For more details on Vertex AI Training service, please refer to
  https://cloud.google.com/vertex-ai/docs/training/create-custom-job

  Args:
    component_spec: The task (ContainerOp) object to run as Vertex AI custom
      job.
    display_name (Optional[str]): The name of the custom job. If not provided
      the component_spec.name will be used instead.
    replica_count (Optional[int]): The count of instances in the cluster. One
      replica always counts towards the master in worker_pool_spec[0] and the
      remaining replicas will be allocated in worker_pool_spec[1]. For more
      details see
      https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job.
    machine_type (Optional[str]): The type of the machine to run the custom job.
      The default value is "n1-standard-4".  For more details about this input
      config, see
      https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types.
    accelerator_type (Optional[str]): The type of accelerator(s) that may be
      attached to the machine as per accelerator_count.  For more details about
      this input config, see
      https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype.
    accelerator_count (Optional[int]): The number of accelerators to attach to
      the machine. Defaults to 1 if accelerator_type is set.
    boot_disk_type (Optional[str]):
      Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd"
        (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk
        Hard Disk Drive).
    boot_disk_size_gb (Optional[int]): Size in GB of the boot disk (default is
      100GB).
    timeout (Optional[str]): The maximum job running time. The default is 7
      days. A duration in seconds with up to nine fractional digits, terminated
      by 's', for example: "3.5s".
    restart_job_on_worker_restart (Optional[bool]): Restarts the entire
      CustomJob if a worker gets restarted. This feature can be used by
      distributed training jobs that are not resilient to workers leaving and
      joining a job.
    service_account (Optional[str]): Sets the default service account for
      workload run-as account. The service account running the pipeline
        (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account)
          submitting jobs must have act-as permission on this run-as account. If
          unspecified, the Vertex AI Custom Code Service
        Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents)
          for the CustomJob's project.
    network (Optional[str]): The full name of the Compute Engine network to
      which the job should be peered. For example,
      projects/12345/global/networks/myVPC. Format is of the form
      projects/{project}/global/networks/{network}. Where {project} is a project
      number, as in 12345, and {network} is a network name. Private services
      access must already be configured for the network. If left unspecified,
      the job is not peered with any network.
    encryption_spec_key_name (Optional[str]): Customer-managed encryption key
      options for the CustomJob. If this is set, then all resources created by
      the CustomJob will be encrypted with the provided encryption key.
    tensorboard (Optional[str]): The name of a Vertex AI Tensorboard resource to
      which this CustomJob will upload Tensorboard logs.
    enable_web_access (Optional[bool]): Whether you want Vertex AI to enable
      [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell)
      to training containers.
      If set to `true`, you can access interactive shells at the URIs given
      by [CustomJob.web_access_uris][].
    base_output_directory (Optional[str]): The Cloud Storage location to store
      the output of this CustomJob or
      HyperparameterTuningJob. see below for more details:
      https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination
    labels (Optional[Dict[str, str]]): The labels with user-defined metadata to
      organize CustomJobs.
      See https://goo.gl/xmQnxf for more information.

  Returns:
    A Custom Job component operator corresponding to the input component
    operator.

  """
    job_spec = {}
    input_specs = []
    output_specs = []

    # pytype: disable=attribute-error

    if component_spec.component_spec.inputs:
        input_specs = component_spec.component_spec.inputs
    if component_spec.component_spec.outputs:
        output_specs = component_spec.component_spec.outputs

    def _is_output_parameter(output_key: str) -> bool:
        for output in component_spec.component_spec.outputs:
            if output.name == output_key:
                return type_utils.is_parameter_type(output.type)
        return False

    worker_pool_spec = {
        'machine_spec': {
            'machine_type': machine_type
        },
        'replica_count': 1,
        'container_spec': {
            'image_uri':
            component_spec.component_spec.implementation.container.image,
        }
    }
    if component_spec.component_spec.implementation.container.command:
        container_command_copy = component_spec.component_spec.implementation.container.command.copy(
        )
        dsl_utils.resolve_cmd_lines(container_command_copy,
                                    _is_output_parameter)
        # Replace executor place holder with the json escaped placeholder.
        for idx, val in enumerate(container_command_copy):
            if val == '{{{{$}}}}':
                container_command_copy[
                    idx] = _EXECUTOR_PLACE_HOLDER_REPLACEMENT
        worker_pool_spec['container_spec']['command'] = container_command_copy

    if component_spec.component_spec.implementation.container.args:
        container_args_copy = component_spec.component_spec.implementation.container.args.copy(
        )
        dsl_utils.resolve_cmd_lines(container_args_copy, _is_output_parameter)
        # Replace executor place holder with the json escaped placeholder.
        for idx, val in enumerate(container_args_copy):
            if val == '{{{{$}}}}':
                container_args_copy[idx] = _EXECUTOR_PLACE_HOLDER_REPLACEMENT
        worker_pool_spec['container_spec']['args'] = container_args_copy
    if accelerator_type:
        worker_pool_spec['machine_spec']['accelerator_type'] = accelerator_type
        worker_pool_spec['machine_spec'][
            'accelerator_count'] = accelerator_count
    if boot_disk_type:
        if 'disk_spec' not in worker_pool_spec:
            worker_pool_spec['disk_spec'] = {}
        worker_pool_spec['disk_spec']['boot_disk_type'] = boot_disk_type
        if 'disk_spec' not in worker_pool_spec:
            worker_pool_spec['disk_spec'] = {}
        worker_pool_spec['disk_spec']['boot_disk_size_gb'] = boot_disk_size_gb

    job_spec['worker_pool_specs'] = [worker_pool_spec]
    if int(replica_count) > 1:
        additional_worker_pool_spec = copy.deepcopy(worker_pool_spec)
        additional_worker_pool_spec['replica_count'] = str(replica_count - 1)
        job_spec['worker_pool_specs'].append(additional_worker_pool_spec)

    # TODO(chavoshi): Use input parameter instead of hard coded string label.
    # This requires Dictionary input type to be supported in V2.
    if labels is not None:
        job_spec['labels'] = labels

    if timeout:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling']['timeout'] = timeout
    if restart_job_on_worker_restart:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling'][
            'restart_job_on_worker_restart'] = restart_job_on_worker_restart
    if enable_web_access:
        job_spec['enable_web_access'] = enable_web_access

    if encryption_spec_key_name:
        job_spec['encryption_spec'] = {}
        job_spec['encryption_spec'][
            'kms_key_name'] = "{{$.inputs.parameters['encryption_spec_key_name']}}"
        input_specs.append(
            structures.InputSpec(name='encryption_spec_key_name',
                                 type='String',
                                 optional=True,
                                 default=encryption_spec_key_name), )

    # Remove any existing service_account from component input list.
    input_specs[:] = [
        input_spec for input_spec in input_specs
        if input_spec.name not in ('service_account', 'network', 'tensorboard',
                                   'base_output_directory')
    ]
    job_spec['service_account'] = "{{$.inputs.parameters['service_account']}}"
    job_spec['network'] = "{{$.inputs.parameters['network']}}"

    job_spec['tensorboard'] = "{{$.inputs.parameters['tensorboard']}}"
    job_spec['base_output_directory'] = {}
    job_spec['base_output_directory'][
        'output_uri_prefix'] = "{{$.inputs.parameters['base_output_directory']}}"
    custom_job_payload = {
        'display_name': display_name or component_spec.component_spec.name,
        'job_spec': job_spec
    }

    custom_job_component_spec = structures.ComponentSpec(
        name=component_spec.component_spec.name,
        inputs=input_specs + [
            structures.InputSpec(name='base_output_directory',
                                 type='String',
                                 optional=True,
                                 default=base_output_directory),
            structures.InputSpec(name='tensorboard',
                                 type='String',
                                 optional=True,
                                 default=tensorboard),
            structures.InputSpec(
                name='network', type='String', optional=True, default=network),
            structures.InputSpec(name='service_account',
                                 type='String',
                                 optional=True,
                                 default=service_account),
            structures.InputSpec(name='project', type='String'),
            structures.InputSpec(name='location', type='String')
        ],
        outputs=output_specs +
        [structures.OutputSpec(name='gcp_resources', type='String')],
        implementation=structures.
        ContainerImplementation(container=structures.ContainerSpec(
            image=_DEFAULT_CUSTOM_JOB_CONTAINER_IMAGE,
            command=[
                'python3', '-u', '-m',
                'google_cloud_pipeline_components.container.v1.gcp_launcher.launcher'
            ],
            args=[
                '--type',
                'CustomJob',
                '--payload',
                json.dumps(custom_job_payload),
                '--project',
                structures.InputValuePlaceholder(input_name='project'),
                '--location',
                structures.InputValuePlaceholder(input_name='location'),
                '--gcp_resources',
                structures.OutputPathPlaceholder(output_name='gcp_resources'),
            ],
        )))

    # pytype: enable=attribute-error

    component_path = tempfile.mktemp()
    custom_job_component_spec.save(component_path)
    return components.load_component_from_file(component_path)

class TestComponent(base_component.BaseComponent):
    def execute(self, *args, **kwargs):
        pass


component_op = TestComponent(component_spec=structures.ComponentSpec(
    name='component_1',
    implementation=structures.Implementation(
        container=structures.ContainerSpec(
            image='alpine',
            command=[
                'sh',
                '-c',
                'set -ex\necho "$0" "$1" "$2" > "$3"',
                structures.InputValuePlaceholder(input_name='input1'),
                structures.InputValuePlaceholder(input_name='input2'),
                structures.InputValuePlaceholder(input_name='input3'),
                structures.OutputPathPlaceholder(output_name='output1'),
            ],
        )),
    inputs={
        'input1': structures.InputSpec(type='String'),
        'input2': structures.InputSpec(type='Integer'),
        'input3': structures.InputSpec(type='Float', default=3.14),
        'input4': structures.InputSpec(type='Optional[Float]', default=None),
    },
    outputs={
        'output1': structures.OutputSpec(type='String'),
    },
))
Beispiel #13
0
    def test_if_placeholder(self):
        compiled_yaml = textwrap.dedent("""
components:
  comp-if:
    executorLabel: exec-if
    inputDefinitions:
      parameters:
        optional_input_1:
          parameterType: STRING
deploymentSpec:
  executors:
    exec-if:
      container:
        args:
        - 'input: '
        - '{{$.inputs.parameters[''optional_input_1'']}}'
        command:
        - sh
        - -c
        - echo "$0" "$1"
        image: alpine
pipelineInfo:
  name: if
root:
  dag:
    tasks:
      if:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-if
        inputs:
          parameters:
            optional_input_1:
              componentInputParameter: optional_input_1
        taskInfo:
          name: if
  inputDefinitions:
    parameters:
      optional_input_1:
        parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.2""")
        loaded_component_spec = structures.ComponentSpec.load_from_component_yaml(
            compiled_yaml)
        component_spec = structures.ComponentSpec(
            name='if',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" "$1"'],
                    args=[
                        'input: ',
                        structures.InputValuePlaceholder(
                            input_name='optional_input_1')
                    ],
                    env=None,
                    resources=None),
                graph=None,
                importer=None),
            description=None,
            inputs={
                'optional_input_1':
                    structures.InputSpec(type='String', default=None)
            },
            outputs=None)
        self.assertEqual(loaded_component_spec, component_spec)
Beispiel #14
0
    def test_simple_placeholder(self):
        compiled_yaml = textwrap.dedent("""
components:
  comp-component1:
    executorLabel: exec-component1
    inputDefinitions:
      parameters:
        input1:
          parameterType: STRING
    outputDefinitions:
      artifacts:
        output1:
          artifactType:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
deploymentSpec:
  executors:
    exec-component1:
      container:
        args:
        - '{{$.inputs.parameters[''input1'']}}'
        - '{{$.outputs.artifacts[''output1''].path}}'
        command:
        - sh
        - -c
        - echo "$0" >> "$1"
        image: alpine
pipelineInfo:
  name: component1
root:
  dag:
    tasks:
      component1:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-component1
        inputs:
          parameters:
            input1:
              componentInputParameter: input1
        taskInfo:
          name: component1
  inputDefinitions:
    parameters:
      input1:
        parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.2""")
        loaded_component_spec = structures.ComponentSpec.load_from_component_yaml(
            compiled_yaml)
        component_spec = structures.ComponentSpec(
            name='component1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" >> "$1"'],
                    args=[
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputPathPlaceholder(output_name='output1')
                    ],
                    env=None,
                    resources=None),
                graph=None,
                importer=None),
            description=None,
            inputs={
                'input1': structures.InputSpec(type='String', default=None)
            },
            outputs={'output1': structures.OutputSpec(type='Artifact')})
        self.assertEqual(loaded_component_spec, component_spec)
Beispiel #15
0
def create_component_from_func(func: Callable,
                               base_image: Optional[str] = None,
                               target_image: Optional[str] = None,
                               packages_to_install: List[str] = None,
                               output_component_file: Optional[str] = None,
                               install_kfp_package: bool = True,
                               kfp_package_path: Optional[str] = None):
    """Implementation for the @component decorator.

    The decorator is defined under component_decorator.py. See the decorator
    for the canonical documentation for this function.
    """
    packages_to_install = packages_to_install or []

    if install_kfp_package and target_image is None:
        if kfp_package_path is None:
            kfp_package_path = _get_default_kfp_package_path()
        packages_to_install.append(kfp_package_path)

    packages_to_install_command = _get_packages_to_install_command(
        package_list=packages_to_install)

    command = []
    args = []
    if base_image is None:
        base_image = _DEFAULT_BASE_IMAGE

    component_image = base_image

    if target_image:
        component_image = target_image
        command, args = _get_command_and_args_for_containerized_component(
            function_name=func.__name__, )
    else:
        command, args = _get_command_and_args_for_lightweight_component(
            func=func)

    component_spec = extract_component_interface(func)
    component_spec.implementation = structures.ContainerImplementation(
        container=structures.ContainerSpec(
            image=component_image,
            command=packages_to_install_command + command,
            args=args,
        ))

    module_path = pathlib.Path(inspect.getsourcefile(func))
    module_path.resolve()

    component_name = _python_function_name_to_component_name(func.__name__)
    component_info = ComponentInfo(name=component_name,
                                   function_name=func.__name__,
                                   func=func,
                                   target_image=target_image,
                                   module_path=module_path,
                                   component_spec=component_spec,
                                   output_component_file=output_component_file,
                                   base_image=base_image)

    if REGISTERED_MODULES is not None:
        REGISTERED_MODULES[component_name] = component_info

    if output_component_file:
        component_spec.save(output_component_file)

    # TODO(KFPv2): Replace with v2 BaseComponent.
    task_factory = _components._create_task_factory_from_component_spec(
        component_spec)

    # TODO(KFPv2): Once this returns a BaseComponent, we should check for this
    # in the Executor, and get the appropriate callable. For now, we'll look for
    # this special attribute to hold the Python function in the task factory
    # during runtime.
    setattr(task_factory, 'python_func', func)

    return task_factory
Beispiel #16
0
    def test_component_spec_load_from_v1_component_yaml(self):
        component_yaml_v1 = textwrap.dedent("""\
        name: Component with 2 inputs and 2 outputs
        inputs:
        - {name: Input parameter, type: String}
        - {name: Input artifact}
        outputs:
        - {name: Output 1}
        - {name: Output 2}
        implementation:
          container:
            image: busybox
            command: [sh, -c, '
                mkdir -p $(dirname "$2")
                mkdir -p $(dirname "$3")
                echo "$0" > "$2"
                cp "$1" "$3"
                '
            ]
            args:
            - {inputValue: Input parameter}
            - {inputPath: Input artifact}
            - {outputPath: Output 1}
            - {outputPath: Output 2}
        """)

        generated_spec = structures.ComponentSpec.load_from_component_yaml(
            component_yaml_v1)

        expected_spec = structures.ComponentSpec(
            name='Component with 2 inputs and 2 outputs',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='busybox',
                    command=[
                        'sh',
                        '-c',
                        (' mkdir -p $(dirname "$2") mkdir -p $(dirname "$3") '
                         'echo "$0" > "$2" cp "$1" "$3" '),
                    ],
                    args=[
                        structures.InputValuePlaceholder(
                            input_name='input_parameter'),
                        structures.InputPathPlaceholder(
                            input_name='input_artifact'),
                        structures.OutputPathPlaceholder(
                            output_name='output_1'),
                        structures.OutputPathPlaceholder(
                            output_name='output_2'),
                    ],
                    env={},
                )),
            inputs={
                'input_parameter': structures.InputSpec(type='String'),
                'input_artifact': structures.InputSpec(type='Artifact')
            },
            outputs={
                'output_1': structures.OutputSpec(type='Artifact'),
                'output_2': structures.OutputSpec(type='Artifact'),
            })
        self.assertEqual(generated_spec, expected_spec)
Beispiel #17
0
    def test_simple_component_spec_load_from_v2_component_yaml(self):
        component_yaml_v2 = textwrap.dedent("""\
components:
  comp-component-1:
    executorLabel: exec-component-1
    inputDefinitions:
      parameters:
        input1:
          parameterType: STRING
    outputDefinitions:
      parameters:
        output1:
          parameterType: STRING
deploymentSpec:
  executors:
    exec-component-1:
      container:
        command:
        - sh
        - -c
        - 'set -ex

          echo "$0" > "$1"'
        - '{{$.inputs.parameters[''input1'']}}'
        - '{{$.outputs.parameters[''output1''].output_file}}'
        image: alpine
pipelineInfo:
  name: component-1
root:
  dag:
    tasks:
      component-1:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-component-1
        inputs:
          parameters:
            input1:
              componentInputParameter: input1
        taskInfo:
          name: component-1
  inputDefinitions:
    parameters:
      input1:
        parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.2
        """)

        generated_spec = structures.ComponentSpec.load_from_component_yaml(
            component_yaml_v2)

        expected_spec = structures.ComponentSpec(
            name='component-1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=[
                        'sh',
                        '-c',
                        'set -ex\necho "$0" > "$1"',
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputParameterPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={'input1': structures.InputSpec(type='String')},
            outputs={'output1': structures.OutputSpec(type='String')})

        self.assertEqual(generated_spec, expected_spec)
Beispiel #18
0
    def component_yaml_generator(**kwargs):
        input_specs = []
        input_args = []
        input_kwargs = {}

        serialized_args = {INIT_KEY: {}, METHOD_KEY: {}}

        init_kwargs = {}
        method_kwargs = {}

        for key, value in kwargs.items():
            if key in init_arg_names:
                prefix_key = INIT_KEY
                init_kwargs[key] = value
                signature = init_signature
            else:
                prefix_key = METHOD_KEY
                method_kwargs[key] = value
                signature = method_signature

            # no need to add this argument because it's optional
            # this param is validated against the signature because
            # of init_kwargs, method_kwargs
            if value is None:
                continue

            param_type = signature.parameters[key].annotation
            param_type = resolve_annotation(param_type)
            serializer = get_serializer(param_type)
            if serializer:
                param_type = str
                if not isinstance(value,
                                  kfp.dsl._pipeline_param.PipelineParam):
                    value = serializer(value)

            # TODO remove PipelineParam check when Metadata Importer component available
            # if we serialize we need to include the argument as input
            # perhaps, another option is to embed in yaml as json serialized list
            component_param_name = component_param_name_to_mb_sdk_param_name.get(
                key, key)
            component_param_type = None
            if isinstance(value,
                          kfp.dsl._pipeline_param.PipelineParam) or serializer:
                if is_mb_sdk_resource_noun_type(param_type):
                    metadata_type = map_resource_to_metadata_type(
                        param_type)[1]
                    component_param_type = metadata_type
                else:
                    if param_type == int:
                        component_param_type = 'Integer'
                    elif param_type == float:
                        component_param_type = 'Float'
                    elif param_type == bool:
                        component_param_type = 'Bool'
                    elif param_type in (list, collections.abc.Sequence,
                                        Sequence):
                        component_param_type = 'List'
                    elif param_type in (dict, Dict):
                        component_param_type = 'Dict'
                    elif param_type in PROTO_PLUS_CLASS_TYPES:
                        component_param_type = 'String'
                    else:
                        component_param_type = 'String'

                input_specs.append(
                    structures.InputSpec(
                        name=key,
                        type=component_param_type,
                    ))
                input_args.append(f'--{prefix_key}.{component_param_name}')
                if is_mb_sdk_resource_noun_type(param_type):
                    input_args.append(
                        f'{{{{$.inputs.artifacts[\'{key}\'].metadata[\'resourceName\']}}}}'
                    )
                else:
                    input_args.append(
                        structures.InputValuePlaceholder(input_name=key))

                input_kwargs[key] = value
            else:
                # Serialized arguments must always be strings
                value = str(value)
                serialized_args[prefix_key][component_param_name] = value

        # validate parameters
        if should_serialize_init:
            init_signature.bind(**init_kwargs)
        method_signature.bind(**method_kwargs)

        component_spec = structures.ComponentSpec(
            name=f'{cls_name}-{method_name}',
            inputs=input_specs,
            outputs=output_specs,
            implementation=structures.
            ContainerImplementation(container=structures.ContainerSpec(
                image=DEFAULT_CONTAINER_IMAGE,
                command=[
                    'python3',
                    '-m',
                    'google_cloud_pipeline_components.container.aiplatform.remote_runner',
                    '--cls_name',
                    cls_name,
                    '--method_name',
                    method_name,
                ],
                args=make_args(serialized_args) + output_args + input_args,
            )))
        component_path = tempfile.mktemp()
        component_spec.save(component_path)

        return components.load_component_from_file(component_path)(
            **input_kwargs)
Beispiel #19
0
def create_component_from_func(func: Callable,
                               base_image: Optional[str] = None,
                               target_image: Optional[str] = None,
                               packages_to_install: List[str] = None,
                               pip_index_urls: Optional[List[str]] = None,
                               output_component_file: Optional[str] = None,
                               install_kfp_package: bool = True,
                               kfp_package_path: Optional[str] = None):
    """Implementation for the @component decorator.

    The decorator is defined under component_decorator.py. See the
    decorator for the canonical documentation for this function.
    """
    packages_to_install = packages_to_install or []

    if install_kfp_package and target_image is None:
        if kfp_package_path is None:
            kfp_package_path = _get_default_kfp_package_path()
        packages_to_install.append(kfp_package_path)

    packages_to_install_command = _get_packages_to_install_command(
        package_list=packages_to_install, pip_index_urls=pip_index_urls)

    command = []
    args = []
    if base_image is None:
        base_image = _DEFAULT_BASE_IMAGE

    component_image = base_image

    if target_image:
        component_image = target_image
        command, args = _get_command_and_args_for_containerized_component(
            function_name=func.__name__, )
    else:
        command, args = _get_command_and_args_for_lightweight_component(
            func=func)

    component_spec = extract_component_interface(func)
    component_spec.implementation = structures.Implementation(
        container=structures.ContainerSpec(
            image=component_image,
            command=packages_to_install_command + command,
            args=args,
        ))

    module_path = pathlib.Path(inspect.getsourcefile(func))
    module_path.resolve()

    component_name = _python_function_name_to_component_name(func.__name__)
    component_info = ComponentInfo(name=component_name,
                                   function_name=func.__name__,
                                   func=func,
                                   target_image=target_image,
                                   module_path=module_path,
                                   component_spec=component_spec,
                                   output_component_file=output_component_file,
                                   base_image=base_image)

    if REGISTERED_MODULES is not None:
        REGISTERED_MODULES[component_name] = component_info

    if output_component_file:
        component_spec.save_to_component_yaml(output_component_file)

    return python_component.PythonComponent(component_spec=component_spec,
                                            python_func=func)
Beispiel #20
0
    inputs:
    - {name: optional_input_1, optional: true, type: String}
    name: component_if
    """)

COMPONENT_SPEC_IF_PLACEHOLDER = structures.ComponentSpec(
    name='component_if',
    implementation=structures.Implementation(
        container=structures.ContainerSpec(
            image='alpine',
            args=[
                structures.IfPresentPlaceholder(
                    if_structure=structures.IfPresentPlaceholderStructure(
                        input_name='optional_input_1',
                        then=[
                            '--arg1',
                            structures.InputUriPlaceholder(
                                input_name='optional_input_1'),
                        ],
                        otherwise=[
                            '--arg2',
                            'default',
                        ]))
            ])),
    inputs={
        'optional_input_1': structures.InputSpec(type='String', default=None)
    },
)

V1_YAML_CONCAT_PLACEHOLDER = textwrap.dedent("""\
    name: component_concat
    implementation:
Beispiel #21
0
def run_as_vertex_ai_custom_job(
    component_spec: Callable,
    display_name: Optional[str] = None,
    replica_count: Optional[int] = None,
    machine_type: Optional[str] = None,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    boot_disk_type: Optional[str] = None,
    boot_disk_size_gb: Optional[int] = None,
    timeout: Optional[str] = None,
    restart_job_on_worker_restart: Optional[bool] = None,
    service_account: Optional[str] = None,
    network: Optional[str] = None,
    worker_pool_specs: Optional[List[Mapping[str, Any]]] = None,
) -> Callable:
    """Run a pipeline task using AI Platform (Unified) custom training job.

    For detailed doc of the service, please refer to
    https://cloud.google.com/ai-platform-unified/docs/training/create-custom-job

    Args:
      component_spec: The task (ContainerOp) object to run as aiplatform custom job.
      display_name: Optional. The name of the custom job. If not provided the
        component_spec.name will be used instead.
      replica_count: Optional. The number of replicas to be split between master
        workerPoolSpec and worker workerPoolSpec. (master always has 1 replica).
      machine_type: Optional. The type of the machine to run the custom job. The
        default value is "n1-standard-4".
      accelerator_type: Optional. The type of accelerator(s) that may be attached
        to the machine as per accelerator_count. Optional.
      accelerator_count: Optional. The number of accelerators to attach to the
        machine.
      boot_disk_type: Optional. Type of the boot disk (default is "pd-ssd"). Valid
        values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard"
          (Persistent Disk Hard Disk Drive).
      boot_disk_size_gb: Optional. Size in GB of the boot disk (default is 100GB).
      timeout: Optional. The maximum job running time. The default is 7 days. A
        duration in seconds with up to nine fractional digits, terminated by 's'.
        Example: "3.5s"
      restart_job_on_worker_restart: Optional. Restarts the entire CustomJob if a
        worker gets restarted. This feature can be used by distributed training
        jobs that are not resilient to workers leaving and joining a job.
      service_account: Optional. Specifies the service account for workload run-as
        account.
      network: Optional. The full name of the Compute Engine network to which the
        job should be peered. For example, projects/12345/global/networks/myVPC.
      worker_pool_specs: Optional, worker_pool_specs for distributed training. this
        will overwite all other cluster configurations. For details, please see:
        https://cloud.google.com/ai-platform-unified/docs/training/distributed-training
    Returns:
      A Custom Job component OP correspoinding to the input component OP.
    """
    job_spec = {}

    # As a temporary work aruond for issue with kfp v2 based compiler where
    # compiler expects place holders in origional form in args, instead of
    # using fields from outputs, we add back the args from the origional
    # component to the custom job component. These args will be ignored
    # by the remote launcher.
    copy_of_origional_args = []

    if worker_pool_specs is not None:
        worker_pool_specs = copy.deepcopy(worker_pool_specs)

        def _is_output_parameter(output_key: str) -> bool:
            return output_key in (component_spec.component_spec.
                                  output_definitions.parameters.keys())

        for worker_pool_spec in worker_pool_specs:
            if 'container_spec' in worker_pool_spec:
                container_spec = worker_pool_spec['container_spec']
                if 'command' in container_spec:
                    dsl_utils.resolve_cmd_lines(container_spec['command'],
                                                _is_output_parameter)
                if 'args' in container_spec:
                    copy_of_origional_args = container_spec['args'].copy()
                    dsl_utils.resolve_cmd_lines(container_spec['args'],
                                                _is_output_parameter)

            elif 'python_package_spec' in worker_pool_spec:
                # For custom Python training, resolve placeholders in args only.
                python_spec = worker_pool_spec['python_package_spec']
                if 'args' in python_spec:
                    dsl_utils.resolve_cmd_lines(python_spec['args'],
                                                _is_output_parameter)

            else:
                raise ValueError(
                    'Expect either "container_spec" or "python_package_spec" in each '
                    'workerPoolSpec. Got: {}'.format(worker_pool_spec))

        job_spec['worker_pool_specs'] = worker_pool_specs

    else:

        def _is_output_parameter(output_key: str) -> bool:
            for output in component_spec.component_spec.outputs:
                if output.name == output_key:
                    return type_utils.is_parameter_type(output.type)
            return False

        worker_pool_spec = {
            'machine_spec': {
                'machine_type': machine_type
                or _DEFAULT_CUSTOM_JOB_MACHINE_TYPE
            },
            'replica_count': 1,
            'container_spec': {
                'image_uri':
                component_spec.component_spec.implementation.container.image,
            }
        }
        if component_spec.component_spec.implementation.container.command:
            container_command_copy = component_spec.component_spec.implementation.container.command.copy(
            )
            dsl_utils.resolve_cmd_lines(container_command_copy,
                                        _is_output_parameter)
            worker_pool_spec['container_spec'][
                'command'] = container_command_copy

        if component_spec.component_spec.implementation.container.args:
            container_args_copy = component_spec.component_spec.implementation.container.args.copy(
            )
            copy_of_origional_args = component_spec.component_spec.implementation.container.args.copy(
            )
            dsl_utils.resolve_cmd_lines(container_args_copy,
                                        _is_output_parameter)
            worker_pool_spec['container_spec']['args'] = container_args_copy
        if accelerator_type is not None:
            worker_pool_spec['machine_spec'][
                'accelerator_type'] = accelerator_type
        if accelerator_count is not None:
            worker_pool_spec['machine_spec'][
                'accelerator_count'] = accelerator_count
        if boot_disk_type is not None:
            if 'disk_spec' not in worker_pool_spec:
                worker_pool_spec['disk_spec'] = {}
            worker_pool_spec['disk_spec']['boot_disk_type'] = boot_disk_type
        if boot_disk_size_gb is not None:
            if 'disk_spec' not in worker_pool_spec:
                worker_pool_spec['disk_spec'] = {}
            worker_pool_spec['disk_spec'][
                'boot_disk_size_gb'] = boot_disk_size_gb

        job_spec['worker_pool_specs'] = [worker_pool_spec]
        if replica_count is not None and replica_count > 1:
            additional_worker_pool_spec = copy.deepcopy(worker_pool_spec)
            additional_worker_pool_spec['replica_count'] = str(replica_count -
                                                               1)
            job_spec['worker_pool_specs'].append(additional_worker_pool_spec)

    if timeout is not None:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling']['timeout'] = timeout
    if restart_job_on_worker_restart is not None:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling'][
            'restart_job_on_worker_restart'] = restart_job_on_worker_restart
    if service_account is not None:
        job_spec['service_account'] = service_account
    if network is not None:
        job_spec['network'] = network

    custom_job_payload = {
        'display_name': display_name or component_spec.component_spec.name,
        'job_spec': job_spec
    }

    custom_job_component_spec = structures.ComponentSpec(
        name=component_spec.component_spec.name,
        inputs=component_spec.component_spec.inputs + [
            structures.InputSpec(name='gcp_project', type='String'),
            structures.InputSpec(name='gcp_region', type='String')
        ],
        outputs=component_spec.component_spec.outputs +
        [structures.OutputSpec(name='GCP_RESOURCES', type='String')],
        implementation=structures.ContainerImplementation(
            container=structures.ContainerSpec(
                image=_DEFAULT_CUSTOM_JOB_CONTAINER_IMAGE,
                command=["python", "-u", "-m", "launcher"],
                args=[
                    '--type',
                    'CustomJob',
                    '--gcp_project',
                    structures.InputValuePlaceholder(input_name='gcp_project'),
                    '--gcp_region',
                    structures.InputValuePlaceholder(input_name='gcp_region'),
                    '--payload',
                    json.dumps(custom_job_payload),
                    '--gcp_resources',
                    structures.OutputPathPlaceholder(
                        output_name='GCP_RESOURCES'),
                ] + copy_of_origional_args,
            )))
    component_path = tempfile.mktemp()
    custom_job_component_spec.save(component_path)

    return components.load_component_from_file(component_path)
class PipelineTaskTest(parameterized.TestCase):

    def test_create_pipeline_task_valid(self):
        expected_component_spec = structures.ComponentSpec(
            name='component1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" >> "$1"'],
                    args=[
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputPathPlaceholder(output_name='output1'),
                    ],
                )),
            inputs={
                'input1': structures.InputSpec(type='String'),
            },
            outputs={
                'output1': structures.OutputSpec(type='Artifact'),
            },
        )
        expected_task_spec = structures.TaskSpec(
            name='component1',
            inputs={'input1': 'value'},
            dependent_tasks=[],
            component_ref='component1',
        )
        expected_container_spec = structures.ContainerSpec(
            image='alpine',
            command=['sh', '-c', 'echo "$0" >> "$1"'],
            args=[
                "{{$.inputs.parameters['input1']}}",
                "{{$.outputs.artifacts['output1'].path}}",
            ],
        )

        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        self.assertEqual(task.task_spec, expected_task_spec)
        self.assertEqual(task.component_spec, expected_component_spec)
        self.assertEqual(task.container_spec, expected_container_spec)

    def test_create_pipeline_task_invalid_missing_required_input(self):
        with self.assertRaisesRegex(ValueError,
                                    'No value provided for input: input1.'):
            task = pipeline_task.PipelineTask(
                component_spec=structures.ComponentSpec
                .load_from_component_yaml(V2_YAML),
                args={},
            )

    def test_create_pipeline_task_invalid_wrong_input(self):
        with self.assertRaisesRegex(
                ValueError,
                'Component "component1" got an unexpected input: input0.'):
            task = pipeline_task.PipelineTask(
                component_spec=structures.ComponentSpec
                .load_from_component_yaml(V2_YAML),
                args={
                    'input1': 'value',
                    'input0': 'abc',
                },
            )

    @parameterized.parameters(
        {
            'component_yaml':
                V2_YAML_IF_PLACEHOLDER,
            'args': {
                'optional_input_1': 'value'
            },
            'expected_container_spec':
                structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" "$1"'],
                    args=[
                        'input: ',
                        "{{$.inputs.parameters['optional_input_1']}}",
                    ],
                )
        },
        {
            'component_yaml':
                V2_YAML_IF_PLACEHOLDER,
            'args': {},
            'expected_container_spec':
                structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" "$1"'],
                    args=[
                        'default: ',
                        'Hello world!',
                    ],
                )
        },
    )
    def test_resolve_if_placeholder(
        self,
        component_yaml: str,
        args: dict,
        expected_container_spec: structures.ContainerSpec,
    ):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                component_yaml),
            args=args,
        )
        self.assertEqual(task.container_spec, expected_container_spec)

    def test_resolve_concat_placeholder(self):
        expected_container_spec = structures.ContainerSpec(
            image='alpine',
            command=[
                'sh',
                '-c',
                'echo "$0"',
                "{{$.inputs.parameters['input1']}}+{{$.inputs.parameters['input2']}}",
            ],
        )

        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML_CONCAT_PLACEHOLDER),
            args={
                'input1': '1',
                'input2': '2',
            },
        )
        self.assertEqual(task.container_spec, expected_container_spec)

    def test_set_caching_options(self):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.set_caching_options(False)
        self.assertEqual(False, task.task_spec.enable_caching)

    @parameterized.parameters(
        {
            'cpu_limit': '123',
            'expected_cpu_number': 123,
        },
        {
            'cpu_limit': '123m',
            'expected_cpu_number': 0.123,
        },
        {
            'cpu_limit': '123.0',
            'expected_cpu_number': 123,
        },
        {
            'cpu_limit': '123.0m',
            'expected_cpu_number': 0.123,
        },
    )
    def test_set_valid_cpu_limit(self, cpu_limit: str,
                                 expected_cpu_number: float):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.set_cpu_limit(cpu_limit)
        self.assertEqual(expected_cpu_number,
                         task.container_spec.resources.cpu_limit)

    @parameterized.parameters(
        {
            'gpu_limit': '666',
            'expected_gpu_number': 666,
        },)
    def test_set_valid_gpu_limit(self, gpu_limit: str,
                                 expected_gpu_number: int):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.set_gpu_limit(gpu_limit)
        self.assertEqual(expected_gpu_number,
                         task.container_spec.resources.accelerator_count)

    @parameterized.parameters(
        {
            'memory': '1E',
            'expected_memory_number': 1000000000,
        },
        {
            'memory': '15Ei',
            'expected_memory_number': 17293822569.102703,
        },
        {
            'memory': '2P',
            'expected_memory_number': 2000000,
        },
        {
            'memory': '25Pi',
            'expected_memory_number': 28147497.6710656,
        },
        {
            'memory': '3T',
            'expected_memory_number': 3000,
        },
        {
            'memory': '35Ti',
            'expected_memory_number': 38482.90697216,
        },
        {
            'memory': '4G',
            'expected_memory_number': 4,
        },
        {
            'memory': '45Gi',
            'expected_memory_number': 48.31838208,
        },
        {
            'memory': '5M',
            'expected_memory_number': 0.005,
        },
        {
            'memory': '55Mi',
            'expected_memory_number': 0.05767168,
        },
        {
            'memory': '6K',
            'expected_memory_number': 0.000006,
        },
        {
            'memory': '65Ki',
            'expected_memory_number': 0.00006656,
        },
        {
            'memory': '7000',
            'expected_memory_number': 0.000007,
        },
    )
    def test_set_memory_limit(self, memory: str, expected_memory_number: int):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.set_memory_limit(memory)
        self.assertEqual(expected_memory_number,
                         task.container_spec.resources.memory_limit)

    def test_add_node_selector_constraint_type_only(self):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.add_node_selector_constraint('NVIDIA_TESLA_K80')
        self.assertEqual(
            structures.ResourceSpec(
                accelerator_type='NVIDIA_TESLA_K80', accelerator_count=1),
            task.container_spec.resources)

    def test_add_node_selector_constraint_accelerator_count(self):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.set_gpu_limit('5').add_node_selector_constraint('TPU_V3')
        self.assertEqual(
            structures.ResourceSpec(
                accelerator_type='TPU_V3', accelerator_count=5),
            task.container_spec.resources)

    def test_set_env_variable(self):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.set_env_variable('env_name', 'env_value')
        self.assertEqual({'env_name': 'env_value'}, task.container_spec.env)

    def test_set_display_name(self):
        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        task.set_display_name('test_name')
        self.assertEqual('test_name', task.task_spec.display_name)