def test_get_input_artifact_type_schema(self):
        input_specs = [
            structures.InputSpec(name='input1', type='String'),
            structures.InputSpec(name='input2', type='Model'),
            structures.InputSpec(name='input3', type=None),
        ]
        # input not found.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input0', input_specs)
            self.assertEqual('Input not found.', str(cm))

        # input found, but it doesn't map to an artifact type.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input1', input_specs)
            self.assertEqual('Input is not an artifact type.', str(cm))

        # input found, and a matching artifact type schema returned.
        self.assertEqual(
            'properties:\ntitle: kfp.Model\ntype: object\n',
            type_utils.get_input_artifact_type_schema('input2', input_specs))

        # input found, and the default artifact type schema returned.
        self.assertEqual(
            'properties:\ntitle: kfp.Artifact\ntype: object\n',
            type_utils.get_input_artifact_type_schema('input3', input_specs))
Example #2
0
    def test_optional(self):
        input_spec = structures.InputSpec(type='str', default='test')
        self.assertEqual(input_spec.default, 'test')
        self.assertEqual(input_spec._optional, True)

        input_spec = structures.InputSpec(type='str', default=None)
        self.assertEqual(input_spec.default, None)
        self.assertEqual(input_spec._optional, True)

        input_spec = structures.InputSpec(type='str')
        self.assertEqual(input_spec.default, None)
        self.assertEqual(input_spec._optional, False)
Example #3
0
 def test_equality(self):
     self.assertEqual(structures.InputSpec(type='str', default=None),
                      structures.InputSpec(type='str', default=None))
     self.assertNotEqual(structures.InputSpec(type='str', default=None),
                         structures.InputSpec(type='str', default='test'))
     self.assertEqual(
         structures.InputSpec(type='List', default=None),
         structures.InputSpec(type='typing.List', default=None))
     self.assertEqual(
         structures.InputSpec(type='List', default=None),
         structures.InputSpec(type='typing.List[int]', default=None))
     self.assertEqual(
         structures.InputSpec(type='List'),
         structures.InputSpec(type='typing.List[typing.Dict[str, str]]'))
Example #4
0
 def test_get_input_artifact_type_schema(self):
   input_specs = [
       structures.InputSpec(name='input1', type='String'),
       structures.InputSpec(name='input2', type='GCSPath'),
   ]
   # input not found.
   self.assertEqual(
       None, type_utils.get_input_artifact_type_schema('input0', input_specs))
   # input found, but it doesn't map to an artifact type.
   self.assertEqual(
       None, type_utils.get_input_artifact_type_schema('input1', input_specs))
   # input found, and a matching artifact type schema returned.
   self.assertEqual(
       'title: Artifact\ntype: object\nproperties:\n',
       type_utils.get_input_artifact_type_schema('input2', input_specs))
Example #5
0
    def test_simple_component_spec_save_to_component_yaml(self):
        # tests writing old style (less verbose) and reading in new style (more verbose)
        original_component_spec = structures.ComponentSpec(
            name='component_1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=[
                        'sh',
                        '-c',
                        'set -ex\necho "$0" > "$1"',
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputParameterPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={'input1': structures.InputSpec(type='String')},
            outputs={'output1': structures.OutputSpec(type='String')},
        )
        from kfp.components import yaml_component
        yaml_component = yaml_component.YamlComponent(
            component_spec=original_component_spec)
        with tempfile.TemporaryDirectory() as tempdir:
            output_path = os.path.join(tempdir, 'component.yaml')
            compiler.Compiler().compile(yaml_component, output_path)

            # test that it can be read back correctly
            with open(output_path, 'r') as f:
                contents = f.read()
            new_component_spec = structures.ComponentSpec.load_from_component_yaml(
                contents)

        self.assertEqual(original_component_spec, new_component_spec)
Example #6
0
    def test_component_spec_with_placeholder_referencing_nonexisting_input_output(
            self):
        with self.assertRaisesRegex(
                ValueError,
                r'^Argument \"InputValuePlaceholder[\s\S]*\'input000\'[\s\S]*references non-existing input.'
        ):
            structures.ComponentSpec(
                name='component_1',
                implementation=structures.Implementation(
                    container=structures.ContainerSpec(
                        image='alpine',
                        command=[
                            'sh',
                            '-c',
                            'set -ex\necho "$0" > "$1"',
                            structures.InputValuePlaceholder(
                                input_name='input000'),
                            structures.OutputPathPlaceholder(
                                output_name='output1'),
                        ],
                    )),
                inputs={'input1': structures.InputSpec(type='String')},
                outputs={'output1': structures.OutputSpec(type='String')},
            )

        with self.assertRaisesRegex(
                ValueError,
                r'^Argument \"OutputPathPlaceholder[\s\S]*\'output000\'[\s\S]*references non-existing output.'
        ):
            structures.ComponentSpec(
                name='component_1',
                implementation=structures.Implementation(
                    container=structures.ContainerSpec(
                        image='alpine',
                        command=[
                            'sh',
                            '-c',
                            'set -ex\necho "$0" > "$1"',
                            structures.InputValuePlaceholder(
                                input_name='input1'),
                            structures.OutputPathPlaceholder(
                                output_name='output000'),
                        ],
                    )),
                inputs={'input1': structures.InputSpec(type='String')},
                outputs={'output1': structures.OutputSpec(type='String')},
            )
Example #7
0
    def test_from_container_dict_no_placeholders(self):
        component_spec = structures.ComponentSpec(
            name='test',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='python:3.7',
                    command=[
                        'sh', '-c',
                        '\nif ! [ -x "$(command -v pip)" ]; then\n    python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet     --no-warn-script-location \'kfp==2.0.0-alpha.2\' && "$0" "$@"\n',
                        'sh', '-ec',
                        'program_path=$(mktemp -d)\nprintf "%s" "$0" > "$program_path/ephemeral_component.py"\npython3 -m kfp.components.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"\n',
                        '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef concat_message(first: str, second: str) -> str:\n    return first + second\n\n'
                    ],
                    args=[
                        '--executor_input', '{{$}}', '--function_to_execute',
                        'concat_message'
                    ],
                    env=None,
                    resources=None),
                graph=None,
                importer=None),
            description=None,
            inputs={
                'first': structures.InputSpec(type='String', default=None),
                'second': structures.InputSpec(type='String', default=None)
            },
            outputs={'Output': structures.OutputSpec(type='String')})
        container_dict = {
            'args': [
                '--executor_input', '{{$}}', '--function_to_execute', 'fail_op'
            ],
            'command': [
                'sh', '-c',
                '\nif ! [ -x "$(command -v pip)" ]; then\n    python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet     --no-warn-script-location \'kfp==2.0.0-alpha.2\' && "$0" "$@"\n',
                'sh', '-ec',
                'program_path=$(mktemp -d)\nprintf "%s" "$0" > "$program_path/ephemeral_component.py"\npython3 -m kfp.components.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"\n',
                '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef fail_op(message: str):\n    """Fails."""\n    import sys\n    print(message)\n    sys.exit(1)\n\n'
            ],
            'image': 'python:3.7'
        }

        loaded_container_spec = structures.ContainerSpec.from_container_dict(
            container_dict)
Example #8
0
    def test_simple_component_spec_save_to_component_yaml(self):
        open_mock = mock.mock_open()
        expected_yaml = textwrap.dedent("""\
        implementation:
          container:
            command:
            - sh
            - -c
            - 'set -ex

              echo "$0" > "$1"'
            - {inputValue: input1}
            - {outputPath: output1}
            image: alpine
        inputs:
          input1: {type: String}
        name: component_1
        outputs:
          output1: {type: String}
        """)

        with mock.patch(
                "builtins.open", open_mock, create=True), self.assertWarnsRegex(
                    DeprecationWarning, r"Compiling to JSON is deprecated"):
            structures.ComponentSpec(
                name='component_1',
                implementation=structures.Implementation(
                    container=structures.ContainerSpec(
                        image='alpine',
                        command=[
                            'sh',
                            '-c',
                            'set -ex\necho "$0" > "$1"',
                            structures.InputValuePlaceholder(
                                input_name='input1'),
                            structures.OutputPathPlaceholder(
                                output_name='output1'),
                        ],
                    )),
                inputs={
                    'input1': structures.InputSpec(type='String')
                },
                outputs={
                    'output1': structures.OutputSpec(type='String')
                },
            ).save_to_component_yaml('test_save_file.json')

        open_mock.assert_called_once_with('test_save_file.json', 'w')
Example #9
0
    def test_create_pipeline_task_valid(self):
        expected_component_spec = structures.ComponentSpec(
            name='component1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" >> "$1"'],
                    args=[
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputPathPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={
                'input1': structures.InputSpec(type='String'),
            },
            outputs={
                'output1': structures.OutputSpec(type='Artifact'),
            },
        )
        expected_task_spec = structures.TaskSpec(
            name='component1',
            inputs={'input1': 'value'},
            dependent_tasks=[],
            component_ref='component1',
        )
        expected_container_spec = structures.ContainerSpec(
            image='alpine',
            command=['sh', '-c', 'echo "$0" >> "$1"'],
            args=[
                "{{$.inputs.parameters['input1']}}",
                "{{$.outputs.artifacts['output1'].path}}",
            ],
        )

        task = pipeline_task.PipelineTask(
            component_spec=structures.ComponentSpec.load_from_component_yaml(
                V2_YAML),
            args={'input1': 'value'},
        )
        self.assertEqual(task.task_spec, expected_task_spec)
        self.assertEqual(task.component_spec, expected_component_spec)
        self.assertEqual(task.container_spec, expected_container_spec)
Example #10
0
    def test_simple_component_spec_load_from_v2_component_yaml(self):
        component_yaml_v2 = textwrap.dedent("""\
        name: component_1
        inputs:
          input1:
            type: String
        outputs:
          output1:
            type: String
        implementation:
          container:
            image: alpine
            command:
            - sh
            - -c
            - 'set -ex

                echo "$0" > "$1"'
            - inputValue: input1
            - outputPath: output1
        """)

        generated_spec = structures.ComponentSpec.load_from_component_yaml(
            component_yaml_v2)

        expected_spec = structures.ComponentSpec(
            name='component_1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=[
                        'sh',
                        '-c',
                        'set -ex\necho "$0" > "$1"',
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputPathPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={'input1': structures.InputSpec(type='String')},
            outputs={'output1': structures.OutputSpec(type='String')})
        self.assertEqual(generated_spec, expected_spec)
Example #11
0
def importer(
    artifact_uri: Union[pipeline_channel.PipelineParameterChannel, str],
    artifact_class: Type[artifact_types.Artifact],
    reimport: bool = False,
    metadata: Optional[Mapping[str, Any]] = None,
) -> pipeline_task.PipelineTask:
    """dsl.importer for importing an existing artifact. Only for v2 pipeline.

    Args:
      artifact_uri: The artifact uri to import from.
      artifact_type_schema: The user specified artifact type schema of the
        artifact to be imported.
      reimport: Whether to reimport the artifact. Defaults to False.
      metadata: Properties of the artifact.

    Returns:
      A PipelineTask instance.

    Raises:
      ValueError if the passed in artifact_uri is neither a PipelineParam nor a
        constant string value.
    """
    component_spec = structures.ComponentSpec(
        name='importer',
        implementation=structures.Implementation(
            importer=structures.ImporterSpec(
                artifact_uri=structures.InputValuePlaceholder(
                    INPUT_KEY).to_placeholder(),
                type_schema=artifact_class.TYPE_NAME,
                reimport=reimport,
                metadata=metadata)),
        inputs={INPUT_KEY: structures.InputSpec(type='String')},
        outputs={
            OUTPUT_KEY: structures.OutputSpec(type=artifact_class.__name__)
        },
    )

    importer = importer_component.ImporterComponent(
        component_spec=component_spec)
    return importer(uri=artifact_uri)
Example #12
0
    def test_simple_component_spec_load_from_v2_component_yaml(self):
        component_yaml_v2 = textwrap.dedent("""\
components:
  comp-component-1:
    executorLabel: exec-component-1
    inputDefinitions:
      parameters:
        input1:
          parameterType: STRING
    outputDefinitions:
      parameters:
        output1:
          parameterType: STRING
deploymentSpec:
  executors:
    exec-component-1:
      container:
        command:
        - sh
        - -c
        - 'set -ex

          echo "$0" > "$1"'
        - '{{$.inputs.parameters[''input1'']}}'
        - '{{$.outputs.parameters[''output1''].output_file}}'
        image: alpine
pipelineInfo:
  name: component-1
root:
  dag:
    tasks:
      component-1:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-component-1
        inputs:
          parameters:
            input1:
              componentInputParameter: input1
        taskInfo:
          name: component-1
  inputDefinitions:
    parameters:
      input1:
        parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.2
        """)

        generated_spec = structures.ComponentSpec.load_from_component_yaml(
            component_yaml_v2)

        expected_spec = structures.ComponentSpec(
            name='component-1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=[
                        'sh',
                        '-c',
                        'set -ex\necho "$0" > "$1"',
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputParameterPlaceholder(
                            output_name='output1'),
                    ],
                )),
            inputs={'input1': structures.InputSpec(type='String')},
            outputs={'output1': structures.OutputSpec(type='String')})

        self.assertEqual(generated_spec, expected_spec)
Example #13
0
    def to_v1_component_spec(self) -> v1_structures.ComponentSpec:
        """Converts to v1 ComponentSpec.

        Returns:
            Component spec in the form of V1 ComponentSpec.

        Needed until downstream accept new ComponentSpec.
        """
        def _transform_arg(arg: ValidCommandArgs) -> Any:
            if isinstance(arg, str):
                return arg
            if isinstance(arg, InputValuePlaceholder):
                return v1_structures.InputValuePlaceholder(arg.input_name)
            if isinstance(arg, InputPathPlaceholder):
                return v1_structures.InputPathPlaceholder(arg.input_name)
            if isinstance(arg, InputUriPlaceholder):
                return v1_structures.InputUriPlaceholder(arg.input_name)
            if isinstance(arg, OutputPathPlaceholder):
                return v1_structures.OutputPathPlaceholder(arg.output_name)
            if isinstance(arg, OutputUriPlaceholder):
                return v1_structures.OutputUriPlaceholder(arg.output_name)
            if isinstance(arg, IfPresentPlaceholder):
                return v1_structures.IfPlaceholder(arg.if_structure)
            if isinstance(arg, ConcatPlaceholder):
                return v1_structures.ConcatPlaceholder(arg.concat)
            raise ValueError(
                f'Unexpected command/argument type: "{arg}" of type "{type(arg)}".'
            )

        return v1_structures.ComponentSpec(
            name=self.name,
            inputs=[
                v1_structures.InputSpec(
                    name=name,
                    type=input_spec.type,
                    default=input_spec.default,
                ) for name, input_spec in self.inputs.items()
            ],
            outputs=[
                v1_structures.OutputSpec(
                    name=name,
                    type=output_spec.type,
                ) for name, output_spec in self.outputs.items()
            ],
            implementation=v1_structures.
            ContainerImplementation(container=v1_structures.ContainerSpec(
                image=self.implementation.container.image,
                command=[
                    _transform_arg(cmd)
                    for cmd in self.implementation.container.commands or []
                ],
                args=[
                    _transform_arg(arg)
                    for arg in self.implementation.container.arguments or []
                ],
                env={
                    name: _transform_arg(value)
                    for name, value in self.implementation.container.env or {}
                },
            )),
        )
Example #14
0
def create_custom_training_job_op_from_component(
    component_spec: Callable,  # pylint: disable=g-bare-generic
    display_name: Optional[str] = '',
    replica_count: Optional[int] = 1,
    machine_type: Optional[str] = 'n1-standard-4',
    accelerator_type: Optional[str] = '',
    accelerator_count: Optional[int] = 1,
    boot_disk_type: Optional[str] = 'pd-ssd',
    boot_disk_size_gb: Optional[int] = 100,
    timeout: Optional[str] = '',
    restart_job_on_worker_restart: Optional[bool] = False,
    service_account: Optional[str] = '',
    network: Optional[str] = '',
    encryption_spec_key_name: Optional[str] = '',
    tensorboard: Optional[str] = '',
    enable_web_access: Optional[bool] = False,
    base_output_directory: Optional[str] = '',
    labels: Optional[Dict[str, str]] = None,
) -> Callable:  # pylint: disable=g-bare-generic
    """Create a component spec that runs a custom training in Vertex AI.

  This utility converts a given component to a CustomTrainingJobOp that runs a
  custom training in Vertex AI. This simplifies the creation of custom training
  jobs. All Inputs and Outputs of the supplied component will be copied over to
  the constructed training job.

  Note that this utility constructs a ClusterSpec where the master and all the
  workers use the same spec, meaning all disk/machine spec related parameters
  will apply to all replicas. This is suitable for use cases such as training
  with MultiWorkerMirroredStrategy or Mirrored Strategy.

  This component does not support Vertex AI Python training application.

  For more details on Vertex AI Training service, please refer to
  https://cloud.google.com/vertex-ai/docs/training/create-custom-job

  Args:
    component_spec: The task (ContainerOp) object to run as Vertex AI custom
      job.
    display_name (Optional[str]): The name of the custom job. If not provided
      the component_spec.name will be used instead.
    replica_count (Optional[int]): The count of instances in the cluster. One
      replica always counts towards the master in worker_pool_spec[0] and the
      remaining replicas will be allocated in worker_pool_spec[1]. For more
      details see
      https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job.
    machine_type (Optional[str]): The type of the machine to run the custom job.
      The default value is "n1-standard-4".  For more details about this input
      config, see
      https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types.
    accelerator_type (Optional[str]): The type of accelerator(s) that may be
      attached to the machine as per accelerator_count.  For more details about
      this input config, see
      https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype.
    accelerator_count (Optional[int]): The number of accelerators to attach to
      the machine. Defaults to 1 if accelerator_type is set.
    boot_disk_type (Optional[str]):
      Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd"
        (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk
        Hard Disk Drive).
    boot_disk_size_gb (Optional[int]): Size in GB of the boot disk (default is
      100GB).
    timeout (Optional[str]): The maximum job running time. The default is 7
      days. A duration in seconds with up to nine fractional digits, terminated
      by 's', for example: "3.5s".
    restart_job_on_worker_restart (Optional[bool]): Restarts the entire
      CustomJob if a worker gets restarted. This feature can be used by
      distributed training jobs that are not resilient to workers leaving and
      joining a job.
    service_account (Optional[str]): Sets the default service account for
      workload run-as account. The service account running the pipeline
        (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account)
          submitting jobs must have act-as permission on this run-as account. If
          unspecified, the Vertex AI Custom Code Service
        Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents)
          for the CustomJob's project.
    network (Optional[str]): The full name of the Compute Engine network to
      which the job should be peered. For example,
      projects/12345/global/networks/myVPC. Format is of the form
      projects/{project}/global/networks/{network}. Where {project} is a project
      number, as in 12345, and {network} is a network name. Private services
      access must already be configured for the network. If left unspecified,
      the job is not peered with any network.
    encryption_spec_key_name (Optional[str]): Customer-managed encryption key
      options for the CustomJob. If this is set, then all resources created by
      the CustomJob will be encrypted with the provided encryption key.
    tensorboard (Optional[str]): The name of a Vertex AI Tensorboard resource to
      which this CustomJob will upload Tensorboard logs.
    enable_web_access (Optional[bool]): Whether you want Vertex AI to enable
      [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell)
      to training containers.
      If set to `true`, you can access interactive shells at the URIs given
      by [CustomJob.web_access_uris][].
    base_output_directory (Optional[str]): The Cloud Storage location to store
      the output of this CustomJob or
      HyperparameterTuningJob. see below for more details:
      https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination
    labels (Optional[Dict[str, str]]): The labels with user-defined metadata to
      organize CustomJobs.
      See https://goo.gl/xmQnxf for more information.

  Returns:
    A Custom Job component operator corresponding to the input component
    operator.

  """
    job_spec = {}
    input_specs = []
    output_specs = []

    # pytype: disable=attribute-error

    if component_spec.component_spec.inputs:
        input_specs = component_spec.component_spec.inputs
    if component_spec.component_spec.outputs:
        output_specs = component_spec.component_spec.outputs

    def _is_output_parameter(output_key: str) -> bool:
        for output in component_spec.component_spec.outputs:
            if output.name == output_key:
                return type_utils.is_parameter_type(output.type)
        return False

    worker_pool_spec = {
        'machine_spec': {
            'machine_type': machine_type
        },
        'replica_count': 1,
        'container_spec': {
            'image_uri':
            component_spec.component_spec.implementation.container.image,
        }
    }
    if component_spec.component_spec.implementation.container.command:
        container_command_copy = component_spec.component_spec.implementation.container.command.copy(
        )
        dsl_utils.resolve_cmd_lines(container_command_copy,
                                    _is_output_parameter)
        # Replace executor place holder with the json escaped placeholder.
        for idx, val in enumerate(container_command_copy):
            if val == '{{{{$}}}}':
                container_command_copy[
                    idx] = _EXECUTOR_PLACE_HOLDER_REPLACEMENT
        worker_pool_spec['container_spec']['command'] = container_command_copy

    if component_spec.component_spec.implementation.container.args:
        container_args_copy = component_spec.component_spec.implementation.container.args.copy(
        )
        dsl_utils.resolve_cmd_lines(container_args_copy, _is_output_parameter)
        # Replace executor place holder with the json escaped placeholder.
        for idx, val in enumerate(container_args_copy):
            if val == '{{{{$}}}}':
                container_args_copy[idx] = _EXECUTOR_PLACE_HOLDER_REPLACEMENT
        worker_pool_spec['container_spec']['args'] = container_args_copy
    if accelerator_type:
        worker_pool_spec['machine_spec']['accelerator_type'] = accelerator_type
        worker_pool_spec['machine_spec'][
            'accelerator_count'] = accelerator_count
    if boot_disk_type:
        if 'disk_spec' not in worker_pool_spec:
            worker_pool_spec['disk_spec'] = {}
        worker_pool_spec['disk_spec']['boot_disk_type'] = boot_disk_type
        if 'disk_spec' not in worker_pool_spec:
            worker_pool_spec['disk_spec'] = {}
        worker_pool_spec['disk_spec']['boot_disk_size_gb'] = boot_disk_size_gb

    job_spec['worker_pool_specs'] = [worker_pool_spec]
    if int(replica_count) > 1:
        additional_worker_pool_spec = copy.deepcopy(worker_pool_spec)
        additional_worker_pool_spec['replica_count'] = str(replica_count - 1)
        job_spec['worker_pool_specs'].append(additional_worker_pool_spec)

    # TODO(chavoshi): Use input parameter instead of hard coded string label.
    # This requires Dictionary input type to be supported in V2.
    if labels is not None:
        job_spec['labels'] = labels

    if timeout:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling']['timeout'] = timeout
    if restart_job_on_worker_restart:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling'][
            'restart_job_on_worker_restart'] = restart_job_on_worker_restart
    if enable_web_access:
        job_spec['enable_web_access'] = enable_web_access

    if encryption_spec_key_name:
        job_spec['encryption_spec'] = {}
        job_spec['encryption_spec'][
            'kms_key_name'] = "{{$.inputs.parameters['encryption_spec_key_name']}}"
        input_specs.append(
            structures.InputSpec(name='encryption_spec_key_name',
                                 type='String',
                                 optional=True,
                                 default=encryption_spec_key_name), )

    # Remove any existing service_account from component input list.
    input_specs[:] = [
        input_spec for input_spec in input_specs
        if input_spec.name not in ('service_account', 'network', 'tensorboard',
                                   'base_output_directory')
    ]
    job_spec['service_account'] = "{{$.inputs.parameters['service_account']}}"
    job_spec['network'] = "{{$.inputs.parameters['network']}}"

    job_spec['tensorboard'] = "{{$.inputs.parameters['tensorboard']}}"
    job_spec['base_output_directory'] = {}
    job_spec['base_output_directory'][
        'output_uri_prefix'] = "{{$.inputs.parameters['base_output_directory']}}"
    custom_job_payload = {
        'display_name': display_name or component_spec.component_spec.name,
        'job_spec': job_spec
    }

    custom_job_component_spec = structures.ComponentSpec(
        name=component_spec.component_spec.name,
        inputs=input_specs + [
            structures.InputSpec(name='base_output_directory',
                                 type='String',
                                 optional=True,
                                 default=base_output_directory),
            structures.InputSpec(name='tensorboard',
                                 type='String',
                                 optional=True,
                                 default=tensorboard),
            structures.InputSpec(
                name='network', type='String', optional=True, default=network),
            structures.InputSpec(name='service_account',
                                 type='String',
                                 optional=True,
                                 default=service_account),
            structures.InputSpec(name='project', type='String'),
            structures.InputSpec(name='location', type='String')
        ],
        outputs=output_specs +
        [structures.OutputSpec(name='gcp_resources', type='String')],
        implementation=structures.
        ContainerImplementation(container=structures.ContainerSpec(
            image=_DEFAULT_CUSTOM_JOB_CONTAINER_IMAGE,
            command=[
                'python3', '-u', '-m',
                'google_cloud_pipeline_components.container.v1.gcp_launcher.launcher'
            ],
            args=[
                '--type',
                'CustomJob',
                '--payload',
                json.dumps(custom_job_payload),
                '--project',
                structures.InputValuePlaceholder(input_name='project'),
                '--location',
                structures.InputValuePlaceholder(input_name='location'),
                '--gcp_resources',
                structures.OutputPathPlaceholder(output_name='gcp_resources'),
            ],
        )))

    # pytype: enable=attribute-error

    component_path = tempfile.mktemp()
    custom_job_component_spec.save(component_path)
    return components.load_component_from_file(component_path)
Example #15
0
    def component_yaml_generator(**kwargs):
        input_specs = []
        input_args = []
        input_kwargs = {}

        serialized_args = {INIT_KEY: {}, METHOD_KEY: {}}

        init_kwargs = {}
        method_kwargs = {}

        for key, value in kwargs.items():
            if key in init_arg_names:
                prefix_key = INIT_KEY
                init_kwargs[key] = value
                signature = init_signature
            else:
                prefix_key = METHOD_KEY
                method_kwargs[key] = value
                signature = method_signature

            # no need to add this argument because it's optional
            # this param is validated against the signature because
            # of init_kwargs, method_kwargs
            if value is None:
                continue

            param_type = signature.parameters[key].annotation
            param_type = resolve_annotation(param_type)
            serializer = get_serializer(param_type)
            if serializer:
                param_type = str
                if not isinstance(value,
                                  kfp.dsl._pipeline_param.PipelineParam):
                    value = serializer(value)

            # TODO remove PipelineParam check when Metadata Importer component available
            # if we serialize we need to include the argument as input
            # perhaps, another option is to embed in yaml as json serialized list
            component_param_name = component_param_name_to_mb_sdk_param_name.get(
                key, key)
            component_param_type = None
            if isinstance(value,
                          kfp.dsl._pipeline_param.PipelineParam) or serializer:
                if is_mb_sdk_resource_noun_type(param_type):
                    metadata_type = map_resource_to_metadata_type(
                        param_type)[1]
                    component_param_type = metadata_type
                else:
                    if param_type == int:
                        component_param_type = 'Integer'
                    elif param_type == float:
                        component_param_type = 'Float'
                    elif param_type == bool:
                        component_param_type = 'Bool'
                    elif param_type in (list, collections.abc.Sequence,
                                        Sequence):
                        component_param_type = 'List'
                    elif param_type in (dict, Dict):
                        component_param_type = 'Dict'
                    elif param_type in PROTO_PLUS_CLASS_TYPES:
                        component_param_type = 'String'
                    else:
                        component_param_type = 'String'

                input_specs.append(
                    structures.InputSpec(
                        name=key,
                        type=component_param_type,
                    ))
                input_args.append(f'--{prefix_key}.{component_param_name}')
                if is_mb_sdk_resource_noun_type(param_type):
                    input_args.append(
                        f'{{{{$.inputs.artifacts[\'{key}\'].metadata[\'resourceName\']}}}}'
                    )
                else:
                    input_args.append(
                        structures.InputValuePlaceholder(input_name=key))

                input_kwargs[key] = value
            else:
                # Serialized arguments must always be strings
                value = str(value)
                serialized_args[prefix_key][component_param_name] = value

        # validate parameters
        if should_serialize_init:
            init_signature.bind(**init_kwargs)
        method_signature.bind(**method_kwargs)

        component_spec = structures.ComponentSpec(
            name=f'{cls_name}-{method_name}',
            inputs=input_specs,
            outputs=output_specs,
            implementation=structures.
            ContainerImplementation(container=structures.ContainerSpec(
                image=DEFAULT_CONTAINER_IMAGE,
                command=[
                    'python3',
                    '-m',
                    'google_cloud_pipeline_components.container.aiplatform.remote_runner',
                    '--cls_name',
                    cls_name,
                    '--method_name',
                    method_name,
                ],
                args=make_args(serialized_args) + output_args + input_args,
            )))
        component_path = tempfile.mktemp()
        component_spec.save(component_path)

        return components.load_component_from_file(component_path)(
            **input_kwargs)
    name='component_1',
    implementation=structures.Implementation(
        container=structures.ContainerSpec(
            image='alpine',
            command=[
                'sh',
                '-c',
                'set -ex\necho "$0" "$1" "$2" > "$3"',
                structures.InputValuePlaceholder(input_name='input1'),
                structures.InputValuePlaceholder(input_name='input2'),
                structures.InputValuePlaceholder(input_name='input3'),
                structures.OutputPathPlaceholder(output_name='output1'),
            ],
        )),
    inputs={
        'input1': structures.InputSpec(type='String'),
        'input2': structures.InputSpec(type='Integer'),
        'input3': structures.InputSpec(type='Float', default=3.14),
        'input4': structures.InputSpec(type='Optional[Float]', default=None),
    },
    outputs={
        'output1': structures.OutputSpec(type='String'),
    },
))


class BaseComponentTest(unittest.TestCase):
    @patch.object(pipeline_task, 'create_pipeline_task', autospec=True)
    def test_instantiate_component_with_keyword_arguments(
            self, mock_create_pipeline_task):
Example #17
0
    def test_if_placeholder(self):
        compiled_yaml = textwrap.dedent("""
components:
  comp-if:
    executorLabel: exec-if
    inputDefinitions:
      parameters:
        optional_input_1:
          parameterType: STRING
deploymentSpec:
  executors:
    exec-if:
      container:
        args:
        - 'input: '
        - '{{$.inputs.parameters[''optional_input_1'']}}'
        command:
        - sh
        - -c
        - echo "$0" "$1"
        image: alpine
pipelineInfo:
  name: if
root:
  dag:
    tasks:
      if:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-if
        inputs:
          parameters:
            optional_input_1:
              componentInputParameter: optional_input_1
        taskInfo:
          name: if
  inputDefinitions:
    parameters:
      optional_input_1:
        parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.2""")
        loaded_component_spec = structures.ComponentSpec.load_from_component_yaml(
            compiled_yaml)
        component_spec = structures.ComponentSpec(
            name='if',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" "$1"'],
                    args=[
                        'input: ',
                        structures.InputValuePlaceholder(
                            input_name='optional_input_1')
                    ],
                    env=None,
                    resources=None),
                graph=None,
                importer=None),
            description=None,
            inputs={
                'optional_input_1':
                    structures.InputSpec(type='String', default=None)
            },
            outputs=None)
        self.assertEqual(loaded_component_spec, component_spec)
Example #18
0
    def test_simple_placeholder(self):
        compiled_yaml = textwrap.dedent("""
components:
  comp-component1:
    executorLabel: exec-component1
    inputDefinitions:
      parameters:
        input1:
          parameterType: STRING
    outputDefinitions:
      artifacts:
        output1:
          artifactType:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
deploymentSpec:
  executors:
    exec-component1:
      container:
        args:
        - '{{$.inputs.parameters[''input1'']}}'
        - '{{$.outputs.artifacts[''output1''].path}}'
        command:
        - sh
        - -c
        - echo "$0" >> "$1"
        image: alpine
pipelineInfo:
  name: component1
root:
  dag:
    tasks:
      component1:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-component1
        inputs:
          parameters:
            input1:
              componentInputParameter: input1
        taskInfo:
          name: component1
  inputDefinitions:
    parameters:
      input1:
        parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.2""")
        loaded_component_spec = structures.ComponentSpec.load_from_component_yaml(
            compiled_yaml)
        component_spec = structures.ComponentSpec(
            name='component1',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='alpine',
                    command=['sh', '-c', 'echo "$0" >> "$1"'],
                    args=[
                        structures.InputValuePlaceholder(input_name='input1'),
                        structures.OutputPathPlaceholder(output_name='output1')
                    ],
                    env=None,
                    resources=None),
                graph=None,
                importer=None),
            description=None,
            inputs={
                'input1': structures.InputSpec(type='String', default=None)
            },
            outputs={'output1': structures.OutputSpec(type='Artifact')})
        self.assertEqual(loaded_component_spec, component_spec)
Example #19
0
def extract_component_interface(func: Callable) -> structures.ComponentSpec:
    single_output_name_const = 'Output'

    signature = inspect.signature(func)
    parameters = list(signature.parameters.values())

    parsed_docstring = docstring_parser.parse(inspect.getdoc(func))
    doc_dict = {p.arg_name: p.description for p in parsed_docstring.params}

    inputs = {}
    outputs = {}

    input_names = set()
    output_names = set()
    for parameter in parameters:
        parameter_type = type_annotations.maybe_strip_optional_from_annotation(
            parameter.annotation)
        passing_style = None
        io_name = parameter.name

        if type_annotations.is_artifact_annotation(parameter_type):
            # passing_style is either type_annotations.InputAnnotation or
            # type_annotations.OutputAnnotation.
            passing_style = type_annotations.get_io_artifact_annotation(
                parameter_type)

            # parameter_type is type_annotations.Artifact or one of its subclasses.
            parameter_type = type_annotations.get_io_artifact_class(
                parameter_type)
            if not issubclass(parameter_type, artifact_types.Artifact):
                raise ValueError(
                    'Input[T] and Output[T] are only supported when T is a '
                    'subclass of Artifact. Found `{} with type {}`'.format(
                        io_name, parameter_type))

            if parameter.default is not inspect.Parameter.empty:
                raise ValueError(
                    'Default values for Input/Output artifacts are not supported.'
                )
        elif isinstance(
                parameter_type,
            (type_annotations.InputPath, type_annotations.OutputPath)):
            passing_style = type(parameter_type)
            parameter_type = parameter_type.type
            if parameter.default is not inspect.Parameter.empty and not (
                    passing_style == type_annotations.InputPath
                    and parameter.default is None):
                raise ValueError(
                    'Path inputs only support default values of None. Default'
                    ' values for outputs are not supported.')

        type_struct = _annotation_to_type_struct(parameter_type)
        if type_struct is None:
            raise TypeError('Missing type annotation for argument: {}'.format(
                parameter.name))

        if passing_style in [
                type_annotations.OutputAnnotation, type_annotations.OutputPath
        ]:
            io_name = _maybe_make_unique(io_name, output_names)
            output_names.add(io_name)
            output_spec = structures.OutputSpec(type=type_struct,
                                                description=doc_dict.get(
                                                    parameter.name))
            outputs[io_name] = output_spec
        else:
            io_name = _maybe_make_unique(io_name, input_names)
            input_names.add(io_name)
            if parameter.default is not inspect.Parameter.empty:
                input_spec = structures.InputSpec(
                    type=type_struct,
                    description=doc_dict.get(parameter.name),
                    default=parameter.default,
                )
            else:
                input_spec = structures.InputSpec(
                    type=type_struct,
                    description=doc_dict.get(parameter.name),
                )

            inputs[io_name] = input_spec

    #Analyzing the return type annotations.
    return_ann = signature.return_annotation
    if hasattr(return_ann, '_fields'):  #NamedTuple
        # Getting field type annotations.
        # __annotations__ does not exist in python 3.5 and earlier
        # _field_types does not exist in python 3.9 and later
        field_annotations = getattr(return_ann,
                                    '__annotations__', None) or getattr(
                                        return_ann, '_field_types', None)
        for field_name in return_ann._fields:
            type_struct = None
            if field_annotations:
                type_struct = _annotation_to_type_struct(
                    field_annotations.get(field_name, None))

            output_name = _maybe_make_unique(field_name, output_names)
            output_names.add(output_name)
            output_spec = structures.OutputSpec(type=type_struct)
            outputs[output_name] = output_spec
    # Deprecated dict-based way of declaring multiple outputs. Was only used by
    # the @component decorator
    elif isinstance(return_ann, dict):
        warnings.warn(
            'The ability to specify multiple outputs using the dict syntax'
            ' has been deprecated. It will be removed soon after release'
            ' 0.1.32. Please use typing.NamedTuple to declare multiple'
            ' outputs.')
        for output_name, output_type_annotation in return_ann.items():
            output_type_struct = _annotation_to_type_struct(
                output_type_annotation)
            output_spec = structures.OutputSpec(type=output_type_struct)
            outputs[name] = output_spec
    elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty:
        output_name = _maybe_make_unique(single_output_name_const,
                                         output_names)
        # Fixes exotic, but possible collision:
        #   `def func(output_path: OutputPath()) -> str: ...`
        output_names.add(output_name)
        type_struct = _annotation_to_type_struct(signature.return_annotation)
        output_spec = structures.OutputSpec(type=type_struct)
        outputs[output_name] = output_spec

    # Component name and description are derived from the function's name and
    # docstring.  The name can be overridden by setting setting func.__name__
    # attribute (of the legacy func._component_human_name attribute).  The
    # description can be overridden by setting the func.__doc__ attribute (or
    # the legacy func._component_description attribute).
    component_name = getattr(func, '_component_human_name',
                             None) or _python_function_name_to_component_name(
                                 func.__name__)
    description = getattr(func, '_component_description',
                          None) or parsed_docstring.short_description
    if description:
        description = description.strip()

    component_spec = structures.ComponentSpec(
        name=component_name,
        description=description,
        inputs=inputs if inputs else None,
        outputs=outputs if outputs else None,
        # Dummy implementation to bypass model validation.
        implementation=structures.Implementation(),
    )
    return component_spec
Example #20
0
    def test_component_spec_load_from_v1_component_yaml(self):
        component_yaml_v1 = textwrap.dedent("""\
        name: Component with 2 inputs and 2 outputs
        inputs:
        - {name: Input parameter, type: String}
        - {name: Input artifact}
        outputs:
        - {name: Output 1}
        - {name: Output 2}
        implementation:
          container:
            image: busybox
            command: [sh, -c, '
                mkdir -p $(dirname "$2")
                mkdir -p $(dirname "$3")
                echo "$0" > "$2"
                cp "$1" "$3"
                '
            ]
            args:
            - {inputValue: Input parameter}
            - {inputPath: Input artifact}
            - {outputPath: Output 1}
            - {outputPath: Output 2}
        """)

        generated_spec = structures.ComponentSpec.load_from_component_yaml(
            component_yaml_v1)

        expected_spec = structures.ComponentSpec(
            name='Component with 2 inputs and 2 outputs',
            implementation=structures.Implementation(
                container=structures.ContainerSpec(
                    image='busybox',
                    command=[
                        'sh',
                        '-c',
                        (' mkdir -p $(dirname "$2") mkdir -p $(dirname "$3") '
                         'echo "$0" > "$2" cp "$1" "$3" '),
                    ],
                    args=[
                        structures.InputValuePlaceholder(
                            input_name='input_parameter'),
                        structures.InputPathPlaceholder(
                            input_name='input_artifact'),
                        structures.OutputPathPlaceholder(
                            output_name='output_1'),
                        structures.OutputPathPlaceholder(
                            output_name='output_2'),
                    ],
                    env={},
                )),
            inputs={
                'input_parameter': structures.InputSpec(type='String'),
                'input_artifact': structures.InputSpec(type='Artifact')
            },
            outputs={
                'output_1': structures.OutputSpec(type='Artifact'),
                'output_2': structures.OutputSpec(type='Artifact'),
            })
        self.assertEqual(generated_spec, expected_spec)
Example #21
0
def run_as_vertex_ai_custom_job(
    component_spec: Callable,
    display_name: Optional[str] = None,
    replica_count: Optional[int] = None,
    machine_type: Optional[str] = None,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    boot_disk_type: Optional[str] = None,
    boot_disk_size_gb: Optional[int] = None,
    timeout: Optional[str] = None,
    restart_job_on_worker_restart: Optional[bool] = None,
    service_account: Optional[str] = None,
    network: Optional[str] = None,
    worker_pool_specs: Optional[List[Mapping[str, Any]]] = None,
) -> Callable:
    """Run a pipeline task using AI Platform (Unified) custom training job.

    For detailed doc of the service, please refer to
    https://cloud.google.com/ai-platform-unified/docs/training/create-custom-job

    Args:
      component_spec: The task (ContainerOp) object to run as aiplatform custom job.
      display_name: Optional. The name of the custom job. If not provided the
        component_spec.name will be used instead.
      replica_count: Optional. The number of replicas to be split between master
        workerPoolSpec and worker workerPoolSpec. (master always has 1 replica).
      machine_type: Optional. The type of the machine to run the custom job. The
        default value is "n1-standard-4".
      accelerator_type: Optional. The type of accelerator(s) that may be attached
        to the machine as per accelerator_count. Optional.
      accelerator_count: Optional. The number of accelerators to attach to the
        machine.
      boot_disk_type: Optional. Type of the boot disk (default is "pd-ssd"). Valid
        values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard"
          (Persistent Disk Hard Disk Drive).
      boot_disk_size_gb: Optional. Size in GB of the boot disk (default is 100GB).
      timeout: Optional. The maximum job running time. The default is 7 days. A
        duration in seconds with up to nine fractional digits, terminated by 's'.
        Example: "3.5s"
      restart_job_on_worker_restart: Optional. Restarts the entire CustomJob if a
        worker gets restarted. This feature can be used by distributed training
        jobs that are not resilient to workers leaving and joining a job.
      service_account: Optional. Specifies the service account for workload run-as
        account.
      network: Optional. The full name of the Compute Engine network to which the
        job should be peered. For example, projects/12345/global/networks/myVPC.
      worker_pool_specs: Optional, worker_pool_specs for distributed training. this
        will overwite all other cluster configurations. For details, please see:
        https://cloud.google.com/ai-platform-unified/docs/training/distributed-training
    Returns:
      A Custom Job component OP correspoinding to the input component OP.
    """
    job_spec = {}

    # As a temporary work aruond for issue with kfp v2 based compiler where
    # compiler expects place holders in origional form in args, instead of
    # using fields from outputs, we add back the args from the origional
    # component to the custom job component. These args will be ignored
    # by the remote launcher.
    copy_of_origional_args = []

    if worker_pool_specs is not None:
        worker_pool_specs = copy.deepcopy(worker_pool_specs)

        def _is_output_parameter(output_key: str) -> bool:
            return output_key in (component_spec.component_spec.
                                  output_definitions.parameters.keys())

        for worker_pool_spec in worker_pool_specs:
            if 'container_spec' in worker_pool_spec:
                container_spec = worker_pool_spec['container_spec']
                if 'command' in container_spec:
                    dsl_utils.resolve_cmd_lines(container_spec['command'],
                                                _is_output_parameter)
                if 'args' in container_spec:
                    copy_of_origional_args = container_spec['args'].copy()
                    dsl_utils.resolve_cmd_lines(container_spec['args'],
                                                _is_output_parameter)

            elif 'python_package_spec' in worker_pool_spec:
                # For custom Python training, resolve placeholders in args only.
                python_spec = worker_pool_spec['python_package_spec']
                if 'args' in python_spec:
                    dsl_utils.resolve_cmd_lines(python_spec['args'],
                                                _is_output_parameter)

            else:
                raise ValueError(
                    'Expect either "container_spec" or "python_package_spec" in each '
                    'workerPoolSpec. Got: {}'.format(worker_pool_spec))

        job_spec['worker_pool_specs'] = worker_pool_specs

    else:

        def _is_output_parameter(output_key: str) -> bool:
            for output in component_spec.component_spec.outputs:
                if output.name == output_key:
                    return type_utils.is_parameter_type(output.type)
            return False

        worker_pool_spec = {
            'machine_spec': {
                'machine_type': machine_type
                or _DEFAULT_CUSTOM_JOB_MACHINE_TYPE
            },
            'replica_count': 1,
            'container_spec': {
                'image_uri':
                component_spec.component_spec.implementation.container.image,
            }
        }
        if component_spec.component_spec.implementation.container.command:
            container_command_copy = component_spec.component_spec.implementation.container.command.copy(
            )
            dsl_utils.resolve_cmd_lines(container_command_copy,
                                        _is_output_parameter)
            worker_pool_spec['container_spec'][
                'command'] = container_command_copy

        if component_spec.component_spec.implementation.container.args:
            container_args_copy = component_spec.component_spec.implementation.container.args.copy(
            )
            copy_of_origional_args = component_spec.component_spec.implementation.container.args.copy(
            )
            dsl_utils.resolve_cmd_lines(container_args_copy,
                                        _is_output_parameter)
            worker_pool_spec['container_spec']['args'] = container_args_copy
        if accelerator_type is not None:
            worker_pool_spec['machine_spec'][
                'accelerator_type'] = accelerator_type
        if accelerator_count is not None:
            worker_pool_spec['machine_spec'][
                'accelerator_count'] = accelerator_count
        if boot_disk_type is not None:
            if 'disk_spec' not in worker_pool_spec:
                worker_pool_spec['disk_spec'] = {}
            worker_pool_spec['disk_spec']['boot_disk_type'] = boot_disk_type
        if boot_disk_size_gb is not None:
            if 'disk_spec' not in worker_pool_spec:
                worker_pool_spec['disk_spec'] = {}
            worker_pool_spec['disk_spec'][
                'boot_disk_size_gb'] = boot_disk_size_gb

        job_spec['worker_pool_specs'] = [worker_pool_spec]
        if replica_count is not None and replica_count > 1:
            additional_worker_pool_spec = copy.deepcopy(worker_pool_spec)
            additional_worker_pool_spec['replica_count'] = str(replica_count -
                                                               1)
            job_spec['worker_pool_specs'].append(additional_worker_pool_spec)

    if timeout is not None:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling']['timeout'] = timeout
    if restart_job_on_worker_restart is not None:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling'][
            'restart_job_on_worker_restart'] = restart_job_on_worker_restart
    if service_account is not None:
        job_spec['service_account'] = service_account
    if network is not None:
        job_spec['network'] = network

    custom_job_payload = {
        'display_name': display_name or component_spec.component_spec.name,
        'job_spec': job_spec
    }

    custom_job_component_spec = structures.ComponentSpec(
        name=component_spec.component_spec.name,
        inputs=component_spec.component_spec.inputs + [
            structures.InputSpec(name='gcp_project', type='String'),
            structures.InputSpec(name='gcp_region', type='String')
        ],
        outputs=component_spec.component_spec.outputs +
        [structures.OutputSpec(name='GCP_RESOURCES', type='String')],
        implementation=structures.ContainerImplementation(
            container=structures.ContainerSpec(
                image=_DEFAULT_CUSTOM_JOB_CONTAINER_IMAGE,
                command=["python", "-u", "-m", "launcher"],
                args=[
                    '--type',
                    'CustomJob',
                    '--gcp_project',
                    structures.InputValuePlaceholder(input_name='gcp_project'),
                    '--gcp_region',
                    structures.InputValuePlaceholder(input_name='gcp_region'),
                    '--payload',
                    json.dumps(custom_job_payload),
                    '--gcp_resources',
                    structures.OutputPathPlaceholder(
                        output_name='GCP_RESOURCES'),
                ] + copy_of_origional_args,
            )))
    component_path = tempfile.mktemp()
    custom_job_component_spec.save(component_path)

    return components.load_component_from_file(component_path)
Example #22
0
            args=[
                structures.IfPresentPlaceholder(
                    if_structure=structures.IfPresentPlaceholderStructure(
                        input_name='optional_input_1',
                        then=[
                            '--arg1',
                            structures.InputUriPlaceholder(
                                input_name='optional_input_1'),
                        ],
                        otherwise=[
                            '--arg2',
                            'default',
                        ]))
            ])),
    inputs={
        'optional_input_1': structures.InputSpec(type='String', default=None)
    },
)

V1_YAML_CONCAT_PLACEHOLDER = textwrap.dedent("""\
    name: component_concat
    implementation:
      container:
        args:
        - concat: ['--arg1', {inputValue: input_prefix}]
        image: alpine
    inputs:
    - {name: input_prefix, type: String}
    """)

COMPONENT_SPEC_CONCAT_PLACEHOLDER = structures.ComponentSpec(