예제 #1
0
 def a_op(field_m: {
     'GCSPath': {
         'path_type': 'file',
         'file_type': 'tsv'
     }
 }, field_o: 'Integer'):
     return ContainerOp(
         name='operator a',
         image='gcr.io/ml-pipeline/component-b',
         arguments=[
             '--field-l',
             field_m,
             '--field-o',
             field_o,
         ],
     )
예제 #2
0
    def test_basic(self):
        """Test basic usage."""
        param1 = PipelineParam('param1')
        param2 = PipelineParam('param2')
        op1 = (ContainerOp(
            name='op1',
            image='image',
            arguments=['%s hello %s %s' % (param1, param2, param1)],
            init_containers=[
                UserContainer(name='initcontainer0', image='initimage0')
            ],
            sidecars=[Sidecar(name='sidecar0', image='image0')],
            container_kwargs={
                'env': [V1EnvVar(name='env1', value='value1')]
            },
            file_outputs={
                'out1': '/tmp/b'
            }).add_init_container(
                UserContainer(name='initcontainer1',
                              image='initimage1')).add_init_container(
                                  UserContainer(
                                      name='initcontainer2',
                                      image='initimage2')).add_sidecar(
                                          Sidecar(name='sidecar1',
                                                  image='image1')).add_sidecar(
                                                      Sidecar(name='sidecar2',
                                                              image='image2')))

        self.assertCountEqual([x.name for x in op1.inputs],
                              ['param1', 'param2'])
        self.assertCountEqual(list(op1.outputs.keys()), ['out1'])
        self.assertCountEqual([x.op_name for x in op1.outputs.values()],
                              [op1.name])
        self.assertEqual(op1.output.name, 'out1')
        self.assertCountEqual(
            [init_container.name for init_container in op1.init_containers],
            ['initcontainer0', 'initcontainer1', 'initcontainer2'])
        self.assertCountEqual(
            [init_container.image for init_container in op1.init_containers],
            ['initimage0', 'initimage1', 'initimage2'])
        self.assertCountEqual([sidecar.name for sidecar in op1.sidecars],
                              ['sidecar0', 'sidecar1', 'sidecar2'])
        self.assertCountEqual([sidecar.image for sidecar in op1.sidecars],
                              ['image0', 'image1', 'image2'])
        self.assertCountEqual([env.name for env in op1.container.env],
                              ['env1'])
예제 #3
0
    def test_basic(self):
        """Test basic usage."""
        with Pipeline('somename') as p:
            param1 = PipelineParam('param1')
            param2 = PipelineParam('param2')
            op1 = ContainerOp(
                name='op1',
                image='image',
                arguments=['%s hello %s %s' % (param1, param2, param1)],
                file_outputs={'out1': '/tmp/b'})

        self.assertCountEqual([x.name for x in op1.inputs],
                              ['param1', 'param2'])
        self.assertCountEqual(list(op1.outputs.keys()), ['out1'])
        self.assertCountEqual([x.op_name for x in op1.outputs.values()],
                              ['op1'])
        self.assertEqual(op1.output.name, 'out1')
예제 #4
0
 def b_op(field_x: {'customized_type': {'openapi_schema_validator': '{"type": "string", "pattern": "^gcs://.*$"}'}},
     field_y: Integer(),
     field_z: GCSPath()) -> {'output_model_uri': 'GcsUri'}:
   return ContainerOp(
       name = 'operator b',
       image = 'gcr.io/ml-pipeline/component-a',
       command = [
           'python3',
           field_x,
       ],
       arguments = [
           '--field-y', field_y,
           '--field-z', field_z,
       ],
       file_outputs = {
           'output_model_uri': '/schema.txt',
       }
   )
예제 #5
0
 def b_op(field_x: {'customized_type': {'property_a': 'value_a', 'property_b': 'value_b'}},
     field_y: 'GcsUri',
     field_z: GCSPath(path_type='file', file_type='tsv')) -> {'output_model_uri': 'GcsUri'}:
   return ContainerOp(
       name = 'operator b',
       image = 'gcr.io/ml-pipeline/component-b',
       command = [
           'python3',
           field_x,
       ],
       arguments = [
           '--field-y', field_y,
           '--field-z', field_z,
       ],
       file_outputs = {
           'output_model_uri': '/schema.txt',
       }
   )
예제 #6
0
 def b_op(field_x,
     field_y: Integer(),
     field_z: GCSPath(path_type='file', file_type='tsv')) -> {'output_model_uri': 'GcsUri'}:
   return ContainerOp(
       name = 'operator b',
       image = 'gcr.io/ml-pipeline/component-a',
       command = [
           'python3',
           field_x,
       ],
       arguments = [
           '--field-y', field_y,
           '--field-z', field_z,
       ],
       file_outputs = {
           'output_model_uri': '/schema.txt',
       }
   )
예제 #7
0
 def a_op(
         field_m: {
             'GCRPath': {
                 'openapi_schema_validator': {
                     "type": "string",
                     "pattern": "^.*gcr\\.io/.*$"
                 }
             }
         }, field_o: 'Integer'):
     return ContainerOp(
         name='operator a',
         image='gcr.io/ml-pipeline/component-b',
         arguments=[
             '--field-l',
             field_m,
             '--field-o',
             field_o,
         ],
     )
예제 #8
0
    def __init__(self, kind=None, command=None, args=None, image=None, handler=None,
                 metadata=None, build=None, volumes=None, volume_mounts=None,
                 env=None, resources=None, image_pull_policy=None,
                 service_account=None):
        try:
            from kfp.dsl import ContainerOp
        except ImportError as e:
            print('KubeFlow pipelines sdk is not installed, use "pip install kfp"')
            raise e

        super().__init__(kind, command, args, image, handler, metadata, None)
        self._build = None
        self.build = build
        self.volumes = volumes or []
        self.volume_mounts = volume_mounts or []
        self.env = env or []
        self.resources = resources
        self.image_pull_policy = image_pull_policy
        self.service_account = service_account
        self._cop = ContainerOp('name', 'image')
예제 #9
0
 def a_op(field_l: Integer()) -> {
         'field_m': 'GCSPath',
         'field_n': {
             'customized_type': {
                 'property_a': 'value_a',
                 'property_b': 'value_b'
             }
         },
         'field_o': 'Integer'
 }:
     return ContainerOp(name='operator a',
                        image='gcr.io/ml-pipeline/component-b',
                        arguments=[
                            '--field-l',
                            field_l,
                        ],
                        file_outputs={
                            'field_m': '/schema.txt',
                            'field_n': '/feature.txt',
                            'field_o': '/output.txt'
                        })
예제 #10
0
 def a_op(field_l: Integer()) -> {
         'field_m': 'GCSPath',
         'field_n': {
             'customized_type': {
                 'openapi_schema_validator':
                 '{"type": "string", "pattern": "^gs://.*$"}'
             }
         },
         'field_o': 'Integer'
 }:
     return ContainerOp(name='operator a',
                        image='gcr.io/ml-pipeline/component-b',
                        arguments=[
                            '--field-l',
                            field_l,
                        ],
                        file_outputs={
                            'field_m': '/schema.txt',
                            'field_n': '/feature.txt',
                            'field_o': '/output.txt'
                        })
예제 #11
0
def training_op(script, image=None, arguments=[], file_outputs={}):
    """ A template function to encapsulate similar container ops
    """

    if not image and _is_ipython():
        from IPython import get_ipython
        image = get_ipython().user_ns.get('TRAINING_IMAGE')

    if not image:
        raise ValueError(f"""
            `image` parameter is missing.
            If you run in Jupyter Notebook you can also define a global var TRAINING_IMAGE
        """)

    return ContainerOp(
        name=re.sub(r'[\W_]+', '-',
                    os.path.splitext(script.lower())[0]),
        image=image,
        command=['/usr/local/bin/python', script],
        arguments=arguments,
        file_outputs=file_outputs,
    )
예제 #12
0
    def test_deprecation_warnings(self):
        """Test deprecation warnings."""
        op = ContainerOp(name='op1', image='image')

        with self.assertWarns(PendingDeprecationWarning):
            op.env_variables = [V1EnvVar(name="foo", value="bar")]

        with self.assertWarns(PendingDeprecationWarning):
            op.image = 'image2'

        with self.assertWarns(PendingDeprecationWarning):
            op.set_memory_request('10M')

        with self.assertWarns(PendingDeprecationWarning):
            op.set_memory_limit('10M')

        with self.assertWarns(PendingDeprecationWarning):
            op.set_cpu_request('100m')

        with self.assertWarns(PendingDeprecationWarning):
            op.set_cpu_limit('1')

        with self.assertWarns(PendingDeprecationWarning):
            op.set_gpu_limit('1')

        with self.assertWarns(PendingDeprecationWarning):
            op.add_env_variable(V1EnvVar(name="foo", value="bar"))

        with self.assertWarns(PendingDeprecationWarning):
            op.add_volume_mount(
                V1VolumeMount(mount_path='/secret/gcp-credentials',
                              name='gcp-credentials'))
예제 #13
0
 def test_after_op(self):
     """Test duplicate ops."""
     op1 = ContainerOp(name='op1', image='image')
     op2 = ContainerOp(name='op2', image='image')
     op2.after(op1)
     self.assertCountEqual(op2.dependent_names, [op1.name])
예제 #14
0
def update_op(op: dsl.ContainerOp,
              pipeline_name: dsl.PipelineParam,
              pipeline_root: dsl.PipelineParam,
              launcher_image: Optional[str] = None) -> None:
    """Updates the passed in Op for running in v2-compatible mode.

    Args:
      op: The Op to update.
      pipeline_spec: The PipelineSpec for the pipeline under which `op`
        runs.
      pipeline_root: The root output directory for pipeline artifacts.
      launcher_image: An optional launcher image. Useful for tests.
    """
    op.is_v2 = True
    # Inject the launcher binary and overwrite the entrypoint.
    image_name = launcher_image or _DEFAULT_LAUNCHER_IMAGE
    launcher_container = dsl.UserContainer(
        name="kfp-launcher",
        image=image_name,
        command=["launcher", "--copy", "/kfp-launcher/launch"],
        mirror_volume_mounts=True)

    op.add_init_container(launcher_container)
    op.add_volume(k8s_client.V1Volume(name='kfp-launcher'))
    op.add_volume_mount(
        k8s_client.V1VolumeMount(name='kfp-launcher',
                                 mount_path='/kfp-launcher'))

    # op.command + op.args will have the following sections:
    # 1. args passed to kfp-launcher
    # 2. a separator "--"
    # 3. parameters in format "key1=value1", "key2=value2", ...
    # 4. a separator "--" as end of arguments passed to launcher
    # 5. (start of op.args) arguments of the original user program command + args
    #
    # example:
    # - command:
    # - /kfp-launcher/launch
    # - '--mlmd_server_address'
    # - $(METADATA_GRPC_SERVICE_HOST)
    # - '--mlmd_server_port'
    # - $(METADATA_GRPC_SERVICE_PORT)
    # - ... # more launcher params
    # - '--pipeline_task_id'
    # - $(KFP_POD_NAME)
    # - '--pipeline_root'
    # - ''
    # - '--' # start of parameter values
    # - first=first
    # - second=second
    # - '--' # start of user command and args
    # args:
    # - sh
    # - '-ec'
    # - |
    #     program_path=$(mktemp)
    #     printf "%s" "$0" > "$program_path"
    #     python3 -u "$program_path" "$@"
    # - >
    #     import json
    #     import xxx
    #     ...
    op.command = [
        "/kfp-launcher/launch",
        "--mlmd_server_address",
        "$(METADATA_GRPC_SERVICE_HOST)",
        "--mlmd_server_port",
        "$(METADATA_GRPC_SERVICE_PORT)",
        "--runtime_info_json",
        "$(KFP_V2_RUNTIME_INFO)",
        "--container_image",
        "$(KFP_V2_IMAGE)",
        "--task_name",
        op.name,
        "--pipeline_name",
        pipeline_name,
        "--run_id",
        "$(KFP_RUN_ID)",
        "--run_resource",
        "workflows.argoproj.io/$(WORKFLOW_ID)",
        "--namespace",
        "$(KFP_NAMESPACE)",
        "--pod_name",
        "$(KFP_POD_NAME)",
        "--pod_uid",
        "$(KFP_POD_UID)",
        "--pipeline_root",
        pipeline_root,
        "--enable_caching",
        "$(ENABLE_CACHING)",
    ]

    # Mount necessary environment variables.
    op.apply(_default_transformers.add_kfp_pod_env)
    op.container.add_env_variable(
        k8s_client.V1EnvVar(name="KFP_V2_IMAGE", value=op.container.image))

    config_map_ref = k8s_client.V1ConfigMapEnvSource(
        name='metadata-grpc-configmap', optional=True)
    op.container.add_env_from(
        k8s_client.V1EnvFromSource(config_map_ref=config_map_ref))

    op.arguments = list(op.container_spec.command) + list(
        op.container_spec.args)

    runtime_info = {
        "inputParameters": collections.OrderedDict(),
        "inputArtifacts": collections.OrderedDict(),
        "outputParameters": collections.OrderedDict(),
        "outputArtifacts": collections.OrderedDict(),
    }

    op.command += ["--"]
    component_spec = op.component_spec
    for parameter, spec in sorted(
            component_spec.input_definitions.parameters.items()):
        parameter_info = {
            "type":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
        }
        op.command += [f"{parameter}={op._parameter_arguments[parameter]}"]
        runtime_info["inputParameters"][parameter] = parameter_info
    op.command += ["--"]

    for artifact_name, spec in sorted(
            component_spec.input_definitions.artifacts.items()):
        artifact_info = {
            "metadataPath": op.input_artifact_paths[artifact_name],
            "schemaTitle": spec.artifact_type.schema_title,
            "instanceSchema": spec.artifact_type.instance_schema,
        }
        runtime_info["inputArtifacts"][artifact_name] = artifact_info

    for parameter, spec in sorted(
            component_spec.output_definitions.parameters.items()):
        parameter_info = {
            "type":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
            "path":
            op.file_outputs[parameter],
        }
        runtime_info["outputParameters"][parameter] = parameter_info

    for artifact_name, spec in sorted(
            component_spec.output_definitions.artifacts.items()):
        # TODO: Assert instance_schema.
        artifact_info = {
            # Type used to register output artifacts.
            "schemaTitle": spec.artifact_type.schema_title,
            "instanceSchema": spec.artifact_type.instance_schema,
            # File used to write out the registered artifact ID.
            "metadataPath": op.file_outputs[artifact_name],
        }
        runtime_info["outputArtifacts"][artifact_name] = artifact_info

    op.container.add_env_variable(
        k8s_client.V1EnvVar(name="KFP_V2_RUNTIME_INFO",
                            value=json.dumps(runtime_info)))

    op.pod_annotations['pipelines.kubeflow.org/v2_component'] = "true"
    op.pod_labels['pipelines.kubeflow.org/v2_component'] = "true"
예제 #15
0
 def copy(self):
     self._cop = None
     fn = deepcopy(self)
     self._cop = ContainerOp("name", "image")
     fn._cop = ContainerOp("name", "image")
     return fn
예제 #16
0
    def container(
        name: str,
        arguments: str,
        inputs: Optional[List[Tuple[InputArgumentPath, str]]] = None,
        outputs: Optional[Dict[str, str]] = None,
    ) -> Tuple[ContainerOp, Dict[str, Tuple[InputArgumentPath, str]]]:
        # Set the correct shell parameters
        prepare_args = "set -euo pipefail\n"

        # Copy the output artifacts correctly
        file_outputs = {}
        output_artifact_copy_args = ""
        if outputs:
            for k, v in outputs.items():
                out = Pipeline.out_dir(v)
                file_outputs[k] = out
                output_artifact_copy_args += dedent("""
                    mkdir -p {d}
                    cp -r {fr} {to}
                """.format(
                    d=os.path.dirname(out),
                    fr=v,
                    to=out,
                )).lstrip()

        # Create the container
        ctr = ContainerOp(
            image=Pipeline.IMAGE,
            name=name,
            command=["bash", "-c"],
            output_artifact_paths=Pipeline.default_artifact_path(),
            file_outputs=file_outputs,
            artifact_argument_paths=[InputArgumentPath(x[0])
                                     for x in inputs] if inputs else None,
        )
        ctr.container.set_image_pull_policy("Always")

        # Copy input artifacts correctly
        input_artifact_copy_args = ""
        in_repo = False
        for i, path in enumerate(ctr.input_artifact_paths.values()):
            target_location = inputs[i][1]
            input_artifact_copy_args += "cp -r {fr} {to}\n".format(
                fr=path, to=target_location)

            # Change to the repository path if available
            if target_location == Pipeline.REPO:
                in_repo = True
                input_artifact_copy_args += "cd {}\n".format(Pipeline.REPO)
        # Show the git diff to validate
        if in_repo:
            input_artifact_copy_args += dedent("""
                echo "git diff:"
                git diff --name-only
            """)

        # Assemble the command
        ctr.arguments = prepare_args + \
            input_artifact_copy_args + \
            arguments + \
            "\n" + \
            output_artifact_copy_args

        # Output Artifacts
        vol = "output-artifacts"
        ctr.add_volume(
            k8s.V1Volume(name=vol, empty_dir=k8s.V1EmptyDirVolumeSource()))
        ctr.container.add_volume_mount(
            k8s.V1VolumeMount(name=vol, mount_path=Pipeline.OUT_DIR))

        # GitHub Token
        gh_token = "github-token"
        ctr.add_volume(
            k8s.V1Volume(
                name=gh_token,
                secret=k8s.V1SecretVolumeSource(secret_name=gh_token)))
        ctr.container.add_volume_mount(
            k8s.V1VolumeMount(name=gh_token,
                              read_only=True,
                              mount_path=Pipeline.GITHUB_TOKEN_MOUNT_PATH))

        # Quay Login
        quay = "quay"
        ctr.add_volume(
            k8s.V1Volume(name=quay,
                         secret=k8s.V1SecretVolumeSource(secret_name=quay)))
        ctr.container.add_volume_mount(
            k8s.V1VolumeMount(name=quay,
                              read_only=True,
                              mount_path=Pipeline.QUAY_SECRET_MOUNT_PATH))

        # SSH Key
        ssh_key = "ssh-key"
        ctr.add_volume(
            k8s.V1Volume(name=ssh_key,
                         secret=k8s.V1SecretVolumeSource(default_mode=0o600,
                                                         secret_name=ssh_key)))
        ctr.container.add_volume_mount(
            k8s.V1VolumeMount(name=ssh_key,
                              read_only=True,
                              mount_path="/root/.ssh"))

        # Assemble the inputs for the next stage
        consumable_inputs = {}
        for k, v in file_outputs.items():
            consumable_inputs[k] = (ctr.outputs[k], outputs[k])

        return ctr, consumable_inputs
 def _add_common_labels(op: dsl.ContainerOp) -> dsl.ContainerOp:
     return op.add_pod_label('param', param)
예제 #18
0
 def __init__(self, spec=None, metadata=None):
     super().__init__(metadata, spec)
     self._cop = ContainerOp("name", "image")
     self.verbose = False
예제 #19
0
def update_op(op: dsl.ContainerOp,
              pipeline_name: dsl.PipelineParam,
              pipeline_root: dsl.PipelineParam,
              launcher_image: Optional[str] = None) -> None:
  """Updates the passed in Op for running in v2-compatible mode.

    Args:
      op: The Op to update.
      pipeline_spec: The PipelineSpec for the pipeline under which `op`
        runs.
      pipeline_root: The root output directory for pipeline artifacts.
      launcher_image: An optional launcher image. Useful for tests.
    """
  # Inject the launcher binary and overwrite the entrypoint.
  image_name = launcher_image or _DEFAULT_LAUNCHER_IMAGE
  launcher_container = dsl.UserContainer(name="kfp-launcher",
                                         image=image_name,
                                         command="/bin/mount_launcher.sh",
                                         mirror_volume_mounts=True)

  op.add_init_container(launcher_container)
  op.add_volume(k8s_client.V1Volume(name='kfp-launcher'))
  op.add_volume_mount(
      k8s_client.V1VolumeMount(name='kfp-launcher', mount_path='/kfp-launcher'))

  op.command = [
      "/kfp-launcher/launch",
      "--mlmd_server_address",
      "$(METADATA_GRPC_SERVICE_HOST)",
      "--mlmd_server_port",
      "$(METADATA_GRPC_SERVICE_PORT)",
      "--runtime_info_json",
      "$(KFP_V2_RUNTIME_INFO)",
      "--container_image",
      "$(KFP_V2_IMAGE)",
      "--task_name",
      op.name,
      "--pipeline_name",
      pipeline_name,
      "--pipeline_run_id",
      "$(WORKFLOW_ID)",
      "--pipeline_task_id",
      "$(KFP_POD_NAME)",
      "--pipeline_root",
      pipeline_root,
  ]

  # Mount necessary environment variables.
  op.apply(_default_transformers.add_kfp_pod_env)
  op.container.add_env_variable(
      k8s_client.V1EnvVar(name="KFP_V2_IMAGE", value=op.container.image))

  config_map_ref = k8s_client.V1ConfigMapEnvSource(
      name='metadata-grpc-configmap', optional=True)
  op.container.add_env_from(
      k8s_client.V1EnvFromSource(config_map_ref=config_map_ref))

  op.arguments = list(op.container_spec.command) + list(op.container_spec.args)

  runtime_info = {
      "inputParameters": collections.OrderedDict(),
      "inputArtifacts": collections.OrderedDict(),
      "outputParameters": collections.OrderedDict(),
      "outputArtifacts": collections.OrderedDict(),
  }

  component_spec = op.component_spec
  for parameter, spec in sorted(
      component_spec.input_definitions.parameters.items()):
    parameter_info = {
        "parameterType":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
        "parameterValue":
            op._parameter_arguments[parameter],
    }
    runtime_info["inputParameters"][parameter] = parameter_info

  for artifact_name, spec in sorted(
      component_spec.input_definitions.artifacts.items()):
    artifact_info = {"fileInputPath": op.input_artifact_paths[artifact_name]}
    runtime_info["inputArtifacts"][artifact_name] = artifact_info

  for parameter, spec in sorted(
      component_spec.output_definitions.parameters.items()):
    parameter_info = {
        "parameterType":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
        "fileOutputPath":
            op.file_outputs[parameter],
    }
    runtime_info["outputParameters"][parameter] = parameter_info

  for artifact_name, spec in sorted(
      component_spec.output_definitions.artifacts.items()):
    # TODO: Assert instance_schema.
    artifact_info = {
        # Type used to register output artifacts.
        "artifactSchema": spec.artifact_type.instance_schema,
        # File used to write out the registered artifact ID.
        "fileOutputPath": op.file_outputs[artifact_name],
    }
    runtime_info["outputArtifacts"][artifact_name] = artifact_info

  op.container.add_env_variable(
      k8s_client.V1EnvVar(name="KFP_V2_RUNTIME_INFO",
                          value=json.dumps(runtime_info)))

  op.pod_annotations['pipelines.kubeflow.org/v2_component'] = "true"
예제 #20
0
def run_as_aiplatform_custom_job(
    op: dsl.ContainerOp,
    display_name: Optional[str] = None,
    replica_count: Optional[int] = None,
    machine_type: Optional[str] = None,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    boot_disk_type: Optional[str] = None,
    boot_disk_size_gb: Optional[int] = None,
    timeout: Optional[str] = None,
    restart_job_on_worker_restart: Optional[bool] = None,
    service_account: Optional[str] = None,
    network: Optional[str] = None,
    output_uri_prefix: Optional[str] = None,
    worker_pool_specs: Optional[List[Mapping[str, Any]]] = None,
) -> None:
    """Run a pipeline task using AI Platform (Unified) custom training job.

    For detailed doc of the service, please refer to
    https://cloud.google.com/ai-platform-unified/docs/training/create-custom-job

    Args:
      op: The task (ContainerOp) object to run as aiplatform custom job.
      display_name: Optional. The name of the custom job.
      replica_count: Optional. The number of replicas to be split between master
        workerPoolSpec and worker workerPoolSpec. (master always has 1 replica).
      machine_type: Optional. The type of the machine to run the custom job. The
        default value is "n1-standard-4".
      accelerator_type: Optional. The type of accelerator(s) that may be attached
        to the machine as per acceleratorCount. Optional.
      accelerator_count: Optional. The number of accelerators to attach to the
        machine.
      boot_disk_type: Optional. Type of the boot disk (default is "pd-ssd"). Valid
        values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard"
          (Persistent Disk Hard Disk Drive).
      boot_disk_size_gb: Optional. Size in GB of the boot disk (default is 100GB).
      timeout: Optional. The maximum job running time. The default is 7 days. A
        duration in seconds with up to nine fractional digits, terminated by 's'.
        Example: "3.5s"
      restart_job_on_worker_restart: Optional. Restarts the entire CustomJob if a
        worker gets restarted. This feature can be used by distributed training
        jobs that are not resilient to workers leaving and joining a job.
      service_account: Optional. Specifies the service account for workload run-as
        account.
      network: Optional. The full name of the Compute Engine network to which the
        job should be peered. For example, projects/12345/global/networks/myVPC.
      output_uri_prefix: Optional. Google Cloud Storage URI to output directory.
      additional_worker_pool_specs: Optional. Additional workerPoolSpecs for
        distributed training. For details, please see:
        https://cloud.google.com/ai-platform-unified/docs/training/distributed-training
    """
    job_spec = {}

    if worker_pool_specs is not None:
        worker_pool_specs = copy.deepcopy(worker_pool_specs)

        def _is_output_parameter(output_key: str) -> bool:
            return output_key in (
                op.component_spec.output_definitions.parameters.keys())

        for worker_pool_spec in worker_pool_specs:
            if 'containerSpec' in worker_pool_spec:
                container_spec = worker_pool_spec['containerSpec']
                if 'command' in container_spec:
                    dsl_utils.resolve_cmd_lines(container_spec['command'],
                                                _is_output_parameter)
                if 'args' in container_spec:
                    dsl_utils.resolve_cmd_lines(container_spec['args'],
                                                _is_output_parameter)

            elif 'pythonPackageSpec' in worker_pool_spec:
                # For custom Python training, resolve placeholders in args only.
                python_spec = worker_pool_spec['pythonPackageSpec']
                if 'args' in python_spec:
                    dsl_utils.resolve_cmd_lines(python_spec['args'],
                                                _is_output_parameter)

            else:
                raise ValueError(
                    'Expect either "containerSpec" or "pythonPackageSpec" in each '
                    'workerPoolSpec. Got: {}'.format(custom_job_spec))

        job_spec['workerPoolSpecs'] = worker_pool_specs

    else:
        worker_pool_spec = {
            'machineSpec': {
                'machineType': machine_type or _DEFAULT_CUSTOM_JOB_MACHINE_TYPE
            },
            'replicaCount': '1',
            'containerSpec': {
                'imageUri': op.container.image,
            }
        }
        if op.container.command:
            worker_pool_spec['containerSpec']['command'] = op.container.command
        if op.container.args:
            worker_pool_spec['containerSpec']['args'] = op.container.args
        if accelerator_type is not None:
            worker_pool_spec['machineSpec'][
                'acceleratorType'] = accelerator_type
        if accelerator_count is not None:
            worker_pool_spec['machineSpec'][
                'acceleratorCount'] = accelerator_count
        if boot_disk_type is not None:
            if 'diskSpec' not in worker_pool_spec:
                worker_pool_spec['diskSpec'] = {}
            worker_pool_spec['diskSpec']['bootDiskType'] = boot_disk_type
        if boot_disk_size_gb is not None:
            if 'diskSpec' not in worker_pool_spec:
                worker_pool_spec['diskSpec'] = {}
            worker_pool_spec['diskSpec']['bootDiskSizeGb'] = boot_disk_size_gb

        job_spec['workerPoolSpecs'] = [worker_pool_spec]
        if replica_count is not None and replica_count > 1:
            additional_worker_pool_spec = copy.deepcopy(worker_pool_spec)
            additional_worker_pool_spec['replicaCount'] = str(replica_count -
                                                              1)
            job_spec['workerPoolSpecs'].append(additional_worker_pool_spec)

    if timeout is not None:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling']['timeout'] = timeout
    if restart_job_on_worker_restart is not None:
        if 'scheduling' not in job_spec:
            job_spec['scheduling'] = {}
        job_spec['scheduling'][
            'restartJobOnWorkerRestart'] = restart_job_on_worker_restart
    if service_account is not None:
        job_spec['serviceAccount'] = service_account
    if network is not None:
        job_spec['network'] = network
    if output_uri_prefix is not None:
        job_spec['baseOutputDirectory'] = {
            'outputUriPrefix': output_uri_prefix
        }

    op.custom_job_spec = {
        'displayName': display_name or op.name,
        'jobSpec': job_spec
    }
예제 #21
0
 def copy(self):
     self._cop = None
     fn = deepcopy(self)
     self._cop = ContainerOp('name', 'image')
     fn._cop = ContainerOp('name', 'image')
     return fn