def test_container_based_custom_job_op_compile(self): custom_job_op = utils.create_custom_training_job_op_from_component( self._container_component) @kfp.dsl.pipeline(name="training-test") def pipeline(): custom_job_task = custom_job_op( # pylint: disable=unused-variable self._test_input_string, project=self._project, location=self._location) compiler.Compiler().compile( pipeline_func=pipeline, package_path=self._package_path) with open(self._package_path) as f: executor_output_json = json.load(f, strict=False) with open( os.path.join( os.path.dirname(__file__), "../testdata/custom_training_job_wrapper_pipeline.json")) as ef: expected_executor_output_json = json.load(ef, strict=False) # Ignore the kfp SDK & schema version during comparision del executor_output_json["pipelineSpec"]["sdkVersion"] del executor_output_json["pipelineSpec"]["schemaVersion"] self.assertEqual(executor_output_json, expected_executor_output_json)
def test_run_as_vertex_ai_custom_with_replica_count_greater_than_1_converts_correctly( self): component_factory_function = self._create_a_container_based_component() expected_sub_results = { 'implementation': { 'container': { 'image': 'test_launcher_image', 'command': [ 'python3', '-u', '-m', 'google_cloud_pipeline_components.container.v1.gcp_launcher.launcher' ], 'args': [ '--type', 'CustomJob', '--payload', '{"display_name": "ContainerComponent", "job_spec": ' '{"worker_pool_specs": [{"machine_spec": {"machine_type": ' '"n1-standard-4"}, "replica_count": 1, "container_spec": ' '{"image_uri": "google/cloud-sdk:latest", "command": ' '["sh", "-c", "set -e -x\\necho \\"$0, this is an output ' 'parameter\\"\\n", ' '"{{$.inputs.parameters[\'input_text\']}}", ' '"{{$.outputs.parameters[\'output_value\'].output_file}}"]},' ' "disk_spec": {"boot_disk_type": "pd-ssd", ' '"boot_disk_size_gb": 100}}, {"machine_spec": ' '{"machine_type": "n1-standard-4"}, "replica_count": "1", ' '"container_spec": {"image_uri": ' '"google/cloud-sdk:latest", "command": ["sh", "-c", "set ' '-e -x\\necho \\"$0, this is an output parameter\\"\\n", ' '"{{$.inputs.parameters[\'input_text\']}}", ' '"{{$.outputs.parameters[\'output_value\'].output_file}}"]},' ' "disk_spec": {"boot_disk_type": "pd-ssd", ' '"boot_disk_size_gb": 100}}], "service_account": ' '"{{$.inputs.parameters[\'service_account\']}}", ' '"network": "{{$.inputs.parameters[\'network\']}}", ' '"tensorboard": ' '"{{$.inputs.parameters[\'tensorboard\']}}", ' '"base_output_directory": {"output_uri_prefix": ' '"{{$.inputs.parameters[\'base_output_directory\']}}"}}}', '--project', { 'inputValue': 'project' }, '--location', { 'inputValue': 'location' }, '--gcp_resources', { 'outputPath': 'gcp_resources' } ] } } } custom_job_spec = utils.create_custom_training_job_op_from_component( component_factory_function, replica_count=2) self.assertDictContainsSubset( subset=expected_sub_results, dictionary=custom_job_spec.component_spec.to_dict())
def test_run_as_vertex_ai_custom_with_network_converts_correctly(self): component_factory_function = self._create_a_container_based_component() custom_job_spec = utils.create_custom_training_job_op_from_component( component_factory_function, network='test_network') self.assertContainsSubsequence(custom_job_spec.component_spec.inputs, [ components.structures.InputSpec(name='network', type='String', description=None, default='test_network', optional=True, annotations=None) ])
def test_run_as_vertex_ai_custom_with_labels_converts_correctly(self): component_factory_function = self._create_a_container_based_component() custom_job_spec = utils.create_custom_training_job_op_from_component( component_factory_function, labels={'test_key': 'test_value'}) self.assertContainsSubsequence(custom_job_spec.component_spec.inputs, [ components.structures.InputSpec( name='labels', type='JsonObject', description=None, default='{"test_key": "test_value"}', optional=True, annotations=None) ])
def test_run_as_vertex_ai_custom_with_reserved_ip_ranges(self): component_factory_function = self._create_a_container_based_component() custom_job_spec = utils.create_custom_training_job_op_from_component( component_factory_function, reserved_ip_ranges=['test_ip_range_network']) self.assertContainsSubsequence(custom_job_spec.component_spec.inputs, [ components.structures.InputSpec( name='reserved_ip_ranges', type='JsonArray', description=None, default='["test_ip_range_network"]', optional=True, annotations=None) ])
def test_run_as_vertex_ai_custom_with_replica_count_greater_than_1_converts_correctly( self): component_factory_function = self._create_a_container_based_component() custom_job_spec = utils.create_custom_training_job_op_from_component( component_factory_function, replica_count=5) self.assertContainsSubsequence(custom_job_spec.component_spec.inputs, [ components.structures.InputSpec( name='worker_pool_specs', type='JsonArray', description=None, default= '[{"machine_spec": {"machine_type": "n1-standard-4"}, "replica_count": 1, "container_spec": {"image_uri": "google/cloud-sdk:latest", "command": ["sh", "-c", "set -e -x\\necho \\"$0, this is an output parameter\\"\\n", "{{$.inputs.parameters[\'input_text\']}}", "{{$.outputs.parameters[\'output_value\'].output_file}}"]}, "disk_spec": {"boot_disk_type": "pd-ssd", "boot_disk_size_gb": 100}}, {"machine_spec": {"machine_type": "n1-standard-4"}, "replica_count": "4", "container_spec": {"image_uri": "google/cloud-sdk:latest", "command": ["sh", "-c", "set -e -x\\necho \\"$0, this is an output parameter\\"\\n", "{{$.inputs.parameters[\'input_text\']}}", "{{$.outputs.parameters[\'output_value\'].output_file}}"]}, "disk_spec": {"boot_disk_type": "pd-ssd", "boot_disk_size_gb": 100}}]', optional=True, annotations=None) ])
def test_run_as_vertex_ai_custom_job_on_container_spec_with_defualts_values_converts_correctly( self): expected_results = { 'name': 'ContainerComponent', 'description': 'Launch a Custom training job using Vertex CustomJob API.\n\n ' 'Args:\n project (str):\n Required. Project to ' 'create the custom training job in.\n location ' '(Optional[str]):\n Location for creating the custom ' 'training job. If not set,\n default to us-central1.\n' ' display_name (str): The name of the custom training job.\n' ' worker_pool_specs (Optional[Sequence[str]]): Serialized ' 'json spec of the worker pools\n including machine type ' 'and Docker image. All worker pools except the first one are\n' ' optional and can be skipped by providing an empty ' 'value.\n\n For more details about the WorkerPoolSpec, ' 'see\n ' 'https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#WorkerPoolSpec\n' ' timeout (Optional[str]): The maximum job running time. The ' 'default is 7\n days. A duration in seconds with up to ' 'nine fractional digits, terminated\n by \'s\', for ' 'example: "3.5s".\n restart_job_on_worker_restart ' '(Optional[bool]): Restarts the entire\n CustomJob if a ' 'worker gets restarted. This feature can be used by\n ' 'distributed training jobs that are not resilient to workers ' 'leaving and\n joining a job.\n service_account ' '(Optional[str]): Sets the default service account for\n ' 'workload run-as account. The service account running the ' 'pipeline\n ' '(https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account)\n' ' submitting jobs must have act-as permission on this' ' run-as account. If\n unspecified, the Vertex AI ' 'Custom Code Service\n ' 'Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents)\n' ' for the CustomJob\'s project.\n tensorboard ' '(Optional[str]): The name of a Vertex AI Tensorboard resource ' 'to\n which this CustomJob will upload Tensorboard ' 'logs.\n enable_web_access (Optional[bool]): Whether you want' ' Vertex AI to enable\n [interactive shell ' 'access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell)\n' ' to training containers.\n If set to `true`, ' 'you can access interactive shells at the URIs given\n by' ' [CustomJob.web_access_uris][].\n network (Optional[str]): ' 'The full name of the Compute Engine network to\n which ' 'the job should be peered. For example,\n ' 'projects/12345/global/networks/myVPC. Format is of the form\n' ' projects/{project}/global/networks/{network}. Where ' '{project} is a project\n number, as in 12345, and ' '{network} is a network name. Private services\n access ' 'must already be configured for the network. If left ' 'unspecified,\n the job is not peered with any network.\n' ' reserved_ip_ranges (Optional[Sequence[str]]): A list of ' 'names for the reserved ip ranges\n under the VPC network' ' that can be used for this job.\n If set, we will deploy' ' the job within the provided ip ranges. Otherwise,\n the' ' job will be deployed to any ip ranges under the provided VPC ' 'network.\n base_output_directory (Optional[str]): The Cloud ' 'Storage location to store\n the output of this CustomJob' ' or HyperparameterTuningJob. see below for more details:\n' ' ' 'https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination\n' ' labels (Optional[Dict[str, str]]): The labels with ' 'user-defined metadata to organize CustomJobs.\n See ' 'https://goo.gl/xmQnxf for more information.\n ' 'encryption_spec_key_name (Optional[str]): Customer-managed ' 'encryption key\n options for the CustomJob. If this is ' 'set, then all resources created by\n the CustomJob will ' 'be encrypted with the provided encryption key.\n\n Returns:\n' ' gcp_resources (str):\n Serialized gcp_resources ' 'proto tracking the custom training job.\n For more ' 'details, see ' 'https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.\n', 'inputs': [{ 'name': 'project', 'type': 'String' }, { 'name': 'location', 'type': 'String', 'default': 'us-central1' }, { 'name': 'display_name', 'type': 'String', 'default': 'ContainerComponent', 'optional': True }, { 'name': 'worker_pool_specs', 'type': 'JsonArray', 'default': '[{"machine_spec": {"machine_type": "n1-standard-4"}, ' '"replica_count": 1, "container_spec": {"image_uri": ' '"google/cloud-sdk:latest", "command": ["sh", "-c", "set -e ' '-x\\necho \\"$0, this is an output parameter\\"\\n", ' '"{{$.inputs.parameters[\'input_text\']}}", ' '"{{$.outputs.parameters[\'output_value\'].output_file}}"]}, ' '"disk_spec": {"boot_disk_type": "pd-ssd", ' '"boot_disk_size_gb": 100}}]', 'optional': True }, { 'name': 'timeout', 'type': 'String', 'default': '604800s', 'optional': True }, { 'name': 'restart_job_on_worker_restart', 'type': 'Boolean', 'default': 'false', 'optional': True }, { 'name': 'service_account', 'type': 'String', 'default': '', 'optional': True }, { 'name': 'tensorboard', 'type': 'String', 'default': '', 'optional': True }, { 'name': 'enable_web_access', 'type': 'Boolean', 'default': 'false', 'optional': True }, { 'name': 'network', 'type': 'String', 'default': '', 'optional': True }, { 'name': 'reserved_ip_ranges', 'type': 'JsonArray', 'default': '[]', 'optional': True }, { 'name': 'base_output_directory', 'type': 'String', 'default': '', 'optional': True }, { 'name': 'labels', 'type': 'JsonObject', 'default': '{}', 'optional': True }, { 'name': 'encryption_spec_key_name', 'type': 'String', 'default': '', 'optional': True }, { 'name': 'input_text', 'type': 'String', 'description': 'Represents an input parameter.' }], 'outputs': [{ 'name': 'gcp_resources', 'type': 'String' }, { 'name': 'output_value', 'type': 'String', 'description': 'Represents an output paramter.' }], 'implementation': { 'container': { 'image': 'gcr.io/ml-pipeline/google-cloud-pipeline-components:latest', 'command': [ 'python3', '-u', '-m', 'google_cloud_pipeline_components.container.v1.gcp_launcher.launcher' ], 'args': [ '--type', 'CustomJob', '--payload', { 'concat': [ '{', '"display_name": "', { 'inputValue': 'display_name' }, '"', ', "job_spec": {', '"worker_pool_specs": ', { 'inputValue': 'worker_pool_specs' }, ', "scheduling": {', '"timeout": "', { 'inputValue': 'timeout' }, '"', ', "restart_job_on_worker_restart": "', { 'inputValue': 'restart_job_on_worker_restart' }, '"', '}', ', "service_account": "', { 'inputValue': 'service_account' }, '"', ', "tensorboard": "', { 'inputValue': 'tensorboard' }, '"', ', "enable_web_access": "', { 'inputValue': 'enable_web_access' }, '"', ', "network": "', { 'inputValue': 'network' }, '"', ', "reserved_ip_ranges": ', { 'inputValue': 'reserved_ip_ranges' }, ', "base_output_directory": {', '"output_uri_prefix": "', { 'inputValue': 'base_output_directory' }, '"', '}', '}', ', "labels": ', { 'inputValue': 'labels' }, ', "encryption_spec": {"kms_key_name":"', { 'inputValue': 'encryption_spec_key_name' }, '"}', '}' ] }, '--project', { 'inputValue': 'project' }, '--location', { 'inputValue': 'location' }, '--gcp_resources', { 'outputPath': 'gcp_resources' } ] } } } component_factory_function = self._create_a_container_based_component() custom_job_spec = utils.create_custom_training_job_op_from_component( component_factory_function) self.assertDictEqual(custom_job_spec.component_spec.to_dict(), expected_results)