def test_deploy_pod_fails(self, unused_mock_cluster_exists, unused_mock_get_pod_status, unused_mock_call, unused_mock_sleep): with self.assertRaises(RuntimeError): gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone').deploy_pod( pod_config='foo-config', pod_name='foo-pod')
def test_delete_cluster_in_provisioning_state(self, mock_call, unused_mock_sleep): mock_call.side_effect = [ 'foo-cluster', None, 'RUNNING', 'foo-cluster', 'PROVISIONING', 'RUNNING', 'RUNNING' ] gke_cluster.GkeCluster( 'foo-cluster', cluster_zone='foo-zone').delete_cluster(wait=False) self.assertEqual(mock_call.call_count, 7)
def test_create_new_cluster(self, mock_call): gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone', alpha_cluster=True) mock_call.assert_called_with([ 'gcloud', 'alpha', 'container', 'clusters', 'create', 'foo-cluster', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1)
def test_delete_pod(self, unused_mock_cluster_exists, unused_mock_pod_exists, mock_call): gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone').delete_pod( pod_name='foo-pod', wait=True) mock_call.assert_any_call( ['kubectl', 'delete', 'pod', 'foo-pod', '--wait'], std_input=None, retries=1, retry_delay_sec=0)
def test_deploy_pod(self, unused_mock_cluster_exists, unused_mock_get_pod_status, mock_call, unused_mock_sleep): gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone').deploy_pod( pod_config='foo-config', pod_name='foo-pod') mock_call.assert_any_call(['kubectl', 'create', '-f', '-'], std_input='foo-config', retries=1, retry_delay_sec=1)
def test_get_pod_status(self, unused_mock_cluster_exists, mock_call): self.assertEqual( gke_cluster.GkeCluster( 'foo-cluster', cluster_zone='foo-zone').get_pod_status(pod_name='foo-pod'), gke_cluster.PodStatus.SUCCEEDED) mock_call.assert_any_call([ 'kubectl', 'get', 'pods', 'foo-pod', '-o', 'jsonpath={.status.phase}' ], std_input=None, retries=0, retry_delay_sec=1)
def _run_call_variants_with_kubernetes(pipeline_args): """Runs call_variants step with kubernetes.""" # Setup Kubernetes cluster. if pipeline_args.gke_cluster_name: # Reuse provided GKE cluster. new_cluster_created = False cluster = gke_cluster.GkeCluster(pipeline_args.gke_cluster_name, pipeline_args.gke_cluster_region, pipeline_args.gke_cluster_zone) else: # Create a new GKE cluster. job_name_label = pipeline_args.job_name_prefix + _CALL_VARIANTS_JOB_NAME extra_args = [ '--cluster-version=' + pipeline_args.gke_cluster_version, '--num-nodes=1', '--enable-kubernetes-alpha', '--enable-ip-alias', '--create-subnetwork=', '--node-labels=job_name=' + job_name_label, '--scopes=cloud-platform', '--enable-tpu', '--no-enable-autorepair', '--project', pipeline_args.project, '--quiet' ] cluster_name = 'deepvariant-' + _NOW_STR + uuid.uuid4().hex[:5] cluster = gke_cluster.GkeCluster(cluster_name, pipeline_args.gke_cluster_region, pipeline_args.gke_cluster_zone, alpha_cluster=True, extra_args=extra_args) new_cluster_created = True # Deploy call_variants pod. pod_name = 'deepvariant-' + _NOW_STR + '-' + uuid.uuid4().hex[:5] try: _deploy_call_variants_pod(pod_name, cluster, pipeline_args) except KeyboardInterrupt: cluster.delete_pod(pod_name) raise RuntimeError('Job cancelled by user.') finally: if new_cluster_created: cluster.delete_cluster(wait=False)
def test_deploy_pod_retries(self, unused_mock_cluster_exists, unused_mock_delete_pod, mock_get_pod_status, mock_call, unused_mock_sleep): mock_get_pod_status.side_effect = [ gke_cluster.PodStatus.FAILED, gke_cluster.PodStatus.SUCCEEDED ] gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone').deploy_pod( pod_config='foo-config', pod_name='foo-pod', retries=1) mock_call.assert_any_call(['kubectl', 'create', '-f', '-'], std_input='foo-config', retries=1, retry_delay_sec=1) mock_call.assert_any_call(['kubectl', 'replace', '--force', '-f', '-'], std_input='foo-config', retries=1, retry_delay_sec=1)
def test_reuse_existing_cluster(self, mock_call): gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone') mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'list', '--format=value(name)', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1) mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'get-credentials', 'foo-cluster', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1) mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'describe', 'foo-cluster', '--format=value(status)', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1)
def test_create_new_cluster_with_keyboard_interrupt( self, unused_mock_cluster_exists, mock_call): gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone', alpha_cluster=True) mock_call.assert_any_call([ 'gcloud', 'alpha', 'container', 'clusters', 'create', 'foo-cluster', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1) mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'describe', 'foo-cluster', '--format=value(status)', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1) mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'describe', 'foo-cluster', '--format=value(status)', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1)
def test_get_pod_status_pulling_image_failed(self, unused_mock_cluster_exists, mock_call): self.assertEqual( gke_cluster.GkeCluster( 'foo-cluster', cluster_zone='foo-zone').get_pod_status(pod_name='foo-pod'), gke_cluster.PodStatus.FAILED) mock_call.assert_any_call([ 'kubectl', 'get', 'pods', 'foo-pod', '-o', 'jsonpath={.status.phase}' ], std_input=None, retries=0, retry_delay_sec=1) mock_call.assert_any_call([ 'kubectl', 'get', 'pods', 'foo-pod', '-o', 'go-template="{{range .status.containerStatuses}}{{if not .ready}}' '{{.state.waiting.reason}}{{end}}{{end}}"' ], std_input=None, retries=0, retry_delay_sec=1)
def test_get_cluster_status(self, mock_call): self.assertEqual( gke_cluster.GkeCluster( 'foo-cluster', cluster_zone='foo-zone')._get_cluster_status(), gke_cluster.ClusterStatus.RUNNING) mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'list', '--format=value(name)', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1) mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'get-credentials', 'foo-cluster', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1) mock_call.assert_any_call([ 'gcloud', 'container', 'clusters', 'describe', 'foo-cluster', '--format=value(status)', '--zone', 'foo-zone' ], retry_delay_sec=1, retries=1)
def _validate_and_complete_args(pipeline_args): """Validates pipeline arguments and fills some missing args (if any).""" # Basic validation logic. More detailed validation is done by pipelines API. if pipeline_args.preemptible and pipeline_args.max_preemptible_tries <= 0: raise ValueError('--max_preemptible_tries must be greater than zero.') if pipeline_args.max_non_preemptible_tries <= 0: raise ValueError( '--max_non_preemptible_tries must be greater than zero.') if pipeline_args.make_examples_workers <= 0: raise ValueError('--make_examples_workers must be greater than zero.') if pipeline_args.call_variants_workers <= 0: raise ValueError('--call_variants_workers must be greater than zero.') if pipeline_args.shards <= 0: raise ValueError('--shards must be greater than zero.') if pipeline_args.shards % pipeline_args.make_examples_workers != 0: raise ValueError( '--shards must be divisible by --make_examples_workers') if pipeline_args.shards % pipeline_args.call_variants_workers != 0: raise ValueError( '--shards must be divisible by --call_variants_workers') if pipeline_args.call_variants_workers > pipeline_args.make_examples_workers: logging.warning( '--call_variants_workers cannot be greather than ' '--make_examples_workers. Setting call_variants_workers to %d', pipeline_args.make_examples_workers) pipeline_args.call_variants_workers = pipeline_args.make_examples_workers if pipeline_args.gpu and not pipeline_args.docker_image_gpu: raise ValueError('--docker_image_gpu must be provided with --gpu') if (pipeline_args.call_variants_cores_per_worker < pipeline_args.call_variants_cores_per_shard): raise ValueError('--call_variants_cores_per_worker must be at least ' 'as large as --call_variants_cores_per_shard') if (pipeline_args.gvcf_gq_binsize is not None and not pipeline_args.gvcf_outfile): raise ValueError( '--gvcf_outfile must be provided with --gvcf_gq_binsize') if (pipeline_args.gvcf_gq_binsize is not None and pipeline_args.gvcf_gq_binsize < 1): raise ValueError('--gvcf_gq_binsize must be greater or equal to 1') if pipeline_args.gpu and pipeline_args.tpu: raise ValueError('Both --gpu and --tpu cannot be set.') # TODO(nmousavi): Support multiple TPUs for call_variants if there is an # interest. if pipeline_args.tpu and pipeline_args.call_variants_workers != 1: raise ValueError( '--call_variants_workers must be equal to one when --tpu is set.') if pipeline_args.tpu and bool(pipeline_args.gke_cluster_region) == bool( pipeline_args.gke_cluster_zone): raise ValueError( 'Exactly one of --gke_cluster_region or ' '--gke_cluster_zone must be specified if --tpu is set.') # Verify the existing gke cluster is up and running. if pipeline_args.gke_cluster_name: try: _ = gke_cluster.GkeCluster(pipeline_args.gke_cluster_name, pipeline_args.gke_cluster_region, pipeline_args.gke_cluster_zone, create_if_not_exist=False) except ValueError: raise ValueError('Given --gke_cluster_name does not exist: %s' % pipeline_args.gke_cluster_name) # Automatically generate default values for missing args (if any). if not pipeline_args.logging: pipeline_args.logging = os.path.join(pipeline_args.staging, 'logs') if not pipeline_args.ref_fai: pipeline_args.ref_fai = pipeline_args.ref + _FAI_FILE_SUFFIX if not pipeline_args.ref_gzi and pipeline_args.ref.endswith( _GZ_FILE_SUFFIX): pipeline_args.ref_gzi = pipeline_args.ref + _GZI_FILE_SUFFIX if not pipeline_args.bai: pipeline_args.bai = pipeline_args.bam + _BAI_FILE_SUFFIX # Ensuring all input files exist... if not _gcs_object_exist(pipeline_args.ref): raise ValueError('Given reference file via --ref does not exist') if not _gcs_object_exist(pipeline_args.ref_fai): raise ValueError('Given FAI index file via --ref_fai does not exist') if (pipeline_args.ref_gzi and not _gcs_object_exist(pipeline_args.ref_gzi)): raise ValueError('Given GZI index file via --ref_gzi does not exist') if not _gcs_object_exist(pipeline_args.bam): raise ValueError('Given BAM file via --bam does not exist') if not _gcs_object_exist(pipeline_args.bai): raise ValueError('Given BAM index file via --bai does not exist') # ...and we can write to output buckets. if not _can_write_to_bucket(_get_gcs_bucket(pipeline_args.staging)): raise ValueError( 'Cannot write to staging bucket, change --staging value') if not _can_write_to_bucket(_get_gcs_bucket(pipeline_args.outfile)): raise ValueError( 'Cannot write to output bucket, change --outfile value')
def test_cluster_exists_for_non_existent_cluster( self, unused_mock_call, unused_unused_mock_create_cluster): self.assertEqual( gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone')._cluster_exists(), False)
def test_cluster_exists(self, unused_mock_call, unused_mock_create_cluster): self.assertEqual( gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone')._cluster_exists(), True)
def test_get_cluster_unknown_status(self, unused_mock_call): self.assertEqual( gke_cluster.GkeCluster( 'foo-cluster', cluster_zone='foo-zone')._get_cluster_status(), gke_cluster.ClusterStatus.UNKNOWN)