Exemple #1
0
 def test_deploy_pod_fails(self, unused_mock_cluster_exists,
                           unused_mock_get_pod_status, unused_mock_call,
                           unused_mock_sleep):
     with self.assertRaises(RuntimeError):
         gke_cluster.GkeCluster('foo-cluster',
                                cluster_zone='foo-zone').deploy_pod(
                                    pod_config='foo-config',
                                    pod_name='foo-pod')
Exemple #2
0
 def test_delete_cluster_in_provisioning_state(self, mock_call,
                                               unused_mock_sleep):
     mock_call.side_effect = [
         'foo-cluster', None, 'RUNNING', 'foo-cluster', 'PROVISIONING',
         'RUNNING', 'RUNNING'
     ]
     gke_cluster.GkeCluster(
         'foo-cluster', cluster_zone='foo-zone').delete_cluster(wait=False)
     self.assertEqual(mock_call.call_count, 7)
Exemple #3
0
 def test_create_new_cluster(self, mock_call):
     gke_cluster.GkeCluster('foo-cluster',
                            cluster_zone='foo-zone',
                            alpha_cluster=True)
     mock_call.assert_called_with([
         'gcloud', 'alpha', 'container', 'clusters', 'create',
         'foo-cluster', '--zone', 'foo-zone'
     ],
                                  retry_delay_sec=1,
                                  retries=1)
Exemple #4
0
 def test_delete_pod(self, unused_mock_cluster_exists,
                     unused_mock_pod_exists, mock_call):
     gke_cluster.GkeCluster('foo-cluster',
                            cluster_zone='foo-zone').delete_pod(
                                pod_name='foo-pod', wait=True)
     mock_call.assert_any_call(
         ['kubectl', 'delete', 'pod', 'foo-pod', '--wait'],
         std_input=None,
         retries=1,
         retry_delay_sec=0)
Exemple #5
0
 def test_deploy_pod(self, unused_mock_cluster_exists,
                     unused_mock_get_pod_status, mock_call,
                     unused_mock_sleep):
     gke_cluster.GkeCluster('foo-cluster',
                            cluster_zone='foo-zone').deploy_pod(
                                pod_config='foo-config', pod_name='foo-pod')
     mock_call.assert_any_call(['kubectl', 'create', '-f', '-'],
                               std_input='foo-config',
                               retries=1,
                               retry_delay_sec=1)
Exemple #6
0
 def test_get_pod_status(self, unused_mock_cluster_exists, mock_call):
     self.assertEqual(
         gke_cluster.GkeCluster(
             'foo-cluster',
             cluster_zone='foo-zone').get_pod_status(pod_name='foo-pod'),
         gke_cluster.PodStatus.SUCCEEDED)
     mock_call.assert_any_call([
         'kubectl', 'get', 'pods', 'foo-pod', '-o',
         'jsonpath={.status.phase}'
     ],
                               std_input=None,
                               retries=0,
                               retry_delay_sec=1)
def _run_call_variants_with_kubernetes(pipeline_args):
    """Runs call_variants step with kubernetes."""
    # Setup Kubernetes cluster.
    if pipeline_args.gke_cluster_name:
        # Reuse provided GKE cluster.
        new_cluster_created = False
        cluster = gke_cluster.GkeCluster(pipeline_args.gke_cluster_name,
                                         pipeline_args.gke_cluster_region,
                                         pipeline_args.gke_cluster_zone)
    else:
        # Create a new GKE cluster.
        job_name_label = pipeline_args.job_name_prefix + _CALL_VARIANTS_JOB_NAME
        extra_args = [
            '--cluster-version=' + pipeline_args.gke_cluster_version,
            '--num-nodes=1', '--enable-kubernetes-alpha', '--enable-ip-alias',
            '--create-subnetwork=', '--node-labels=job_name=' + job_name_label,
            '--scopes=cloud-platform', '--enable-tpu',
            '--no-enable-autorepair', '--project', pipeline_args.project,
            '--quiet'
        ]
        cluster_name = 'deepvariant-' + _NOW_STR + uuid.uuid4().hex[:5]
        cluster = gke_cluster.GkeCluster(cluster_name,
                                         pipeline_args.gke_cluster_region,
                                         pipeline_args.gke_cluster_zone,
                                         alpha_cluster=True,
                                         extra_args=extra_args)
        new_cluster_created = True

    # Deploy call_variants pod.
    pod_name = 'deepvariant-' + _NOW_STR + '-' + uuid.uuid4().hex[:5]
    try:
        _deploy_call_variants_pod(pod_name, cluster, pipeline_args)
    except KeyboardInterrupt:
        cluster.delete_pod(pod_name)
        raise RuntimeError('Job cancelled by user.')
    finally:
        if new_cluster_created:
            cluster.delete_cluster(wait=False)
Exemple #8
0
 def test_deploy_pod_retries(self, unused_mock_cluster_exists,
                             unused_mock_delete_pod, mock_get_pod_status,
                             mock_call, unused_mock_sleep):
     mock_get_pod_status.side_effect = [
         gke_cluster.PodStatus.FAILED, gke_cluster.PodStatus.SUCCEEDED
     ]
     gke_cluster.GkeCluster('foo-cluster',
                            cluster_zone='foo-zone').deploy_pod(
                                pod_config='foo-config',
                                pod_name='foo-pod',
                                retries=1)
     mock_call.assert_any_call(['kubectl', 'create', '-f', '-'],
                               std_input='foo-config',
                               retries=1,
                               retry_delay_sec=1)
     mock_call.assert_any_call(['kubectl', 'replace', '--force', '-f', '-'],
                               std_input='foo-config',
                               retries=1,
                               retry_delay_sec=1)
Exemple #9
0
 def test_reuse_existing_cluster(self, mock_call):
     gke_cluster.GkeCluster('foo-cluster', cluster_zone='foo-zone')
     mock_call.assert_any_call([
         'gcloud', 'container', 'clusters', 'list', '--format=value(name)',
         '--zone', 'foo-zone'
     ],
                               retry_delay_sec=1,
                               retries=1)
     mock_call.assert_any_call([
         'gcloud', 'container', 'clusters', 'get-credentials',
         'foo-cluster', '--zone', 'foo-zone'
     ],
                               retry_delay_sec=1,
                               retries=1)
     mock_call.assert_any_call([
         'gcloud', 'container', 'clusters', 'describe', 'foo-cluster',
         '--format=value(status)', '--zone', 'foo-zone'
     ],
                               retry_delay_sec=1,
                               retries=1)
Exemple #10
0
 def test_create_new_cluster_with_keyboard_interrupt(
         self, unused_mock_cluster_exists, mock_call):
     gke_cluster.GkeCluster('foo-cluster',
                            cluster_zone='foo-zone',
                            alpha_cluster=True)
     mock_call.assert_any_call([
         'gcloud', 'alpha', 'container', 'clusters', 'create',
         'foo-cluster', '--zone', 'foo-zone'
     ],
                               retry_delay_sec=1,
                               retries=1)
     mock_call.assert_any_call([
         'gcloud', 'container', 'clusters', 'describe', 'foo-cluster',
         '--format=value(status)', '--zone', 'foo-zone'
     ],
                               retry_delay_sec=1,
                               retries=1)
     mock_call.assert_any_call([
         'gcloud', 'container', 'clusters', 'describe', 'foo-cluster',
         '--format=value(status)', '--zone', 'foo-zone'
     ],
                               retry_delay_sec=1,
                               retries=1)
 def test_get_pod_status_pulling_image_failed(self,
                                              unused_mock_cluster_exists,
                                              mock_call):
     self.assertEqual(
         gke_cluster.GkeCluster(
             'foo-cluster',
             cluster_zone='foo-zone').get_pod_status(pod_name='foo-pod'),
         gke_cluster.PodStatus.FAILED)
     mock_call.assert_any_call([
         'kubectl', 'get', 'pods', 'foo-pod', '-o',
         'jsonpath={.status.phase}'
     ],
                               std_input=None,
                               retries=0,
                               retry_delay_sec=1)
     mock_call.assert_any_call([
         'kubectl', 'get', 'pods', 'foo-pod', '-o',
         'go-template="{{range .status.containerStatuses}}{{if not .ready}}'
         '{{.state.waiting.reason}}{{end}}{{end}}"'
     ],
                               std_input=None,
                               retries=0,
                               retry_delay_sec=1)
Exemple #12
0
    def test_get_cluster_status(self, mock_call):
        self.assertEqual(
            gke_cluster.GkeCluster(
                'foo-cluster', cluster_zone='foo-zone')._get_cluster_status(),
            gke_cluster.ClusterStatus.RUNNING)

        mock_call.assert_any_call([
            'gcloud', 'container', 'clusters', 'list', '--format=value(name)',
            '--zone', 'foo-zone'
        ],
                                  retry_delay_sec=1,
                                  retries=1)
        mock_call.assert_any_call([
            'gcloud', 'container', 'clusters', 'get-credentials',
            'foo-cluster', '--zone', 'foo-zone'
        ],
                                  retry_delay_sec=1,
                                  retries=1)
        mock_call.assert_any_call([
            'gcloud', 'container', 'clusters', 'describe', 'foo-cluster',
            '--format=value(status)', '--zone', 'foo-zone'
        ],
                                  retry_delay_sec=1,
                                  retries=1)
def _validate_and_complete_args(pipeline_args):
    """Validates pipeline arguments and fills some missing args (if any)."""
    # Basic validation logic. More detailed validation is done by pipelines API.
    if pipeline_args.preemptible and pipeline_args.max_preemptible_tries <= 0:
        raise ValueError('--max_preemptible_tries must be greater than zero.')
    if pipeline_args.max_non_preemptible_tries <= 0:
        raise ValueError(
            '--max_non_preemptible_tries must be greater than zero.')
    if pipeline_args.make_examples_workers <= 0:
        raise ValueError('--make_examples_workers must be greater than zero.')
    if pipeline_args.call_variants_workers <= 0:
        raise ValueError('--call_variants_workers must be greater than zero.')
    if pipeline_args.shards <= 0:
        raise ValueError('--shards must be greater than zero.')
    if pipeline_args.shards % pipeline_args.make_examples_workers != 0:
        raise ValueError(
            '--shards must be divisible by --make_examples_workers')
    if pipeline_args.shards % pipeline_args.call_variants_workers != 0:
        raise ValueError(
            '--shards must be divisible by --call_variants_workers')
    if pipeline_args.call_variants_workers > pipeline_args.make_examples_workers:
        logging.warning(
            '--call_variants_workers cannot be greather than '
            '--make_examples_workers. Setting call_variants_workers to  %d',
            pipeline_args.make_examples_workers)
        pipeline_args.call_variants_workers = pipeline_args.make_examples_workers

    if pipeline_args.gpu and not pipeline_args.docker_image_gpu:
        raise ValueError('--docker_image_gpu must be provided with --gpu')
    if (pipeline_args.call_variants_cores_per_worker <
            pipeline_args.call_variants_cores_per_shard):
        raise ValueError('--call_variants_cores_per_worker must be at least '
                         'as large as --call_variants_cores_per_shard')
    if (pipeline_args.gvcf_gq_binsize is not None
            and not pipeline_args.gvcf_outfile):
        raise ValueError(
            '--gvcf_outfile must be provided with --gvcf_gq_binsize')
    if (pipeline_args.gvcf_gq_binsize is not None
            and pipeline_args.gvcf_gq_binsize < 1):
        raise ValueError('--gvcf_gq_binsize must be greater or equal to 1')
    if pipeline_args.gpu and pipeline_args.tpu:
        raise ValueError('Both --gpu and --tpu cannot be set.')
    # TODO(nmousavi): Support multiple TPUs for call_variants if there is an
    # interest.
    if pipeline_args.tpu and pipeline_args.call_variants_workers != 1:
        raise ValueError(
            '--call_variants_workers must be equal to one when --tpu is set.')
    if pipeline_args.tpu and bool(pipeline_args.gke_cluster_region) == bool(
            pipeline_args.gke_cluster_zone):
        raise ValueError(
            'Exactly one of --gke_cluster_region or '
            '--gke_cluster_zone must be specified if --tpu is set.')

    # Verify the existing gke cluster is up and running.
    if pipeline_args.gke_cluster_name:
        try:
            _ = gke_cluster.GkeCluster(pipeline_args.gke_cluster_name,
                                       pipeline_args.gke_cluster_region,
                                       pipeline_args.gke_cluster_zone,
                                       create_if_not_exist=False)
        except ValueError:
            raise ValueError('Given --gke_cluster_name does not exist: %s' %
                             pipeline_args.gke_cluster_name)

    # Automatically generate default values for missing args (if any).
    if not pipeline_args.logging:
        pipeline_args.logging = os.path.join(pipeline_args.staging, 'logs')
    if not pipeline_args.ref_fai:
        pipeline_args.ref_fai = pipeline_args.ref + _FAI_FILE_SUFFIX
    if not pipeline_args.ref_gzi and pipeline_args.ref.endswith(
            _GZ_FILE_SUFFIX):
        pipeline_args.ref_gzi = pipeline_args.ref + _GZI_FILE_SUFFIX
    if not pipeline_args.bai:
        pipeline_args.bai = pipeline_args.bam + _BAI_FILE_SUFFIX

    # Ensuring all input files exist...
    if not _gcs_object_exist(pipeline_args.ref):
        raise ValueError('Given reference file via --ref does not exist')
    if not _gcs_object_exist(pipeline_args.ref_fai):
        raise ValueError('Given FAI index file via --ref_fai does not exist')
    if (pipeline_args.ref_gzi
            and not _gcs_object_exist(pipeline_args.ref_gzi)):
        raise ValueError('Given GZI index file via --ref_gzi does not exist')
    if not _gcs_object_exist(pipeline_args.bam):
        raise ValueError('Given BAM file via --bam does not exist')
    if not _gcs_object_exist(pipeline_args.bai):
        raise ValueError('Given BAM index file via --bai does not exist')
    # ...and we can write to output buckets.
    if not _can_write_to_bucket(_get_gcs_bucket(pipeline_args.staging)):
        raise ValueError(
            'Cannot write to staging bucket, change --staging value')
    if not _can_write_to_bucket(_get_gcs_bucket(pipeline_args.outfile)):
        raise ValueError(
            'Cannot write to output bucket, change --outfile value')
Exemple #14
0
 def test_cluster_exists_for_non_existent_cluster(
         self, unused_mock_call, unused_unused_mock_create_cluster):
     self.assertEqual(
         gke_cluster.GkeCluster('foo-cluster',
                                cluster_zone='foo-zone')._cluster_exists(),
         False)
Exemple #15
0
 def test_cluster_exists(self, unused_mock_call,
                         unused_mock_create_cluster):
     self.assertEqual(
         gke_cluster.GkeCluster('foo-cluster',
                                cluster_zone='foo-zone')._cluster_exists(),
         True)
Exemple #16
0
 def test_get_cluster_unknown_status(self, unused_mock_call):
     self.assertEqual(
         gke_cluster.GkeCluster(
             'foo-cluster', cluster_zone='foo-zone')._get_cluster_status(),
         gke_cluster.ClusterStatus.UNKNOWN)