コード例 #1
0
    def _Create(self):
        """Creates the cluster."""
        if self.gpu_count:
            # TODO(ferneyhough): Make cluster version a flag, and allow it
            # to be specified in the spec (this will require a new spec class
            # for google_container_engine however).
            cmd = util.GcloudCommand(self, 'beta', 'container', 'clusters',
                                     'create', self.name)

            cmd.flags['accelerator'] = (
                gce_virtual_machine.GenerateAcceleratorSpecString(
                    self.gpu_type, self.gpu_count))
        else:
            cmd = util.GcloudCommand(self, 'container', 'clusters', 'create',
                                     self.name)

        cmd.flags['cluster-version'] = self.cluster_version
        cmd.flags['scopes'] = 'cloud-platform'

        if self.enable_autoscaling:
            cmd.args.append('--enable-autoscaling')
            cmd.flags['max-nodes'] = self.max_nodes
            cmd.flags['min-nodes'] = self.min_nodes

        cmd.flags['num-nodes'] = self.num_nodes
        cmd.flags['machine-type'] = self.machine_type

        # This command needs a long timeout due to the many minutes it
        # can take to provision a large GPU-accelerated GKE cluster.
        cmd.Issue(timeout=900, env=self._GetRequiredGkeEnv())
コード例 #2
0
    def _AddNodeParamsToCmd(self, vm_config, num_nodes, name, cmd):
        """Modifies cmd to include node specific command arguments."""

        if vm_config.gpu_count:
            cmd.flags['accelerator'] = (
                gce_virtual_machine.GenerateAcceleratorSpecString(
                    vm_config.gpu_type, vm_config.gpu_count))
        if vm_config.min_cpu_platform:
            cmd.flags['min-cpu-platform'] = vm_config.min_cpu_platform

        if vm_config.threads_per_core:
            # TODO(user): Remove when threads-per-core is available in GA
            cmd.use_alpha_gcloud = True
            cmd.flags['threads-per-core'] = vm_config.threads_per_core

        if vm_config.boot_disk_size:
            cmd.flags['disk-size'] = vm_config.boot_disk_size
        if vm_config.boot_disk_type:
            cmd.flags['disk-type'] = vm_config.boot_disk_type
        if vm_config.max_local_disks:
            # TODO(pclay): Switch to local-ssd-volumes which support NVME when it
            # leaves alpha. See
            # https://cloud.google.com/sdk/gcloud/reference/alpha/container/clusters/create
            cmd.flags['local-ssd-count'] = vm_config.max_local_disks

        cmd.flags['num-nodes'] = num_nodes

        if vm_config.machine_type is None:
            cmd.flags['machine-type'] = 'custom-{0}-{1}'.format(
                vm_config.cpus, vm_config.memory_mib)
        else:
            cmd.flags['machine-type'] = vm_config.machine_type

        cmd.flags['node-labels'] = f'pkb_nodepool={name}'
コード例 #3
0
    def _Create(self):
        """Creates the cluster."""
        if self.min_cpu_platform or self.gpu_count:
            cmd = util.GcloudCommand(self, 'beta', 'container', 'clusters',
                                     'create', self.name)
        else:
            cmd = util.GcloudCommand(self, 'container', 'clusters', 'create',
                                     self.name)

        cmd.flags['cluster-version'] = self.cluster_version
        if FLAGS.gke_enable_alpha:
            cmd.args.append('--enable-kubernetes-alpha')
            cmd.args.append('--no-enable-autorepair')
            cmd.args.append('--no-enable-autoupgrade')

        user = util.GetDefaultUser()
        if FLAGS.gcp_service_account:
            cmd.flags['service-account'] = FLAGS.gcp_service_account
        elif 'gserviceaccount.com' in user:
            cmd.flags['service-account'] = user
            self.use_application_default_credentials = False
        else:
            cmd.flags['scopes'] = 'cloud-platform'

        if self.gpu_count:
            cmd.flags['accelerator'] = (
                gce_virtual_machine.GenerateAcceleratorSpecString(
                    self.gpu_type, self.gpu_count))
        if self.min_cpu_platform:
            cmd.flags['min-cpu-platform'] = self.min_cpu_platform

        if self.min_nodes != self.num_nodes or self.max_nodes != self.num_nodes:
            cmd.args.append('--enable-autoscaling')
            cmd.flags['max-nodes'] = self.max_nodes
            cmd.flags['min-nodes'] = self.min_nodes

        cmd.flags['num-nodes'] = self.num_nodes

        if self.machine_type is None:
            cmd.flags['machine-type'] = 'custom-{0}-{1}'.format(
                self.cpus, self.memory)
        else:
            cmd.flags['machine-type'] = self.machine_type

        cmd.flags['metadata'] = util.MakeFormattedDefaultTags()
        cmd.flags['labels'] = util.MakeFormattedDefaultTags()

        # This command needs a long timeout due to the many minutes it
        # can take to provision a large GPU-accelerated GKE cluster.
        _, stderr, retcode = cmd.Issue(timeout=900,
                                       env=self._GetRequiredGkeEnv(),
                                       raise_on_failure=False)
        if retcode != 0:
            # Log specific type of failure, if known.
            if 'ZONE_RESOURCE_POOL_EXHAUSTED' in stderr:
                logging.exception('Container resources exhausted: %s', stderr)
                raise errors.Benchmarks.InsufficientCapacityCloudFailure(
                    'Container resources exhausted in zone %s: %s' %
                    (self.zone, stderr))
            raise errors.Resource.CreationError(stderr)
コード例 #4
0
    def _Create(self):
        """Creates the cluster."""
        if self.min_cpu_platform or self.gpu_count:
            cmd = util.GcloudCommand(self, 'beta', 'container', 'clusters',
                                     'create', self.name)
        else:
            cmd = util.GcloudCommand(self, 'container', 'clusters', 'create',
                                     self.name)

        cmd.flags['cluster-version'] = self.cluster_version
        cmd.flags['scopes'] = 'cloud-platform'

        if self.gpu_count:
            cmd.flags['accelerator'] = (
                gce_virtual_machine.GenerateAcceleratorSpecString(
                    self.gpu_type, self.gpu_count))
        if self.min_cpu_platform:
            cmd.flags['min-cpu-platform'] = self.min_cpu_platform

        if self.enable_autoscaling:
            cmd.args.append('--enable-autoscaling')
            cmd.flags['max-nodes'] = self.max_nodes
            cmd.flags['min-nodes'] = self.min_nodes

        cmd.flags['num-nodes'] = self.num_nodes

        if self.machine_type is None:
            cmd.flags['machine-type'] = "custom-{0}-{1}".format(
                self.cpus, self.memory)
        else:
            cmd.flags['machine-type'] = self.machine_type

        # This command needs a long timeout due to the many minutes it
        # can take to provision a large GPU-accelerated GKE cluster.
        cmd.Issue(timeout=900, env=self._GetRequiredGkeEnv())
コード例 #5
0
  def _AddNodeParamsToCmd(self, vm_config, num_nodes, name, cmd):
    """Modifies cmd to include node specific command arguments."""

    if vm_config.gpu_count:
      cmd.flags['accelerator'] = (
          gce_virtual_machine.GenerateAcceleratorSpecString(
              vm_config.gpu_type,
              vm_config.gpu_count))
    if vm_config.min_cpu_platform:
      cmd.flags['min-cpu-platform'] = vm_config.min_cpu_platform

    if vm_config.threads_per_core:
      # TODO(user): Remove when threads-per-core is available in GA
      cmd.use_alpha_gcloud = True
      cmd.flags['threads-per-core'] = vm_config.threads_per_core

    if vm_config.boot_disk_size:
      cmd.flags['disk-size'] = vm_config.boot_disk_size
    if vm_config.boot_disk_type:
      cmd.flags['disk-type'] = vm_config.boot_disk_type
    if vm_config.max_local_disks:
      # TODO(pclay): Switch to local-ssd-volumes which support NVME when it
      # leaves alpha. See
      # https://cloud.google.com/sdk/gcloud/reference/alpha/container/clusters/create
      cmd.flags['local-ssd-count'] = vm_config.max_local_disks

    cmd.flags['num-nodes'] = num_nodes
    # vm_config.zone may be split a comma separated list
    if vm_config.zone:
      cmd.flags['node-locations'] = vm_config.zone

    if vm_config.machine_type is None:
      cmd.flags['machine-type'] = 'custom-{0}-{1}'.format(
          vm_config.cpus,
          vm_config.memory_mib)
    else:
      cmd.flags['machine-type'] = vm_config.machine_type

    if FLAGS.gke_enable_gvnic:
      cmd.args.append('--enable-gvnic')
    else:
      cmd.args.append('--no-enable-gvnic')

    # If using a fixed version (or the default) do not enable upgrades.
    if self.cluster_version not in RELEASE_CHANNELS:
      cmd.args.append('--no-enable-autoupgrade')

    cmd.flags['node-labels'] = f'pkb_nodepool={name}'
コード例 #6
0
    def _Create(self):
        """Creates the cluster."""
        if self.min_cpu_platform or self.gpu_count:
            cmd = util.GcloudCommand(self, 'beta', 'container', 'clusters',
                                     'create', self.name)
        else:
            cmd = util.GcloudCommand(self, 'container', 'clusters', 'create',
                                     self.name)

        cmd.flags['cluster-version'] = self.cluster_version
        if FLAGS.gke_enable_alpha:
            cmd.args.append('--enable-kubernetes-alpha')
            cmd.args.append('--no-enable-autorepair')
            cmd.args.append('--no-enable-autoupgrade')

        user = util.GetDefaultUser()
        if 'gserviceaccount.com' in user:
            cmd.flags['service-account'] = user
            self.use_application_default_credentials = False
        else:
            cmd.flags['scopes'] = 'cloud-platform'

        if self.gpu_count:
            cmd.flags['accelerator'] = (
                gce_virtual_machine.GenerateAcceleratorSpecString(
                    self.gpu_type, self.gpu_count))
        if self.min_cpu_platform:
            cmd.flags['min-cpu-platform'] = self.min_cpu_platform

        if self.min_nodes != self.num_nodes or self.max_nodes != self.num_nodes:
            cmd.args.append('--enable-autoscaling')
            cmd.flags['max-nodes'] = self.max_nodes
            cmd.flags['min-nodes'] = self.min_nodes

        cmd.flags['num-nodes'] = self.num_nodes

        if self.machine_type is None:
            cmd.flags['machine-type'] = 'custom-{0}-{1}'.format(
                self.cpus, self.memory)
        else:
            cmd.flags['machine-type'] = self.machine_type

        cmd.flags['metadata'] = util.MakeFormattedDefaultTags()
        cmd.flags['labels'] = util.MakeFormattedDefaultTags()

        # This command needs a long timeout due to the many minutes it
        # can take to provision a large GPU-accelerated GKE cluster.
        cmd.Issue(timeout=900, env=self._GetRequiredGkeEnv())
コード例 #7
0
    def _Create(self):
        """Creates the cluster."""
        if self.gpu_count:
            # TODO(ferneyhough): Make cluster version a flag, and allow it
            # to be specified in the spec (this will require a new spec class
            # for google_container_engine however).
            cmd = util.GcloudCommand(self, 'beta', 'container', 'clusters',
                                     'create', self.name, '--cluster-version',
                                     '1.9.2-gke.1')

            cmd.flags['accelerator'] = (
                gce_virtual_machine.GenerateAcceleratorSpecString(
                    self.gpu_type, self.gpu_count))
        else:
            cmd = util.GcloudCommand(self, 'container', 'clusters', 'create',
                                     self.name)

        cmd.flags['num-nodes'] = self.num_nodes
        cmd.flags['machine-type'] = self.machine_type

        cmd.Issue(timeout=600, env=self._GetRequiredGkeEnv())
コード例 #8
0
    def _Create(self):
        """Creates the cluster."""
        cmd = util.GcloudCommand(self, 'container', 'clusters', 'create',
                                 self.name)

        cmd.flags['cluster-version'] = self.cluster_version
        if FLAGS.gke_enable_alpha:
            cmd.args.append('--enable-kubernetes-alpha')
            cmd.args.append('--no-enable-autorepair')
            cmd.args.append('--no-enable-autoupgrade')

        user = util.GetDefaultUser()
        if FLAGS.gcp_service_account:
            cmd.flags['service-account'] = FLAGS.gcp_service_account
        # Matches service accounts that either definitely belongs to this project or
        # are a GCP managed service account like the GCE default service account,
        # which we can't tell to which project they belong.
        elif re.match(SERVICE_ACCOUNT_PATTERN, user):
            logging.info(
                'Re-using configured service-account for GKE Cluster: %s',
                user)
            cmd.flags['service-account'] = user
            self.use_application_default_credentials = False
        else:
            logging.info('Using default GCE service account for GKE cluster')
            cmd.flags['scopes'] = 'cloud-platform'

        if self.vm_config.gpu_count:
            cmd.flags['accelerator'] = (
                gce_virtual_machine.GenerateAcceleratorSpecString(
                    self.vm_config.gpu_type, self.vm_config.gpu_count))
        if self.vm_config.min_cpu_platform:
            cmd.flags['min-cpu-platform'] = self.vm_config.min_cpu_platform

        if self.vm_config.boot_disk_size:
            cmd.flags['disk-size'] = self.vm_config.boot_disk_size
        if self.vm_config.boot_disk_type:
            cmd.flags['disk-type'] = self.vm_config.boot_disk_type
        if self.vm_config.max_local_disks:
            # TODO(pclay): Switch to local-ssd-volumes which support NVME when it
            # leaves alpha. See
            # https://cloud.google.com/sdk/gcloud/reference/alpha/container/clusters/create
            cmd.flags['local-ssd-count'] = self.vm_config.max_local_disks

        if self.min_nodes != self.num_nodes or self.max_nodes != self.num_nodes:
            cmd.args.append('--enable-autoscaling')
            cmd.flags['max-nodes'] = self.max_nodes
            cmd.flags['min-nodes'] = self.min_nodes

        cmd.flags['num-nodes'] = self.num_nodes

        if self.vm_config.machine_type is None:
            cmd.flags['machine-type'] = 'custom-{0}-{1}'.format(
                self.vm_config.cpus, self.vm_config.memory_mib)
        else:
            cmd.flags['machine-type'] = self.vm_config.machine_type

        cmd.flags['metadata'] = util.MakeFormattedDefaultTags()
        cmd.flags['labels'] = util.MakeFormattedDefaultTags()

        # This command needs a long timeout due to the many minutes it
        # can take to provision a large GPU-accelerated GKE cluster.
        _, stderr, retcode = cmd.Issue(timeout=1200, raise_on_failure=False)
        if retcode:
            # Log specific type of failure, if known.
            if 'ZONE_RESOURCE_POOL_EXHAUSTED' in stderr:
                logging.exception('Container resources exhausted: %s', stderr)
                raise errors.Benchmarks.InsufficientCapacityCloudFailure(
                    'Container resources exhausted in zone %s: %s' %
                    (self.zone, stderr))
            util.CheckGcloudResponseKnownFailures(stderr, retcode)
            raise errors.Resource.CreationError(stderr)