class GkeCluster(KubernetesCluster): AUXILIARY_POOL_NAME = 'default-pool' # This is default pool that is deployed with the cluster POOL_LABEL_NAME = 'cloud.google.com/gke-nodepool' pools: Dict[str, GkeNodePool] # pylint: disable=too-many-arguments def __init__(self, gke_cluster_version, gke_k8s_release_channel, gce_image_type, gce_image_size, gce_network, services, gce_instance_type='n1-standard-4', user_prefix=None, params=None, gce_datacenter=None, cluster_uuid=None, n_nodes=1 ): super().__init__( params=params, cluster_uuid=cluster_uuid, user_prefix=user_prefix ) self.gke_cluster_version = gke_cluster_version self.gke_k8s_release_channel = gke_k8s_release_channel.strip() self.gce_image_type = gce_image_type self.gce_image_size = gce_image_size self.gce_network = gce_network self.gce_services = services self.gce_instance_type = gce_instance_type self.n_nodes = n_nodes self.gce_project = services[0].project self.gce_user = services[0].key self.gce_zone = gce_datacenter[0] self.gke_cluster_created = False self.api_call_rate_limiter = ApiCallRateLimiter( rate_limit=GKE_API_CALL_RATE_LIMIT, queue_size=GKE_API_CALL_QUEUE_SIZE, urllib_retry=GKE_URLLIB_RETRY, urllib_backoff_factor=GKE_URLLIB_BACKOFF_FACTOR, ) self.api_call_rate_limiter.start() def __str__(self): return f"{type(self).__name__} {self.name} | Zone: {self.gce_zone} | Version: {self.gke_cluster_version}" def deploy(self): LOGGER.info("Create GKE cluster `%s' with %d node(s) in %s", self.short_cluster_name, self.n_nodes, self.AUXILIARY_POOL_NAME) tags = ",".join(f"{key}={value}" for key, value in self.tags.items()) with self.gcloud as gcloud: # NOTE: only static K8S release channel supports disabling of autoupgrade gcloud.run(f"container --project {self.gce_project} clusters create {self.short_cluster_name}" f" --no-enable-basic-auth" f" --zone {self.gce_zone}" f" --cluster-version {self.gke_cluster_version}" f"{' --release-channel ' + self.gke_k8s_release_channel if self.gke_k8s_release_channel else ''}" f" --network {self.gce_network}" f" --num-nodes {self.n_nodes}" f" --machine-type {self.gce_instance_type}" f" --image-type UBUNTU" f" --disk-type {self.gce_image_type}" f" --disk-size {self.gce_image_size}" f" --enable-stackdriver-kubernetes" f"{'' if self.gke_k8s_release_channel else ' --no-enable-autoupgrade'}" f"{'' if self.gke_k8s_release_channel else ' --no-enable-autorepair'}" f" --metadata {tags}") self.patch_kubectl_config() self.deploy_node_pool(GkeNodePool( name=self.AUXILIARY_POOL_NAME, num_nodes=self.n_nodes, disk_size=self.gce_image_size, disk_type=self.gce_image_type, k8s_cluster=self, instance_type=self.gce_instance_type, is_deployed=True )) LOGGER.info("Setup RBAC for GKE cluster `%s'", self.name) self.kubectl("create clusterrolebinding cluster-admin-binding --clusterrole cluster-admin " f"--user {self.gce_user}") @cached_property def gcloud(self) -> GcloudContextManager: # pylint: disable=no-self-use return self.test_config.tester_obj().localhost.gcloud def deploy_node_pool(self, pool: GkeNodePool, wait_till_ready=True) -> None: self._add_pool(pool) if pool.is_deployed: return LOGGER.info("Create %s pool with %d node(s) in GKE cluster `%s'", pool.name, pool.num_nodes, self.name) if wait_till_ready: with self.api_call_rate_limiter.pause: pool.deploy_and_wait_till_ready() self.api_call_rate_limiter.wait_till_api_become_stable(self) else: pool.deploy() def wait_all_node_pools_to_be_ready(self): with self.api_call_rate_limiter.pause: super().wait_all_node_pools_to_be_ready() self.api_call_rate_limiter.wait_till_api_become_stable(self) def resize_node_pool(self, name: str, num_nodes: int) -> None: with self.api_call_rate_limiter.pause: self.pools[name].resize(num_nodes) self.api_call_rate_limiter.wait_till_api_become_stable(self) def get_instance_group_name_for_pool(self, pool_name: str, default=None) -> str: try: group_link = yaml.load( self.gcloud.run( f'container node-pools describe {pool_name} ' f'--zone {self.gce_zone} --project {self.gce_project} ' f'--cluster {self.short_cluster_name}') ).get('instanceGroupUrls')[0] return group_link.split('/')[-1] except Exception as exc: if default is not None: return default raise RuntimeError(f"Can't get instance group name due to the: {exc}") from exc def delete_instance_that_belong_to_instance_group(self, group_name: str, instance_name: str): self.gcloud.run(f'compute instance-groups managed delete-instances {group_name} ' f'--zone={self.gce_zone} --instances={instance_name}') def create_token_update_thread(self): return GcloudTokenUpdateThread(self.gcloud, self.kubectl_token_path) def create_kubectl_config(self): self.gcloud.run(f"container clusters get-credentials {self.short_cluster_name} --zone {self.gce_zone}") def destroy(self): self.api_call_rate_limiter.stop() self.stop_token_update_thread() def deploy_scylla_manager(self, pool_name: str = None) -> None: self.deploy_minio_s3_backend() super().deploy_scylla_manager(pool_name=pool_name)
class GkeCluster(KubernetesCluster): AUXILIARY_POOL_NAME = 'default-pool' # This is default pool that is deployed with the cluster POOL_LABEL_NAME = 'cloud.google.com/gke-nodepool' IS_NODE_TUNING_SUPPORTED = True pools: Dict[str, GkeNodePool] # pylint: disable=too-many-arguments def __init__( self, gke_cluster_version, gke_k8s_release_channel, gce_image_type, gce_image_size, gce_network, services, gce_instance_type='n1-standard-2', user_prefix=None, params=None, gce_datacenter=None, cluster_uuid=None, n_nodes=2, ): super().__init__(params=params, cluster_uuid=cluster_uuid, user_prefix=user_prefix) self.gke_cluster_version = gke_cluster_version self.gke_k8s_release_channel = gke_k8s_release_channel.strip() self.gce_image_type = gce_image_type self.gce_image_size = gce_image_size self.gce_network = gce_network self.gce_services = services self.gce_instance_type = gce_instance_type self.n_nodes = n_nodes self.gce_project = services[0].project self.gce_user = services[0].key self.gce_zone = gce_datacenter[0] self.gke_cluster_created = False self.api_call_rate_limiter = ApiCallRateLimiter( rate_limit=GKE_API_CALL_RATE_LIMIT, queue_size=GKE_API_CALL_QUEUE_SIZE, urllib_retry=GKE_URLLIB_RETRY, urllib_backoff_factor=GKE_URLLIB_BACKOFF_FACTOR, ) self.api_call_rate_limiter.start() @cached_property def allowed_labels_on_scylla_node(self) -> list: allowed_labels_on_scylla_node = [ ('name', 'cpu-policy'), ('app', 'local-volume-provisioner'), ('name', 'raid-local-disks'), ('k8s-app', 'fluentbit-gke'), ('k8s-app', 'gke-metrics-agent'), ('component', 'kube-proxy'), ('k8s-app', 'gcp-compute-persistent-disk-csi-driver'), ('scylla/cluster', self.k8s_scylla_cluster_name), ] if self.is_performance_tuning_enabled: # NOTE: add performance tuning related pods only if we expect it to be. # When we have tuning disabled it must not exist. allowed_labels_on_scylla_node.extend(self.perf_pods_labels) return allowed_labels_on_scylla_node def __str__(self): return f"{type(self).__name__} {self.name} | Zone: {self.gce_zone} | Version: {self.gke_cluster_version}" def deploy(self): LOGGER.info("Create GKE cluster `%s' with %d node(s) in %s", self.short_cluster_name, self.n_nodes, self.AUXILIARY_POOL_NAME) tags = ",".join(f"{key}={value}" for key, value in self.tags.items()) with self.gcloud as gcloud: # NOTE: only static K8S release channel supports disabling of autoupgrade gcloud.run( f"container --project {self.gce_project} clusters create {self.short_cluster_name}" f" --no-enable-basic-auth" f" --zone {self.gce_zone}" f" --cluster-version {self.gke_cluster_version}" f"{' --release-channel ' + self.gke_k8s_release_channel if self.gke_k8s_release_channel else ''}" f" --network {self.gce_network}" f" --num-nodes {self.n_nodes}" f" --machine-type {self.gce_instance_type}" f" --image-type UBUNTU" f" --disk-type {self.gce_image_type}" f" --disk-size {self.gce_image_size}" f" --enable-stackdriver-kubernetes" f"{'' if self.gke_k8s_release_channel else ' --no-enable-autoupgrade'}" f"{'' if self.gke_k8s_release_channel else ' --no-enable-autorepair'}" f" --metadata {tags}") self.patch_kubectl_config() self.deploy_node_pool( GkeNodePool(name=self.AUXILIARY_POOL_NAME, num_nodes=self.n_nodes, disk_size=self.gce_image_size, disk_type=self.gce_image_type, k8s_cluster=self, instance_type=self.gce_instance_type, is_deployed=True)) LOGGER.info("Setup RBAC for GKE cluster `%s'", self.name) self.kubectl( "create clusterrolebinding cluster-admin-binding --clusterrole cluster-admin " f"--user {self.gce_user}") @cached_property def gcloud(self) -> GcloudContextManager: # pylint: disable=no-self-use return self.test_config.tester_obj().localhost.gcloud def deploy_node_pool(self, pool: GkeNodePool, wait_till_ready=True) -> None: self._add_pool(pool) if pool.is_deployed: return LOGGER.info("Create %s pool with %d node(s) in GKE cluster `%s'", pool.name, pool.num_nodes, self.name) if wait_till_ready: with self.api_call_rate_limiter.pause: pool.deploy_and_wait_till_ready() self.api_call_rate_limiter.wait_till_api_become_stable(self) else: pool.deploy() def wait_all_node_pools_to_be_ready(self): with self.api_call_rate_limiter.pause: super().wait_all_node_pools_to_be_ready() self.api_call_rate_limiter.wait_till_api_become_stable(self) def resize_node_pool(self, name: str, num_nodes: int) -> None: with self.api_call_rate_limiter.pause: self.pools[name].resize(num_nodes) self.api_call_rate_limiter.wait_till_api_become_stable(self) def get_instance_group_name_for_pool(self, pool_name: str, default=None) -> str: try: group_link = yaml.safe_load( self.gcloud.run( f'container node-pools describe {pool_name} ' f'--zone {self.gce_zone} --project {self.gce_project} ' f'--cluster {self.short_cluster_name}')).get( 'instanceGroupUrls')[0] return group_link.split('/')[-1] except Exception as exc: if default is not None: return default raise RuntimeError( f"Can't get instance group name due to the: {exc}") from exc def delete_instance_that_belong_to_instance_group(self, group_name: str, instance_name: str): self.gcloud.run( f'compute instance-groups managed delete-instances {group_name} ' f'--zone={self.gce_zone} --instances={instance_name}') def create_token_update_thread(self): return GcloudTokenUpdateThread(self.gcloud, self.kubectl_token_path) def create_kubectl_config(self): self.gcloud.run( f"container clusters get-credentials {self.short_cluster_name} --zone {self.gce_zone}" ) def destroy(self): self.api_call_rate_limiter.stop() self.stop_token_update_thread() def deploy_scylla_manager(self, pool_name: str = None) -> None: self.deploy_minio_s3_backend() super().deploy_scylla_manager(pool_name=pool_name) # NOTE: blocked by https://github.com/scylladb/scylla-operator/issues/760 def upgrade_kubernetes_platform(self) -> str: # NOTE: 'self.gke_cluster_version' can be like 1.21.3-gke.N or 1.21 upgrade_version = f"1.{int(self.gke_cluster_version.split('.')[1]) + 1}" with self.gcloud as gcloud: # Upgrade control plane (API, scheduler, manager and so on ...) LOGGER.info("Upgrading K8S control plane to the '%s' version", upgrade_version) gcloud.run( f"container clusters upgrade {self.short_cluster_name} " f"--master --quiet --project {self.gce_project} --zone {self.gce_zone} " f"--cluster-version {upgrade_version}") # Upgrade scylla-related node pools for node_pool in (self.AUXILIARY_POOL_NAME, self.SCYLLA_POOL_NAME): LOGGER.info("Upgrading '%s' node pool to the '%s' version", node_pool, upgrade_version) # NOTE: one node upgrade takes about 10 minutes gcloud.run( f"container clusters upgrade {self.short_cluster_name} " f"--quiet --project {self.gce_project} --zone {self.gce_zone} " f"--node-pool={node_pool}") return upgrade_version