コード例 #1
0
class GkeCluster(KubernetesCluster):
    AUXILIARY_POOL_NAME = 'default-pool'  # This is default pool that is deployed with the cluster
    POOL_LABEL_NAME = 'cloud.google.com/gke-nodepool'
    pools: Dict[str, GkeNodePool]

    # pylint: disable=too-many-arguments
    def __init__(self,
                 gke_cluster_version,
                 gke_k8s_release_channel,
                 gce_image_type,
                 gce_image_size,
                 gce_network,
                 services,
                 gce_instance_type='n1-standard-4',
                 user_prefix=None,
                 params=None,
                 gce_datacenter=None,
                 cluster_uuid=None,
                 n_nodes=1
                 ):
        super().__init__(
            params=params,
            cluster_uuid=cluster_uuid,
            user_prefix=user_prefix
        )
        self.gke_cluster_version = gke_cluster_version
        self.gke_k8s_release_channel = gke_k8s_release_channel.strip()
        self.gce_image_type = gce_image_type
        self.gce_image_size = gce_image_size
        self.gce_network = gce_network
        self.gce_services = services
        self.gce_instance_type = gce_instance_type
        self.n_nodes = n_nodes
        self.gce_project = services[0].project
        self.gce_user = services[0].key
        self.gce_zone = gce_datacenter[0]
        self.gke_cluster_created = False

        self.api_call_rate_limiter = ApiCallRateLimiter(
            rate_limit=GKE_API_CALL_RATE_LIMIT,
            queue_size=GKE_API_CALL_QUEUE_SIZE,
            urllib_retry=GKE_URLLIB_RETRY,
            urllib_backoff_factor=GKE_URLLIB_BACKOFF_FACTOR,
        )
        self.api_call_rate_limiter.start()

    def __str__(self):
        return f"{type(self).__name__} {self.name} | Zone: {self.gce_zone} | Version: {self.gke_cluster_version}"

    def deploy(self):
        LOGGER.info("Create GKE cluster `%s' with %d node(s) in %s",
                    self.short_cluster_name, self.n_nodes, self.AUXILIARY_POOL_NAME)
        tags = ",".join(f"{key}={value}" for key, value in self.tags.items())
        with self.gcloud as gcloud:
            # NOTE: only static K8S release channel supports disabling of autoupgrade
            gcloud.run(f"container --project {self.gce_project} clusters create {self.short_cluster_name}"
                       f" --no-enable-basic-auth"
                       f" --zone {self.gce_zone}"
                       f" --cluster-version {self.gke_cluster_version}"
                       f"{' --release-channel ' + self.gke_k8s_release_channel if self.gke_k8s_release_channel else ''}"
                       f" --network {self.gce_network}"
                       f" --num-nodes {self.n_nodes}"
                       f" --machine-type {self.gce_instance_type}"
                       f" --image-type UBUNTU"
                       f" --disk-type {self.gce_image_type}"
                       f" --disk-size {self.gce_image_size}"
                       f" --enable-stackdriver-kubernetes"
                       f"{'' if self.gke_k8s_release_channel else ' --no-enable-autoupgrade'}"
                       f"{'' if self.gke_k8s_release_channel else ' --no-enable-autorepair'}"
                       f" --metadata {tags}")
            self.patch_kubectl_config()
            self.deploy_node_pool(GkeNodePool(
                name=self.AUXILIARY_POOL_NAME,
                num_nodes=self.n_nodes,
                disk_size=self.gce_image_size,
                disk_type=self.gce_image_type,
                k8s_cluster=self,
                instance_type=self.gce_instance_type,
                is_deployed=True
            ))

        LOGGER.info("Setup RBAC for GKE cluster `%s'", self.name)
        self.kubectl("create clusterrolebinding cluster-admin-binding --clusterrole cluster-admin "
                     f"--user {self.gce_user}")

    @cached_property
    def gcloud(self) -> GcloudContextManager:  # pylint: disable=no-self-use
        return self.test_config.tester_obj().localhost.gcloud

    def deploy_node_pool(self, pool: GkeNodePool, wait_till_ready=True) -> None:
        self._add_pool(pool)
        if pool.is_deployed:
            return
        LOGGER.info("Create %s pool with %d node(s) in GKE cluster `%s'", pool.name, pool.num_nodes, self.name)
        if wait_till_ready:
            with self.api_call_rate_limiter.pause:
                pool.deploy_and_wait_till_ready()
                self.api_call_rate_limiter.wait_till_api_become_stable(self)
        else:
            pool.deploy()

    def wait_all_node_pools_to_be_ready(self):
        with self.api_call_rate_limiter.pause:
            super().wait_all_node_pools_to_be_ready()
            self.api_call_rate_limiter.wait_till_api_become_stable(self)

    def resize_node_pool(self, name: str, num_nodes: int) -> None:
        with self.api_call_rate_limiter.pause:
            self.pools[name].resize(num_nodes)
            self.api_call_rate_limiter.wait_till_api_become_stable(self)

    def get_instance_group_name_for_pool(self, pool_name: str, default=None) -> str:
        try:
            group_link = yaml.load(
                self.gcloud.run(
                    f'container node-pools describe {pool_name} '
                    f'--zone {self.gce_zone} --project {self.gce_project} '
                    f'--cluster {self.short_cluster_name}')
            ).get('instanceGroupUrls')[0]
            return group_link.split('/')[-1]
        except Exception as exc:
            if default is not None:
                return default
            raise RuntimeError(f"Can't get instance group name due to the: {exc}") from exc

    def delete_instance_that_belong_to_instance_group(self, group_name: str, instance_name: str):
        self.gcloud.run(f'compute instance-groups managed delete-instances {group_name} '
                        f'--zone={self.gce_zone} --instances={instance_name}')

    def create_token_update_thread(self):
        return GcloudTokenUpdateThread(self.gcloud, self.kubectl_token_path)

    def create_kubectl_config(self):
        self.gcloud.run(f"container clusters get-credentials {self.short_cluster_name} --zone {self.gce_zone}")

    def destroy(self):
        self.api_call_rate_limiter.stop()
        self.stop_token_update_thread()

    def deploy_scylla_manager(self, pool_name: str = None) -> None:
        self.deploy_minio_s3_backend()
        super().deploy_scylla_manager(pool_name=pool_name)
コード例 #2
0
ファイル: gke.py プロジェクト: eliransin/scylla-cluster-tests
class GkeCluster(KubernetesCluster):
    AUXILIARY_POOL_NAME = 'default-pool'  # This is default pool that is deployed with the cluster
    POOL_LABEL_NAME = 'cloud.google.com/gke-nodepool'
    IS_NODE_TUNING_SUPPORTED = True
    pools: Dict[str, GkeNodePool]

    # pylint: disable=too-many-arguments
    def __init__(
        self,
        gke_cluster_version,
        gke_k8s_release_channel,
        gce_image_type,
        gce_image_size,
        gce_network,
        services,
        gce_instance_type='n1-standard-2',
        user_prefix=None,
        params=None,
        gce_datacenter=None,
        cluster_uuid=None,
        n_nodes=2,
    ):
        super().__init__(params=params,
                         cluster_uuid=cluster_uuid,
                         user_prefix=user_prefix)
        self.gke_cluster_version = gke_cluster_version
        self.gke_k8s_release_channel = gke_k8s_release_channel.strip()
        self.gce_image_type = gce_image_type
        self.gce_image_size = gce_image_size
        self.gce_network = gce_network
        self.gce_services = services
        self.gce_instance_type = gce_instance_type
        self.n_nodes = n_nodes
        self.gce_project = services[0].project
        self.gce_user = services[0].key
        self.gce_zone = gce_datacenter[0]
        self.gke_cluster_created = False
        self.api_call_rate_limiter = ApiCallRateLimiter(
            rate_limit=GKE_API_CALL_RATE_LIMIT,
            queue_size=GKE_API_CALL_QUEUE_SIZE,
            urllib_retry=GKE_URLLIB_RETRY,
            urllib_backoff_factor=GKE_URLLIB_BACKOFF_FACTOR,
        )
        self.api_call_rate_limiter.start()

    @cached_property
    def allowed_labels_on_scylla_node(self) -> list:
        allowed_labels_on_scylla_node = [
            ('name', 'cpu-policy'),
            ('app', 'local-volume-provisioner'),
            ('name', 'raid-local-disks'),
            ('k8s-app', 'fluentbit-gke'),
            ('k8s-app', 'gke-metrics-agent'),
            ('component', 'kube-proxy'),
            ('k8s-app', 'gcp-compute-persistent-disk-csi-driver'),
            ('scylla/cluster', self.k8s_scylla_cluster_name),
        ]
        if self.is_performance_tuning_enabled:
            # NOTE: add performance tuning related pods only if we expect it to be.
            #       When we have tuning disabled it must not exist.
            allowed_labels_on_scylla_node.extend(self.perf_pods_labels)
        return allowed_labels_on_scylla_node

    def __str__(self):
        return f"{type(self).__name__} {self.name} | Zone: {self.gce_zone} | Version: {self.gke_cluster_version}"

    def deploy(self):
        LOGGER.info("Create GKE cluster `%s' with %d node(s) in %s",
                    self.short_cluster_name, self.n_nodes,
                    self.AUXILIARY_POOL_NAME)
        tags = ",".join(f"{key}={value}" for key, value in self.tags.items())
        with self.gcloud as gcloud:
            # NOTE: only static K8S release channel supports disabling of autoupgrade
            gcloud.run(
                f"container --project {self.gce_project} clusters create {self.short_cluster_name}"
                f" --no-enable-basic-auth"
                f" --zone {self.gce_zone}"
                f" --cluster-version {self.gke_cluster_version}"
                f"{' --release-channel ' + self.gke_k8s_release_channel if self.gke_k8s_release_channel else ''}"
                f" --network {self.gce_network}"
                f" --num-nodes {self.n_nodes}"
                f" --machine-type {self.gce_instance_type}"
                f" --image-type UBUNTU"
                f" --disk-type {self.gce_image_type}"
                f" --disk-size {self.gce_image_size}"
                f" --enable-stackdriver-kubernetes"
                f"{'' if self.gke_k8s_release_channel else ' --no-enable-autoupgrade'}"
                f"{'' if self.gke_k8s_release_channel else ' --no-enable-autorepair'}"
                f" --metadata {tags}")
            self.patch_kubectl_config()
            self.deploy_node_pool(
                GkeNodePool(name=self.AUXILIARY_POOL_NAME,
                            num_nodes=self.n_nodes,
                            disk_size=self.gce_image_size,
                            disk_type=self.gce_image_type,
                            k8s_cluster=self,
                            instance_type=self.gce_instance_type,
                            is_deployed=True))

        LOGGER.info("Setup RBAC for GKE cluster `%s'", self.name)
        self.kubectl(
            "create clusterrolebinding cluster-admin-binding --clusterrole cluster-admin "
            f"--user {self.gce_user}")

    @cached_property
    def gcloud(self) -> GcloudContextManager:  # pylint: disable=no-self-use
        return self.test_config.tester_obj().localhost.gcloud

    def deploy_node_pool(self,
                         pool: GkeNodePool,
                         wait_till_ready=True) -> None:
        self._add_pool(pool)
        if pool.is_deployed:
            return
        LOGGER.info("Create %s pool with %d node(s) in GKE cluster `%s'",
                    pool.name, pool.num_nodes, self.name)
        if wait_till_ready:
            with self.api_call_rate_limiter.pause:
                pool.deploy_and_wait_till_ready()
                self.api_call_rate_limiter.wait_till_api_become_stable(self)
        else:
            pool.deploy()

    def wait_all_node_pools_to_be_ready(self):
        with self.api_call_rate_limiter.pause:
            super().wait_all_node_pools_to_be_ready()
            self.api_call_rate_limiter.wait_till_api_become_stable(self)

    def resize_node_pool(self, name: str, num_nodes: int) -> None:
        with self.api_call_rate_limiter.pause:
            self.pools[name].resize(num_nodes)
            self.api_call_rate_limiter.wait_till_api_become_stable(self)

    def get_instance_group_name_for_pool(self,
                                         pool_name: str,
                                         default=None) -> str:
        try:
            group_link = yaml.safe_load(
                self.gcloud.run(
                    f'container node-pools describe {pool_name} '
                    f'--zone {self.gce_zone} --project {self.gce_project} '
                    f'--cluster {self.short_cluster_name}')).get(
                        'instanceGroupUrls')[0]
            return group_link.split('/')[-1]
        except Exception as exc:
            if default is not None:
                return default
            raise RuntimeError(
                f"Can't get instance group name due to the: {exc}") from exc

    def delete_instance_that_belong_to_instance_group(self, group_name: str,
                                                      instance_name: str):
        self.gcloud.run(
            f'compute instance-groups managed delete-instances {group_name} '
            f'--zone={self.gce_zone} --instances={instance_name}')

    def create_token_update_thread(self):
        return GcloudTokenUpdateThread(self.gcloud, self.kubectl_token_path)

    def create_kubectl_config(self):
        self.gcloud.run(
            f"container clusters get-credentials {self.short_cluster_name} --zone {self.gce_zone}"
        )

    def destroy(self):
        self.api_call_rate_limiter.stop()
        self.stop_token_update_thread()

    def deploy_scylla_manager(self, pool_name: str = None) -> None:
        self.deploy_minio_s3_backend()
        super().deploy_scylla_manager(pool_name=pool_name)

    # NOTE: blocked by https://github.com/scylladb/scylla-operator/issues/760
    def upgrade_kubernetes_platform(self) -> str:
        # NOTE: 'self.gke_cluster_version' can be like 1.21.3-gke.N or 1.21
        upgrade_version = f"1.{int(self.gke_cluster_version.split('.')[1]) + 1}"

        with self.gcloud as gcloud:
            # Upgrade control plane (API, scheduler, manager and so on ...)
            LOGGER.info("Upgrading K8S control plane to the '%s' version",
                        upgrade_version)
            gcloud.run(
                f"container clusters upgrade {self.short_cluster_name} "
                f"--master --quiet --project {self.gce_project} --zone {self.gce_zone} "
                f"--cluster-version {upgrade_version}")

            # Upgrade scylla-related node pools
            for node_pool in (self.AUXILIARY_POOL_NAME, self.SCYLLA_POOL_NAME):
                LOGGER.info("Upgrading '%s' node pool to the '%s' version",
                            node_pool, upgrade_version)
                # NOTE: one node upgrade takes about 10 minutes
                gcloud.run(
                    f"container clusters upgrade {self.short_cluster_name} "
                    f"--quiet --project {self.gce_project} --zone {self.gce_zone} "
                    f"--node-pool={node_pool}")
        return upgrade_version