class ScaleRunner:
    def __init__(self):

        self.config = ConfigLoader()
        self.interval = FileCache('interval', flag='cs')
        self.logger = getLogger(name=self.__class__.__name__)

    def scaling_deployment(self, deployment_name, namespace='default'):
        """
        :param str deployment_name:
        :param str namespace:
        :return:
        """
        while True:
            scaler = WorkerScaler(deployment_name=deployment_name)
            interval = self.config.get_deployment_properties(
                deployment_name=deployment_name)['interval']
            desired_pods = scaler.calculate_desired_pods()
            if desired_pods is None:
                self.logger.info(
                    f"Condition of deployment {deployment_name} no need to scale"
                )
            else:
                scaler.set_deployment_replicas(deployment_name=deployment_name,
                                               namespace=namespace,
                                               replicas_number=desired_pods)

            if interrupted:
                self.logger.info("We done here! Bye Bye")
                break

            gevent.sleep(interval)

    def asynchronous(self):
        threads = [
            gevent.spawn(self.scaling_deployment, deployment)
            for deployment in self.config.get_deployments()
        ]
        gevent.joinall(threads)
class QueuesScraper:

    def __init__(self, deployment_name):
        self.logger = getLogger(self.__class__.__name__)
        self.deployment_name = deployment_name
        self.rabbit_config = ConfigLoader()
        self.queue_list_config = self.rabbit_config.get_deployment_properties(deployment_name=deployment_name)['queues']
        self.vhost_config = self.rabbit_config.get_deployment_properties(deployment_name=deployment_name)['vHost']
        self.queues_body = self.get_queues_body(vhost=self.vhost_config, queue_list=self.queue_list_config)

    def get_queues_body(self, vhost, queue_list):
        body_list = []
        host = self.get_rabbit_host_from_vhost(vhost)
        rabbit = self.rabbit_login(host)
        for queue in queue_list:
            body_list.append(rabbit.get_queue(vhost=vhost, name=queue))
        return body_list

    def total_messages(self):
        message_list = list()
        for body in self.exclude_idle_queue_from_list():
            message_list.append(body['messages'])
        return sum(message_list)

    def get_queues_average_consumer_utilisation(self):

        consumer_utilisation_list = []
        for queue_body in self.exclude_idle_queue_from_list():
            consumer_utilisation_list.append(queue_body['consumer_utilisation'])
        return sum(consumer_utilisation_list) / len(consumer_utilisation_list)

    def exclude_idle_queue_from_list(self):
        try:
            ttl = self.rabbit_config.get_deployment_properties(deployment_name=self.deployment_name)['ttl']
        except KeyError:
            self.logger.info("ttl not found in deployment %s config\nUsing default ttl = 1.0" % self.deployment_name)
            ttl = 1.0

        non_idle_queues_body = []
        for queue_body in self.queues_body:
            if self.check_queue_non_idling(queue_body=queue_body, ttl=ttl):
                non_idle_queues_body.append(queue_body)
        return non_idle_queues_body

    def check_queue_non_idling(self, queue_body, ttl):
        fmt = "%Y-%m-%d %H:%M:%S"

        if self.detect_stuck_messages_queue(queue_body=queue_body, ttl=ttl):
            return False

        try:
            idle_since = queue_body['idle_since']
            idle_since_time = datetime.strptime(idle_since, fmt)
            current_time = datetime.now(timezone.utc)
            current_time = current_time.replace(tzinfo=None)
            if timedelta.total_seconds(current_time - idle_since_time) / 60 > ttl and queue_body['consumers'] > 0:
                return False
            else:
                queue_body['consumer_utilisation'] = 0
                return True
        except KeyError:
            return True

    @staticmethod
    def detect_stuck_messages_queue(queue_body, ttl):
        past_queue = FileCache('message-queue', flag='cs')
        queue_name = queue_body['name']
        current_messages = queue_body['messages']
        current_consumers = queue_body['consumers']

        current_time = datetime.now(timezone.utc)
        current_time = current_time.replace(tzinfo=None)

        if past_queue.get(queue_name):
            time_range_minutes = timedelta.total_seconds(current_time - past_queue[queue_name]['time_catch']) / 60
            if past_queue[queue_name]['messages'] == current_messages:
                if time_range_minutes > ttl:
                    return True
                if time_range_minutes < ttl:
                    return False
            else:
                past_queue[queue_name] = {'messages': current_messages, 'time_catch': current_time,
                                          'consumers': current_consumers}
                return False
        else:
            past_queue[queue_name] = {'messages': current_messages, 'time_catch': current_time,
                                      'consumers': current_consumers}
            return False

    def get_rabbit_host_from_vhost(self, vhost, caching=True):
        if caching:
            vhost_host_cache = FileCache('vhost-host', flag='cs')
            if vhost_host_cache.get(vhost):
                return vhost_host_cache[vhost]
            else:
                vhost_host_cache[vhost] = self.get_host_action(vhost)
                return vhost_host_cache[vhost]
        else:
            return self.get_host_action(vhost)

    def rabbit_login(self, host):
        """
        :param str host:
        :return:
        """
        return Client(f'{host}:15672', self.rabbit_config.username, self.rabbit_config.password)

    def get_host_action(self, vhost):
        for host in self.rabbit_config.hosts:
            cl = Client(f'{host}:15672', self.rabbit_config.username, self.rabbit_config.password)
            try:
                cl.is_alive(vhost)
                return host
            except APIError:
                pass
Exemple #3
0
class WorkerScaler:

    def __init__(self, deployment_name):
        config.load_kube_config()
        self.logger = getLogger(self.__class__.__name__)
        self.v1 = client.AppsV1Api()
        self.rabbit_config = ConfigLoader()
        self.deployment_name = deployment_name
        self.scraper = QueuesScraper(deployment_name=deployment_name)

    def calculate_desired_pods(self, namespace='default'):
        """
        :param str namespace:
        :return: number of desired pods
        """
        deployment_properties = self.rabbit_config.get_deployment_properties(deployment_name=self.deployment_name)
        max_pod = deployment_properties['maxPod']
        queues = self.scraper.exclude_idle_queue_from_list()

        try:
            accumulative_limit = deployment_properties['accumulativeLimit']
        except KeyError:
            self.logger.info("accumulativeLimit not found in deployment %s config\nUsing default accumulativeLimit = 1"
                             % self.deployment_name)
            accumulative_limit = 1

        try:
            min_pod = deployment_properties['minPod']
        except KeyError:
            self.logger.info(
                "minPod not found in deployment %s config\nUsing default minPod = 0" % self.deployment_name)
            min_pod = 0

        current_pods = self.get_deployment_replicas(deployment_name=self.deployment_name, namespace=namespace)

        if not queues:
            self.logger.info(f"All queues are idle")
            if current_pods == min_pod:
                self.logger.info(f"current pods of {self.deployment_name} is min pods")
                return None
            else:
                self.logger.info(f"Scale {self.deployment_name} from {current_pods} to {min_pod}")
                return min_pod

        average_consumer_utilisation = self.scraper.get_queues_average_consumer_utilisation()

        desired_pods = current_pods
        if current_pods < min_pod:
            desired_pods = min_pod
        elif min_pod <= current_pods < max_pod:
            if average_consumer_utilisation < 0.9:
                desired_pods = current_pods + accumulative_limit
        elif current_pods >= max_pod:
            desired_pods = max_pod

        if desired_pods == current_pods == max_pod:
            self.logger.info(f"Current pods of {self.deployment_name} hit max threshold: {max_pod}")
            return None
        elif desired_pods == current_pods < max_pod:
            self.logger.info(f"Current pods of {self.deployment_name} are suitable: {current_pods}")
            return None
        else:
            self.logger.info(f"Scale {self.deployment_name} from {current_pods} to {desired_pods}")
            return desired_pods

    def set_deployment_replicas(self, deployment_name, namespace='default', replicas_number=1):
        """
        :param str deployment_name:
        :param str namespace:
        :param int replicas_number:
        :return: deployment body
        """
        body = self.v1.read_namespaced_deployment_scale(name=deployment_name, namespace=namespace)
        body.spec.replicas = replicas_number
        try:
            api_response = self.v1.patch_namespaced_deployment_scale(name=deployment_name, namespace=namespace,
                                                                     body=body)
            return api_response
        except ApiException as e:
            self.logger.error("Exception when calling AppsV1Api->patch_namespaced_deployment_scale: %s\n" % e)

    def get_deployment_replicas(self, deployment_name, namespace='default'):
        """
        :param str deployment_name:
        :param str namespace:
        :return: deployment replicas
        """
        try:
            body = self.v1.read_namespaced_deployment_scale(name=deployment_name, namespace=namespace)
            return body.status.replicas
        except ApiException as e:
            self.logger.error("Exception when calling AppsV1Api-->read_namespaced_deployment_scale: %s\n" % e)