class ScaleRunner: def __init__(self): self.config = ConfigLoader() self.interval = FileCache('interval', flag='cs') self.logger = getLogger(name=self.__class__.__name__) def scaling_deployment(self, deployment_name, namespace='default'): """ :param str deployment_name: :param str namespace: :return: """ while True: scaler = WorkerScaler(deployment_name=deployment_name) interval = self.config.get_deployment_properties( deployment_name=deployment_name)['interval'] desired_pods = scaler.calculate_desired_pods() if desired_pods is None: self.logger.info( f"Condition of deployment {deployment_name} no need to scale" ) else: scaler.set_deployment_replicas(deployment_name=deployment_name, namespace=namespace, replicas_number=desired_pods) if interrupted: self.logger.info("We done here! Bye Bye") break gevent.sleep(interval) def asynchronous(self): threads = [ gevent.spawn(self.scaling_deployment, deployment) for deployment in self.config.get_deployments() ] gevent.joinall(threads)
class QueuesScraper: def __init__(self, deployment_name): self.logger = getLogger(self.__class__.__name__) self.deployment_name = deployment_name self.rabbit_config = ConfigLoader() self.queue_list_config = self.rabbit_config.get_deployment_properties(deployment_name=deployment_name)['queues'] self.vhost_config = self.rabbit_config.get_deployment_properties(deployment_name=deployment_name)['vHost'] self.queues_body = self.get_queues_body(vhost=self.vhost_config, queue_list=self.queue_list_config) def get_queues_body(self, vhost, queue_list): body_list = [] host = self.get_rabbit_host_from_vhost(vhost) rabbit = self.rabbit_login(host) for queue in queue_list: body_list.append(rabbit.get_queue(vhost=vhost, name=queue)) return body_list def total_messages(self): message_list = list() for body in self.exclude_idle_queue_from_list(): message_list.append(body['messages']) return sum(message_list) def get_queues_average_consumer_utilisation(self): consumer_utilisation_list = [] for queue_body in self.exclude_idle_queue_from_list(): consumer_utilisation_list.append(queue_body['consumer_utilisation']) return sum(consumer_utilisation_list) / len(consumer_utilisation_list) def exclude_idle_queue_from_list(self): try: ttl = self.rabbit_config.get_deployment_properties(deployment_name=self.deployment_name)['ttl'] except KeyError: self.logger.info("ttl not found in deployment %s config\nUsing default ttl = 1.0" % self.deployment_name) ttl = 1.0 non_idle_queues_body = [] for queue_body in self.queues_body: if self.check_queue_non_idling(queue_body=queue_body, ttl=ttl): non_idle_queues_body.append(queue_body) return non_idle_queues_body def check_queue_non_idling(self, queue_body, ttl): fmt = "%Y-%m-%d %H:%M:%S" if self.detect_stuck_messages_queue(queue_body=queue_body, ttl=ttl): return False try: idle_since = queue_body['idle_since'] idle_since_time = datetime.strptime(idle_since, fmt) current_time = datetime.now(timezone.utc) current_time = current_time.replace(tzinfo=None) if timedelta.total_seconds(current_time - idle_since_time) / 60 > ttl and queue_body['consumers'] > 0: return False else: queue_body['consumer_utilisation'] = 0 return True except KeyError: return True @staticmethod def detect_stuck_messages_queue(queue_body, ttl): past_queue = FileCache('message-queue', flag='cs') queue_name = queue_body['name'] current_messages = queue_body['messages'] current_consumers = queue_body['consumers'] current_time = datetime.now(timezone.utc) current_time = current_time.replace(tzinfo=None) if past_queue.get(queue_name): time_range_minutes = timedelta.total_seconds(current_time - past_queue[queue_name]['time_catch']) / 60 if past_queue[queue_name]['messages'] == current_messages: if time_range_minutes > ttl: return True if time_range_minutes < ttl: return False else: past_queue[queue_name] = {'messages': current_messages, 'time_catch': current_time, 'consumers': current_consumers} return False else: past_queue[queue_name] = {'messages': current_messages, 'time_catch': current_time, 'consumers': current_consumers} return False def get_rabbit_host_from_vhost(self, vhost, caching=True): if caching: vhost_host_cache = FileCache('vhost-host', flag='cs') if vhost_host_cache.get(vhost): return vhost_host_cache[vhost] else: vhost_host_cache[vhost] = self.get_host_action(vhost) return vhost_host_cache[vhost] else: return self.get_host_action(vhost) def rabbit_login(self, host): """ :param str host: :return: """ return Client(f'{host}:15672', self.rabbit_config.username, self.rabbit_config.password) def get_host_action(self, vhost): for host in self.rabbit_config.hosts: cl = Client(f'{host}:15672', self.rabbit_config.username, self.rabbit_config.password) try: cl.is_alive(vhost) return host except APIError: pass
class WorkerScaler: def __init__(self, deployment_name): config.load_kube_config() self.logger = getLogger(self.__class__.__name__) self.v1 = client.AppsV1Api() self.rabbit_config = ConfigLoader() self.deployment_name = deployment_name self.scraper = QueuesScraper(deployment_name=deployment_name) def calculate_desired_pods(self, namespace='default'): """ :param str namespace: :return: number of desired pods """ deployment_properties = self.rabbit_config.get_deployment_properties(deployment_name=self.deployment_name) max_pod = deployment_properties['maxPod'] queues = self.scraper.exclude_idle_queue_from_list() try: accumulative_limit = deployment_properties['accumulativeLimit'] except KeyError: self.logger.info("accumulativeLimit not found in deployment %s config\nUsing default accumulativeLimit = 1" % self.deployment_name) accumulative_limit = 1 try: min_pod = deployment_properties['minPod'] except KeyError: self.logger.info( "minPod not found in deployment %s config\nUsing default minPod = 0" % self.deployment_name) min_pod = 0 current_pods = self.get_deployment_replicas(deployment_name=self.deployment_name, namespace=namespace) if not queues: self.logger.info(f"All queues are idle") if current_pods == min_pod: self.logger.info(f"current pods of {self.deployment_name} is min pods") return None else: self.logger.info(f"Scale {self.deployment_name} from {current_pods} to {min_pod}") return min_pod average_consumer_utilisation = self.scraper.get_queues_average_consumer_utilisation() desired_pods = current_pods if current_pods < min_pod: desired_pods = min_pod elif min_pod <= current_pods < max_pod: if average_consumer_utilisation < 0.9: desired_pods = current_pods + accumulative_limit elif current_pods >= max_pod: desired_pods = max_pod if desired_pods == current_pods == max_pod: self.logger.info(f"Current pods of {self.deployment_name} hit max threshold: {max_pod}") return None elif desired_pods == current_pods < max_pod: self.logger.info(f"Current pods of {self.deployment_name} are suitable: {current_pods}") return None else: self.logger.info(f"Scale {self.deployment_name} from {current_pods} to {desired_pods}") return desired_pods def set_deployment_replicas(self, deployment_name, namespace='default', replicas_number=1): """ :param str deployment_name: :param str namespace: :param int replicas_number: :return: deployment body """ body = self.v1.read_namespaced_deployment_scale(name=deployment_name, namespace=namespace) body.spec.replicas = replicas_number try: api_response = self.v1.patch_namespaced_deployment_scale(name=deployment_name, namespace=namespace, body=body) return api_response except ApiException as e: self.logger.error("Exception when calling AppsV1Api->patch_namespaced_deployment_scale: %s\n" % e) def get_deployment_replicas(self, deployment_name, namespace='default'): """ :param str deployment_name: :param str namespace: :return: deployment replicas """ try: body = self.v1.read_namespaced_deployment_scale(name=deployment_name, namespace=namespace) return body.status.replicas except ApiException as e: self.logger.error("Exception when calling AppsV1Api-->read_namespaced_deployment_scale: %s\n" % e)