def running(self): self._QUEUE_TYPE = "running" running_path = self._get_path() STREAM.debug("Get running tasks:") tasks = self._load_tasks(running_path) self._RUNNING_TASKS += tasks return _TaskProperty(self._RUNNING_TASKS)
def waiting(self): self._QUEUE_TYPE = "waiting" waiting_path = self._get_path() STREAM.debug("Get waiting tasks:") tasks = self._load_tasks(waiting_path) self._WAITING_TASKS += tasks return _TaskProperty(self._WAITING_TASKS)
def __next__(self): uuid = (lambda x: x.uuid if x else []) group_system = self.get_first_task_by_filter(group="system") STREAM.debug("System task: %s" % uuid(group_system)) priority_high = self.get_first_task_by_filter(priority="high") STREAM.debug("High priority task: %s" % uuid(priority_high)) priority_normal = self.get_first_task_by_filter(priority="normal") STREAM.debug("Normal priority task: %s" % uuid(priority_normal)) priority_low = self.get_first_task_by_filter(priority="low") STREAM.debug("Low priority task: %s" % uuid(priority_low)) group_guest = self.get_first_task_by_filter(group="guest") STREAM.debug("Guest task: %s" % uuid(group_guest)) if group_system: return group_system elif priority_high: return priority_high elif priority_normal: return priority_normal elif priority_low: return priority_low elif group_guest: return group_guest else: return None
def _get_host(self, hostname): host = self.server.do_request("host.get", { "output": "extend", "filter": { "host": hostname, "status": 0 } }) if host["result"]: return host else: STREAM.error("Cannot find specific host: {}".format(hostname)) return None
def _compare_listings(self, listing, cached_listing): """ Compare os.listdir with cache arrived : rtype : list List with new arrived tasks deleted : rtype : list List with tasks should be deleted """ arrived = list(set(listing) - set(cached_listing)) STREAM.debug(" -> new arrived tasks: %s" % arrived) deleted = list(set(cached_listing) - set(listing)) STREAM.debug(" -> deleted tasks: %s" % deleted) return arrived, deleted
def _get_metric(self, host_id, metric): item = self.server.do_request("item.get", { "output": "extend", "hostids": host_id, "search": { "key_": metric }, }) if item["result"]: return item else: STREAM.error("Cannot find specific metric: {}".format(metric)) return None
def _get_metric_value(self, metric_id): value = self.server.do_request( "history.get", { "output": "extend", "history": 0, "itemids": metric_id, "sortfield": "clock", "sortorder": "DESC", "limit": 1 }) if value["result"]: return value else: STREAM.error("Cannot retrieve value for item: {}".format(value)) return None
def _start_exporters(self): STREAM.info("Loading metric exporters...") if not settings.EXPORTERS: return for exporter in settings.EXPORTERS: try: getattr(exporter, "run") except AttributeError: pass else: p = Process(target=exporter.run) p.daemon = True p.start() STREAM.info(" -> loaded: %s" % exporter.__class__.__name__) sleep(5)
def get_first_task_by_filter(self, **kwargs): filtered = self.loader.waiting.filter_tasks(**kwargs) if not filtered: return False first_timestamp_task = type("first_task", (), {"build_timestamp": time.time()}) for task in filtered: try: if int(task.build_timestamp) < int( first_timestamp_task.build_timestamp): first_timestamp_task = task except AttributeError: STREAM.warning( "Task %s: Wrong format, attribute 'build_timestamp' not found." % task.uuid) if not hasattr(first_timestamp_task, "uuid"): return False return first_timestamp_task
def _get_task_files(self, directory): """ Get files from directory and return only new added files Using cache to reduce the number of IO reads. Reads only new added files to directory. """ directory_listing = [ fil for fil in os.listdir(directory) if not fil.startswith(".") ] STREAM.debug(" -> listdir tasks: %s" % directory_listing) cached_directory_listing = self._get_listing_from_cache() STREAM.debug(" -> cached tasks: %s" % cached_directory_listing) arrived, deleted = self._compare_listings(directory_listing, cached_directory_listing) self._remove_deleted_tasks(deleted) arrived_abs_path = [os.path.join(directory, fil) for fil in arrived] self._save_listing_to_cache(directory_listing) return arrived_abs_path
def get_manual_queue_control_status(self): STREAM.debug("Check manual queue control flag: %s" % self.QUEUE_CONTROL_FILE) if os.path.isfile(self.QUEUE_CONTROL_FILE): with open(self.QUEUE_CONTROL_FILE, "r") as f: queue_control_status = f.read().rstrip() else: queue_control_status = "start" if queue_control_status == "start": STREAM.info("-> Manual control: distribution is: ON") return True else: STREAM.warning("-> Manual control: distribution is: OFF") STREAM.warning("-> Only system tasks wiil be distributed.") return False
def __init__(self): LoggerOptions.set_component("Dispatcher") STREAM.info("******************************") STREAM.info("Initialization...") STREAM.info("******************************") self.check_and_print_settings() self._start_exporters() self.queue = TaskQueueAdapter(settings.QUEUE_STRATEGY) self.distribution = TaskDistributionAdapter( settings.DISTRIBUTE_STRATEGY)
def push_to_running(self): if self.status == "waiting": try: shutil.move( os.path.join(self.queue_directory, "waiting", self.uuid), os.path.join(self.queue_directory, "running", self.uuid)) STREAM.info("Task: %s : Moved to running" % self.uuid) except FileNotFoundError: STREAM.warning("Task: %s : Can't run! Maybe already running" % self.uuid) else: STREAM.warning("Task: %s : Already running")
build_name=parameters[2], build_number=parameters[3], build_email=parameters[4], build_url=parameters[5], build_timestamp=parameters[6])) return TASKS @property def waiting(self): self._QUEUE_TYPE = "waiting" waiting_path = self._get_path() STREAM.debug("Get waiting tasks:") tasks = self._load_tasks(waiting_path) self._WAITING_TASKS += tasks return _TaskProperty(self._WAITING_TASKS) @property def running(self): self._QUEUE_TYPE = "running" running_path = self._get_path() STREAM.debug("Get running tasks:") tasks = self._load_tasks(running_path) self._RUNNING_TASKS += tasks return _TaskProperty(self._RUNNING_TASKS) if __name__ == '__main__': loader = TasksLoader() STREAM.info(loader.waiting.tasks) STREAM.info(loader.running.tasks)
def check_and_print_settings(self): # check vars assert isinstance(settings.LOGLEVEL, int), "settings.LOGLEVEL: expected int(10-50)" assert isinstance(settings.TASK_GROUPS, dict), "settings.TASK_GROUPS: expected dict" assert isinstance(settings.QUEUE_IDLE_INTERVAL, int), "settings.QUEUE_IDLE_INTERVAL: expected int" assert isinstance(settings.CLUSTER_NODE_GROUPS, dict), "settings.CLUSTER_NODE_GROUPS: expected dict" assert (True if os.path.exists(settings.BASE_QUEUE_PATH) and os.path.isdir(settings.BASE_QUEUE_PATH) else False),\ "settings.BASE_QUEUE_PATH: expected directory_path" assert (True if os.path.exists(settings.QUEUE_CONTROL_FILE) and os.path.isfile(settings.QUEUE_CONTROL_FILE) else False),\ "settings.QUEUE_CONTROL_FILE: expected file_path" # Check objects assert (True if QueueStrategy in settings.QUEUE_STRATEGY.__class__.__bases__ else False),\ "settings.QUEUE_STRATEGY: object must be inherited from class QueueStrategy" assert (True if DistributionStrategy in settings.DISTRIBUTE_STRATEGY.__class__.__bases__ else False),\ "settings.DISTRIBUTE_STRATEGY: object must be inherited from class DistributionStrategy" assert isinstance(settings.EXPORTERS, list), "settings.EXPORTERS: expected list of objects" for exporter in settings.EXPORTERS: assert (True if MetricsExporter in exporter.__class__.__bases__ else False),\ "settings.EXPORTERS: Exporters objects must be inherited from class MetricsExporter" # print settings STREAM.info("Using settings:") STREAM.info(" -> LOGLEVEL = %s" % settings.LOGLEVEL) STREAM.info(" -> BASE_QUEUE_PATH = %s" % settings.BASE_QUEUE_PATH) STREAM.info(" -> QUEUE_CONTROL_FILE = %s" % settings.QUEUE_CONTROL_FILE) STREAM.info(" -> QUEUE_IDLE_INTERVAL = %s" % settings.QUEUE_IDLE_INTERVAL) STREAM.info(" -> TASK_GROUPS = %s" % [task for task in settings.TASK_GROUPS.values()]) STREAM.info(" -> QUEUE_STRATEGY = %s" % settings.QUEUE_STRATEGY.__class__.__name__) STREAM.info(" -> DISTRIBUTE_STRATEGY = %s" % settings.DISTRIBUTE_STRATEGY.__class__.__name__) STREAM.info(" ") STREAM.info("Serviced clusters:") for cluster, nodes in settings.CLUSTER_NODE_GROUPS.items(): STREAM.info(" -> %s: nodes(%s)" % (cluster, len(nodes)))
def run(self): STREAM.info("") STREAM.info("******************************") STREAM.info("Automation Dispatcher started.") STREAM.info("******************************") while True: LoggerOptions.set_component("Queue") task = next(self.queue) if task is None: STREAM.info("Get task from queue: Queue is empty.") STREAM.info("Standby before next queue check: %s sec(s)" % settings.QUEUE_IDLE_INTERVAL) sleep(settings.QUEUE_IDLE_INTERVAL) else: STREAM.info("Get task from queue: %s" % task.uuid) LoggerOptions.set_component("Distribution") self.distribution.distribute_task(task)
p.start() STREAM.info(" -> loaded: %s" % exporter.__class__.__name__) sleep(5) def run(self): STREAM.info("") STREAM.info("******************************") STREAM.info("Automation Dispatcher started.") STREAM.info("******************************") while True: LoggerOptions.set_component("Queue") task = next(self.queue) if task is None: STREAM.info("Get task from queue: Queue is empty.") STREAM.info("Standby before next queue check: %s sec(s)" % settings.QUEUE_IDLE_INTERVAL) sleep(settings.QUEUE_IDLE_INTERVAL) else: STREAM.info("Get task from queue: %s" % task.uuid) LoggerOptions.set_component("Distribution") self.distribution.distribute_task(task) if __name__ == "__main__": dispatcher = Dispatcher() try: dispatcher.run() except KeyboardInterrupt: STREAM.info("Automation Dispatcher stopped.") exit(0)
def distribute_task(self, task): STREAM.info("Distribute task: %s" % task.uuid) while True: STREAM.info("Check manual mode") if not self.get_manual_queue_control_status(): if task.group == "system": STREAM.info("Task: %s : system task" % task.uuid) self.run_task(task) else: STREAM.info( "Standby before retry to distribute task: %s sec(s)" % self.REDISTRIBUTE_INTERVAL) sleep(self.REDISTRIBUTE_INTERVAL) continue STREAM.info("Check cluster's status") if task.resource == "vm": STREAM.info("Task: %s : Requires resources: %s" % (task.uuid, task.resource)) if self.get_cluster_status("master") and\ self.get_cluster_status("openstack"): self.run_task(task) return else: STREAM.info( "Standby before retry to distribute task: %s sec(s)" % self.REDISTRIBUTE_INTERVAL) sleep(self.REDISTRIBUTE_INTERVAL) continue elif task.resource == "con": STREAM.info("Task: %s : Requires resources: %s" % (task.uuid, task.resource)) if self.get_cluster_status("master") and\ self.get_cluster_status("openshift"): self.run_task(task) return else: STREAM.info( "Standby before retry to distribute task: %s sec(s)" % self.REDISTRIBUTE_INTERVAL) sleep(self.REDISTRIBUTE_INTERVAL) continue else: STREAM.info("Task: %s : Requires resources: %s" % (task.uuid, task.resource)) if self.get_cluster_status("master") and\ self.get_cluster_status("openstack") and\ self.get_cluster_status("openshift"): self.run_task(task) return else: STREAM.info( "Standby before retry to distribute task: %s sec(s)" % self.REDISTRIBUTE_INTERVAL) sleep(self.REDISTRIBUTE_INTERVAL) continue
def run_task(self, task): task.push_to_running() STREAM.info("Standby before distribute next task: %s sec(s)" % self.UPDATE_INTERVAL) sleep(self.UPDATE_INTERVAL)
def get_cluster_status(self, cluster): STREAM.debug("Check cluster status: %s" % cluster) try: cluster_metrics = getattr(self.loader, cluster) except AttributeError: STREAM.error( "Cluster Error: cluster %s not in group: CLUSTER_NODE_GROUPS" % cluster) return cpu = cluster_metrics.cpu memory = cluster_metrics.memory iowait = cluster_metrics.iowait STREAM.debug("Cluster: %s -> CPU:%s -> MEMORY:%s -> IOWAIT:%s" % (cluster, cpu, memory, iowait)) if cpu > self.CLUSTERS_WORKLOAD_THRESHOLDS[cluster]["cpu"] or \ memory < self.CLUSTERS_WORKLOAD_THRESHOLDS[cluster]["memory"] or \ iowait > self.CLUSTERS_WORKLOAD_THRESHOLDS[cluster]["iowait"]: STREAM.warning("Cluster: %s -> CPU:%s -> MEMORY:%s -> IOWAIT:%s" % (cluster, cpu, memory, iowait)) STREAM.warning( "Cluster: %s: Thresholds exceed, not ready to get tasks" % cluster) return False else: STREAM.info("-> Cluster %s: Ready to get tasks" % cluster) return True
def run(self): # Включаем HTTP сервер, с которого Prometheus будет забирать метрики. port = 8000 start_http_server(port) LoggerOptions.set_component("prometheus") STREAM.debug("Started Prometheus http server on port: %s" % port) LoggerOptions.switchback_component() tasks = TasksLoader() cluster = ClustersLoader() while True: LoggerOptions.set_component("prometheus") # Получаем статистику очереди "waiting" и "running". waiting = tasks.waiting running = tasks.running # Получаем статистику кластеров "openshift" и "openstack". openshift = cluster.openshift openstack = cluster.openstack master = cluster.master LoggerOptions.switchback_component() # -------------------------------------------------------------------------------------------------- # Обновляем метрики Prometheus очереди "waiting". self.common_waiting_guest.set(waiting.count_tasks(group="guest")) self.common_waiting_system.set(waiting.count_tasks(group="system")) vm_res = waiting.count_tasks(resource="vm") con_res = waiting.count_tasks(resource="con") all_res = waiting.count_tasks(resource="all") vm_res += all_res con_res += all_res self.common_waiting_openstack.set(vm_res) self.common_waiting_openshift.set(con_res) self.guest_waiting_count.set(waiting.count_tasks(group="guest")) vm_res = waiting.count_tasks(group="guest", resource="vm") con_res = waiting.count_tasks(group="guest", resource="con") all_res = waiting.count_tasks(group="guest", resource="all") vm_res += all_res con_res += all_res self.guest_waiting_openstack.set(vm_res) self.guest_waiting_openshift.set(con_res) self.system_waiting_count.set(waiting.count_tasks(group="system")) vm_res = waiting.count_tasks(group="system", resource="vm") con_res = waiting.count_tasks(group="system", resource="con") all_res = waiting.count_tasks(group="system", resource="all") vm_res += all_res con_res += all_res self.system_waiting_openstack.set(vm_res) self.system_waiting_openshift.set(con_res) self.unknown_waiting_count.set(waiting.count_tasks(group="unknown")) self.unknown_waiting_high.set(waiting.count_tasks(group="unknown", priority="high")) self.unknown_waiting_normal.set(waiting.count_tasks(group="unknown", priority="normal")) self.unknown_waiting_low.set(waiting.count_tasks(group="unknown", priority="low")) vm_res = waiting.count_tasks(group="unknown", resource="vm") con_res = waiting.count_tasks(group="unknown", resource="con") all_res = waiting.count_tasks(group="unknown", resource="all") vm_res += all_res con_res += all_res self.unknown_waiting_openstack.set(vm_res) self.unknown_waiting_openshift.set(con_res) # -------------------------------------------------------------------------------------------------- # Обновляем метрики Prometheus очереди "running". self.common_running_count.set(running.count_tasks()) self.common_running_high.set(running.count_tasks(priority="high")) self.common_running_normal.set(running.count_tasks(priority="normal")) self.common_running_low.set(running.count_tasks(priority="low")) self.common_running_guest.set(running.count_tasks(group="guest")) self.common_running_system.set(running.count_tasks(group="system")) vm_res = running.count_tasks(resource="vm") con_res = running.count_tasks(resource="con") all_res = running.count_tasks(resource="all") vm_res += all_res con_res += all_res self.common_running_openstack.set(vm_res) self.common_running_openshift.set(con_res) self.guest_running_count.set(running.count_tasks(group="guest")) vm_res = running.count_tasks(group="guest", resource="vm") con_res = running.count_tasks(group="guest", resource="con") all_res = running.count_tasks(group="guest", resource="all") vm_res += all_res con_res += all_res self.guest_running_openstack.set(vm_res) self.guest_running_openshift.set(con_res) self.system_running_count.set(running.count_tasks(group="system")) vm_res = running.count_tasks(group="system", resource="vm") con_res = running.count_tasks(group="system", resource="con") all_res = running.count_tasks(group="system", resource="all") vm_res += all_res con_res += all_res self.system_running_openstack.set(vm_res) self.system_running_openshift.set(con_res) self.unknown_running_count.set(running.count_tasks(group="unknown")) self.unknown_running_high.set(running.count_tasks(group="unknown", priority="high")) self.unknown_running_normal.set(running.count_tasks(group="unknown", priority="normal")) self.unknown_running_low.set(running.count_tasks(group="unknown", priority="low")) vm_res = running.count_tasks(group="unknown", resource="vm") con_res = running.count_tasks(group="unknown", resource="con") all_res = running.count_tasks(group="unknown", resource="all") vm_res += all_res con_res += all_res self.unknown_running_openstack.set(vm_res) self.unknown_running_openshift.set(con_res) # -------------------------------------------------------------------------------------------------- # Обновляем метрики Prometheus загрузки кластеров. openstack_thresholds = SimpleStrategy.CLUSTERS_WORKLOAD_THRESHOLDS["openstack"] openshift_thresholds = SimpleStrategy.CLUSTERS_WORKLOAD_THRESHOLDS["openshift"] master_thresholds = SimpleStrategy.CLUSTERS_WORKLOAD_THRESHOLDS["master"] if self.get_cluster_status(openstack, openstack_thresholds): self.common_openstack_status.set(0) else: self.common_openstack_status.set(1) if self.get_cluster_status(openshift, openshift_thresholds): self.common_openshift_status.set(0) else: self.common_openshift_status.set(1) if self.get_cluster_status(master, master_thresholds): self.common_master_status.set(0) else: self.common_master_status.set(1) self.zabbix_openstack_cpu_usage.set(openstack.cpu) self.zabbix_openstack_memory_free.set(openstack.memory) self.zabbix_openstack_iowait_usage.set(openstack.iowait) self.zabbix_openshift_cpu_usage.set(openshift.cpu) self.zabbix_openshift_memory_free.set(openshift.memory) self.zabbix_openshift_iowait_usage.set(openshift.iowait) # -------------------------------------------------------------------------------------------------- # Обновляем метрики ручного управления очередью. if os.path.isfile(self.QUEUE_CONTROL_FILE): with open(self.QUEUE_CONTROL_FILE, "r") as f: queue_control_status = f.read().rstrip() else: queue_control_status = "start" if queue_control_status == "start": self.common_queue_control.set(0) else: self.common_queue_control.set(1) sleep(self.update_interval)
def __init__(self, url, user, password): try: self.server = ZabbixAPI(url=url, user=user, password=password) except Exception as error: STREAM.error("Cannot establish Zabbix session: {}".format(error)) exit(1)
def discard(self): try: os.remove( os.path.join(self.queue_directory, self.status, self.uuid)) except FileNotFoundError: STREAM.warning("Task: %s : File not found!" % self.uuid)