def reincarnate(self, process): """ :param process: the process to reincarnate :type process: Process or None """ close_old_django_connections() if process == self.monitor: self.monitor = self.spawn_monitor() logger.error( _(f"reincarnated monitor {process.name} after sudden death")) elif process == self.pusher: self.pusher = self.spawn_pusher() logger.error( _(f"reincarnated pusher {process.name} after sudden death")) else: self.pool.remove(process) self.spawn_worker() if process.timer.value == 0: # only need to terminate on timeout, otherwise we risk destabilizing the queues process.terminate() logger.warning( _(f"reincarnated worker {process.name} after timeout")) elif int(process.timer.value) == -2: logger.info(_(f"recycled worker {process.name}")) else: logger.error( _(f"reincarnated worker {process.name} after death")) self.reincarnations += 1
def guard(self): logger.info(_('{} guarding cluster at {}').format(current_process().name, self.pid)) self.start_event.set() Stat(self).save() logger.info(_('Q Cluster-{} running.').format(self.parent_pid)) scheduler(broker=self.broker) counter = 0 cycle = Conf.GUARD_CYCLE # guard loop sleep in seconds # Guard loop. Runs at least once while not self.stop_event.is_set() or not counter: # Check Workers for p in self.pool: # Are you alive? if not p.is_alive() or (self.timeout and p.timer.value == 0): self.reincarnate(p) continue # Decrement timer if work is being done if self.timeout and p.timer.value > 0: p.timer.value -= cycle # Check Monitor if not self.monitor.is_alive(): self.reincarnate(self.monitor) # Check Pusher if not self.pusher.is_alive(): self.reincarnate(self.pusher) # Call scheduler once a minute (or so) counter += cycle if counter >= 30 and Conf.SCHEDULER: counter = 0 scheduler(broker=self.broker) # Save current status Stat(self).save() sleep(cycle) self.stop()
def reincarnate(self, process): """ :param process: the process to reincarnate :type process: Process or None """ db.connections.close_all() # Close any old connections if process == self.monitor: self.monitor = self.spawn_monitor() logger.error(_("reincarnated monitor {} after sudden death").format(process.name)) elif process == self.pusher: self.pusher = self.spawn_pusher() logger.error(_("reincarnated pusher {} after sudden death").format(process.name)) else: self.pool.remove(process) self.spawn_worker() if self.timeout and int(process.timer.value) == 0: # only need to terminate on timeout, otherwise we risk destabilizing the queues process.terminate() logger.warn(_("reincarnated worker {} after timeout").format(process.name)) elif int(process.timer.value) == -2: logger.info(_("recycled worker {}").format(process.name)) else: logger.error(_("reincarnated worker {} after death").format(process.name)) self.reincarnations += 1
def pusher(task_queue, e, list_key=Conf.Q_LIST, r=redis_client): """ Pulls tasks of the Redis List and puts them in the task queue :type task_queue: multiprocessing.Queue :type e: multiprocessing.Event :type list_key: str """ logger.info( _('{} pushing tasks at {}').format(current_process().name, current_process().pid)) while True: try: task = r.blpop(list_key, 1) except Exception as e: logger.error(e) # redis probably crashed. Let the sentinel handle it. sleep(10) break if task: task = task[1] task_queue.put(task) logger.debug(_('queueing from {}').format(list_key)) if e.is_set(): break logger.info(_("{} stopped pushing tasks").format(current_process().name))
def pusher(task_queue, event, broker=None): """ Pulls tasks of the broker and puts them in the task queue :type task_queue: multiprocessing.Queue :type event: multiprocessing.Event """ if not broker: broker = get_broker() logger.info(_('{} pushing tasks at {}').format(current_process().name, current_process().pid)) while True: try: task_set = broker.dequeue() except Exception as e: logger.error(e) # broker probably crashed. Let the sentinel handle it. sleep(10) break if task_set: for task in task_set: ack_id = task[0] # unpack the task try: task = signing.SignedPackage.loads(task[1]) except (TypeError, signing.BadSignature) as e: logger.error(e) broker.fail(ack_id) continue task['ack_id'] = ack_id task_queue.put(task) logger.debug(_('queueing from {}').format(broker.list_key)) if event.is_set(): break logger.info(_("{} stopped pushing tasks").format(current_process().name))
def pusher(task_queue: Queue, event: Event, broker: Broker = None): """ Pulls tasks of the broker and puts them in the task queue :type broker: :type task_queue: multiprocessing.Queue :type event: multiprocessing.Event """ if not broker: broker = get_broker() logger.info(_(f"{current_process().name} pushing tasks at {current_process().pid}")) while True: try: task_set = broker.dequeue() except Exception as e: logger.error(e, traceback.format_exc()) # broker probably crashed. Let the sentinel handle it. sleep(10) break if task_set: for task in task_set: ack_id = task[0] # unpack the task try: task = SignedPackage.loads(task[1]) except (TypeError, BadSignature) as e: logger.error(e, traceback.format_exc()) broker.fail(ack_id) continue task["ack_id"] = ack_id task_queue.put(task) logger.debug(_(f"queueing from {broker.list_key}")) if event.is_set(): break logger.info(_(f"{current_process().name} stopped pushing tasks"))
def import_urls(user, fresh_urls, mark_read): group = uuid() size = len(fresh_urls) for url in fresh_urls: async(subscribe_to_imported_url, user, url, mark_read, group=group) start = time.time() while True: # print("Time", time.time() - start, "count", count_group(group)) if (time.time() - start) > IMPORT_WAIT: # print("TIME!") break if count_group(group) == size: # print("COUNT!") break time.sleep(1) import_results = Counter(result_group(group)) pretty_results = ', '.join("{}: {}".format(*x) for x in import_results.items()) num_added = import_results['added'] num_existed = import_results['existed'] num_errors = import_results['error'] if num_added: async_messages.success(user, "Import complete - you subscribed to {sub} feed{s}.".format(sub=num_added, s=pluralize(num_added))) else: async_messages.info(user, "Import complete - no new subscriptions were added.") if num_existed: async_messages.info(user, "You were already subscribed to {sub_exists} imported feed{s}.".format(sub_exists=num_existed, s=pluralize(num_existed))) if num_errors: async_messages.error(user, "There was an error subscribing to {errors} imported feed{s}.".format(errors=num_errors, s=pluralize(num_errors))) logger.info('User %(user)s OPML import complete - %(results)s', {'user': user, 'results': pretty_results}) delete_group(group, tasks=True) return pretty_results
def guard(self): logger.info(_('{} guarding cluster at {}').format(current_process().name, self.pid)) self.start_event.set() Stat(self).save() logger.info(_('Q Cluster-{} running.').format(self.parent_pid)) scheduler(broker=self.broker) counter = 0 cycle = Conf.GUARD_CYCLE # guard loop sleep in seconds # Guard loop. Runs at least once while not self.stop_event.is_set() or not counter: # Check Workers for p in self.pool: with p.timer.get_lock(): # Are you alive? if not p.is_alive() or p.timer.value == 0: self.reincarnate(p) continue # Decrement timer if work is being done if p.timer.value > 0: p.timer.value -= cycle # Check Monitor if not self.monitor.is_alive(): self.reincarnate(self.monitor) # Check Pusher if not self.pusher.is_alive(): self.reincarnate(self.pusher) # Call scheduler once a minute (or so) counter += cycle if counter >= 30 and Conf.SCHEDULER: counter = 0 scheduler(broker=self.broker) # Save current status Stat(self).save() sleep(cycle) self.stop()
def guard(self): logger.info(_('{} guarding cluster at {}').format(current_process().name, self.pid)) self.start_event.set() Stat(self).save() logger.info(_('Q Cluster-{} running.').format(self.parent_pid)) scheduler(list_key=self.list_key) counter = 0 cycle = 0.5 # guard loop sleep in seconds # Guard loop. Runs at least once while not self.stop_event.is_set() or not counter: # Check Workers for p in self.pool: # Are you alive? if not p.is_alive() or (self.timeout and p.timer.value == 0): self.reincarnate(p) continue # Decrement timer if work is being done if self.timeout and p.timer.value > 0: p.timer.value -= cycle # Check Monitor if not self.monitor.is_alive(): self.reincarnate(self.monitor) # Check Pusher if not self.pusher.is_alive(): self.reincarnate(self.pusher) # Call scheduler once a minute (or so) counter += cycle if counter == 30: counter = 0 scheduler(list_key=self.list_key) # Save current status Stat(self).save() sleep(cycle) self.stop()
def set_cpu_affinity(n, process_ids, actual=not Conf.TESTING): """ Sets the cpu affinity for the supplied processes. Requires the optional psutil module. :param int n: affinity :param list process_ids: a list of pids :param bool actual: Test workaround for Travis not supporting cpu affinity """ # check if we have the psutil module if not psutil: logger.warning('Skipping cpu affinity because psutil was not found.') return # check if the platform supports cpu_affinity if actual and not hasattr(psutil.Process(process_ids[0]), 'cpu_affinity'): logger.warning('Faking cpu affinity because it is not supported on this platform') actual = False # get the available processors cpu_list = list(range(psutil.cpu_count())) # affinities of 0 or gte cpu_count, equals to no affinity if not n or n >= len(cpu_list): return # spread the workers over the available processors. index = 0 for pid in process_ids: affinity = [] for k in range(n): if index == len(cpu_list): index = 0 affinity.append(cpu_list[index]) index += 1 if psutil.pid_exists(pid): p = psutil.Process(pid) if actual: p.cpu_affinity(affinity) logger.info(_('{} will use cpu {}').format(pid, affinity))
def set_cpu_affinity(n, process_ids, actual=not Conf.TESTING): """ Sets the cpu affinity for the supplied processes. Requires the optional psutil module. :param int n: :param list process_ids: a list of pids :param bool actual: Test workaround for Travis not supporting cpu affinity """ # check if we have the psutil module if not psutil: return # get the available processors cpu_list = list(range(psutil.cpu_count())) # affinities of 0 or gte cpu_count, equals to no affinity if not n or n >= len(cpu_list): return # spread the workers over the available processors. index = 0 for pid in process_ids: affinity = [] for k in range(n): if index == len(cpu_list): index = 0 affinity.append(cpu_list[index]) index += 1 if psutil.pid_exists(pid): p = psutil.Process(pid) if actual: p.cpu_affinity(affinity) logger.info('{} will use cpu {}'.format(pid, affinity))
def scheduler(list_key=Conf.Q_LIST): """ Creates a task from a schedule at the scheduled time and schedules next run """ for s in Schedule.objects.exclude(repeats=0).filter(next_run__lt=timezone.now()): args = () kwargs = {} # get args, kwargs and hook if s.kwargs: try: # eval should be safe here cause dict() kwargs = eval('dict({})'.format(s.kwargs)) except SyntaxError: kwargs = {} if s.args: args = ast.literal_eval(s.args) # single value won't eval to tuple, so: if type(args) != tuple: args = (args,) q_options = kwargs.get('q_options', {}) if s.hook: q_options['hook'] = s.hook # set up the next run time if not s.schedule_type == s.ONCE: next_run = arrow.get(s.next_run) if s.schedule_type == s.HOURLY: next_run = next_run.replace(hours=+1) elif s.schedule_type == s.DAILY: next_run = next_run.replace(days=+1) elif s.schedule_type == s.WEEKLY: next_run = next_run.replace(weeks=+1) elif s.schedule_type == s.MONTHLY: next_run = next_run.replace(months=+1) elif s.schedule_type == s.QUARTERLY: next_run = next_run.replace(months=+3) elif s.schedule_type == s.YEARLY: next_run = next_run.replace(years=+1) s.next_run = next_run.datetime s.repeats += -1 # send it to the cluster q_options['list_key'] = list_key q_options['group'] = s.name or s.id kwargs['q_options'] = q_options s.task = tasks.async(s.func, *args, **kwargs) # log it if not s.task: logger.error( _('{} failed to create a task from schedule [{}]').format(current_process().name, s.name or s.id)) else: logger.info( _('{} created a task from schedule [{}]').format(current_process().name, s.name or s.id)) # default behavior is to delete a ONCE schedule if s.schedule_type == s.ONCE: if s.repeats < 0: s.delete() return # but not if it has a positive repeats s.repeats = 0 # save the schedule s.save()
def pusher(task_queue, event, list_key=Conf.Q_LIST, r=redis_client): """ Pulls tasks of the Redis List and puts them in the task queue :type task_queue: multiprocessing.Queue :type event: multiprocessing.Event :type list_key: str """ logger.info(_("{} pushing tasks at {}").format(current_process().name, current_process().pid)) while True: try: task = r.blpop(list_key, 1) except Exception as e: logger.error(e) # redis probably crashed. Let the sentinel handle it. sleep(10) break if task: # unpack the task try: task = signing.SignedPackage.loads(task[1]) except (TypeError, signing.BadSignature) as e: logger.error(e) continue task_queue.put(task) logger.debug(_("queueing from {}").format(list_key)) if event.is_set(): break logger.info(_("{} stopped pushing tasks").format(current_process().name))
def async_task(func, *args, **kwargs): """Queue a task for the cluster.""" keywords = kwargs.copy() opt_keys = ( "hook", "group", "save", "sync", "cached", "ack_failure", "iter_count", "iter_cached", "chain", "broker", "timeout", ) q_options = keywords.pop("q_options", {}) # get an id tag = uuid() # build the task package task = { "id": tag[1], "name": keywords.pop("task_name", None) or q_options.pop("task_name", None) or tag[0], "func": func, "args": args, } # push optionals for key in opt_keys: if q_options and key in q_options: task[key] = q_options[key] elif key in keywords: task[key] = keywords.pop(key) # don't serialize the broker broker = task.pop("broker", get_broker()) # overrides if "cached" not in task and Conf.CACHED: task["cached"] = Conf.CACHED if "sync" not in task and Conf.SYNC: task["sync"] = Conf.SYNC if "ack_failure" not in task and Conf.ACK_FAILURES: task["ack_failure"] = Conf.ACK_FAILURES # finalize task["kwargs"] = keywords task["started"] = timezone.now() # signal it pre_enqueue.send(sender="django_q", task=task) # sign it pack = SignedPackage.dumps(task) if task.get("sync", False): return _sync(pack) # push it enqueue_id = broker.enqueue(pack) logger.info(f"Enqueued {enqueue_id}") logger.debug(f"Pushed {tag}") return task["id"]
def stop(self): if not self.sentinel.is_alive(): return False logger.info(_('Q Cluster-{} stopping.').format(self.pid)) self.stop_event.set() self.sentinel.join() logger.info(_('Q Cluster-{} has stopped.').format(self.pid)) self.start_event = None self.stop_event = None return True
def start(self): # Start Sentinel self.stop_event = Event() self.start_event = Event() self.sentinel = Process(target=Sentinel, args=(self.stop_event, self.start_event, self.broker, self.timeout)) self.sentinel.start() logger.info(_("Q Cluster-{} starting.").format(self.pid)) while not self.start_event.is_set(): sleep(0.1) return self.pid
def stop(self): if not self.sentinel.is_alive(): return False logger.info(_(f"Q Cluster {self.name} stopping.")) self.stop_event.set() self.sentinel.join() logger.info(_(f"Q Cluster {self.name} has stopped.")) self.start_event = None self.stop_event = None return True
def start(self): # Start Sentinel self.stop_event = Event() self.start_event = Event() self.sentinel = Process(target=Sentinel, args=(self.stop_event, self.start_event, self.broker, self.timeout)) self.sentinel.start() logger.info(_('Q Cluster-{} starting.').format(self.pid)) while not self.start_event.is_set(): sleep(0.1) return self.pid
def stop(self) -> bool: if not self.sentinel.is_alive(): return False logger.info(_(f"Q Cluster {self.name} stopping.")) self.stop_event.set() self.sentinel.join() logger.info(_(f"Q Cluster {self.name} has stopped.")) self.start_event = None self.stop_event = None # Delete cluster instance (will also remove associated workers) ClusterModel.objects.filter(id=self.cluster_id).delete() return True
def add_async_task(func, *args, **kwargs): # Wrapper method to add a task with awareness of schemapack if "schema_name" not in kwargs: kwargs.update({"schema_name": connection.schema_name}) tag, task, broker, pack = QUtilities.prepare_task( func, *args, **kwargs) if task.get("sync", False): return QUtilities.run_synchronously(pack) enqueue_id = broker.enqueue(pack) logger.info(f"Enqueued {enqueue_id}") logger.debug(f"Pushed {tag}") return task["id"]
def async_task(func, *args, **kwargs): """Queue a task for the cluster.""" keywords = kwargs.copy() opt_keys = ('hook', 'group', 'save', 'sync', 'cached', 'ack_failure', 'iter_count', 'iter_cached', 'chain', 'broker') q_options = keywords.pop('q_options', {}) # get an id tag = uuid() # build the task package task = { 'id': tag[1], 'name': keywords.pop('task_name', None) or q_options.pop('task_name', None) or tag[0], 'func': func, 'args': args } # push optionals for key in opt_keys: if q_options and key in q_options: task[key] = q_options[key] elif key in keywords: task[key] = keywords.pop(key) # don't serialize the broker broker = task.pop('broker', get_broker()) # overrides if 'cached' not in task and Conf.CACHED: task['cached'] = Conf.CACHED if 'sync' not in task and Conf.SYNC: task['sync'] = Conf.SYNC if 'ack_failure' not in task and Conf.ACK_FAILURES: task['ack_failure'] = Conf.ACK_FAILURES # finalize task['kwargs'] = keywords task['started'] = timezone.now() # signal it pre_enqueue.send(sender="django_q", task=task) # sign it pack = SignedPackage.dumps(task) if task.get('sync', False): return _sync(pack) # push it enqueue_id = broker.enqueue(pack) logger.info('Enqueued {}'.format(enqueue_id)) logger.debug('Pushed {}'.format(tag)) return task['id']
def guard(self): logger.info( _(f"{current_process().name} guarding cluster {humanize(self.cluster_id.hex)}" )) self.start_event.set() Stat(self).save() logger.info(_(f"Q Cluster {humanize(self.cluster_id.hex)} running.")) counter = 0 cycle = Conf.GUARD_CYCLE # guard loop sleep in seconds # Guard loop. Runs at least once while not self.stop_event.is_set() or not counter: # Check Workers for p in self.pool: with p.timer.get_lock(): # Check if worker is alive or timed out if not p.is_alive() or p.timer.value == 0: self.reincarnate_worker(p) continue # Decrement timer if work is being done if p.timer.value > 0: p.timer.value = 0 if p.timer.value < cycle else p.timer.value - cycle # Check Monitor if not self.monitor.is_alive(): self.reincarnate_monitor() # Check Pusher if not self.pusher.is_alive(): self.reincarnate_pusher() counter += cycle # Run every 30 seconds if counter >= 30: # Update cluster heartbeat time now_time = timezone.now() self.cluster_model.heartbeat_time = now_time self.cluster_model.save(update_fields=['heartbeat_time']) # Clean up other cluster instances that have died self.cluster_model.filter(heartbeat_time__lt=now_time - timedelta(minutes=1)).delete() # Call scheduler if Conf.SCHEDULER: scheduler(broker=self.broker) counter = 0 # Save current status Stat(self).save() sleep(cycle) self.stop()
def start(self): # This is just for PyCharm to not crash. Ignore it. if not hasattr(sys.stdin, 'close'): def dummy_close(): pass sys.stdin.close = dummy_close # Start Sentinel self.stop_event = Event() self.start_event = Event() self.sentinel = Process(target=Sentinel, args=(self.stop_event, self.start_event, self.list_key, self.timeout)) self.sentinel.start() logger.info(_('Q Cluster-{} starting.').format(self.pid)) while not self.start_event.is_set(): sleep(0.1) return self.pid
def start(self): # This is just for PyCharm to not crash. Ignore it. if not hasattr(sys.stdin, 'close'): def dummy_close(): pass sys.stdin.close = dummy_close # Start Sentinel self.stop_event = Event() self.start_event = Event() self.sentinel = Process(target=Sentinel, args=(self.stop_event, self.start_event, self.list_key, self.timeout)) self.sentinel.start() logger.info(_('Q Cluster-{} starting.').format(self.pid)) while not self.start_event.is_set(): sleep(0.2) return self.pid
def start(self): # Start Sentinel self.stop_event = Event() self.start_event = Event() self.sentinel = Process( target=Sentinel, args=( self.stop_event, self.start_event, self.cluster_id, self.broker, self.timeout, ), ) self.sentinel.start() logger.info(_(f"Q Cluster {self.name} starting.")) while not self.start_event.is_set(): sleep(0.1) return self.pid
def async_task(func, *args, **kwargs): """Queue a task for the cluster.""" keywords = kwargs.copy() opt_keys = ( 'hook', 'group', 'save', 'sync', 'cached', 'ack_failure', 'iter_count', 'iter_cached', 'chain', 'broker') q_options = keywords.pop('q_options', {}) # get an id tag = uuid() # build the task package task = {'id': tag[1], 'name': keywords.pop('task_name', None) or q_options.pop('task_name', None) or tag[0], 'func': func, 'args': args} # push optionals for key in opt_keys: if q_options and key in q_options: task[key] = q_options[key] elif key in keywords: task[key] = keywords.pop(key) # don't serialize the broker broker = task.pop('broker', get_broker()) # overrides if 'cached' not in task and Conf.CACHED: task['cached'] = Conf.CACHED if 'sync' not in task and Conf.SYNC: task['sync'] = Conf.SYNC if 'ack_failure' not in task and Conf.ACK_FAILURES: task['ack_failure'] = Conf.ACK_FAILURES # finalize task['kwargs'] = keywords task['started'] = timezone.now() # signal it pre_enqueue.send(sender="django_q", task=task) # sign it pack = SignedPackage.dumps(task) if task.get('sync', False): return _sync(pack) # push it enqueue_id = broker.enqueue(pack) logger.info('Enqueued {}'.format(enqueue_id)) logger.debug('Pushed {}'.format(tag)) return task['id']
def reincarnate_worker(self, process): """ :param process: the process to reincarnate :type process: Process or None """ close_old_django_connections() self.pool.remove(process) # Delete Worker model entry WorkerModel.objects.filter(id=process.id).delete() self.spawn_worker() if process.timer.value == 0: # only need to terminate on timeout, otherwise we risk destabilizing the queues process.terminate() logger.warning( _(f"reincarnated worker {process.name} after timeout")) elif int(process.timer.value) == -2: logger.info(_(f"recycled worker {process.name}")) else: logger.error(_(f"reincarnated worker {process.name} after death")) self.reincarnations += 1
def monitor(result_queue, broker=None): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ if not broker: broker = get_broker() name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) for task in iter(result_queue.get, "STOP"): # acknowledge ack_id = task.pop("ack_id", False) if ack_id: broker.acknowledge(ack_id) # save the result if task.get("cached", False): save_cached(task, broker) else: save_task(task) # log the result if task["success"]: logger.info(_("Processed [{}]").format(task["name"])) else: logger.error(_("Failed [{}] - {}").format(task["name"], task["result"])) logger.info(_("{} stopped monitoring results").format(name))
def monitor(result_queue, broker=None): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ if not broker: broker = get_broker() name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) for task in iter(result_queue.get, 'STOP'): # acknowledge ack_id = task.pop('ack_id', False) if ack_id: broker.acknowledge(ack_id) # save the result if task.get('cached', False): save_cached(task, broker) else: save_task(task, broker) # log the result if task['success']: logger.info(_("Processed [{}]").format(task['name'])) else: logger.error( _("Failed [{}] - {}").format(task['name'], task['result'])) logger.info(_("{} stopped monitoring results").format(name))
def monitor(result_queue, broker=None): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ if not broker: broker = get_broker() name = current_process().name logger.info(_(f"{name} monitoring at {current_process().pid}")) for task in iter(result_queue.get, "STOP"): # save the result if task.get("cached", False): save_cached(task, broker) else: save_task(task, broker) # acknowledge result ack_id = task.pop("ack_id", False) if ack_id and (task["success"] or task.get("ack_failure", False)): broker.acknowledge(ack_id) # log the result if task["success"]: # log success logger.info(_(f"Processed [{task['name']}]")) else: # log failure logger.error(_(f"Failed [{task['name']}] - {task['result']}")) logger.info(_(f"{name} stopped monitoring results"))
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info( _('{} ready for work at {}').format(name, current_process().pid)) task_count = 0 # Start reading the task queue for task in iter(task_queue.get, 'STOP'): result = None timer.value = -1 # Idle task_count += 1 # Get the function from the task logger.info(_('{} processing [{}]').format(name, task['name'])) f = task['func'] # if it's not an instance try to get it from the string if not callable(task['func']): try: module, func = f.rsplit('.', 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False) if error_reporter: error_reporter.report() if rollbar: rollbar.report_exc_info() # We're still going if not result: db.close_old_connections() timer_value = task['kwargs'].pop('timeout', timeout or 0) # signal execution pre_execute.send(sender="django_q", func=f, task=task) # execute the payload timer.value = timer_value # Busy try: res = f(*task['args'], **task['kwargs']) result = (res, True) except Exception as e: result = ('{}'.format(e), False) if error_reporter: error_reporter.report() if rollbar: rollbar.report_exc_info() # Process result task['result'] = result[0] task['success'] = result[1] task['stopped'] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE: timer.value = -2 # Recycled break logger.info(_('{} stopped doing work').format(name))
def monitor(result_queue, broker=None): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ if not broker: broker = get_broker() name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) for task in iter(result_queue.get, 'STOP'): # save the result if task.get('cached', False): save_cached(task, broker) else: save_task(task, broker) # acknowledge and log the result if task['success']: # acknowledge ack_id = task.pop('ack_id', False) if ack_id: broker.acknowledge(ack_id) # log success logger.info(_("Processed [{}]").format(task['name'])) else: # log failure logger.error(_("Failed [{}] - {}").format(task['name'], task['result'])) logger.info(_("{} stopped monitoring results").format(name))
def stop(self): Stat(self).save() name = current_process().name logger.info('{} stopping cluster processes'.format(name)) # Stopping pusher self.event_out.set() # Wait for it to stop while self.pusher.is_alive(): sleep(0.1) Stat(self).save() # Put poison pills in the queue for _ in range(len(self.pool)): self.task_queue.put('STOP') self.task_queue.close() # wait for the task queue to empty self.task_queue.join_thread() # Wait for all the workers to exit while len(self.pool): for p in self.pool: if not p.is_alive(): self.pool.remove(p) sleep(0.1) Stat(self).save() # Finally stop the monitor self.result_queue.put('STOP') self.result_queue.close() # Wait for the result queue to empty self.result_queue.join_thread() logger.info('{} waiting for the monitor.'.format(name)) # Wait for everything to close or time out count = 0 if not self.timeout: self.timeout = 30 while self.status() == Conf.STOPPING and count < self.timeout * 10: sleep(0.1) Stat(self).save() count += 1 # Final status Stat(self).save()
def pusher(task_queue, e, list_key=Conf.Q_LIST, r=redis_client): """ Pulls tasks of the Redis List and puts them in the task queue :type task_queue: multiprocessing.Queue :type e: multiprocessing.Event :type list_key: str """ logger.info(_('{} pushing tasks at {}').format(current_process().name, current_process().pid)) while True: try: task = r.blpop(list_key, 1) except Exception as e: logger.error(e) # redis probably crashed. Let the sentinel handle it. sleep(10) break if task: task_queue.put(task[1]) logger.debug(_('queueing from {}').format(list_key)) if e.is_set(): break logger.info(_("{} stopped pushing tasks").format(current_process().name))
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info(_(f"{name} ready for work at {current_process().pid}")) task_count = 0 if timeout is None: timeout = -1 # Start reading the task queue for task in iter(task_queue.get, "STOP"): result = None timer.value = -1 # Idle task_count += 1 # Get the function from the task logger.info(_(f'{name} processing [{task["name"]}]')) f = task["func"] # if it's not an instance try to get it from the string if not callable(task["func"]): try: module, func = f.rsplit(".", 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False) if error_reporter: error_reporter.report() # We're still going if not result: close_old_django_connections() timer_value = task.pop("timeout", timeout) # signal execution pre_execute.send(sender="django_q", func=f, task=task) # execute the payload timer.value = timer_value # Busy try: res = f(*task["args"], **task["kwargs"]) result = (res, True) except Exception as e: result = (f"{e} : {traceback.format_exc()}", False) if error_reporter: error_reporter.report() if task.get("sync", False): raise with timer.get_lock(): # Process result task["result"] = result[0] task["success"] = result[1] task["stopped"] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE: timer.value = -2 # Recycled break logger.info(_(f"{name} stopped doing work"))
def stop(self): Stat(self).save() name = current_process().name logger.info(_('{} stopping cluster processes').format(name)) # Stopping pusher self.event_out.set() # Wait for it to stop while self.pusher.is_alive(): sleep(0.1) Stat(self).save() # Put poison pills in the queue for __ in range(len(self.pool)): self.task_queue.put('STOP') self.task_queue.close() # wait for the task queue to empty self.task_queue.join_thread() # Wait for all the workers to exit while len(self.pool): for p in self.pool: if not p.is_alive(): self.pool.remove(p) sleep(0.1) Stat(self).save() # Finally stop the monitor self.result_queue.put('STOP') self.result_queue.close() # Wait for the result queue to empty self.result_queue.join_thread() logger.info(_('{} waiting for the monitor.').format(name)) # Wait for everything to close or time out count = 0 if not self.timeout: self.timeout = 30 while self.status() == Conf.STOPPING and count < self.timeout * 10: sleep(0.1) Stat(self).save() count += 1 # Final status Stat(self).save()
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info(_('{} ready for work at {}').format(name, current_process().pid)) task_count = 0 # Start reading the task queue for task in iter(task_queue.get, 'STOP'): result = None timer.value = -1 # Idle task_count += 1 # Get the function from the task logger.info(_('{} processing [{}]').format(name, task['name'])) f = task['func'] # if it's not an instance try to get it from the string if not callable(task['func']): try: module, func = f.rsplit('.', 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False) if error_reporter: error_reporter.report() if rollbar: rollbar.report_exc_info() # We're still going if not result: db.close_old_connections() timer_value = task['kwargs'].pop('timeout', timeout or 0) # signal execution pre_execute.send(sender="django_q", func=f, task=task) # execute the payload timer.value = timer_value # Busy try: res = f(*task['args'], **task['kwargs']) result = (res, True) except Exception as e: result = ('{}'.format(e), False) if error_reporter: error_reporter.report() if rollbar: rollbar.report_exc_info() # Process result task['result'] = result[0] task['success'] = result[1] task['stopped'] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE: timer.value = -2 # Recycled break logger.info(_('{} stopped doing work').format(name))
def guard(self): logger.info( _( f"{current_process().name} guarding cluster {humanize(self.cluster_id.hex)}" ) ) self.start_event.set() Stat(self).save() logger.info(_(f"Q Cluster {humanize(self.cluster_id.hex)} running.")) counter = 0 cycle = Conf.GUARD_CYCLE # guard loop sleep in seconds # Guard loop. Runs at least once while not self.stop_event.is_set() or not counter: # Check Workers for p in self.pool: with p.timer.get_lock(): # Are you alive? if not p.is_alive() or p.timer.value == 0: self.reincarnate(p) continue # Decrement timer if work is being done if p.timer.value > 0: p.timer.value -= cycle # Check Monitor if not self.monitor.is_alive(): self.reincarnate(self.monitor) # Check Pusher if not self.pusher.is_alive(): self.reincarnate(self.pusher) # Call scheduler once a minute (or so) counter += cycle if counter >= 30 and Conf.SCHEDULER: counter = 0 scheduler(broker=self.broker) # Save current status Stat(self).save() sleep(cycle) self.stop()
def start(self) -> int: # Start Sentinel self.stop_event = Event() self.start_event = Event() self.sentinel = Process( target=Sentinel, args=( self.stop_event, self.start_event, self.cluster_id, self.broker, self.timeout, ), ) self.sentinel.start() logger.info(_(f"Q Cluster {self.name} starting.")) while not self.start_event.is_set(): sleep(0.1) # Create cluster instance ClusterModel.objects.create(id=self.cluster_id, start_time=timezone.now(), hostname=self.host, pid=self.pid) return self.pid
def guard(self): logger.info( _('{} guarding cluster at {}').format(current_process().name, self.pid)) self.start_event.set() Stat(self).save() logger.info(_('Q Cluster-{} running.').format(self.parent_pid)) scheduler(list_key=self.list_key) counter = 0 # Guard loop. Runs at least once while not self.stop_event.is_set() or not counter: # Check Workers for p in self.pool: # Are you alive? if not p.is_alive() or (self.timeout and int(p.timer.value) == 0): self.reincarnate(p) continue # Decrement timer if work is being done if p.timer.value > 0: p.timer.value -= 1 # Check Monitor if not self.monitor.is_alive(): self.reincarnate(self.monitor) # Check Pusher if not self.pusher.is_alive(): self.reincarnate(self.pusher) # Call scheduler once a minute (or so) counter += 1 if counter > 120: counter = 0 scheduler(list_key=self.list_key) # Save current status Stat(self).save() sleep(0.5) self.stop()
def start(self): # Start Sentinel if isinstance(self.broker, ORM): logger.info(_(f"Django ORM broker is not supported")) return self.stop_event = Event() self.start_event = Event() self.sentinel = Process( target=Sentinel, args=( self.stop_event, self.start_event, self.cluster_id, self.broker, self.timeout, ), ) self.sentinel.start() logger.info(_(f"Q Cluster {self.name} starting.")) while not self.start_event.is_set(): sleep(0.1) return self.pid
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info( _('{} ready for work at {}').format(name, current_process().pid)) task_count = 0 # Start reading the task queue for pack in iter(task_queue.get, 'STOP'): result = None timer.value = -1 # Idle task_count += 1 # unpickle the task try: task = signing.SignedPackage.loads(pack) except (TypeError, signing.BadSignature) as e: logger.error(e) continue # Get the function from the task logger.info(_('{} processing [{}]').format(name, task['name'])) f = task['func'] # if it's not an instance try to get it from the string if not callable(task['func']): try: module, func = f.rsplit('.', 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False) # We're still going if not result: # execute the payload timer.value = task['kwargs'].pop('timeout', timeout or 0) # Busy try: res = f(*task['args'], **task['kwargs']) result = (res, True) except Exception as e: result = (e, False) # Process result task['result'] = result[0] task['success'] = result[1] task['stopped'] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE: timer.value = -2 # Recycled break logger.info(_('{} stopped doing work').format(name))
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info(_('{} ready for work at {}').format(name, current_process().pid)) task_count = 0 # Start reading the task queue for pack in iter(task_queue.get, 'STOP'): result = None timer.value = -1 # Idle task_count += 1 # unpickle the task try: task = signing.SignedPackage.loads(pack) except (TypeError, signing.BadSignature) as e: logger.error(e) continue # Get the function from the task logger.info(_('{} processing [{}]').format(name, task['name'])) f = task['func'] # if it's not an instance try to get it from the string if not callable(task['func']): try: module, func = f.rsplit('.', 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False) # We're still going if not result: # execute the payload timer.value = task['kwargs'].pop('timeout', timeout or 0) # Busy try: res = f(*task['args'], **task['kwargs']) result = (res, True) except Exception as e: result = (e, False) # Process result task['result'] = result[0] task['success'] = result[1] task['stopped'] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE: timer.value = -2 # Recycled break logger.info(_('{} stopped doing work').format(name))
def monitor(result_queue): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) for task in iter(result_queue.get, 'STOP'): save_task(task) if task['success']: logger.info(_("Processed [{}]").format(task['name'])) else: logger.error(_("Failed [{}] - {}").format(task['name'], task['result'])) logger.info(_("{} stopped monitoring results").format(name))
def monitor(result_queue): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) db.close_old_connections() for task in iter(result_queue.get, "STOP"): save_task(task) if task["success"]: logger.info(_("Processed [{}]").format(task["name"])) else: logger.error(_("Failed [{}] - {}").format(task["name"], task["result"])) logger.info(_("{} stopped monitoring results").format(name))
def monitor(result_queue): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) for task in iter(result_queue.get, 'STOP'): save_task(task) if task['success']: logger.info(_("Processed [{}]").format(task['name'])) else: logger.error( _("Failed [{}] - {}").format(task['name'], task['result'])) logger.info(_("{} stopped monitoring results").format(name))
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info(_("{} ready for work at {}").format(name, current_process().pid)) task_count = 0 # Start reading the task queue for task in iter(task_queue.get, "STOP"): result = None timer.value = -1 # Idle task_count += 1 # Get the function from the task logger.info(_("{} processing [{}]").format(name, task["name"])) f = task["func"] # if it's not an instance try to get it from the string if not callable(task["func"]): try: module, func = f.rsplit(".", 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False) # We're still going if not result: db.close_old_connections() # execute the payload timer.value = task["kwargs"].pop("timeout", timeout or 0) # Busy try: res = f(*task["args"], **task["kwargs"]) result = (res, True) except Exception as e: result = (e, False) # Process result task["result"] = result[0] task["success"] = result[1] task["stopped"] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE: timer.value = -2 # Recycled break logger.info(_("{} stopped doing work").format(name))
def worker(task_queue, result_queue, timer, timeout=Conf.TIMEOUT): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info(_('{} ready for work at {}').format(name, current_process().pid)) task_count = 0 # Start reading the task queue for task in iter(task_queue.get, 'STOP'): result = None timer.value = -1 # Idle task_count += 1 # record the current worker's PID, so we can kill it later task['worker_process_pid'] = os.getpid() logger.info("Got job for worker {}".format(os.getpid())) Task.objects.filter(id=task['id']).update(worker_process_pid=task['worker_process_pid']) # mark the task as being run now. Task.objects.filter(id=task['id']).update(task_status=Task.INPROGRESS) # Get the function from the task logger.info(_('{} processing [{}]').format(name, task['name'])) f = task['func'] # if it's not an instance try to get it from the string if not callable(task['func']): try: module, func = f.rsplit('.', 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False, Task.FAILED) if rollbar: rollbar.report_exc_info() # We're still going if not result: db.close_old_connections() timer_value = task['kwargs'].pop('timeout', timeout or 0) # signal execution pre_execute.send(sender="django_q", func=f, task=task) if task['is_progress_updating']: task['kwargs']['update_state'] = partial(update_task_progress, task) # execute the payload timer.value = timer_value # Busy try: res = f(*task['args'], **task['kwargs']) result = (res, True, Task.SUCCESS) except Exception as e: e.traceback = traceback.format_exc() result = (e, False, Task.FAILED) if rollbar: rollbar.report_exc_info() # make sure to remove the update_state func before shuffling across # process boundaries (through the result_queue), since its globals() # contains multiprocessing.Queue objects, which are unpickleable task['kwargs'].pop('update_state', None) # Process result task['result'] = result[0] task['success'] = result[1] task['task_status'] = result[2] task['stopped'] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE: timer.value = -2 # Recycled break logger.info(_('{} stopped doing work').format(name))
def scheduler(broker=None): """ Creates a task from a schedule at the scheduled time and schedules next run """ if not broker: broker = get_broker() db.close_old_connections() try: for s in Schedule.objects.exclude(repeats=0).filter(next_run__lt=timezone.now()): args = () kwargs = {} # get args, kwargs and hook if s.kwargs: try: # eval should be safe here because dict() kwargs = eval('dict({})'.format(s.kwargs)) except SyntaxError: kwargs = {} if s.args: args = ast.literal_eval(s.args) # single value won't eval to tuple, so: if type(args) != tuple: args = (args,) q_options = kwargs.get('q_options', {}) if s.hook: q_options['hook'] = s.hook # set up the next run time if not s.schedule_type == s.ONCE: next_run = arrow.get(s.next_run) while True: if s.schedule_type == s.MINUTES: next_run = next_run.replace(minutes=+(s.minutes or 1)) elif s.schedule_type == s.HOURLY: next_run = next_run.replace(hours=+1) elif s.schedule_type == s.DAILY: next_run = next_run.replace(days=+1) elif s.schedule_type == s.WEEKLY: next_run = next_run.replace(weeks=+1) elif s.schedule_type == s.MONTHLY: next_run = next_run.replace(months=+1) elif s.schedule_type == s.QUARTERLY: next_run = next_run.replace(months=+3) elif s.schedule_type == s.YEARLY: next_run = next_run.replace(years=+1) if Conf.CATCH_UP or next_run > arrow.utcnow(): break s.next_run = next_run.datetime s.repeats += -1 # send it to the cluster q_options['broker'] = broker q_options['group'] = q_options.get('group', s.name or s.id) kwargs['q_options'] = q_options s.task = tasks.async(s.func, *args, **kwargs) # log it if not s.task: logger.error( _('{} failed to create a task from schedule [{}]').format(current_process().name, s.name or s.id)) else: logger.info( _('{} created a task from schedule [{}]').format(current_process().name, s.name or s.id)) # default behavior is to delete a ONCE schedule if s.schedule_type == s.ONCE: if s.repeats < 0: s.delete() continue # but not if it has a positive repeats s.repeats = 0 # save the schedule s.save() except Exception as e: logger.error(e)