def __init__(self, queues, host='localhost', workers=None, interval=60, workdir='.', resume=False, **kwargs): _host, s, _port = host.partition(':') _port = int(_port or 6379) _password = kwargs.get('password') self.host = _host self.port = _port self.password = _password self.client = qless.client(self.host, self.port, password=self.password) self.count = workers or psutil.NUM_CPUS self.interval = interval self.queues = queues self.resume = resume self.sandboxes = {} # This is for filesystem sandboxing. Each worker has # a directory associated with it, which it should make # sure is the working directory in which it runs each # of the jobs. It should also ensure that the directory # exists, and clobbers files before each run, and after. self.workdir = os.path.abspath(workdir) self.sandbox = self.workdir # I'm the parent, so I have a negative worker id self.worker_id = -1 self.master = True # These are the job ids that I should get to first, before # picking off other jobs self.jids = []
def work(self): # We should probably open up our own redis client self.client = qless.client(self.host, self.port) self.queues = [self.client.queues[q] for q in self.queues] if not os.path.isdir(self.sandbox): os.makedirs(self.sandbox) from gevent.pool import Pool from gevent import sleep, Greenlet pool = Pool(self.pool_size) while True: try: seen = False for queue in self.queues: # Wait until a greenlet is available pool.wait_available() job = queue.pop() if job: # For whatever reason, doing imports within a greenlet # (there's one implicitly invoked in job.process), was # throwing exceptions. The relatively ghetto way to get # around this is to force the import to happen before # the greenlet is spawned. _module = job.klass seen = True pool.start(Greenlet(job.process)) if not seen: logger.debug('Sleeping for %fs' % self.interval) sleep(self.interval) except KeyboardInterrupt: return
def check(cluster, config): global QNAMES, THRESHOLD # Get a Qless connection try: master = qless.client( host=config["clusters"][cluster]["qless"]["master"]["host"], port=config["clusters"][cluster]["qless"]["master"]["port"], ) except (KeyError, TypeError): # Should not happen. return "Bad cluster." # Initialize some things njobs = 0 found = False # Determine how many jobs are running delim = "" ret = "" for queue in master.queues.counts: if queue["name"] in QNAMES: ret += delim + queue["name"] + ": " + str(queue["waiting"]) delim = ", " njobs += queue["waiting"] found = True # This should not happen... if not found: return "No data." # Return backlog counts or 'OK' if no backlog. return ret if njobs > THRESHOLD else "OK"
def work(self): # We should probably open up our own redis client self.client = qless.client(url=self.host) self.queues = [self.client.queues[q] for q in self.queues] if not os.path.isdir(self.sandbox): os.makedirs(self.sandbox) from gevent.pool import Pool from gevent import sleep, Greenlet pool = Pool(self.pool_size) while True: try: seen = False for queue in self.queues: # Wait until a greenlet is available pool.wait_available() job = queue.pop() if job: # For whatever reason, doing imports within a greenlet # (there's one implicitly invoked in job.process), was # throwing exceptions. The relatively ghetto way to get # around this is to force the import to happen before # the greenlet is spawned. _module = job.klass seen = True pool.start(Greenlet(job.process)) if not seen: logger.debug('Sleeping for %fs' % self.interval) sleep(self.interval) except KeyboardInterrupt: return
def __init__(self, queues, host='localhost', workers=None, interval=60, workdir='.', resume=False, stop_on_idle=False, **kwargs): if host.startswith('redis://'): self.host = host else: _split_host = host.split(':') host = _split_host[0] port = int(_split_host[1] or 6379) self.host = 'redis://%s:%d' % (host, port) self.client = qless.client(url=self.host) self.count = workers or psutil.NUM_CPUS self.interval = interval self.queues = queues self.resume = resume self.stop_on_idle = stop_on_idle self.sandboxes = {} # This is for filesystem sandboxing. Each worker has # a directory associated with it, which it should make # sure is the working directory in which it runs each # of the jobs. It should also ensure that the directory # exists, and clobbers files before each run, and after. self.workdir = os.path.abspath(workdir) self.sandbox = self.workdir # I'm the parent, so I have a negative worker id self.worker_id = -1 self.master = True # These are the job ids that I should get to first, before # picking off other jobs self.jids = []
def work(self): # We should probably open up our own redis client self.client = qless.client(url=self.host) self.queues = [self.client.queues[q] for q in self.queues] if not os.path.isdir(self.sandbox): os.makedirs(self.sandbox) self.clean() # First things first, we should clear out any jobs that # we're responsible for off-hand while len(self.jids): try: job = self.client.jobs[self.jids.pop(0)] # If we still have access to it, then we should process it if job.heartbeat(): logger.info('Resuming %s' % job.jid) self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name)) job.process() self.clean() else: logger.warn('Lost heart on would-be resumed job %s' % job.jid) except KeyboardInterrupt: return sleep_cycles = 0 while True: try: for queue in self.queues: job = queue.pop() if job: sleep_cycles = -1 self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name)) job.process() self.clean() if self.stop_on_idle and sleep_cycles >= 2: logger.info("Idle for too long, quiting") import sys sys.exit(self.IDLE_EXIT_STATUS) if sleep_cycles >= 0: self.setproctitle('sleeping...') logger.debug('Sleeping for %fs' % self.interval) time.sleep(self.interval) sleep_cycles += 1 else: sleep_cycles = 0 except KeyboardInterrupt: return
def check(cluster, config): global THRESHOLD # Get a Qless connection try: master = qless.client( host=config['clusters'][cluster]['qless']['master']['host'], port=config['clusters'][cluster]['qless']['master']['port'] ) except (KeyError, TypeError): # Should not happen. return "Bad cluster." # Determine how many jobs are backlogged njobs = len(master.queues['awaiting-upload'].peek(100000)) # Return backlog counts or 'OK' if no backlog. return 'Backlogged: %d' % njobs if njobs > THRESHOLD else 'OK'
def work(self): # We should probably open up our own redis client self.client = qless.client(self.host, self.port) self.queues = [self.client.queues[q] for q in self.queues] if not os.path.isdir(self.sandbox): os.makedirs(self.sandbox) self.clean() # First things first, we should clear out any jobs that # we're responsible for off-hand while len(self.jids): try: job = self.client.jobs[self.jids.pop(0)] # If we still have access to it, then we should process it if job.heartbeat(): logger.info('Resuming %s' % job.jid) self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name)) job.process() self.clean() else: logger.warn('Lost heart on would-be resumed job %s' % job.jid) except KeyboardInterrupt: return while True: try: seen = False for queue in self.queues: job = queue.pop() if job: seen = True self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name)) job.process() self.clean() if not seen: self.setproctitle('sleeping...') logger.debug('Sleeping for %fs' % self.interval) time.sleep(self.interval) except KeyboardInterrupt: return
def work(self): # We should probably open up our own redis client self.client = qless.client(self.host, self.port, password=self.password) self.queues = [self.client.queues[q] for q in self.queues] if not os.path.isdir(self.sandbox): os.makedirs(self.sandbox) self.clean() # First things first, we should clear out any jobs that # we're responsible for off-hand while len(self.jids): try: job = self.client.jobs[self.jids.pop(0)] # If we still have access to it, then we should process it if job.heartbeat(): logger.info('Resuming %s' % job.jid) self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name)) job.process() self.clean() else: logger.warn('Lost heart on would-be resumed job %s' % job.jid) except KeyboardInterrupt: return while True: try: seen = False for queue in self.queues: job = queue.pop() if job: seen = True self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name)) job.process() self.clean() if not seen: self.setproctitle('sleeping...') logger.debug('Sleeping for %fs' % self.interval) time.sleep(self.interval) except KeyboardInterrupt: return
def __init__(self, *a, **kw): threading.Thread.__init__(self, *a, **kw) # This is to fake out thread-level workers tmp = qless.client(host=args.host, port=args.port) tmp.worker += "-" + self.getName() self.q = tmp.queue("testing")
import logging import threading logger = logging.getLogger("qless-bench") formatter = logging.Formatter("[%(asctime)s] %(threadName)s => %(message)s") handler = logging.StreamHandler() handler.setLevel(logging.DEBUG) handler.setFormatter(formatter) logger.addHandler(handler) if args.verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.WARN) # Our qless client client = qless.client(host=args.host, port=args.port) class ForgetfulWorker(threading.Thread): def __init__(self, *a, **kw): threading.Thread.__init__(self, *a, **kw) # This is to fake out thread-level workers tmp = qless.client(host=args.host, port=args.port) tmp.worker += "-" + self.getName() self.q = tmp.queue("testing") def run(self): while len(self.q): job = self.q.pop() if not job: # Sleep a little bit
time.sleep(random.random() * 0.5) job['underpants'] = { 'collected': random.randrange(0, 200) } job.complete('unknown') @staticmethod def unknown(job): # After all, it is the unknown stage if random.random() < 0.05: raise Exception('The most uncertain plans of mice and men...') else: # We'll scratch our heads a little bit about what to do here time.sleep(random.random() * 2.0) job['unknown'] = ['?'] * int(random.random() * 10) job.complete('profit') @staticmethod def profit(job): # How much profits did we get?! job['profit'] = '$%.2f' % (random.uniform(0.5, 1.5) * job['underpants']['collected']) job.complete() if __name__ == '__main__': import qless from qless import gnomes client = qless.client() underpants = client.queue('underpants') for i in range(1000): underpants.put(gnomes.GnomesJob, {})
def __init__(self, *a, **kw): threading.Thread.__init__(self, *a, **kw) # This is to fake out thread-level workers tmp = qless.client(host=args.host, port=args.port) tmp.worker += '-' + self.getName() self.q = tmp.queue('testing')
import logging import threading logger = logging.getLogger('qless-bench') formatter = logging.Formatter('[%(asctime)s] %(threadName)s => %(message)s') handler = logging.StreamHandler() handler.setLevel(logging.DEBUG) handler.setFormatter(formatter) logger.addHandler(handler) if args.verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.WARN) # Our qless client client = qless.client(host=args.host, port=args.port) class ForgetfulWorker(threading.Thread): def __init__(self, *a, **kw): threading.Thread.__init__(self, *a, **kw) # This is to fake out thread-level workers tmp = qless.client(host=args.host, port=args.port) tmp.worker += '-' + self.getName() self.q = tmp.queue('testing') def run(self): while len(self.q): job = self.q.pop() if not job: # Sleep a little bit time.sleep(0.1)
def check(cluster, config): global QNAME, NOTFOUND, CLUSTERS # Get number of crawlers we expect based on cluster ncrawlers = CLUSTERS[cluster] # Get a Qless connection try: master = qless.client( host=config['clusters'][cluster]['qless']['master']['host'], port=config['clusters'][cluster]['qless']['master']['port'] ) except (KeyError, TypeError): # Should not happen. return "Bad cluster." # Initialize some things njobs = NOTFOUND # Determine how many jobs are running for queue in master.queues.counts: if queue['name'] == QNAME: njobs = queue['running'] break # This should not happen... if njobs == NOTFOUND: return "No data." # Each crawler must be running at least one third its fair share, else we # proclaim it lazy and tattle on it. minjobs = njobs / ncrawlers / 3 # Determine how many jobs are running on each crawler prefix = cluster + '-crawler-' crawlers = { } for node in master.workers.counts: if node['name'].startswith(prefix): crawlers[node['name']] = (node.get('jobs', 0), node.get('stalled', 0)) # Tell on any lazy crawlers. Something of a PITA as Zabbix forces us to # be über-terse. lazy = [ ] mia = [ ] for i in range(1, ncrawlers+1): key = prefix + '%02d' % i try: if crawlers[key][0] < minjobs: lazy.append(key) except KeyError: mia.append(key) # If OK, say so (Zabbix hates empty strings). if len(lazy) == 0 and len(mia) == 0: return 'OK' # Else issue a terse summary of what's wrong. ret = '' delim = '' if len(lazy) != 0: ret = 'Lazy: ' + ', '.join(lazy) delim = '; ' if len(mia) != 0: ret += delim + 'MIA: ' + ', '.join(mia) return ret