Beispiel #1
0
 def __init__(self, queues, host='localhost', workers=None, interval=60, workdir='.', resume=False, **kwargs):
     _host, s, _port = host.partition(':')
     _port           = int(_port or 6379)
     _password       = kwargs.get('password')
     self.host       = _host
     self.port       = _port
     self.password   = _password
     self.client     = qless.client(self.host, self.port, password=self.password)
     self.count      = workers or psutil.NUM_CPUS
     self.interval   = interval
     self.queues     = queues
     self.resume     = resume
     self.sandboxes  = {}
     # This is for filesystem sandboxing. Each worker has
     # a directory associated with it, which it should make
     # sure is the working directory in which it runs each
     # of the jobs. It should also ensure that the directory
     # exists, and clobbers files before each run, and after.
     self.workdir    = os.path.abspath(workdir)
     self.sandbox    = self.workdir
     # I'm the parent, so I have a negative worker id
     self.worker_id  = -1
     self.master     = True
     # These are the job ids that I should get to first, before
     # picking off other jobs
     self.jids       = []
Beispiel #2
0
    def work(self):
        # We should probably open up our own redis client
        self.client = qless.client(self.host, self.port)
        self.queues = [self.client.queues[q] for q in self.queues]

        if not os.path.isdir(self.sandbox):
            os.makedirs(self.sandbox)

        from gevent.pool import Pool
        from gevent import sleep, Greenlet
        pool = Pool(self.pool_size)
        while True:
            try:
                seen = False
                for queue in self.queues:
                    # Wait until a greenlet is available
                    pool.wait_available()
                    job = queue.pop()
                    if job:
                        # For whatever reason, doing imports within a greenlet
                        # (there's one implicitly invoked in job.process), was
                        # throwing exceptions. The relatively ghetto way to get
                        # around this is to force the import to happen before
                        # the greenlet is spawned.
                        _module = job.klass
                        seen = True
                        pool.start(Greenlet(job.process))

                if not seen:
                    logger.debug('Sleeping for %fs' % self.interval)
                    sleep(self.interval)
            except KeyboardInterrupt:
                return
Beispiel #3
0
def check(cluster, config):
    global QNAMES, THRESHOLD

    # Get a Qless connection
    try:
        master = qless.client(
            host=config["clusters"][cluster]["qless"]["master"]["host"],
            port=config["clusters"][cluster]["qless"]["master"]["port"],
        )
    except (KeyError, TypeError):
        # Should not happen.
        return "Bad cluster."

    # Initialize some things
    njobs = 0
    found = False

    # Determine how many jobs are running
    delim = ""
    ret = ""
    for queue in master.queues.counts:
        if queue["name"] in QNAMES:
            ret += delim + queue["name"] + ": " + str(queue["waiting"])
            delim = ", "
            njobs += queue["waiting"]
            found = True

    # This should not happen...
    if not found:
        return "No data."

    # Return backlog counts or 'OK' if no backlog.
    return ret if njobs > THRESHOLD else "OK"
Beispiel #4
0
 def work(self):
     # We should probably open up our own redis client
     self.client = qless.client(url=self.host)
     self.queues = [self.client.queues[q] for q in self.queues]
     
     if not os.path.isdir(self.sandbox):
         os.makedirs(self.sandbox)
     
     from gevent.pool import Pool
     from gevent import sleep, Greenlet
     pool = Pool(self.pool_size)
     while True:
         try:
             seen = False
             for queue in self.queues:
                 # Wait until a greenlet is available
                 pool.wait_available()
                 job = queue.pop()
                 if job:
                     # For whatever reason, doing imports within a greenlet
                     # (there's one implicitly invoked in job.process), was
                     # throwing exceptions. The relatively ghetto way to get
                     # around this is to force the import to happen before
                     # the greenlet is spawned.
                     _module = job.klass
                     seen = True
                     pool.start(Greenlet(job.process))
             
             if not seen:
                 logger.debug('Sleeping for %fs' % self.interval)
                 sleep(self.interval)
         except KeyboardInterrupt:
             return
Beispiel #5
0
 def __init__(self, queues, host='localhost', workers=None, interval=60, workdir='.', resume=False, stop_on_idle=False, **kwargs):
     if host.startswith('redis://'):
         self.host       = host
     else:
         _split_host = host.split(':')
         host = _split_host[0]
         port = int(_split_host[1] or 6379)
         self.host = 'redis://%s:%d' % (host, port)
     self.client     = qless.client(url=self.host)
     self.count      = workers or psutil.NUM_CPUS
     self.interval   = interval
     self.queues     = queues
     self.resume     = resume
     self.stop_on_idle = stop_on_idle
     self.sandboxes  = {}
     # This is for filesystem sandboxing. Each worker has
     # a directory associated with it, which it should make
     # sure is the working directory in which it runs each
     # of the jobs. It should also ensure that the directory
     # exists, and clobbers files before each run, and after.
     self.workdir    = os.path.abspath(workdir)
     self.sandbox    = self.workdir
     # I'm the parent, so I have a negative worker id
     self.worker_id  = -1
     self.master     = True
     # These are the job ids that I should get to first, before
     # picking off other jobs
     self.jids       = []
Beispiel #6
0
 def work(self):
     # We should probably open up our own redis client
     self.client = qless.client(url=self.host)
     self.queues = [self.client.queues[q] for q in self.queues]
     
     if not os.path.isdir(self.sandbox):
         os.makedirs(self.sandbox)
     self.clean()
     # First things first, we should clear out any jobs that
     # we're responsible for off-hand
     while len(self.jids):
         try:
             job = self.client.jobs[self.jids.pop(0)]
             # If we still have access to it, then we should process it
             if job.heartbeat():
                 logger.info('Resuming %s' % job.jid)
                 self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name))
                 job.process()
                 self.clean()
             else:
                 logger.warn('Lost heart on would-be resumed job %s' % job.jid)
         except KeyboardInterrupt:
             return
     
     sleep_cycles = 0
     while True:
         try:
             for queue in self.queues:
                 job = queue.pop()
                 if job:
                     sleep_cycles = -1
                     self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name))
                     job.process()
                     self.clean()
             
             if self.stop_on_idle and sleep_cycles >= 2:
                 logger.info("Idle for too long, quiting")
                 import sys
                 sys.exit(self.IDLE_EXIT_STATUS)
             if sleep_cycles >= 0:
                 self.setproctitle('sleeping...')
                 logger.debug('Sleeping for %fs' % self.interval)
                 time.sleep(self.interval)
                 sleep_cycles += 1
             else:
                 sleep_cycles = 0
         except KeyboardInterrupt:
             return
Beispiel #7
0
def check(cluster, config):
    global THRESHOLD

    # Get a Qless connection
    try:
        master = qless.client(
            host=config['clusters'][cluster]['qless']['master']['host'],
            port=config['clusters'][cluster]['qless']['master']['port']
        )
    except (KeyError, TypeError):
        # Should not happen.
        return "Bad cluster."

    # Determine how many jobs are backlogged
    njobs = len(master.queues['awaiting-upload'].peek(100000))

    # Return backlog counts or 'OK' if no backlog.
    return 'Backlogged: %d' % njobs if njobs > THRESHOLD else 'OK'
Beispiel #8
0
    def work(self):
        # We should probably open up our own redis client
        self.client = qless.client(self.host, self.port)
        self.queues = [self.client.queues[q] for q in self.queues]

        if not os.path.isdir(self.sandbox):
            os.makedirs(self.sandbox)
        self.clean()
        # First things first, we should clear out any jobs that
        # we're responsible for off-hand
        while len(self.jids):
            try:
                job = self.client.jobs[self.jids.pop(0)]
                # If we still have access to it, then we should process it
                if job.heartbeat():
                    logger.info('Resuming %s' % job.jid)
                    self.setproctitle('Working %s (%s)' %
                                      (job.jid, job.klass_name))
                    job.process()
                    self.clean()
                else:
                    logger.warn('Lost heart on would-be resumed job %s' %
                                job.jid)
            except KeyboardInterrupt:
                return

        while True:
            try:
                seen = False
                for queue in self.queues:
                    job = queue.pop()
                    if job:
                        seen = True
                        self.setproctitle('Working %s (%s)' %
                                          (job.jid, job.klass_name))
                        job.process()
                        self.clean()

                if not seen:
                    self.setproctitle('sleeping...')
                    logger.debug('Sleeping for %fs' % self.interval)
                    time.sleep(self.interval)
            except KeyboardInterrupt:
                return
Beispiel #9
0
 def work(self):
     # We should probably open up our own redis client
     self.client = qless.client(self.host, self.port, password=self.password)
     self.queues = [self.client.queues[q] for q in self.queues]
     
     if not os.path.isdir(self.sandbox):
         os.makedirs(self.sandbox)
     self.clean()
     # First things first, we should clear out any jobs that
     # we're responsible for off-hand
     while len(self.jids):
         try:
             job = self.client.jobs[self.jids.pop(0)]
             # If we still have access to it, then we should process it
             if job.heartbeat():
                 logger.info('Resuming %s' % job.jid)
                 self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name))
                 job.process()
                 self.clean()
             else:
                 logger.warn('Lost heart on would-be resumed job %s' % job.jid)
         except KeyboardInterrupt:
             return
     
     while True:
         try:
             seen = False
             for queue in self.queues:
                 job = queue.pop()
                 if job:
                     seen = True
                     self.setproctitle('Working %s (%s)' % (job.jid, job.klass_name))
                     job.process()
                     self.clean()
             
             if not seen:
                 self.setproctitle('sleeping...')
                 logger.debug('Sleeping for %fs' % self.interval)
                 time.sleep(self.interval)
         except KeyboardInterrupt:
             return
Beispiel #10
0
 def __init__(self, *a, **kw):
     threading.Thread.__init__(self, *a, **kw)
     # This is to fake out thread-level workers
     tmp = qless.client(host=args.host, port=args.port)
     tmp.worker += "-" + self.getName()
     self.q = tmp.queue("testing")
Beispiel #11
0
import logging
import threading

logger = logging.getLogger("qless-bench")
formatter = logging.Formatter("[%(asctime)s] %(threadName)s => %(message)s")
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
handler.setFormatter(formatter)
logger.addHandler(handler)
if args.verbose:
    logger.setLevel(logging.DEBUG)
else:
    logger.setLevel(logging.WARN)

# Our qless client
client = qless.client(host=args.host, port=args.port)


class ForgetfulWorker(threading.Thread):
    def __init__(self, *a, **kw):
        threading.Thread.__init__(self, *a, **kw)
        # This is to fake out thread-level workers
        tmp = qless.client(host=args.host, port=args.port)
        tmp.worker += "-" + self.getName()
        self.q = tmp.queue("testing")

    def run(self):
        while len(self.q):
            job = self.q.pop()
            if not job:
                # Sleep a little bit
Beispiel #12
0
            time.sleep(random.random() * 0.5)
            job['underpants'] = {
                'collected': random.randrange(0, 200)
            }
            job.complete('unknown')
    
    @staticmethod
    def unknown(job):
        # After all, it is the unknown stage
        if random.random() < 0.05:
            raise Exception('The most uncertain plans of mice and men...')
        else:
            # We'll scratch our heads a little bit about what to do here
            time.sleep(random.random() * 2.0)
            job['unknown'] = ['?'] * int(random.random() * 10)
            job.complete('profit')
    
    @staticmethod
    def profit(job):
        # How much profits did we get?!
        job['profit'] = '$%.2f' % (random.uniform(0.5, 1.5) * job['underpants']['collected'])
        job.complete()

if __name__ == '__main__':
    import qless
    from qless import gnomes
    client = qless.client()
    underpants = client.queue('underpants')
    for i in range(1000):
        underpants.put(gnomes.GnomesJob, {})
Beispiel #13
0
 def __init__(self, *a, **kw):
     threading.Thread.__init__(self, *a, **kw)
     # This is to fake out thread-level workers
     tmp = qless.client(host=args.host, port=args.port)
     tmp.worker += '-' + self.getName()
     self.q = tmp.queue('testing')
Beispiel #14
0
import logging
import threading

logger = logging.getLogger('qless-bench')
formatter = logging.Formatter('[%(asctime)s] %(threadName)s => %(message)s')
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
handler.setFormatter(formatter)
logger.addHandler(handler)
if args.verbose:
    logger.setLevel(logging.DEBUG)
else:
    logger.setLevel(logging.WARN)

# Our qless client
client = qless.client(host=args.host, port=args.port)

class ForgetfulWorker(threading.Thread):
    def __init__(self, *a, **kw):
        threading.Thread.__init__(self, *a, **kw)
        # This is to fake out thread-level workers
        tmp = qless.client(host=args.host, port=args.port)
        tmp.worker += '-' + self.getName()
        self.q = tmp.queue('testing')
    
    def run(self):
        while len(self.q):
            job = self.q.pop()
            if not job:
                # Sleep a little bit
                time.sleep(0.1)
Beispiel #15
0
def check(cluster, config):
    global QNAME, NOTFOUND, CLUSTERS

    # Get number of crawlers we expect based on cluster
    ncrawlers = CLUSTERS[cluster]

    # Get a Qless connection
    try:
        master = qless.client(
            host=config['clusters'][cluster]['qless']['master']['host'],
            port=config['clusters'][cluster]['qless']['master']['port']
        )
    except (KeyError, TypeError):
        # Should not happen.
        return "Bad cluster."

    # Initialize some things
    njobs = NOTFOUND

    # Determine how many jobs are running
    for queue in master.queues.counts:
        if queue['name'] == QNAME:
            njobs = queue['running']
            break

    # This should not happen...
    if njobs == NOTFOUND:
        return "No data."

    # Each crawler must be running at least one third its fair share, else we
    # proclaim it lazy and tattle on it.
    minjobs = njobs / ncrawlers / 3

    # Determine how many jobs are running on each crawler
    prefix = cluster + '-crawler-'
    crawlers = { }
    for node in master.workers.counts:
        if node['name'].startswith(prefix):
            crawlers[node['name']] = (node.get('jobs', 0), node.get('stalled', 0))

    # Tell on any lazy crawlers. Something of a PITA as Zabbix forces us to
    # be über-terse.
    lazy = [ ]
    mia = [ ]
    for i in range(1, ncrawlers+1):
        key = prefix + '%02d' % i
        try:
            if crawlers[key][0] < minjobs:
                lazy.append(key)
        except KeyError:
            mia.append(key)

    # If OK, say so (Zabbix hates empty strings).
    if len(lazy) == 0 and len(mia) == 0:
        return 'OK'

    # Else issue a terse summary of what's wrong.
    ret = ''
    delim = ''
    if len(lazy) != 0:
        ret = 'Lazy: ' + ', '.join(lazy)
        delim = '; '
    if len(mia) != 0:
        ret += delim + 'MIA: ' + ', '.join(mia)
    return ret