Esempio n. 1
0
class Consumer():

    def __init__(self, beanstalk_host, beanstalk_port):
        self.beanstalk = Connection(host=beanstalk_host, port=beanstalk_port)


    def consume(self, max_jobs, src_queue):
        """consume from the incoming job queue."""
        # incoming jobs
        self.beanstalk.watch(src_queue)
        self.beanstalk.ignore('default')
        print("now watching", self.beanstalk.watching())

        queue_timeout = 10 if max_jobs >= 0 else None

        done = 0
        while(max_jobs <= 0 or done < max_jobs):
            job = self.beanstalk.reserve(timeout=queue_timeout)

            if job is None:
                break 
            
            try:
                res = self.process_job(job.body)
                if res is True:
                    job.delete()
                else:
                    job.bury()
            
            except Exception as e:
                print("error from process_job()", e)
                job.bury()

            finally:             
                done += 1

        self.beanstalk.close()

    
    def process_job(self, json_job):
        """default: do nothing."""
        return False
Esempio n. 2
0
def push_city_jobs(city, sample_order):
    """get image download jobs for a city from job api then push them to image
    download queue."""
    dst_tube = 'backlog_' + city.replace(' ', '_').lower()
    beanstalk = Connection(host='localhost', port=11300)
    print("tubes:", beanstalk.tubes())
    print("switching to", beanstalk.use(dst_tube))
    print("now using", beanstalk.using())

    job_api = API()
    ok, jobs = job_api.jobs(city, sample_order)
    if not ok:
        return 0

    for job in jobs:
        job_json = json.dumps(job)
        beanstalk.put(job_json)
        print("pushed {}_{}_{}_{}".format(job['city'], job['osm_way_id'],
                                          job['sequence'], job['cam_dir']))

    beanstalk.close()
    return len(jobs)
Esempio n. 3
0
class BeanstalkdBroker(BaseBroker):
    def __init__(self, queue_name: str):
        self.queue_name = queue_name
        self.connection = Connection(host=settings.beanstalkd_host,
                                     port=settings.beanstalkd_port)
        self.connection.watch(name=queue_name)
        self.connection.use(name=queue_name)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.connection.close()

    def add_to_queue(self, task_uuid: uuid.UUID):
        self.connection.put(body=str(task_uuid))

    def reserve(self, timeout: int = None) -> pystalkd.Job:
        job = self.connection.reserve(timeout=timeout)
        return job

    def delete(self, job: pystalkd.Job):
        job.delete()
Esempio n. 4
0
class Shovel():
    def __init__(self, beanstalk_host, beanstalk_port):
        self.logger = logging.getLogger(__name__)
        self.beanstalk = Connection(host=beanstalk_host, port=beanstalk_port)
        self.logger.info("host: {} {}".format(beanstalk_host, beanstalk_port))

    def watch_single_tube(self, tube):
        """watch a single tube."""
        # todo: is this necessary?
        self.beanstalk.watch(tube)
        watching = [x for x in self.beanstalk.watching() if x != tube]
        for x in watching:
            self.beanstalk.ignore(x)
        self.logger.info("now watching {}".format(self.beanstalk.watching()))

    def move_jobs(self, src_tube, dst_tube, n=0):
        """move n jobs from one tube to another."""
        self.watch_single_tube(src_tube)
        self.beanstalk.watch(src_tube)
        self.beanstalk.use(dst_tube)
        # BATCH DRAIN INTO THIS (note that this bit is not persistent!)
        lifo = []
        while (n > 0):
            job = self.beanstalk.reserve(timeout=60)
            if job is None:
                print("timed out. nothing to do?!")
                return
            lifo.append(job)
            n -= 1

        stack_len = len(lifo)

        # dump stack into destination work queue.
        while (len(lifo) > 0):
            job = lifo.pop()
            self.beanstalk.put(job.body)
            job.delete()

        self.logger.info("drained {} jobs".format(stack_len))

    def drain(self, total_shovel, target_queue, queue_prefix="backlog"):
        self.logger.info(
            "total_shovel: [{}] target_queue: [{}] queue_prefix: [{}]".format(
                total_shovel, target_queue, queue_prefix))
        backlog = [
            self.beanstalk.stats_tube(x) for x in self.beanstalk.tubes()
            if x.startswith(queue_prefix + "_")
        ]
        # shuffle cities.
        # we do this because there is a chance that some of the jobs in the last
        # city to be processed may be left on backlog if number of jobs shoveled
        # so far exceeds maximum processing limit. this happens due to
        # accumulation of rounding error.
        shuffle(backlog)

        total_jobs = sum(city['current-jobs-ready'] for city in backlog)
        total_shovel = min(total_jobs, total_shovel)

        self.logger.info("jobs remaining: [{}] jobs to shovel: [{}]".format(
            total_jobs, total_shovel))

        done = 0
        for city in backlog:
            name, jobs = city['name'], city['current-jobs-ready']
            weight = jobs / total_jobs
            shovel = ceil(weight * total_shovel)
            done += shovel

            if done > total_shovel:
                excess = done - total_shovel
                shovel = max(0, shovel - excess)

            self.logger.info(
                "tube: {} jobs: {} weight: {:0.1f}%, shovel: {}".format(
                    name, jobs, 100 * weight, shovel))

            self.move_jobs(name, target_queue, shovel)

        self.beanstalk.close()