Esempio n. 1
0
def connect(host, port, dst_tube):
    """connect to beanstalk, use dst_tube tube."""
    beanstalk = Connection(host, port))
    print("tubes:", beanstalk.tubes())
    print("switching to", beanstalk.use(dst_tube))
    print("now using", beanstalk.using())
    return beanstalk
Esempio n. 2
0
def push_city_jobs(city, sample_order):
    """get image download jobs for a city from job api then push them to image
    download queue."""
    dst_tube = 'backlog_' + city.replace(' ', '_').lower()
    beanstalk = Connection(host='localhost', port=11300)
    print("tubes:", beanstalk.tubes())
    print("switching to", beanstalk.use(dst_tube))
    print("now using", beanstalk.using())

    job_api = API()
    ok, jobs = job_api.jobs(city, sample_order)
    if not ok:
        return 0

    for job in jobs:
        job_json = json.dumps(job)
        beanstalk.put(job_json)
        print("pushed {}_{}_{}_{}".format(job['city'], job['osm_way_id'],
                                          job['sequence'], job['cam_dir']))

    beanstalk.close()
    return len(jobs)
Esempio n. 3
0
from pystalkd.Beanstalkd import Connection

beanstalk = Connection(host='localhost', port=11300)
print("tubes:", beanstalk.tubes())
print("switching to", beanstalk.watch('manchester'))
print("now watching", beanstalk.watching())

while True:
    print("blocking...")
    job = beanstalk.reserve()
    print("got job:", job.body)
    job.delete()   

Esempio n. 4
0
class Shovel():
    def __init__(self, beanstalk_host, beanstalk_port):
        self.logger = logging.getLogger(__name__)
        self.beanstalk = Connection(host=beanstalk_host, port=beanstalk_port)
        self.logger.info("host: {} {}".format(beanstalk_host, beanstalk_port))

    def watch_single_tube(self, tube):
        """watch a single tube."""
        # todo: is this necessary?
        self.beanstalk.watch(tube)
        watching = [x for x in self.beanstalk.watching() if x != tube]
        for x in watching:
            self.beanstalk.ignore(x)
        self.logger.info("now watching {}".format(self.beanstalk.watching()))

    def move_jobs(self, src_tube, dst_tube, n=0):
        """move n jobs from one tube to another."""
        self.watch_single_tube(src_tube)
        self.beanstalk.watch(src_tube)
        self.beanstalk.use(dst_tube)
        # BATCH DRAIN INTO THIS (note that this bit is not persistent!)
        lifo = []
        while (n > 0):
            job = self.beanstalk.reserve(timeout=60)
            if job is None:
                print("timed out. nothing to do?!")
                return
            lifo.append(job)
            n -= 1

        stack_len = len(lifo)

        # dump stack into destination work queue.
        while (len(lifo) > 0):
            job = lifo.pop()
            self.beanstalk.put(job.body)
            job.delete()

        self.logger.info("drained {} jobs".format(stack_len))

    def drain(self, total_shovel, target_queue, queue_prefix="backlog"):
        self.logger.info(
            "total_shovel: [{}] target_queue: [{}] queue_prefix: [{}]".format(
                total_shovel, target_queue, queue_prefix))
        backlog = [
            self.beanstalk.stats_tube(x) for x in self.beanstalk.tubes()
            if x.startswith(queue_prefix + "_")
        ]
        # shuffle cities.
        # we do this because there is a chance that some of the jobs in the last
        # city to be processed may be left on backlog if number of jobs shoveled
        # so far exceeds maximum processing limit. this happens due to
        # accumulation of rounding error.
        shuffle(backlog)

        total_jobs = sum(city['current-jobs-ready'] for city in backlog)
        total_shovel = min(total_jobs, total_shovel)

        self.logger.info("jobs remaining: [{}] jobs to shovel: [{}]".format(
            total_jobs, total_shovel))

        done = 0
        for city in backlog:
            name, jobs = city['name'], city['current-jobs-ready']
            weight = jobs / total_jobs
            shovel = ceil(weight * total_shovel)
            done += shovel

            if done > total_shovel:
                excess = done - total_shovel
                shovel = max(0, shovel - excess)

            self.logger.info(
                "tube: {} jobs: {} weight: {:0.1f}%, shovel: {}".format(
                    name, jobs, 100 * weight, shovel))

            self.move_jobs(name, target_queue, shovel)

        self.beanstalk.close()