Example #1
0
class Headquarters(object):
    """mini Headquarters object with just one incomng queue"""
    def __init__(self):
        self.jobs = {}
        self.jobslock = threading.RLock()
        self.mongo = pymongo.Connection(hqconfig.get('mongo'))
        self.configdb = self.mongo.crawl
        self.jobconfigs = JobConfigs(self.configdb)
        #self.coordinator = Coordinator(hqconfig.get('zkhosts'))
        self.maxinqueuesize = hqconfig.get(('inq', 'maxqueuesize'), 4)

    def shutdown(self):
        for job in self.jobs.values():
            job.shutdown()
        self.configdb = None
        self.mongo.disconnect()

    def get_job(self, jobname):
        with self.jobslock:
            job = self.jobs.get(jobname)
            if job is None:
                # too small maxqueuesize would be a problem, so config
                # parameter is in MB.
                job = self.jobs[jobname] = CrawlJob(
                    self.jobconfigs.get_job(jobname),
                    maxqueuesize=int(max(1, self.maxinqueuesize)*1000*1000))
                #self.coordinator.publish_job(job)
            return job
Example #2
0
class QuarterMaster(object):
    def __init__(self):
        zkhosts = hqconfig.get('zkhosts', None)
        logging.warn('zkhosts=%s', zkhosts)
        self.coord = Coordinator(zkhosts, alivenode='master') if zkhosts else None
        self.mongo = pymongo.Connection(host=hqconfig.get('mongo'))
        self.jobconfigs = JobConfigs(self.mongo.crawl)

        # crawlinfo is historically named 'wide' but not really wide crawl
        # specific.
        #self.crawlinfo = CrawlInfo('wide')
        
        self.worksetmapper = WorksetMapper(hqconfig.NWORKSETS_BITS)
        # distributor for each job
        self.distributors = {}

    def shutdown(self):
        self.coord.shutdown()
        #self.crawlinfo.shutdown()

    @property
    def servers(self):
        return self.coord and self.coord.get_servers()
    @property
    def servers_status(self):
        return self.coord and self.coord.get_servers_status()

    def get_distributor(self, job):
        if job not in self.distributors:
            self.distributors[job] = Distributor(self.jobconfigs.get_job(job),
                                                 self.worksetmapper)
        return self.distributors[job]
Example #3
0
File: inq.py Project: travisfw/hq
class Headquarters(object):
    """mini Headquarters object with just one incomng queue"""
    def __init__(self):
        self.jobs = {}
        self.jobslock = threading.RLock()
        self.mongo = pymongo.Connection(hqconfig.get('mongo'))
        self.configdb = self.mongo.crawl
        self.jobconfigs = JobConfigs(self.configdb)
        #self.coordinator = Coordinator(hqconfig.get('zkhosts'))

    def shutdown(self):
        for job in self.jobs.values():
            job.shutdown()
        self.configdb = None
        self.mongo.disconnect()

    def get_job(self, jobname):
        with self.jobslock:
            job = self.jobs.get(jobname)
            if job is None:
                job = self.jobs[jobname] = CrawlJob(
                    self.jobconfigs.get_job(jobname))
                #self.coordinator.publish_job(job)
            return job