Created on 2014.1.1 @author: gaolichuang ''' import random from oslo.config import cfg from eventlet import greenthread from miracle.common.manager import periodic_task from miracle.common.manager import manager from miracle.common.utils.gettextutils import _ # noqa from miracle.common.base import log as logging CONF = cfg.CONF CONF.import_opt('periodic_report_tasks_interval', 'miracle.common.service.service') LOG = logging.getLogger(__name__) class ManagerContainer(periodic_task.PeriodicTasks): ''' ManagerContainer can contain many Manager ''' def __init__(self, manager = None, number = 0): self._input_queue = None self._output_queue = None self.managers = [] self.number = number LOG.info(_("=====================Start %s number:%s===================="% (manager,self.number))) i = 0 while i < self.number: _manager = manager() self.managers.append(_manager)
cfg.IntOpt('max_fail_retry_time', default=2, help='crawl fail retry time'), cfg.IntOpt('max_timeout_retry_time', default=2, help='crawl timeout retry time'), cfg.IntOpt('read_batch_num', default=80, help='read batch number like db limit'), cfg.IntOpt('crawl_timeout', default=3600, help='time out interval'), cfg.StrOpt('init_url', default='', help='init url to crawl'), cfg.StrOpt('init_url_file', default='', help='init url file to crawl'), ] CONF = cfg.CONF CONF.register_opts(scheduler_opt) LOG = logging.getLogger(__name__) class DummySchedulerManager(manager.CrawlManager): def __init__(self): super(DummySchedulerManager, self).__init__() def run_periodic_report_tasks(self, service): '''TODO: fill url host level and something make some nessary check''' doc = CrawlDoc() doc.request_url = 'http://roll.sohu.com/' # doc.request_url = 'http://www.163.com/' doc.url = doc.request_url doc.docid = mmh3.hash(doc.url) doc.level = 1