def __init__(self, config): SpiderScheduler.__init__(self, config) self.redis_connecton = redis.StrictRedis(host=config.get('REDIS_HOST'), port=config.get('REDIS_PORT'), db=0, password=config.get( 'REDIS_PASSWORD', ''))
def setUp(self): d = self.mktemp() eggs_dir = self.eggs_dir = os.path.join(d, 'eggs') dbs_dir = os.path.join(d, 'dbs') os.mkdir(d) os.makedirs(eggs_dir) os.makedirs(dbs_dir) os.makedirs(os.path.join(eggs_dir, 'mybot1')) os.makedirs(os.path.join(eggs_dir, 'mybot2')) config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir}) self.queues = get_spider_queues(config) self.sched = SpiderScheduler(config)
class SpiderSchedulerTest(unittest.TestCase): def setUp(self): d = self.mktemp() eggs_dir = self.eggs_dir = os.path.join(d, 'eggs') dbs_dir = os.path.join(d, 'dbs') os.mkdir(d) os.makedirs(eggs_dir) os.makedirs(dbs_dir) os.makedirs(os.path.join(eggs_dir, 'mybot1')) os.makedirs(os.path.join(eggs_dir, 'mybot2')) config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir}) self.queues = get_spider_queues(config) self.sched = SpiderScheduler(config) def test_interface(self): verifyObject(ISpiderScheduler, self.sched) def test_list_update_projects(self): self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2'])) os.makedirs(os.path.join(self.eggs_dir, 'mybot3')) self.sched.update_projects() self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2', 'mybot3'])) def test_schedule(self): q1, q2 = self.queues['mybot1'], self.queues['mybot2'] self.failIf(q1.count()) self.sched.schedule('mybot1', 'myspider1', 2, a='b') self.sched.schedule('mybot2', 'myspider2', 1, c='d') self.sched.schedule('mybot2', 'myspider3', 10, e='f') self.assertEqual(q1.pop(), {'name': 'myspider1', 'a': 'b'}) self.assertEqual(q2.pop(), {'name': 'myspider3', 'e': 'f'}) self.assertEqual(q2.pop(), {'name': 'myspider2', 'c': 'd'})
def startService(self): spider_scheduler = SpiderScheduler(self.config) running = get_spider_running(self.config) for project in running.keys(): runner_db = running[project] for item in runner_db.iteritems(): spider_scheduler.schedule(project, str(item[1]['_spider']), _job=str(item[0]), domain=str(item[1]['domain']), settings=item[1]['settings']) finished_jobs = get_spider_finished(self.config) finished_db = finished_jobs[project] for item in finished_db.iteritems(): item = json.loads(item[1]) pp = ScrapyProcessProtocol(item['slot'], item['project'], item['spider'], item['job'], item['env'], domain=item['domain']) pp.end_time = parser.parse(item['end_time']) pp.start_time = parser.parse(item['start_time']) self.finished.append(pp) for slot in range(self.max_proc): self._wait_for_project(slot) log.msg(format='Scrapyd %(version)s started: max_proc=%(max_proc)r, runner=%(runner)r', version=__version__, max_proc=self.max_proc, runner=self.runner, system='Launcher')
class SpiderSchedulerTest(unittest.TestCase): def setUp(self): d = self.mktemp() eggs_dir = self.eggs_dir = os.path.join(d, 'eggs') dbs_dir = os.path.join(d, 'dbs') os.mkdir(d) os.makedirs(eggs_dir) os.makedirs(dbs_dir) os.makedirs(os.path.join(eggs_dir, 'mybot1')) os.makedirs(os.path.join(eggs_dir, 'mybot2')) config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir}) self.queues = get_spider_queues(config) self.sched = SpiderScheduler(config) def test_interface(self): verifyObject(ISpiderScheduler, self.sched) def test_list_update_projects(self): self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2'])) os.makedirs(os.path.join(self.eggs_dir, 'mybot3')) self.sched.update_projects() self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2', 'mybot3'])) def test_schedule(self): q = self.queues['mybot1'] self.failIf(q.count()) self.sched.schedule('mybot1', 'myspider1', a='b') self.sched.schedule('mybot2', 'myspider2', c='d') self.assertEqual(q.pop(), {'name': 'myspider1', 'a': 'b'}) q = self.queues['mybot2'] self.assertEqual(q.pop(), {'name': 'myspider2', 'c': 'd'})
def application(config): app = Application("Scrapyd") http_port = config.getint('http_port', 6800) bind_address = config.get('bind_address', '127.0.0.1') poll_interval = config.getfloat('poll_interval', 5) poller = QueuePoller(config) scheduler = SpiderScheduler(config) environment = Environment(config) app.setComponent(IPoller, poller) app.setComponent(ISpiderScheduler, scheduler) app.setComponent(IEnvironment, environment) jspath = config.get('jobstorage', 'scrapyd.jobstorage.MemoryJobStorage') jscls = load_object(jspath) jobstorage = jscls(config) app.setComponent(IJobStorage, jobstorage) eggstorage = config.get('eggstorage', 'scrapyd.eggstorage.FilesystemEggStorage') eggstoragecls = load_object(eggstorage) app.setComponent(IEggStorage, eggstoragecls(config)) laupath = config.get('launcher', 'scrapyd.launcher.Launcher') laucls = load_object(laupath) launcher = laucls(config, app) timer = TimerService(poll_interval, poller.poll) webpath = config.get('webroot', 'scrapyd.website.Root') webcls = load_object(webpath) resource = create_wrapped_resource(webcls, config, app) webservice = TCPServer(http_port, server.Site(resource), interface=bind_address) log.msg( format= "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/", bind_address=bind_address, http_port=http_port) launcher.setServiceParent(app) timer.setServiceParent(app) webservice.setServiceParent(app) return app
def application(config): app = Application("Scrapyd") http_port = config.getint('http_port', 6800) if 'PORT' in os.environ: http_port = int(os.environ.get('PORT')) bind_address = '0.0.0.0' if 'PORT' in os.environ else config.get('bind_address', '127.0.0.1') poll_interval = config.getfloat('poll_interval', 5) poller = QueuePoller(config) eggstorage = FilesystemEggStorage(config) scheduler = SpiderScheduler(config) environment = Environment(config) app.setComponent(IPoller, poller) app.setComponent(IEggStorage, eggstorage) app.setComponent(ISpiderScheduler, scheduler) app.setComponent(IEnvironment, environment) laupath = config.get('launcher', 'scrapyd.launcher.Launcher') laucls = load_object(laupath) launcher = laucls(config, app) webpath = config.get('webroot', 'scrapyd.website.Root') webcls = load_object(webpath) timer = TimerService(poll_interval, poller.poll) webservice = TCPServer(http_port, server.Site(webcls(config, app)), interface=bind_address) log.msg(format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/", bind_address=bind_address, http_port=http_port) launcher.setServiceParent(app) timer.setServiceParent(app) webservice.setServiceParent(app) return app