Beispiel #1
0
class QueuePollerTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        eggs_dir = os.path.join(d, 'eggs')
        dbs_dir = os.path.join(d, 'dbs')
        os.makedirs(eggs_dir)
        os.makedirs(dbs_dir)
        os.makedirs(os.path.join(eggs_dir, 'mybot1'))
        os.makedirs(os.path.join(eggs_dir, 'mybot2'))
        config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
        self.queues = get_spider_queues(config)
        self.poller = QueuePoller(config)
        self.lancher_mock = Mock()
        self.lancher_mock.processes =  {}
                
    def test_interface(self):
        verifyObject(IPoller, self.poller)

    def test_poll_next(self):
        self.queues['mybot1'].add('spider1')
        self.queues['mybot2'].add('spider2')
        d1 = self.poller.next()
        d2 = self.poller.next()
        self.failUnless(isinstance(d1, Deferred))
        self.failIf(hasattr(d1, 'result'))
        self.poller.poll(self.lancher_mock)
        self.queues['mybot1'].pop()
        self.poller.poll(self.lancher_mock)
        self.failUnlessEqual(d1.result, {'_project': 'mybot1', '_spider': 'spider1'})
        self.failUnlessEqual(d2.result, {'_project': 'mybot2', '_spider': 'spider2'})
Beispiel #2
0
class QueuePollerTest(unittest.TestCase):
    def setUp(self):
        d = self.mktemp()
        eggs_dir = os.path.join(d, "eggs")
        dbs_dir = os.path.join(d, "dbs")
        os.makedirs(eggs_dir)
        os.makedirs(dbs_dir)
        os.makedirs(os.path.join(eggs_dir, "mybot1"))
        os.makedirs(os.path.join(eggs_dir, "mybot2"))
        config = Config(values={"eggs_dir": eggs_dir, "dbs_dir": dbs_dir})
        self.queues = get_spider_queues(config)
        self.poller = QueuePoller(config)

    def test_interface(self):
        verifyObject(IPoller, self.poller)

    def test_poll_next(self):
        self.queues["mybot1"].add("spider1")
        self.queues["mybot2"].add("spider2")
        d1 = self.poller.next()
        d2 = self.poller.next()
        self.failUnless(isinstance(d1, Deferred))
        self.failIf(hasattr(d1, "result"))
        self.poller.poll()
        self.queues["mybot1"].pop()
        self.poller.poll()
        self.failUnlessEqual(d1.result, {"project": "mybot1"})
        self.failUnlessEqual(d2.result, {"project": "mybot2"})
Beispiel #3
0
class QueuePollerTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        eggs_dir = os.path.join(d, 'eggs')
        dbs_dir = os.path.join(d, 'dbs')
        os.makedirs(eggs_dir)
        os.makedirs(dbs_dir)
        os.makedirs(os.path.join(eggs_dir, 'mybot1'))
        os.makedirs(os.path.join(eggs_dir, 'mybot2'))
        config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
        self.queues = get_spider_queues(config)
        self.poller = QueuePoller(config)

    def test_interface(self):
        verifyObject(IPoller, self.poller)

    def test_poll_next(self):
        self.queues['mybot1'].add('spider1')
        self.queues['mybot2'].add('spider2')
        d1 = self.poller.next()
        d2 = self.poller.next()
        self.failUnless(isinstance(d1, Deferred))
        self.failIf(hasattr(d1, 'result'))
        self.poller.poll()
        self.queues['mybot1'].pop()
        self.poller.poll()
        self.failUnlessEqual(d1.result, {'_project': 'mybot1', '_spider': 'spider1'})
        self.failUnlessEqual(d2.result, {'_project': 'mybot2', '_spider': 'spider2'})
Beispiel #4
0
 def setUp(self):
     d = self.mktemp()
     eggs_dir = os.path.join(d, 'eggs')
     dbs_dir = os.path.join(d, 'dbs')
     os.makedirs(eggs_dir)
     os.makedirs(dbs_dir)
     os.makedirs(os.path.join(eggs_dir, 'mybot1'))
     os.makedirs(os.path.join(eggs_dir, 'mybot2'))
     config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
     self.queues = get_spider_queues(config)
     self.poller = QueuePoller(config)
Beispiel #5
0
class QueuePollerTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        eggs_dir = os.path.join(d, 'eggs')
        dbs_dir = os.path.join(d, 'dbs')
        os.makedirs(eggs_dir)
        os.makedirs(dbs_dir)
        os.makedirs(os.path.join(eggs_dir, 'mybot1'))
        os.makedirs(os.path.join(eggs_dir, 'mybot2'))
        config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
        self.queues = get_spider_queues(config)
        self.poller = QueuePoller(config)

    def test_interface(self):
        verifyObject(IPoller, self.poller)

    def test_poll_next(self):
        cfg = {'mybot1': 'spider1',
               'mybot2': 'spider2'}
        priority = 0
        for prj, spd in cfg.items():
            self.queues[prj].add(spd, priority)

        d1 = self.poller.next()
        d2 = self.poller.next()
        self.assertIsInstance(d1, Deferred)
        self.assertFalse(hasattr(d1, 'result'))

        # poll once
        self.poller.poll()
        self.assertTrue(hasattr(d1, 'result'))
        self.assertTrue(getattr(d1, 'called', False))

        # which project got run: project1 or project2?
        self.assertTrue(d1.result.get('_project'))
        prj = d1.result['_project']
        self.assertEqual(d1.result['_spider'], cfg.pop(prj))

        self.queues[prj].pop()

        # poll twice
        # check that the other project's spider got to run
        self.poller.poll()
        prj, spd = cfg.popitem()
        self.assertEqual(d2.result, {'_project': prj, '_spider': spd})
Beispiel #6
0
class QueuePollerTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        eggs_dir = os.path.join(d, 'eggs')
        dbs_dir = os.path.join(d, 'dbs')
        os.makedirs(eggs_dir)
        os.makedirs(dbs_dir)
        os.makedirs(os.path.join(eggs_dir, 'mybot1'))
        os.makedirs(os.path.join(eggs_dir, 'mybot2'))
        config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
        self.queues = get_spider_queues(config)
        self.poller = QueuePoller(config)

    def test_interface(self):
        verifyObject(IPoller, self.poller)

    def test_poll_next(self):
        cfg = {'mybot1': 'spider1',
               'mybot2': 'spider2'}
        for prj, spd in cfg.items():
            self.queues[prj].add(spd)

        d1 = self.poller.next()
        d2 = self.poller.next()
        self.failUnless(isinstance(d1, Deferred))
        self.failIf(hasattr(d1, 'result'))

        # poll once
        self.poller.poll()
        self.failUnless(hasattr(d1, 'result') and getattr(d1, 'called', False))

        # which project got run: project1 or project2?
        self.failUnless(d1.result.get('_project'))
        prj = d1.result['_project']
        self.failUnlessEqual(d1.result['_spider'], cfg.pop(prj))

        self.queues[prj].pop()

        # poll twice
        # check that the other project's spider got to run
        self.poller.poll()
        prj, spd = cfg.popitem()
        self.failUnlessEqual(d2.result, {'_project': prj, '_spider': spd})
Beispiel #7
0
 def setUp(self):
     d = self.mktemp()
     eggs_dir = os.path.join(d, "eggs")
     dbs_dir = os.path.join(d, "dbs")
     os.makedirs(eggs_dir)
     os.makedirs(dbs_dir)
     os.makedirs(os.path.join(eggs_dir, "mybot1"))
     os.makedirs(os.path.join(eggs_dir, "mybot2"))
     config = Config(values={"eggs_dir": eggs_dir, "dbs_dir": dbs_dir})
     self.queues = get_spider_queues(config)
     self.poller = QueuePoller(config)
Beispiel #8
0
 def setUp(self):
     d = self.mktemp()
     eggs_dir = os.path.join(d, 'eggs')
     dbs_dir = os.path.join(d, 'dbs')
     os.makedirs(eggs_dir)
     os.makedirs(dbs_dir)
     os.makedirs(os.path.join(eggs_dir, 'mybot1'))
     os.makedirs(os.path.join(eggs_dir, 'mybot2'))
     config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
     self.queues = get_spider_queues(config)
     self.poller = QueuePoller(config)
Beispiel #9
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '127.0.0.1')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    jspath = config.get('jobstorage', 'scrapyd.jobstorage.MemoryJobStorage')
    jscls = load_object(jspath)
    jobstorage = jscls(config)
    app.setComponent(IJobStorage, jobstorage)
    eggstorage = config.get('eggstorage',
                            'scrapyd.eggstorage.FilesystemEggStorage')
    eggstoragecls = load_object(eggstorage)
    app.setComponent(IEggStorage, eggstoragecls(config))

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)

    webpath = config.get('webroot', 'scrapyd.website.Root')
    webcls = load_object(webpath)
    resource = create_wrapped_resource(webcls, config, app)
    webservice = TCPServer(http_port,
                           server.Site(resource),
                           interface=bind_address)
    log.msg(
        format=
        "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
        bind_address=bind_address,
        http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app
Beispiel #10
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)
    bind_address = config.get('bind_address', '0.0.0.0')
    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)

    schedpath = config.get('scheduler', 'scrapyd.scheduler.SpiderScheduler')
    schedCls = load_object(schedpath)
    scheduler = schedCls(config, app)

    environment = Environment(config)

    pubsub_path = config.get('pubsub', 'scrapyd.pubsub.BasePubSub')
    pubsubCls = load_object(pubsub_path)
    pubsub = pubsubCls(config, app)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)
    app.setComponent(IPubSub, pubsub)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port,
                           server.Site(Root(config, app)),
                           interface=bind_address)
    log.msg(
        format=
        "Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
        bind_address=bind_address,
        http_port=http_port)

    pubsub.setServiceParent(app)
    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)
    return app
Beispiel #11
0
def application(config):
    app = Application("Scrapyd")
    http_port = config.getint('http_port', 6800)

    if 'PORT' in os.environ:
        http_port = int(os.environ.get('PORT'))

    bind_address = '0.0.0.0' if 'PORT' in os.environ else config.get('bind_address', '127.0.0.1')

    poll_interval = config.getfloat('poll_interval', 5)

    poller = QueuePoller(config)
    eggstorage = FilesystemEggStorage(config)
    scheduler = SpiderScheduler(config)
    environment = Environment(config)

    app.setComponent(IPoller, poller)
    app.setComponent(IEggStorage, eggstorage)
    app.setComponent(ISpiderScheduler, scheduler)
    app.setComponent(IEnvironment, environment)

    laupath = config.get('launcher', 'scrapyd.launcher.Launcher')
    laucls = load_object(laupath)
    launcher = laucls(config, app)

    webpath = config.get('webroot', 'scrapyd.website.Root')
    webcls = load_object(webpath)

    timer = TimerService(poll_interval, poller.poll)
    webservice = TCPServer(http_port, server.Site(webcls(config, app)), interface=bind_address)
    log.msg(format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
            bind_address=bind_address, http_port=http_port)

    launcher.setServiceParent(app)
    timer.setServiceParent(app)
    webservice.setServiceParent(app)

    return app