Exemple #1
0
 def configure(self):
     if self.configured:
         return
     self.configured = True
     self.extensions = ExtensionManager.from_settings(self.settings)
     spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
     self.spiders = spman_cls.from_settings(self.settings)
     spq_cls = load_object(self.settings['SPIDER_QUEUE_CLASS'])
     spq = spq_cls.from_settings(self.settings)
     keepalive = self.settings.getbool('KEEP_ALIVE')
     pollint = self.settings.getfloat('QUEUE_POLL_INTERVAL')
     self.queue = ExecutionQueue(self.spiders, spq, poll_interval=pollint,
         keep_alive=keepalive)
     self.engine = ExecutionEngine(self.settings, self._spider_closed)
Exemple #2
0
 def setUp(self):
     self.queue = ExecutionQueue(TestSpiderManager(), None, keep_alive=self.keep_alive)
     self.spider = TestSpider()
     self.request = Request('about:none')
Exemple #3
0
class ExecutionQueueTest(unittest.TestCase):

    keep_alive = False

    def setUp(self):
        self.queue = ExecutionQueue(TestSpiderManager(), None, keep_alive=self.keep_alive)
        self.spider = TestSpider()
        self.request = Request('about:none')

    def tearDown(self):
        del self.queue, self.spider, self.request

    def test_is_finished(self):
        self.assert_(self.queue.is_finished())
        self.queue.append_request(self.request, self.spider)
        self.assert_(not self.queue.is_finished())

    def test_append_spider(self):
        spider = TestSpider()
        self.queue.append_spider(spider)
        self.assert_(self.queue.spider_requests[0][0] is spider)
        self._assert_request_urls(self.queue.spider_requests[0][1],
            ["http://www.example.com/1", "http://www.example.com/2"])

    def test_append_request1(self):
        spider = TestSpider()
        request = Request('about:blank')
        self.queue.append_request(request, spider=spider)
        self.assert_(self.queue.spider_requests[0][0] is spider)
        self.assert_(self.queue.spider_requests[0][1][0] is request)

    def test_append_request2(self):
        request = Request('about:blank')
        self.queue.append_request(request, arg='123')
        spider = self.queue.spider_requests[0][0]
        self.assert_(spider.name == 'create_for_request')
        self.assert_(spider.arg == '123')

    def test_append_url(self):
        spider = TestSpider()
        url = 'http://www.example.com/asd'
        self.queue.append_url(url, spider=spider)
        self.assert_(self.queue.spider_requests[0][0] is spider)
        self._assert_request_urls(self.queue.spider_requests[0][1], \
            ['http://www.example.com/asd/make1', 'http://www.example.com/asd/make2'])

    def test_append_url_kwarg(self):
        spider = TestSpider()
        url = 'http://www.example.com/asd'
        self.queue.append_url(url=url, spider=spider)
        self.assert_(self.queue.spider_requests[0][0] is spider)
        self._assert_request_urls(self.queue.spider_requests[0][1], \
            ['http://www.example.com/asd/make1', 'http://www.example.com/asd/make2'])

    def test_append_url2(self):
        url = 'http://www.example.com/asd'
        self.queue.append_url(url, arg='123')
        self._assert_request_urls(self.queue.spider_requests[0][1], \
            ['http://www.example.com/asd/make1', 'http://www.example.com/asd/make2'])
        spider = self.queue.spider_requests[0][0]
        self.assert_(spider.name == 'create_for_request')
        self.assert_(spider.arg == '123')

    def test_append_spider_name(self):
        self.queue.append_spider_name('test123', arg='123')
        spider = self.queue.spider_requests[0][0]
        self.assert_(spider.name == 'test123')
        self.assert_(spider.arg == '123')

    def test_append_spider_name_kwarg(self):
        self.queue.append_spider_name(name='test123', arg='123')
        spider = self.queue.spider_requests[0][0]
        self.assert_(spider.name == 'test123')
        self.assert_(spider.arg == '123')

    def test_append_next(self):
        # the reason for this test: http://dev.scrapy.org/ticket/250
        class MockQueue(object):
            def pop(self):
                return {u'name': u'test123', u'test': u'hello'}
        self.queue._queue = MockQueue()
        self.queue._append_next()
        spider = self.queue.spider_requests[0][0]
        self.assert_(spider.name == 'test123')
        self.assert_(spider.test == 'hello')

    def _assert_request_urls(self, requests, urls):
        assert all(isinstance(x, Request) for x in requests)
        self.assertEqual([x.url for x in requests], urls)