Ejemplo n.º 1
0
    def test_enabled_setting(self):
        mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])
        self.assertEqual(mw.middlewares[0].enabled_setting, 'M1_ENABLED')
        self.assertEqual(mw.middlewares[1].enabled_setting, 'M2_OFF')

        mw = TestMiddlewareManager(get_engine(M1_ENABLED=False, M2_OFF=False),
                                   mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [])
    def test_enabled_setting(self):
        mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])
        self.assertEqual(mw.middlewares[0].enabled_setting, 'M1_ENABLED')
        self.assertEqual(mw.middlewares[1].enabled_setting, 'M2_OFF')

        mw = TestMiddlewareManager(get_engine(M1_ENABLED=False, M2_OFF=False),
                                   mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [])
Ejemplo n.º 3
0
    def test_process_request(self):
        engine = get_engine(RANDOM_USER_AGENT_LIST=['a'])
        mw = RandomUserAgent(engine)
        request = Request('http://github.com/')
        request = mw.process_request(request)
        self.assertEqual(request.headers['User-Agent'], 'a')

        # user agent shouldn't overwrite existing value
        engine = get_engine(RANDOM_USER_AGENT_LIST=['b'])
        mw = RandomUserAgent(engine)
        request = mw.process_request(request)
        self.assertEqual(request.headers['User-Agent'], 'a')
    def setUp(self):
        engine = get_engine()
        self.stats = engine.stats
        self.mw = DownloaderStats(engine)

        self.req = Request('http://github.com')
        self.resp = Response('scrapytest.org', status=400, request=self.req)
    def setUp(self):
        engine = get_engine()
        self.stats = engine.stats
        self.mw = DownloaderStats(engine)

        self.req = Request('http://github.com')
        self.resp = Response('scrapytest.org', status=400, request=self.req)
Ejemplo n.º 6
0
    def test_init(self):
        mw = TestMiddlewareManager(get_engine())
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])

        logged = self.lw.get_first_line()
        self.assertEqual(logged, "[crawlmi] DEBUG: Disabled <class 'crawlmi.tests.test_middleware_manager.MOff'>:")
Ejemplo n.º 7
0
 def setUp(self):
     self.clock = Clock()
     self.engine = get_engine(LOG_STATS_INTERVAL=30)
     self.engine.signals = SignalManager(self.engine)
     self.ls = LogStats(self.engine, clock=self.clock)
     self.lw = LogWrapper()
     self.lw.setUp()
 def setUp(self):
     self.clock = Clock()
     self.engine = get_engine(LOG_STATS_INTERVAL=30)
     self.engine.signals = SignalManager(self.engine)
     self.ls = LogStats(self.engine, clock=self.clock)
     self.lw = LogWrapper()
     self.lw.setUp()
Ejemplo n.º 9
0
 def setUp(self):
     self.engine = get_engine(
         LOG_ENABLED=False,
         PIPELINE_BASE={'crawlmi.tests.test_engine.Pipeline': 10})
     self.clock = self.engine.clock
     self.engine.setup()
     self.sp = SignalProcessor(self.engine)
     self.pipeline = Pipeline.obj
    def test_init(self):
        mw = TestMiddlewareManager(get_engine())
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])

        logged = self.lw.get_first_line()
        self.assertEqual(
            logged,
            "[crawlmi] DEBUG: Disabled <class 'crawlmi.tests.test_middleware_manager.MOff'>:"
        )
Ejemplo n.º 11
0
    def test_decode_chunked_transfer(self):
        ct = ChunkedTransfer(get_engine())

        chunked_body = '25\r\n' + 'This is the data in the first chunk\r\n\r\n'
        chunked_body += '1C\r\n' + 'and this is the second one\r\n\r\n'
        chunked_body += '3\r\n' + 'con\r\n'
        chunked_body += '8\r\n' + 'sequence\r\n'
        chunked_body += '0\r\n\r\n'
        body = ct._decode_chunked_transfer(chunked_body)
        self.assertEqual(body, \
            'This is the data in the first chunk\r\n' +
            'and this is the second one\r\n' +
            'consequence')
    def test_process_request(self):
        engine = get_engine()
        mw = DuplicateFilter(engine)

        r1 = Request('http://test.org/1')
        r2 = Request('http://test.org/2')
        r3 = Request('http://test.org/2')

        self.assertIs(mw.process_request(r1), r1)
        self.assertIs(mw.process_request(r2), r2)
        self.assertIsNone(mw.process_request(r3))

        engine.signals.send(clear_duplicate_filter)
        self.assertIs(mw.process_request(r3), r3)
Ejemplo n.º 13
0
    def test_memory_stats(self):
        stats = MemoryStats(get_engine(STATS_DUMP=True))
        self.assertEqual(stats.get_stats(), {})
        self.assertEqual(stats.get_value('anything'), None)
        self.assertEqual(stats.get_value('anything', 'default'), 'default')
        stats.set_value('test', 'value')
        self.assertEqual(stats.get_stats(), {'test': 'value'})
        stats.set_value('test2', 23)
        self.assertEqual(stats.get_stats(), {'test': 'value', 'test2': 23})
        self.assertEqual(stats.get_value('test2'), 23)
        stats.inc_value('test2')
        self.assertEqual(stats.get_value('test2'), 24)
        stats.inc_value('test2', 6)
        self.assertEqual(stats.get_value('test2'), 30)
        stats.max_value('test2', 6)
        self.assertEqual(stats.get_value('test2'), 30)
        stats.max_value('test2', 40)
        self.assertEqual(stats.get_value('test2'), 40)
        stats.max_value('test3', 1)
        self.assertEqual(stats.get_value('test3'), 1)
        stats.min_value('test2', 60)
        self.assertEqual(stats.get_value('test2'), 40)
        stats.min_value('test2', 35)
        self.assertEqual(stats.get_value('test2'), 35)
        stats.min_value('test4', 7)
        self.assertEqual(stats.get_value('test4'), 7)

        stats.add_value('stats', 3)
        stats.add_value('stats', 2, 2.0)
        statistics = stats.get_value('stats')
        self.assertTrue(eq(statistics.average, 7.0/3.0), statistics.average)
        self.assertRaises(RuntimeError, stats.add_value, 'test4', 1)

        stats.add_sample('samples', 3, 'hello')
        stats.add_sample('samples', 2, 'world')
        samples = stats.get_value('samples')
        self.assertEqual(len(samples), 2)
        self.assertListEqual(samples.samples, [(3, 'hello'), (2, 'world')])
        self.assertRaises(RuntimeError, stats.add_value, 'test4', 5, '!')

        stats.dump_stats()
        logged = self.lw.get_first_line(clear=False)
        self.assertTrue(logged.startswith('[crawlmi] INFO: Dumping crawlmi stats:'))
        logged = self.lw.get_logged()
        self.assertIn('test', logged)
        self.assertIn('test2', logged)
        self.assertIn('test3', logged)
        self.assertIn('test3', logged)
        self.assertIn('stats', logged)
        self.assertIn('samples', logged)
Ejemplo n.º 14
0
 def test_dummy_stats(self):
     stats = DummyStats(get_engine())
     self.assertEqual(stats.get_stats(), {})
     self.assertEqual(stats.get_value('anything'), None)
     self.assertEqual(stats.get_value('anything', 'default'), 'default')
     stats.set_value('test', 'value')
     stats.inc_value('v1')
     stats.max_value('v2', 100)
     stats.min_value('v3', 100)
     stats.set_value('test', 'value')
     stats.add_value('stats', 100)
     stats.add_value('stats', 100, 12)
     stats.add_sample('samples', 3, 'hello')
     self.assertEqual(stats.get_stats(), {})
     stats.dump_stats()
    def test_tags(self):
        engine = get_engine()
        mw = DuplicateFilter(engine)

        r1 = Request('http://test.org/', meta={'df_tag': '1'})
        r2 = Request('http://test.org/', meta={'df_tag': '2'})
        r3 = Request('http://test.org/', meta={'df_tag': '2'})

        self.assertIs(mw.process_request(r1), r1)
        self.assertIs(mw.process_request(r2), r2)
        self.assertIsNone(mw.process_request(r3))

        engine.signals.send(clear_duplicate_filter, df_tag='2')

        self.assertIsNone(mw.process_request(r1))
        self.assertIs(mw.process_request(r2), r2)
        self.assertIsNone(mw.process_request(r3))
Ejemplo n.º 16
0
 def setUp(self):
     self.mw = Redirect(get_engine())
Ejemplo n.º 17
0
 def _get_pm(self, *mw_classes):
     return PipelineManager(get_engine(), mw_classes=mw_classes)
 def setUp(self):
     self.mw = HttpCompression(get_engine())
 def setUp(self):
     self.mw = HttpCompression(get_engine())
Ejemplo n.º 20
0
 def _get_engine(self, **kwargs):
     engine = get_engine(**kwargs)
     self.stats = engine.stats
     return engine
 def test_init2(self):
     mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
     active = [x.__class__ for x in mw.middlewares]
     self.assertListEqual(active, [M1, M2])
Ejemplo n.º 22
0
 def setUp(self):
     self.mw = Cookies(get_engine())
 def test_config(self):
     self.assertRaises(NotConfigured, LogStats,
                       get_engine(LOG_STATS_INTERVAL=0))
 def setUp(self):
     self.mw = Redirect(get_engine())
Ejemplo n.º 25
0
 def test_empty_list(self):
     engine = get_engine(RANDOM_USER_AGENT_LIST=[])
     mw = RandomUserAgent(engine)
     # there should be many default user agents
     self.assertGreater(len(mw.user_agents), 10)
Ejemplo n.º 26
0
 def test_init2(self):
     mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
     active = [x.__class__ for x in mw.middlewares]
     self.assertListEqual(active, [M1, M2])
 def setUp(self):
     self.mw = MetaRefreshRedirect(get_engine())
Ejemplo n.º 28
0
 def setUp(self):
     engine = get_engine()
     self.mw = Retry(engine)
     self.mw.max_retry_times = 2
Ejemplo n.º 29
0
 def _get_engine(self, **new_settings):
     return get_engine(self._get_settings(**new_settings))
Ejemplo n.º 30
0
 def setUp(self):
     engine = get_engine()
     self.mw = Retry(engine)
     self.mw.max_retry_times = 2
 def setUp(self):
     self.mw = Canonical(get_engine())
Ejemplo n.º 32
0
 def test_config(self):
     self.assertRaises(NotConfigured, LogStats, get_engine(LOG_STATS_INTERVAL=0))
Ejemplo n.º 33
0
 def setUp(self):
     self.mw = MetaRefreshRedirect(get_engine())
Ejemplo n.º 34
0
 def _get_pm(self, *mw_classes):
     return PipelineManager(get_engine(), mw_classes=mw_classes)
Ejemplo n.º 35
0
 def setUp(self):
     self.engine = get_engine(LOG_ENABLED=False, PIPELINE_BASE={"crawlmi.tests.test_engine.Pipeline": 10})
     self.clock = self.engine.clock
     self.engine.setup()
     self.sp = SignalProcessor(self.engine)
     self.pipeline = Pipeline.obj
Ejemplo n.º 36
0
 def setUp(self):
     self.mw = Canonical(get_engine())
 def setUp(self):
     self.dh = DefaultHeaders(get_engine())
     self.defaults = {}
     for k, v in self.dh.headers.iteritems():
         self.defaults[k] = [v]
Ejemplo n.º 38
0
 def _get_engine(self, **kwargs):
     engine = get_engine(**kwargs)
     self.stats = engine.stats
     return engine
Ejemplo n.º 39
0
 def test_basic(self):
     em = ExtensionManager(get_engine(), mw_classes=[E1, E2, EOff])
     active = [x.__class__ for x in em.middlewares]
     self.assertListEqual(active, [E1, E2])
     self.assertIsInstance(em['e1'], E1)
     self.assertRaises(KeyError, em.__getitem__, 'eoff')