Ejemplo n.º 1
0
class MiddlewareManagerTest(unittest.TestCase):
    def setUp(self):
        self.lw = LogWrapper()
        self.lw.setUp(level=log.DEBUG)

    def tearDown(self):
        self.lw.tearDown()

    def test_init(self):
        mw = TestMiddlewareManager(get_engine())
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])

        logged = self.lw.get_first_line()
        self.assertEqual(logged, "[crawlmi] DEBUG: Disabled <class 'crawlmi.tests.test_middleware_manager.MOff'>:")

    def test_init2(self):
        mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])

    def test_enabled_setting(self):
        mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])
        self.assertEqual(mw.middlewares[0].enabled_setting, 'M1_ENABLED')
        self.assertEqual(mw.middlewares[1].enabled_setting, 'M2_OFF')

        mw = TestMiddlewareManager(get_engine(M1_ENABLED=False, M2_OFF=False),
                                   mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [])
class MiddlewareManagerTest(unittest.TestCase):
    def setUp(self):
        self.lw = LogWrapper()
        self.lw.setUp(level=log.DEBUG)

    def tearDown(self):
        self.lw.tearDown()

    def test_init(self):
        mw = TestMiddlewareManager(get_engine())
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])

        logged = self.lw.get_first_line()
        self.assertEqual(
            logged,
            "[crawlmi] DEBUG: Disabled <class 'crawlmi.tests.test_middleware_manager.MOff'>:"
        )

    def test_init2(self):
        mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])

    def test_enabled_setting(self):
        mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [M1, M2])
        self.assertEqual(mw.middlewares[0].enabled_setting, 'M1_ENABLED')
        self.assertEqual(mw.middlewares[1].enabled_setting, 'M2_OFF')

        mw = TestMiddlewareManager(get_engine(M1_ENABLED=False, M2_OFF=False),
                                   mw_classes=[M1, M2, MOff])
        active = [x.__class__ for x in mw.middlewares]
        self.assertListEqual(active, [])
 def setUp(self):
     self.clock = Clock()
     self.engine = get_engine(LOG_STATS_INTERVAL=30)
     self.engine.signals = SignalManager(self.engine)
     self.ls = LogStats(self.engine, clock=self.clock)
     self.lw = LogWrapper()
     self.lw.setUp()
Ejemplo n.º 4
0
class LogStatsTest(unittest.TestCase):
    def setUp(self):
        self.clock = Clock()
        self.engine = get_engine(LOG_STATS_INTERVAL=30)
        self.engine.signals = SignalManager(self.engine)
        self.ls = LogStats(self.engine, clock=self.clock)
        self.lw = LogWrapper()
        self.lw.setUp()

    def tearDown(self):
        self.lw.tearDown()

    def test_config(self):
        self.assertRaises(NotConfigured, LogStats, get_engine(LOG_STATS_INTERVAL=0))

    def test_basic(self):
        # engine is stopped
        self.clock.advance(60)
        self.assertEqual(self.lw.get_first_line(), '')
        # start the engine
        self.engine.signals.send(signals.engine_started)
        self.clock.advance(29)
        self.assertEqual(self.lw.get_first_line(), '')
        self.clock.advance(1)
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Crawled 0 pages (at 0 pages/min).')
        # download some responses
        self.engine.signals.send(signals.response_downloaded, response=Response(url=''))
        self.engine.signals.send(signals.response_downloaded, response=Response(url=''))
        self.engine.signals.send(signals.response_received, response=Response(url=''))
        self.clock.advance(30)
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Crawled 2 pages (at 4 pages/min).')
        # stop the engine
        self.engine.signals.send(signals.engine_stopped)
        self.clock.advance(60)
        self.assertEqual(self.lw.get_first_line(), '')
Ejemplo n.º 5
0
    def setUp(self):
        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
        self.tmpdir = self.mktemp()
        os.mkdir(self.tmpdir)
        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
        shutil.copytree(orig_spiders_dir, self.spiders_dir)
        sys.path.append(self.tmpdir)

        settings = {'SPIDER_MODULES': ['test_spiders_xxx']}
        self.manager = SpiderManager(Settings(settings))
        self.lw = LogWrapper()
        self.lw.setUp()
Ejemplo n.º 6
0
 def setUp(self):
     self.clock = Clock()
     self.engine = get_engine(LOG_STATS_INTERVAL=30)
     self.engine.signals = SignalManager(self.engine)
     self.ls = LogStats(self.engine, clock=self.clock)
     self.lw = LogWrapper()
     self.lw.setUp()
Ejemplo n.º 7
0
class SpiderManagerTest(unittest.TestCase):
    def setUp(self):
        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
        self.tmpdir = self.mktemp()
        os.mkdir(self.tmpdir)
        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
        shutil.copytree(orig_spiders_dir, self.spiders_dir)
        sys.path.append(self.tmpdir)

        settings = {'SPIDER_MODULES': ['test_spiders_xxx']}
        self.manager = SpiderManager(Settings(settings))
        self.lw = LogWrapper()
        self.lw.setUp()

    def tearDown(self):
        sys.path.remove(self.tmpdir)
        self.lw.tearDown()

    def test_get_spiders(self):
        self.assertSetEqual(
            set(self.manager.get_spiders()),
            set(['spider1', 'spider2', 'spider3']))

    def test_create_by_name(self):
        spider1 = self.manager.create_spider_by_name('spider1')
        self.assertEqual(spider1.__class__.__name__, 'Spider1')
        spider2 = self.manager.create_spider_by_name('spider2', {'p1': 1, 'p2': 2})
        self.assertEqual(spider2.__class__.__name__, 'Spider2')
        self.assertEqual(spider2.p1, 1)
        self.assertEqual(spider2.p2, 2)

    def test_get_spiders_by_url(self):
        self.assertEqual(self.manager.get_spiders_by_url('http://crawlmi1.org/test'), ['spider1'])
        self.assertEqual(self.manager.get_spiders_by_url('http://crawlmi2.org/test'), ['spider2'])
        self.assertEqual(set(self.manager.get_spiders_by_url('http://crawlmi3.org/test')), set(['spider1', 'spider2']))
        self.assertEqual(self.manager.get_spiders_by_url('http://crawlmi999.org/test'), [])
        self.assertEqual(self.manager.get_spiders_by_url('http://spider3.com'), ['spider3'])

    def test_create_spiders_by_url(self):
        spider = self.manager.create_spider_by_url('http://crawlmi1.org/test')
        self.assertEqual(spider.__class__.__name__, 'Spider1')

        spider = self.manager.create_spider_by_url('http://crawlmi3.org/test')
        self.assertIsNone(spider)
        self.assertTrue(self.lw.get_first_line().startswith('[crawlmi] ERROR: More than one spider can handle:'))

        spider = self.manager.create_spider_by_url('http://crawlmi999.org/test')
        self.assertIsNone(spider)
        self.assertTrue(self.lw.get_first_line().startswith('[crawlmi] ERROR: Unable to find spider that handles:'))

    def test_load_spider_module(self):
        settings = {'SPIDER_MODULES': ['crawlmi.tests.test_spider_manager.test_spiders.spider1']}
        self.manager = SpiderManager(Settings(settings))
        self.assertEqual(len(self.manager._spiders), 1)

    def test_load_base_spider(self):
        settings = {'SPIDER_MODULES': ['crawlmi.tests.test_spider_manager.test_spiders.spider0']}
        self.manager = SpiderManager(Settings(settings))
        self.assertEqual(len(self.manager._spiders), 0)
class LogStatsTest(unittest.TestCase):
    def setUp(self):
        self.clock = Clock()
        self.engine = get_engine(LOG_STATS_INTERVAL=30)
        self.engine.signals = SignalManager(self.engine)
        self.ls = LogStats(self.engine, clock=self.clock)
        self.lw = LogWrapper()
        self.lw.setUp()

    def tearDown(self):
        self.lw.tearDown()

    def test_config(self):
        self.assertRaises(NotConfigured, LogStats,
                          get_engine(LOG_STATS_INTERVAL=0))

    def test_basic(self):
        # engine is stopped
        self.clock.advance(60)
        self.assertEqual(self.lw.get_first_line(), '')
        # start the engine
        self.engine.signals.send(signals.engine_started)
        self.clock.advance(29)
        self.assertEqual(self.lw.get_first_line(), '')
        self.clock.advance(1)
        self.assertEqual(self.lw.get_first_line(),
                         '[crawlmi] INFO: Crawled 0 pages (at 0 pages/min).')
        # download some responses
        self.engine.signals.send(signals.response_downloaded,
                                 response=Response(url=''))
        self.engine.signals.send(signals.response_downloaded,
                                 response=Response(url=''))
        self.engine.signals.send(signals.response_received,
                                 response=Response(url=''))
        self.clock.advance(30)
        self.assertEqual(self.lw.get_first_line(),
                         '[crawlmi] INFO: Crawled 2 pages (at 4 pages/min).')
        # stop the engine
        self.engine.signals.send(signals.engine_stopped)
        self.clock.advance(60)
        self.assertEqual(self.lw.get_first_line(), '')
Ejemplo n.º 9
0
 def setUp(self):
     self.lw = LogWrapper()
     self.lw.setUp(level=log.DEBUG)
Ejemplo n.º 10
0
 def setUp(self):
     self.lw = LogWrapper()
     self.lw.setUp(log.INFO, 'utf-8')
Ejemplo n.º 11
0
class CrawlmiFileLogObserverTest(unittest.TestCase):
    def setUp(self):
        self.lw = LogWrapper()
        self.lw.setUp(log.INFO, 'utf-8')

    def tearDown(self):
        self.flushLoggedErrors()
        self.lw.tearDown()

    def test_msg_basic(self):
        log.msg('Hello')
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Hello')

    def test_format(self):
        log.msg(format='%(hi)s', hi='Hello')
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Hello')

    def test_msg_level1(self):
        log.msg('Hello', level=log.WARNING)
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] WARNING: Hello')

    def test_msg_level2(self):
        log.msg('Hello', log.WARNING)
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] WARNING: Hello')

    def test_msg_wrong_level(self):
        log.msg('Hello', level=9999)
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] NOLEVEL: Hello')

    def test_msg_encoding(self):
        log.msg(u'Price: \xa3100')
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Price: \xc2\xa3100')

    def test_msg_ignore_level(self):
        log.msg('Hello', level=log.DEBUG)
        log.msg('World', level=log.INFO)
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: World')

    def test_msg_ignore_system(self):
        txlog.msg('Hello')
        self.assertEqual(self.lw.get_first_line(), '')

    def test_msg_ignore_system_err(self):
        txlog.err('Hello')
        self.assertEqual(self.lw.get_first_line(), '[-] ERROR: \'Hello\'')

    def test_err_noargs(self):
        try:
            a = 1 / 0
        except:
            log.err()
        logged = self.lw.get_logged()
        self.assertIn('Traceback', logged)
        self.assertIn('ZeroDivisionError', logged)

    def test_err_why(self):
        log.err(TypeError('bad type'), 'Wrong type')
        logged = self.lw.get_logged(clear=False)
        self.assertEqual(self.lw.get_first_line(), '[crawlmi] ERROR: Wrong type')
        self.assertIn('TypeError', logged)
        self.assertIn('bad type', logged)

    def test_error_outside_crawlmi(self):
        '''Crawlmi logger should still print outside errors'''
        txlog.err(TypeError('bad type'), 'Wrong type')
        logged = self.lw.get_logged(clear=False)
        self.assertEqual(self.lw.get_first_line(), '[-] ERROR: Wrong type')
        self.assertIn('TypeError', logged)
        self.assertIn('bad type', logged)

    # this test fails in twisted trial observer, not in crawlmi observer
    # def test_err_why_encoding(self):
    #     log.err(TypeError('bad type'), u'\xa3')
    #     self.assertEqual(self.lw.get_first_line(), '[crawlmi] ERROR: \xc2\xa3')

    def test_err_exc(self):
        log.err(TypeError('bad type'))
        logged = self.lw.get_logged()
        self.assertIn('Unhandled Error', logged)
        self.assertIn('TypeError', logged)
        self.assertIn('bad type', logged)

    def test_err_failure(self):
        log.err(failure.Failure(TypeError('bad type')))
        logged = self.lw.get_logged()
        self.assertIn('Unhandled Error', logged)
        self.assertIn('TypeError', logged)
        self.assertIn('bad type', logged)
 def setUp(self):
     self.lw = LogWrapper()
     self.lw.setUp(level=log.DEBUG)
Ejemplo n.º 13
0
 def setUp(self):
     self.lw = LogWrapper()
     self.lw.setUp()
Ejemplo n.º 14
0
 def setUp(self):
     self.lw = LogWrapper()
     self.lw.setUp()
Ejemplo n.º 15
0
class StatsTest(unittest.TestCase):

    def setUp(self):
        self.lw = LogWrapper()
        self.lw.setUp()

    def tearDown(self):
        self.lw.tearDown()

    def test_memory_stats(self):
        stats = MemoryStats(get_engine(STATS_DUMP=True))
        self.assertEqual(stats.get_stats(), {})
        self.assertEqual(stats.get_value('anything'), None)
        self.assertEqual(stats.get_value('anything', 'default'), 'default')
        stats.set_value('test', 'value')
        self.assertEqual(stats.get_stats(), {'test': 'value'})
        stats.set_value('test2', 23)
        self.assertEqual(stats.get_stats(), {'test': 'value', 'test2': 23})
        self.assertEqual(stats.get_value('test2'), 23)
        stats.inc_value('test2')
        self.assertEqual(stats.get_value('test2'), 24)
        stats.inc_value('test2', 6)
        self.assertEqual(stats.get_value('test2'), 30)
        stats.max_value('test2', 6)
        self.assertEqual(stats.get_value('test2'), 30)
        stats.max_value('test2', 40)
        self.assertEqual(stats.get_value('test2'), 40)
        stats.max_value('test3', 1)
        self.assertEqual(stats.get_value('test3'), 1)
        stats.min_value('test2', 60)
        self.assertEqual(stats.get_value('test2'), 40)
        stats.min_value('test2', 35)
        self.assertEqual(stats.get_value('test2'), 35)
        stats.min_value('test4', 7)
        self.assertEqual(stats.get_value('test4'), 7)

        stats.add_value('stats', 3)
        stats.add_value('stats', 2, 2.0)
        statistics = stats.get_value('stats')
        self.assertTrue(eq(statistics.average, 7.0/3.0), statistics.average)
        self.assertRaises(RuntimeError, stats.add_value, 'test4', 1)

        stats.add_sample('samples', 3, 'hello')
        stats.add_sample('samples', 2, 'world')
        samples = stats.get_value('samples')
        self.assertEqual(len(samples), 2)
        self.assertListEqual(samples.samples, [(3, 'hello'), (2, 'world')])
        self.assertRaises(RuntimeError, stats.add_value, 'test4', 5, '!')

        stats.dump_stats()
        logged = self.lw.get_first_line(clear=False)
        self.assertTrue(logged.startswith('[crawlmi] INFO: Dumping crawlmi stats:'))
        logged = self.lw.get_logged()
        self.assertIn('test', logged)
        self.assertIn('test2', logged)
        self.assertIn('test3', logged)
        self.assertIn('test3', logged)
        self.assertIn('stats', logged)
        self.assertIn('samples', logged)

    def test_dummy_stats(self):
        stats = DummyStats(get_engine())
        self.assertEqual(stats.get_stats(), {})
        self.assertEqual(stats.get_value('anything'), None)
        self.assertEqual(stats.get_value('anything', 'default'), 'default')
        stats.set_value('test', 'value')
        stats.inc_value('v1')
        stats.max_value('v2', 100)
        stats.min_value('v3', 100)
        stats.set_value('test', 'value')
        stats.add_value('stats', 100)
        stats.add_value('stats', 100, 12)
        stats.add_sample('samples', 3, 'hello')
        self.assertEqual(stats.get_stats(), {})
        stats.dump_stats()