class MiddlewareManagerTest(unittest.TestCase): def setUp(self): self.lw = LogWrapper() self.lw.setUp(level=log.DEBUG) def tearDown(self): self.lw.tearDown() def test_init(self): mw = TestMiddlewareManager(get_engine()) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [M1, M2]) logged = self.lw.get_first_line() self.assertEqual( logged, "[crawlmi] DEBUG: Disabled <class 'crawlmi.tests.test_middleware_manager.MOff'>:" ) def test_init2(self): mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff]) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [M1, M2]) def test_enabled_setting(self): mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff]) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [M1, M2]) self.assertEqual(mw.middlewares[0].enabled_setting, 'M1_ENABLED') self.assertEqual(mw.middlewares[1].enabled_setting, 'M2_OFF') mw = TestMiddlewareManager(get_engine(M1_ENABLED=False, M2_OFF=False), mw_classes=[M1, M2, MOff]) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [])
class MiddlewareManagerTest(unittest.TestCase): def setUp(self): self.lw = LogWrapper() self.lw.setUp(level=log.DEBUG) def tearDown(self): self.lw.tearDown() def test_init(self): mw = TestMiddlewareManager(get_engine()) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [M1, M2]) logged = self.lw.get_first_line() self.assertEqual(logged, "[crawlmi] DEBUG: Disabled <class 'crawlmi.tests.test_middleware_manager.MOff'>:") def test_init2(self): mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff]) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [M1, M2]) def test_enabled_setting(self): mw = TestMiddlewareManager(get_engine(), mw_classes=[M1, M2, MOff]) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [M1, M2]) self.assertEqual(mw.middlewares[0].enabled_setting, 'M1_ENABLED') self.assertEqual(mw.middlewares[1].enabled_setting, 'M2_OFF') mw = TestMiddlewareManager(get_engine(M1_ENABLED=False, M2_OFF=False), mw_classes=[M1, M2, MOff]) active = [x.__class__ for x in mw.middlewares] self.assertListEqual(active, [])
class LogStatsTest(unittest.TestCase): def setUp(self): self.clock = Clock() self.engine = get_engine(LOG_STATS_INTERVAL=30) self.engine.signals = SignalManager(self.engine) self.ls = LogStats(self.engine, clock=self.clock) self.lw = LogWrapper() self.lw.setUp() def tearDown(self): self.lw.tearDown() def test_config(self): self.assertRaises(NotConfigured, LogStats, get_engine(LOG_STATS_INTERVAL=0)) def test_basic(self): # engine is stopped self.clock.advance(60) self.assertEqual(self.lw.get_first_line(), '') # start the engine self.engine.signals.send(signals.engine_started) self.clock.advance(29) self.assertEqual(self.lw.get_first_line(), '') self.clock.advance(1) self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Crawled 0 pages (at 0 pages/min).') # download some responses self.engine.signals.send(signals.response_downloaded, response=Response(url='')) self.engine.signals.send(signals.response_downloaded, response=Response(url='')) self.engine.signals.send(signals.response_received, response=Response(url='')) self.clock.advance(30) self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Crawled 2 pages (at 4 pages/min).') # stop the engine self.engine.signals.send(signals.engine_stopped) self.clock.advance(60) self.assertEqual(self.lw.get_first_line(), '')
class SpiderManagerTest(unittest.TestCase): def setUp(self): orig_spiders_dir = os.path.join(module_dir, 'test_spiders') self.tmpdir = self.mktemp() os.mkdir(self.tmpdir) self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx') shutil.copytree(orig_spiders_dir, self.spiders_dir) sys.path.append(self.tmpdir) settings = {'SPIDER_MODULES': ['test_spiders_xxx']} self.manager = SpiderManager(Settings(settings)) self.lw = LogWrapper() self.lw.setUp() def tearDown(self): sys.path.remove(self.tmpdir) self.lw.tearDown() def test_get_spiders(self): self.assertSetEqual( set(self.manager.get_spiders()), set(['spider1', 'spider2', 'spider3'])) def test_create_by_name(self): spider1 = self.manager.create_spider_by_name('spider1') self.assertEqual(spider1.__class__.__name__, 'Spider1') spider2 = self.manager.create_spider_by_name('spider2', {'p1': 1, 'p2': 2}) self.assertEqual(spider2.__class__.__name__, 'Spider2') self.assertEqual(spider2.p1, 1) self.assertEqual(spider2.p2, 2) def test_get_spiders_by_url(self): self.assertEqual(self.manager.get_spiders_by_url('http://crawlmi1.org/test'), ['spider1']) self.assertEqual(self.manager.get_spiders_by_url('http://crawlmi2.org/test'), ['spider2']) self.assertEqual(set(self.manager.get_spiders_by_url('http://crawlmi3.org/test')), set(['spider1', 'spider2'])) self.assertEqual(self.manager.get_spiders_by_url('http://crawlmi999.org/test'), []) self.assertEqual(self.manager.get_spiders_by_url('http://spider3.com'), ['spider3']) def test_create_spiders_by_url(self): spider = self.manager.create_spider_by_url('http://crawlmi1.org/test') self.assertEqual(spider.__class__.__name__, 'Spider1') spider = self.manager.create_spider_by_url('http://crawlmi3.org/test') self.assertIsNone(spider) self.assertTrue(self.lw.get_first_line().startswith('[crawlmi] ERROR: More than one spider can handle:')) spider = self.manager.create_spider_by_url('http://crawlmi999.org/test') self.assertIsNone(spider) self.assertTrue(self.lw.get_first_line().startswith('[crawlmi] ERROR: Unable to find spider that handles:')) def test_load_spider_module(self): settings = {'SPIDER_MODULES': ['crawlmi.tests.test_spider_manager.test_spiders.spider1']} self.manager = SpiderManager(Settings(settings)) self.assertEqual(len(self.manager._spiders), 1) def test_load_base_spider(self): settings = {'SPIDER_MODULES': ['crawlmi.tests.test_spider_manager.test_spiders.spider0']} self.manager = SpiderManager(Settings(settings)) self.assertEqual(len(self.manager._spiders), 0)
class CrawlmiFileLogObserverTest(unittest.TestCase): def setUp(self): self.lw = LogWrapper() self.lw.setUp(log.INFO, 'utf-8') def tearDown(self): self.flushLoggedErrors() self.lw.tearDown() def test_msg_basic(self): log.msg('Hello') self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Hello') def test_format(self): log.msg(format='%(hi)s', hi='Hello') self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Hello') def test_msg_level1(self): log.msg('Hello', level=log.WARNING) self.assertEqual(self.lw.get_first_line(), '[crawlmi] WARNING: Hello') def test_msg_level2(self): log.msg('Hello', log.WARNING) self.assertEqual(self.lw.get_first_line(), '[crawlmi] WARNING: Hello') def test_msg_wrong_level(self): log.msg('Hello', level=9999) self.assertEqual(self.lw.get_first_line(), '[crawlmi] NOLEVEL: Hello') def test_msg_encoding(self): log.msg(u'Price: \xa3100') self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Price: \xc2\xa3100') def test_msg_ignore_level(self): log.msg('Hello', level=log.DEBUG) log.msg('World', level=log.INFO) self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: World') def test_msg_ignore_system(self): txlog.msg('Hello') self.assertEqual(self.lw.get_first_line(), '') def test_msg_ignore_system_err(self): txlog.err('Hello') self.assertEqual(self.lw.get_first_line(), '[-] ERROR: \'Hello\'') def test_err_noargs(self): try: a = 1 / 0 except: log.err() logged = self.lw.get_logged() self.assertIn('Traceback', logged) self.assertIn('ZeroDivisionError', logged) def test_err_why(self): log.err(TypeError('bad type'), 'Wrong type') logged = self.lw.get_logged(clear=False) self.assertEqual(self.lw.get_first_line(), '[crawlmi] ERROR: Wrong type') self.assertIn('TypeError', logged) self.assertIn('bad type', logged) def test_error_outside_crawlmi(self): '''Crawlmi logger should still print outside errors''' txlog.err(TypeError('bad type'), 'Wrong type') logged = self.lw.get_logged(clear=False) self.assertEqual(self.lw.get_first_line(), '[-] ERROR: Wrong type') self.assertIn('TypeError', logged) self.assertIn('bad type', logged) # this test fails in twisted trial observer, not in crawlmi observer # def test_err_why_encoding(self): # log.err(TypeError('bad type'), u'\xa3') # self.assertEqual(self.lw.get_first_line(), '[crawlmi] ERROR: \xc2\xa3') def test_err_exc(self): log.err(TypeError('bad type')) logged = self.lw.get_logged() self.assertIn('Unhandled Error', logged) self.assertIn('TypeError', logged) self.assertIn('bad type', logged) def test_err_failure(self): log.err(failure.Failure(TypeError('bad type'))) logged = self.lw.get_logged() self.assertIn('Unhandled Error', logged) self.assertIn('TypeError', logged) self.assertIn('bad type', logged)
class StatsTest(unittest.TestCase): def setUp(self): self.lw = LogWrapper() self.lw.setUp() def tearDown(self): self.lw.tearDown() def test_memory_stats(self): stats = MemoryStats(get_engine(STATS_DUMP=True)) self.assertEqual(stats.get_stats(), {}) self.assertEqual(stats.get_value('anything'), None) self.assertEqual(stats.get_value('anything', 'default'), 'default') stats.set_value('test', 'value') self.assertEqual(stats.get_stats(), {'test': 'value'}) stats.set_value('test2', 23) self.assertEqual(stats.get_stats(), {'test': 'value', 'test2': 23}) self.assertEqual(stats.get_value('test2'), 23) stats.inc_value('test2') self.assertEqual(stats.get_value('test2'), 24) stats.inc_value('test2', 6) self.assertEqual(stats.get_value('test2'), 30) stats.max_value('test2', 6) self.assertEqual(stats.get_value('test2'), 30) stats.max_value('test2', 40) self.assertEqual(stats.get_value('test2'), 40) stats.max_value('test3', 1) self.assertEqual(stats.get_value('test3'), 1) stats.min_value('test2', 60) self.assertEqual(stats.get_value('test2'), 40) stats.min_value('test2', 35) self.assertEqual(stats.get_value('test2'), 35) stats.min_value('test4', 7) self.assertEqual(stats.get_value('test4'), 7) stats.add_value('stats', 3) stats.add_value('stats', 2, 2.0) statistics = stats.get_value('stats') self.assertTrue(eq(statistics.average, 7.0/3.0), statistics.average) self.assertRaises(RuntimeError, stats.add_value, 'test4', 1) stats.add_sample('samples', 3, 'hello') stats.add_sample('samples', 2, 'world') samples = stats.get_value('samples') self.assertEqual(len(samples), 2) self.assertListEqual(samples.samples, [(3, 'hello'), (2, 'world')]) self.assertRaises(RuntimeError, stats.add_value, 'test4', 5, '!') stats.dump_stats() logged = self.lw.get_first_line(clear=False) self.assertTrue(logged.startswith('[crawlmi] INFO: Dumping crawlmi stats:')) logged = self.lw.get_logged() self.assertIn('test', logged) self.assertIn('test2', logged) self.assertIn('test3', logged) self.assertIn('test3', logged) self.assertIn('stats', logged) self.assertIn('samples', logged) def test_dummy_stats(self): stats = DummyStats(get_engine()) self.assertEqual(stats.get_stats(), {}) self.assertEqual(stats.get_value('anything'), None) self.assertEqual(stats.get_value('anything', 'default'), 'default') stats.set_value('test', 'value') stats.inc_value('v1') stats.max_value('v2', 100) stats.min_value('v3', 100) stats.set_value('test', 'value') stats.add_value('stats', 100) stats.add_value('stats', 100, 12) stats.add_sample('samples', 3, 'hello') self.assertEqual(stats.get_stats(), {}) stats.dump_stats()