def setUp(self): orig_spiders_dir = os.path.join(module_dir, 'test_spiders') self.tmpdir = self.mktemp() os.mkdir(self.tmpdir) self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx') shutil.copytree(orig_spiders_dir, self.spiders_dir) sys.path.append(self.tmpdir) self.spiderman = SpiderManager(['test_spiders_xxx'])
class SpiderManagerTest(unittest.TestCase): def setUp(self): orig_spiders_dir = os.path.join(module_dir, 'test_spiders') self.tmpdir = self.mktemp() os.mkdir(self.tmpdir) self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx') shutil.copytree(orig_spiders_dir, self.spiders_dir) sys.path.append(self.tmpdir) self.spiderman = SpiderManager(['test_spiders_xxx']) def tearDown(self): del self.spiderman sys.path.remove(self.tmpdir) def test_interface(self): verifyObject(ISpiderManager, self.spiderman) def test_list(self): self.assertEqual(set(self.spiderman.list()), set(['spider1', 'spider2', 'spider3', 'spider4'])) def test_create(self): spider1 = self.spiderman.create("spider1") self.assertEqual(spider1.__class__.__name__, 'Spider1') spider2 = self.spiderman.create("spider2", foo="bar") self.assertEqual(spider2.__class__.__name__, 'Spider2') self.assertEqual(spider2.foo, 'bar') def test_find_by_request(self): self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')), ['spider1']) self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')), ['spider2']) self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))), set(['spider1', 'spider2'])) self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')), []) self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')), []) self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')), ['spider3']) def test_load_spider_module(self): self.spiderman = SpiderManager( ['scrapy.tests.test_spidermanager.test_spiders.spider1']) assert len(self.spiderman._spiders) == 1 def test_load_base_spider(self): self.spiderman = SpiderManager( ['scrapy.tests.test_spidermanager.test_spiders.spider0']) assert len(self.spiderman._spiders) == 0 def test_load_from_crawler(self): spider = self.spiderman.create('spider4', a='OK') self.assertEqual(spider.a, 'OK')
class SpiderManagerTest(unittest.TestCase): def setUp(self): orig_spiders_dir = os.path.join(module_dir, 'test_spiders') self.tmpdir = self.mktemp() os.mkdir(self.tmpdir) self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx') shutil.copytree(orig_spiders_dir, self.spiders_dir) sys.path.append(self.tmpdir) self.spiderman = SpiderManager(['test_spiders_xxx']) def tearDown(self): del self.spiderman del sys.modules['test_spiders_xxx'] sys.path.remove(self.tmpdir) def test_interface(self): verifyObject(ISpiderManager, self.spiderman) def test_list(self): self.assertEqual(set(self.spiderman.list()), set(['spider1', 'spider2', 'spider3', 'spider4'])) def test_create(self): spider1 = self.spiderman.create("spider1") self.assertEqual(spider1.__class__.__name__, 'Spider1') spider2 = self.spiderman.create("spider2", foo="bar") self.assertEqual(spider2.__class__.__name__, 'Spider2') self.assertEqual(spider2.foo, 'bar') def test_find_by_request(self): self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')), ['spider1']) self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')), ['spider2']) self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))), set(['spider1', 'spider2'])) self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')), []) self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')), []) self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')), ['spider3']) def test_load_spider_module(self): self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider1']) assert len(self.spiderman._spiders) == 1 def test_load_base_spider(self): self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider0']) assert len(self.spiderman._spiders) == 0 def test_load_from_crawler(self): spider = self.spiderman.create('spider4', a='OK') self.assertEqual(spider.a, 'OK')
def setUp(self): orig_spiders_dir = os.path.join(module_dir, 'test_spiders') self.tmpdir = self.mktemp() os.mkdir(self.tmpdir) self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx') shutil.copytree(orig_spiders_dir, self.spiders_dir) sys.path.append(self.tmpdir) settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']}) self.spiderman = SpiderManager.from_settings(settings)
def test_load_base_spider(self): self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider0']) assert len(self.spiderman._spiders) == 0
def test_load_spider_module(self): self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider1']) assert len(self.spiderman._spiders) == 1
def discover_spiders(): for project in _discover_spider_projects(): with spider_project(project) as settings: manager = SpiderManager.from_settings(settings) for spider in manager._spiders.itervalues(): yield spider
def test_load_base_spider(self): module = 'tests.test_spidermanager.test_spiders.spider0' settings = Settings({'SPIDER_MODULES': [module]}) self.spiderman = SpiderManager.from_settings(settings) assert len(self.spiderman._spiders) == 0
def test_load_spider_module(self): prefix = 'tests.test_spidermanager.test_spiders.' module = ','.join(prefix + s for s in ('spider1', 'spider2')) settings = Settings({'SPIDER_MODULES': module}) self.spiderman = SpiderManager.from_settings(settings) assert len(self.spiderman._spiders) == 2
def setUp(self): orig_spiders_dir = os.path.join(get_projectpath(), 'tests', 'feature', 'searchspiders') sys.path.append(orig_spiders_dir) self.spiderman = SpiderManager(['feature.searchspiders'])
from inspect import isclass from twisted.internet import reactor, defer from scrapy import log from scrapy.settings import CrawlerSettings from scrapy.crawler import Crawler from scrapy.spidermanager import SpiderManager from oucfeed.crawler import settings, datastore, history from oucfeed.crawler.uploader import upload crawler_settings = CrawlerSettings(settings) log.start_from_settings(crawler_settings) spidermanager = SpiderManager.from_settings(crawler_settings) def setup_output(): crawler_settings.overrides['FEED_URI'] = 'test.js' crawler_settings.overrides['FEED_FORMAT'] = 'js' def init_spider(spider): if isinstance(spider, basestring): spider = spidermanager.create(spider) elif isclass(spider): spider = spider() return spider