def setUp(self): orig_spiders_dir = os.path.join(module_dir, 'test_spiders') self.tmpdir = self.mktemp() os.mkdir(self.tmpdir) self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx') shutil.copytree(orig_spiders_dir, self.spiders_dir) sys.path.append(self.tmpdir) settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']}) self.spiderman = SpiderManager.from_settings(settings)
def setUp(self): orig_spiders_dir = os.path.join(module_dir, 'test_spiders') self.tmpdir = self.mktemp() os.mkdir(self.tmpdir) self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx') shutil.copytree(orig_spiders_dir, self.spiders_dir) sys.path.append(self.tmpdir) settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']}) self.spiderman = SpiderManager.from_settings(settings)
def discover_spiders(): for project in _discover_spider_projects(): with spider_project(project) as settings: manager = SpiderManager.from_settings(settings) for spider in manager._spiders.itervalues(): yield spider
def test_load_base_spider(self): module = 'tests.test_spidermanager.test_spiders.spider0' settings = Settings({'SPIDER_MODULES': [module]}) self.spiderman = SpiderManager.from_settings(settings) assert len(self.spiderman._spiders) == 0
def test_load_spider_module(self): prefix = 'tests.test_spidermanager.test_spiders.' module = ','.join(prefix + s for s in ('spider1', 'spider2')) settings = Settings({'SPIDER_MODULES': module}) self.spiderman = SpiderManager.from_settings(settings) assert len(self.spiderman._spiders) == 2
def test_load_base_spider(self): module = 'tests.test_spidermanager.test_spiders.spider0' settings = Settings({'SPIDER_MODULES': [module]}) self.spiderman = SpiderManager.from_settings(settings) assert len(self.spiderman._spiders) == 0
def test_load_spider_module(self): prefix = 'tests.test_spidermanager.test_spiders.' module = ','.join(prefix + s for s in ('spider1', 'spider2')) settings = Settings({'SPIDER_MODULES': module}) self.spiderman = SpiderManager.from_settings(settings) assert len(self.spiderman._spiders) == 2
from inspect import isclass from twisted.internet import reactor, defer from scrapy import log from scrapy.settings import CrawlerSettings from scrapy.crawler import Crawler from scrapy.spidermanager import SpiderManager from oucfeed.crawler import settings, datastore, history from oucfeed.crawler.uploader import upload crawler_settings = CrawlerSettings(settings) log.start_from_settings(crawler_settings) spidermanager = SpiderManager.from_settings(crawler_settings) def setup_output(): crawler_settings.overrides['FEED_URI'] = 'test.js' crawler_settings.overrides['FEED_FORMAT'] = 'js' def init_spider(spider): if isinstance(spider, basestring): spider = spidermanager.create(spider) elif isclass(spider): spider = spider() return spider
def discover_spiders(): for project in _discover_spider_projects(): with spider_project(project) as settings: manager = SpiderManager.from_settings(settings) for spider in manager._spiders.itervalues(): yield spider