コード例 #1
0
 def setUp(self):
     orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
     self.tmpdir = self.mktemp()
     os.mkdir(self.tmpdir)
     self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
     shutil.copytree(orig_spiders_dir, self.spiders_dir)
     sys.path.append(self.tmpdir)
     self.spiderman = SpiderManager(['test_spiders_xxx'])
コード例 #2
0
ファイル: __init__.py プロジェクト: pyarnold/scrapy
class SpiderManagerTest(unittest.TestCase):

    def setUp(self):
        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
        self.tmpdir = self.mktemp()
        os.mkdir(self.tmpdir)
        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
        shutil.copytree(orig_spiders_dir, self.spiders_dir)
        sys.path.append(self.tmpdir)
        self.spiderman = SpiderManager(['test_spiders_xxx'])

    def tearDown(self):
        del self.spiderman
        sys.path.remove(self.tmpdir)

    def test_interface(self):
        verifyObject(ISpiderManager, self.spiderman)

    def test_list(self):
        self.assertEqual(set(self.spiderman.list()),
                         set(['spider1', 'spider2', 'spider3', 'spider4']))

    def test_create(self):
        spider1 = self.spiderman.create("spider1")
        self.assertEqual(spider1.__class__.__name__, 'Spider1')
        spider2 = self.spiderman.create("spider2", foo="bar")
        self.assertEqual(spider2.__class__.__name__, 'Spider2')
        self.assertEqual(spider2.foo, 'bar')

    def test_find_by_request(self):
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')),
                         ['spider1'])
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')),
                         ['spider2'])
        self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))),
                         set(['spider1', 'spider2']))
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')),
                         [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')),
                         [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')),
                         ['spider3'])

    def test_load_spider_module(self):
        self.spiderman = SpiderManager(
            ['scrapy.tests.test_spidermanager.test_spiders.spider1'])
        assert len(self.spiderman._spiders) == 1

    def test_load_base_spider(self):
        self.spiderman = SpiderManager(
            ['scrapy.tests.test_spidermanager.test_spiders.spider0'])
        assert len(self.spiderman._spiders) == 0

    def test_load_from_crawler(self):
        spider = self.spiderman.create('spider4', a='OK')
        self.assertEqual(spider.a, 'OK')
コード例 #3
0
class SpiderManagerTest(unittest.TestCase):

    def setUp(self):
        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
        self.tmpdir = self.mktemp()
        os.mkdir(self.tmpdir)
        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
        shutil.copytree(orig_spiders_dir, self.spiders_dir)
        sys.path.append(self.tmpdir)
        self.spiderman = SpiderManager(['test_spiders_xxx'])

    def tearDown(self):
        del self.spiderman
        del sys.modules['test_spiders_xxx']
        sys.path.remove(self.tmpdir)

    def test_interface(self):
        verifyObject(ISpiderManager, self.spiderman)

    def test_list(self):
        self.assertEqual(set(self.spiderman.list()),
            set(['spider1', 'spider2', 'spider3', 'spider4']))

    def test_create(self):
        spider1 = self.spiderman.create("spider1")
        self.assertEqual(spider1.__class__.__name__, 'Spider1')
        spider2 = self.spiderman.create("spider2", foo="bar")
        self.assertEqual(spider2.__class__.__name__, 'Spider2')
        self.assertEqual(spider2.foo, 'bar')

    def test_find_by_request(self):
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')),
            ['spider1'])
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')),
            ['spider2'])
        self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))),
            set(['spider1', 'spider2']))
        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')),
            [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')),
            [])
        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')),
            ['spider3'])

    def test_load_spider_module(self):
        self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider1'])
        assert len(self.spiderman._spiders) == 1

    def test_load_base_spider(self):
        self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider0'])
        assert len(self.spiderman._spiders) == 0

    def test_load_from_crawler(self):
        spider = self.spiderman.create('spider4', a='OK')
        self.assertEqual(spider.a, 'OK')
コード例 #4
0
ファイル: __init__.py プロジェクト: 0326/scrapy
 def setUp(self):
     orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
     self.tmpdir = self.mktemp()
     os.mkdir(self.tmpdir)
     self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
     shutil.copytree(orig_spiders_dir, self.spiders_dir)
     sys.path.append(self.tmpdir)
     settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']})
     self.spiderman = SpiderManager.from_settings(settings)
コード例 #5
0
ファイル: __init__.py プロジェクト: zxsted/scrapy
 def setUp(self):
     orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
     self.tmpdir = self.mktemp()
     os.mkdir(self.tmpdir)
     self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
     shutil.copytree(orig_spiders_dir, self.spiders_dir)
     sys.path.append(self.tmpdir)
     settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']})
     self.spiderman = SpiderManager.from_settings(settings)
コード例 #6
0
 def test_load_base_spider(self):
     self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider0'])
     assert len(self.spiderman._spiders) == 0
コード例 #7
0
 def test_load_spider_module(self):
     self.spiderman = SpiderManager(['scrapy.tests.test_spidermanager.test_spiders.spider1'])
     assert len(self.spiderman._spiders) == 1
コード例 #8
0
ファイル: __init__.py プロジェクト: mkmd/parking-demo
def discover_spiders():
    for project in _discover_spider_projects():
        with spider_project(project) as settings:
            manager = SpiderManager.from_settings(settings)
            for spider in manager._spiders.itervalues():
                yield spider
コード例 #9
0
ファイル: __init__.py プロジェクト: 0326/scrapy
 def test_load_base_spider(self):
     module = 'tests.test_spidermanager.test_spiders.spider0'
     settings = Settings({'SPIDER_MODULES': [module]})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 0
コード例 #10
0
ファイル: __init__.py プロジェクト: 0326/scrapy
 def test_load_spider_module(self):
     prefix = 'tests.test_spidermanager.test_spiders.'
     module = ','.join(prefix + s for s in ('spider1', 'spider2'))
     settings = Settings({'SPIDER_MODULES': module})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 2
コード例 #11
0
ファイル: __init__.py プロジェクト: zxsted/scrapy
 def test_load_base_spider(self):
     module = 'tests.test_spidermanager.test_spiders.spider0'
     settings = Settings({'SPIDER_MODULES': [module]})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 0
コード例 #12
0
ファイル: __init__.py プロジェクト: zxsted/scrapy
 def test_load_spider_module(self):
     prefix = 'tests.test_spidermanager.test_spiders.'
     module = ','.join(prefix + s for s in ('spider1', 'spider2'))
     settings = Settings({'SPIDER_MODULES': module})
     self.spiderman = SpiderManager.from_settings(settings)
     assert len(self.spiderman._spiders) == 2
コード例 #13
0
ファイル: __init__.py プロジェクト: amumu/apkcrawl
 def setUp(self):
     orig_spiders_dir = os.path.join(get_projectpath(), 'tests', 'feature',
                                     'searchspiders')
     sys.path.append(orig_spiders_dir)
     self.spiderman = SpiderManager(['feature.searchspiders'])
コード例 #14
0
ファイル: executor.py プロジェクト: D6C92FE5/oucfeed.crawler
from inspect import isclass

from twisted.internet import reactor, defer
from scrapy import log
from scrapy.settings import CrawlerSettings
from scrapy.crawler import Crawler
from scrapy.spidermanager import SpiderManager

from oucfeed.crawler import settings, datastore, history
from oucfeed.crawler.uploader import upload


crawler_settings = CrawlerSettings(settings)
log.start_from_settings(crawler_settings)
spidermanager = SpiderManager.from_settings(crawler_settings)


def setup_output():
    crawler_settings.overrides['FEED_URI'] = 'test.js'
    crawler_settings.overrides['FEED_FORMAT'] = 'js'


def init_spider(spider):
    if isinstance(spider, basestring):
        spider = spidermanager.create(spider)
    elif isclass(spider):
        spider = spider()
    return spider

コード例 #15
0
ファイル: __init__.py プロジェクト: mkmd/parking-demo
def discover_spiders():
    for project in _discover_spider_projects():
        with spider_project(project) as settings:
            manager = SpiderManager.from_settings(settings)
            for spider in manager._spiders.itervalues():
                yield spider