Beispiel #1
0
    def test_build_component_list(self):
        base = {'one': 1, 'two': 2, 'three': 3, 'five': 5, 'six': None}
        custom = {'two': None, 'three': 8, 'four': 4}
        self.assertEqual(build_component_list(base, custom),
                         ['one', 'four', 'five', 'three'])

        custom = ['a', 'b', 'c']
        self.assertEqual(build_component_list(base, custom), custom)
Beispiel #2
0
 def test_duplicate_components_in_basesettings(self):
     # Higher priority takes precedence
     duplicate_bs = BaseSettings({'one': 1, 'two': 2}, priority=0)
     duplicate_bs.set('ONE', 4, priority=10)
     self.assertEqual(build_component_list(duplicate_bs, convert=lambda x: x.lower()),
                      ['two', 'one'])
     duplicate_bs.set('one', duplicate_bs['one'], priority=20)
     self.assertEqual(build_component_list(duplicate_bs, convert=lambda x: x.lower()),
                      ['one', 'two'])
     # Same priority raises ValueError
     duplicate_bs.set('ONE', duplicate_bs['ONE'], priority=20)
     self.assertRaises(ValueError,
                       build_component_list, duplicate_bs, convert=lambda x: x.lower())
Beispiel #3
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])

        # contract requests
        contract_reqs = defaultdict(list)
        self.crawler.engine.has_capacity = lambda: True

        for spider in args or self.crawler.spiders.list():
            spider = self.crawler.spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            else:
                self.crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print '  * %s' % method
        else:
            self.crawler.start()
Beispiel #4
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(self.settings["SPIDER_CONTRACTS_BASE"], self.settings["SPIDER_CONTRACTS"])
        self.conman = ContractsManager([load_object(c) for c in contracts])
        self.results = TextTestRunner(verbosity=opts.verbose)._makeResult()

        # contract requests
        contract_reqs = defaultdict(list)

        spman_cls = load_object(self.settings["SPIDER_MANAGER_CLASS"])
        spiders = spman_cls.from_settings(self.settings)

        for spider in args or spiders.list():
            spider = spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            elif requests:
                crawler = self.crawler_process.create_crawler(spider.name)
                crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print "  * %s" % method
        else:
            self.crawler_process.start()
            self.results.printErrors()
Beispiel #5
0
 def test_valid_numbers(self):
     # work well with None and numeric values
     d = {'a': 10, 'b': None, 'c': 15, 'd': 5.0}
     self.assertEqual(build_component_list(d, convert=lambda x: x),
                      ['d', 'a', 'c'])
     d = {'a': 33333333333333333333, 'b': 11111111111111111111, 'c': 22222222222222222222}
     self.assertEqual(build_component_list(d, convert=lambda x: x),
                      ['b', 'c', 'a'])
     # raise exception for invalid values
     d = {'one': '5'}
     self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
     d = {'one': '1.0'}
     self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
     d = {'one': [1, 2, 3]}
     self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
     d = {'one': {'a': 'a', 'b': 2}}
     self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
     d = {'one': 'lorem ipsum',}
     self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
Beispiel #6
0
 def _get_mwlist_from_settings(cls, settings):
     item_pipelines = settings['ITEM_PIPELINES']
     if isinstance(item_pipelines, (tuple, list, set, frozenset)):
         from scrapy.exceptions import ScrapyDeprecationWarning
         import warnings
         warnings.warn('ITEM_PIPELINES defined as a list or a set is deprecated, switch to a dict',
                       category=ScrapyDeprecationWarning, stacklevel=1)
         # convert old ITEM_PIPELINE list to a dict with order 500
         item_pipelines = dict(zip(item_pipelines, range(500, 500+len(item_pipelines))))
     return build_component_list(settings['ITEM_PIPELINES_BASE'], item_pipelines)
Beispiel #7
0
 def __init__(self):
     if not settings.getbool('WEBSERVICE_ENABLED'):
         raise NotConfigured
     logfile = settings['WEBSERVICE_LOGFILE']
     port = settings.getint('WEBSERVICE_PORT')
     root = RootResource()
     reslist = build_component_list(settings['WEBSERVICE_RESOURCES_BASE'], \
         settings['WEBSERVICE_RESOURCES'])
     for res_cls in map(load_object, reslist):
         res = res_cls()
         root.putChild(res.ws_name, res)
     server.Site.__init__(self, root, logPath=logfile)
     self.noisy = False
     reactor.callWhenRunning(reactor.listenTCP, port, self)
Beispiel #8
0
 def __init__(self):
     if not settings.getbool('WEBSERVICE_ENABLED'):
         raise NotConfigured
     logfile = settings['WEBSERVICE_LOGFILE']
     self.portrange = map(int, settings.getlist('WEBSERVICE_PORT'))
     self.host = settings['WEBSERVICE_HOST']
     root = RootResource()
     reslist = build_component_list(settings['WEBSERVICE_RESOURCES_BASE'], \
         settings['WEBSERVICE_RESOURCES'])
     for res_cls in map(load_object, reslist):
         res = res_cls()
         root.putChild(res.ws_name, res)
     server.Site.__init__(self, root, logPath=logfile)
     self.noisy = False
     dispatcher.connect(self.start_listening, signals.engine_started)
     dispatcher.connect(self.stop_listening, signals.engine_stopped)
Beispiel #9
0
 def load(self):
     """Load middleware defined in settings module"""
     mwlist = build_component_list(settings['SPIDER_MIDDLEWARES_BASE'], \
         settings['SPIDER_MIDDLEWARES'])
     self.enabled.clear()
     self.disabled.clear()
     for mwpath in mwlist:
         try:
             cls = load_object(mwpath)
             mw = cls()
             self.enabled[cls.__name__] = mw
             self._add_middleware(mw)
         except NotConfigured, e:
             self.disabled[cls.__name__] = mwpath
             if e.args:
                 log.msg(e)
Beispiel #10
0
    def _get_mwlist_from_settings(cls, settings):
        """
        SPIDER_MIDDLEWARES = {}

        SPIDER_MIDDLEWARES_BASE = {
            # Engine side
            'scrapy.contrib.spidermiddleware.httperror.HttpErrorMiddleware': 50,
            'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': 500,
            'scrapy.contrib.spidermiddleware.referer.RefererMiddleware': 700,
            'scrapy.contrib.spidermiddleware.urllength.UrlLengthMiddleware': 800,
            'scrapy.contrib.spidermiddleware.depth.DepthMiddleware': 900,
            # Spider side
        }
        """
        return build_component_list(settings['SPIDER_MIDDLEWARES_BASE'], \
            settings['SPIDER_MIDDLEWARES'])
Beispiel #11
0
 def __init__(self, crawler):
     if not crawler.settings.getbool('WEBSERVICE_ENABLED'):
         raise NotConfigured
     self.crawler = crawler
     logfile = crawler.settings['WEBSERVICE_LOGFILE']
     self.portrange = [int(x) for x in crawler.settings.getlist('WEBSERVICE_PORT')]
     self.host = crawler.settings['WEBSERVICE_HOST']
     root = RootResource(crawler)
     reslist = build_component_list(crawler.settings['WEBSERVICE_RESOURCES_BASE'],
                                    crawler.settings['WEBSERVICE_RESOURCES'])
     for res_cls in map(load_object, reslist):
         res = res_cls(crawler)
         root.putChild(res.ws_name, res)
     server.Site.__init__(self, root, logPath=logfile)
     self.noisy = False
     crawler.signals.connect(self.start_listening, signals.engine_started)
     crawler.signals.connect(self.stop_listening, signals.engine_stopped)
Beispiel #12
0
    def load(self):
        """
        Load enabled extensions in settings module
        """

        self.loaded = False
        self.enabled.clear()
        self.disabled.clear()
        extlist = build_component_list(settings['EXTENSIONS_BASE'], \
            settings['EXTENSIONS'])
        for extension_path in extlist:
            try:
                cls = load_object(extension_path)
                self.enabled[cls.__name__] = cls()
            except NotConfigured, e:
                self.disabled[cls.__name__] = extension_path
                if e.args:
                    log.msg(e)
Beispiel #13
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        conman = ContractsManager([load_object(c) for c in contracts])
        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
        result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity)

        # contract requests
        contract_reqs = defaultdict(list)

        spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
        spiders = spman_cls.from_settings(self.settings)

        for spider in args or spiders.list():
            spider = spiders.create(spider)
            requests = self.get_requests(spider, conman, result)
            contract_reqs[spider.name] = []

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            elif requests:
                crawler = self.crawler_process.create_crawler(spider.name)
                crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                if not methods and not opts.verbose:
                    continue
                print(spider)
                for method in sorted(methods):
                    print('  * %s' % method)
        else:
            start = time.time()
            self.crawler_process.start()
            stop = time.time()

            result.printErrors()
            result.printSummary(start, stop)
            self.exitcode = int(not result.wasSuccessful())
Beispiel #14
0
def run_tests(spider, output_file, settings):
    """
    Helper for running test contractors for a spider and output an
    XUnit file (for CI)

    For using offline input the HTTP cache is enabled
    """

    settings.overrides.update({
        "HTTPCACHE_ENABLED": True,
        "HTTPCACHE_EXPIRATION_SECS": 0,
    })

    crawler = CrawlerProcess(settings)

    contracts = build_component_list(
        crawler.settings['SPIDER_CONTRACTS_BASE'],
        crawler.settings['SPIDER_CONTRACTS'],
    )

    xunit = Xunit()
    xunit.enabled = True
    xunit.configure(AttributeDict(xunit_file=output_file), Config())
    xunit.stopTest = lambda *x: None

    check = CheckCommand()
    check.set_crawler(crawler)
    check.settings = settings
    check.conman = ContractsManager([load_object(c) for c in contracts])
    check.results = xunit
    # this are specially crafted requests that run tests as callbacks
    requests = check.get_requests(spider)

    crawler.install()
    crawler.configure()
    crawler.crawl(spider, requests)
    log.start(loglevel='DEBUG')

    # report is called when the crawler finishes, it creates the XUnit file
    report = lambda: check.results.report(check.results.error_report_file)
    dispatcher.connect(report, signals.engine_stopped)

    crawler.start()
Beispiel #15
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        conman = ContractsManager([load_object(c) for c in contracts])
        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
        result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity)

        # contract requests
        contract_reqs = defaultdict(list)

        spiders = self.crawler_process.spiders

        for spidername in args or spiders.list():
            spidercls = spiders.load(spidername)
            spidercls.start_requests = lambda s: conman.from_spider(s, result)

            tested_methods = conman.tested_methods_from_spidercls(spidercls)
            if opts.list:
                for method in tested_methods:
                    contract_reqs[spidercls.name].append(method)
            elif tested_methods:
                self.crawler_process.crawl(spidercls)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.items()):
                if not methods and not opts.verbose:
                    continue
                print(spider)
                for method in sorted(methods):
                    print('  * %s' % method)
        else:
            start = time.time()
            self.crawler_process.start()
            stop = time.time()

            result.printErrors()
            result.printSummary(start, stop)
            self.exitcode = int(not result.wasSuccessful())
Beispiel #16
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings._getcomposite("ITEM_PIPELINES"))
Beispiel #17
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings.getwithbase('EXTENSIONS'))
Beispiel #18
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings.getwithbase('ITEM_PIPELINES'))
Beispiel #19
0
 def test_backwards_compatible_build_dict(self):
     base = {'one': 1, 'two': 2, 'three': 3, 'five': 5, 'six': None}
     custom = {'two': None, 'three': 8, 'four': 4}
     self.assertEqual(build_component_list(base, custom,
                                           convert=lambda x: x),
                      ['one', 'four', 'five', 'three'])
Beispiel #20
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings['SCHEDULER_MIDDLEWARES_BASE'], \
         settings['SCHEDULER_MIDDLEWARES'])
Beispiel #21
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings.getwithbase("EXTENSIONS"))
Beispiel #22
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings['EXTENSIONS_BASE'],
                                 settings['EXTENSIONS'])
Beispiel #23
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(
         settings.getwithbase('SPIDER_MIDDLEWARES')
     )  # 'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware': 50,
Beispiel #24
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings._getcomposite('EXTENSIONS'))
Beispiel #25
0
 def test_map_dict(self):
     custom = {'one': 1, 'two': 2, 'three': 3}
     self.assertEqual(build_component_list({}, custom,
                                           convert=lambda x: x.upper()),
                      ['ONE', 'TWO', 'THREE'])
Beispiel #26
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings.getwithbase('ITEM_PIPELINES'))
Beispiel #27
0
 def _get_mwlist_from_settings(cls, settings):
     ## 从配置 SPIDER_MIDDLEWARES_BASE 和 SPIDER_MIDDLEWARES 中获取爬虫中间件类列表
     return build_component_list(settings.getwithbase('SPIDER_MIDDLEWARES'))
Beispiel #28
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings['SPIDER_MIDDLEWARES_BASE'],
                                 settings['SPIDER_MIDDLEWARES'])
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings.getwithbase('SPIDER_MIDDLEWARES'))
Beispiel #30
0
 def test_build_dict(self):
     d = {'one': 1, 'two': None, 'three': 8, 'four': 4}
     self.assertEqual(build_component_list(d, convert=lambda x: x),
                      ['one', 'four', 'three'])
Beispiel #31
0
 def test_backward_compatible_build_dict(self):
     base = {'one': 1, 'two': 2, 'three': 3, 'five': 5, 'six': None}
     custom = {'two': None, 'three': 8, 'four': 4}
     self.assertEqual(
         build_component_list(base, custom, convert=lambda x: x),
         ['one', 'four', 'five', 'three'])
Beispiel #32
0
 def test_return_list(self):
     custom = ['a', 'b', 'c']
     self.assertEqual(build_component_list(None, custom,
                                           convert=lambda x: x),
                      custom)
Beispiel #33
0
 def test_return_list(self):
     custom = ['a', 'b', 'c']
     self.assertEqual(
         build_component_list(None, custom, convert=lambda x: x), custom)
Beispiel #34
0
 def test_map_list(self):
     custom = ['a', 'b', 'c']
     self.assertEqual(build_component_list(None, custom,
                                           lambda x: x.upper()),
                      ['A', 'B', 'C'])
Beispiel #35
0
 def test_map_dict(self):
     custom = {'one': 1, 'two': 2, 'three': 3}
     self.assertEqual(
         build_component_list({}, custom, convert=lambda x: x.upper()),
         ['ONE', 'TWO', 'THREE'])
Beispiel #36
0
 def test_build_dict(self):
     d = {'one': 1, 'two': None, 'three': 8, 'four': 4}
     self.assertEqual(build_component_list(d, convert=lambda x: x),
                      ['one', 'four', 'three'])
Beispiel #37
0
 def test_map_list(self):
     custom = ['a', 'b', 'c']
     self.assertEqual(
         build_component_list(None, custom, lambda x: x.upper()),
         ['A', 'B', 'C'])
Beispiel #38
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings._getcomposite("DOWNLOADER_MIDDLEWARES"))
Beispiel #39
0
 def test_duplicate_components_in_list(self):
     duplicate_list = ['a', 'b', 'a']
     with self.assertRaises(ValueError) as cm:
         build_component_list(None, duplicate_list, convert=lambda x: x)
     self.assertIn(str(duplicate_list), str(cm.exception))
Beispiel #40
0
 def _get_mwlist_from_settings(cls, settings):
     return build_component_list(settings.getwithbase('SPIDER_MIDDLEWARES'))
Beispiel #41
0
 def _get_mwlist_from_settings(cls, settings):
     # 从配置文件加载ITEM_PIPELINES_BASE和ITEM_PIPELINES类, 默认为空
     return build_component_list(settings.getwithbase('ITEM_PIPELINES'))