def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) self.conman = ContractsManager([load_object(c) for c in contracts]) self.results = TextTestRunner(verbosity=opts.verbose)._makeResult() # contract requests contract_reqs = defaultdict(list) spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS']) spiders = spman_cls.from_settings(self.settings) for spider in args or spiders.list(): spider = spiders.create(spider) requests = self.get_requests(spider) if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) elif requests: crawler = self.crawler_process.create_crawler(spider.name) crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.iteritems()): print spider for method in sorted(methods): print ' * %s' % method else: self.crawler_process.start() self.results.printErrors()
def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) self.conman = ContractsManager([load_object(c) for c in contracts]) # contract requests contract_reqs = defaultdict(list) self.crawler.engine.has_capacity = lambda: True for spider in args or self.crawler.spiders.list(): spider = self.crawler.spiders.create(spider) requests = self.get_requests(spider) if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) else: self.crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.iteritems()): print spider for method in sorted(methods): print ' * %s' % method else: self.crawler.start()
def test_scrapes(self): conman = ContractsManager(self.contracts) spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = conman.from_method(spider.scrapes_item_ok, fail=True) output = request.callback(response) self.assertEqual(map(type, output), [TestItem]) # scrapes_item_fail request = conman.from_method(spider.scrapes_item_fail, fail=True) self.assertRaises(ContractFail, request.callback, response)
def run(self, args, opts): # load contracts contracts = build_component_list(self.settings["SPIDER_CONTRACTS_BASE"], self.settings["SPIDER_CONTRACTS"]) self.conman = ContractsManager([load_object(c) for c in contracts]) self.results = TextTestRunner(verbosity=opts.verbose)._makeResult() # contract requests contract_reqs = defaultdict(list) spman_cls = load_object(self.settings["SPIDER_MANAGER_CLASS"]) spiders = spman_cls.from_settings(self.settings) for spider in args or spiders.list(): spider = spiders.create(spider) requests = self.get_requests(spider) if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) elif requests: crawler = self.crawler_process.create_crawler(spider.name) crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.iteritems()): print spider for method in sorted(methods): print " * %s" % method else: self.crawler_process.start() self.results.printErrors()
def test_contracts(self): conman = ContractsManager(self.contracts) # extract contracts correctly contracts = conman.extract_contracts(TestSpider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(map(type, contracts)), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = conman.from_method(TestSpider.returns_request) self.assertNotEqual(request, None) # no request for missing url request = conman.from_method(TestSpider.parse_no_url) self.assertEqual(request, None)
def run(self, args, opts): # load contracts # 获取系统基础的contracts类路径 contracts = build_component_list( self.settings.getwithbase('SPIDER_CONTRACTS')) # 使用ContractManager进行contract加载 conman = ContractsManager(load_object(c) for c in contracts) # 实例化一个TextTestRunner对象 runner = TextTestRunner(verbosity=2 if opts.verbose else 1) # 返回处理后的TextTestResult对象 result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity) # contract requests # contract 请求 contract_reqs = defaultdict(list) # 实例化爬虫加载器 spider_loader = self.crawler_process.spider_loader for spidername in args or spider_loader.list(): spidercls = spider_loader.load(spidername) spidercls.start_requests = lambda s: conman.from_spider(s, result) # 获取测试方法 tested_methods = conman.tested_methods_from_spidercls(spidercls) if opts.list: for method in tested_methods: contract_reqs[spidercls.name].append(method) elif tested_methods: #如果有测试方法,则进行crawl方法 self.crawler_process.crawl(spidercls) # start checks # 开始检验 if opts.list: for spider, methods in sorted(contract_reqs.items()): if not methods and not opts.verbose: continue print(spider) for method in sorted(methods): print(' * %s' % method) else: start = time.time() self.crawler_process.start() stop = time.time() result.printErrors() result.printSummary(start, stop) self.exitcode = int(not result.wasSuccessful())
class Command(ScrapyCommand): requires_project = True default_settings = {'LOG_ENABLED': False} def syntax(self): return "[options] <spider>" def short_desc(self): return "Check spider contracts" def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) self.conman = ContractsManager([load_object(c) for c in contracts]) # contract requests contract_reqs = defaultdict(list) self.crawler.engine.has_capacity = lambda: True for spider in args or self.crawler.spiders.list(): spider = self.crawler.spiders.create(spider) requests = self.get_requests(spider) if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) else: self.crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.iteritems()): print spider for method in sorted(methods): print ' * %s' % method else: self.crawler.start() def get_requests(self, spider): requests = [] for key, value in vars(type(spider)).items(): if callable(value) and value.__doc__: bound_method = value.__get__(spider, type(spider)) request = self.conman.from_method(bound_method) if request: request.callback = _generate(request.callback) requests.append(request) return requests
def run(self, args, opts): # load contracts contracts = build_component_list( self.settings.getwithbase('SPIDER_CONTRACTS')) conman = ContractsManager(load_object(c) for c in contracts) runner = TextTestRunner(verbosity=2 if opts.verbose else 1) result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity) # contract requests contract_reqs = defaultdict(list) spider_loader = self.crawler_process.spider_loader with set_environ(SCRAPY_CHECK='true'): for spidername in args or spider_loader.list(): spidercls = spider_loader.load(spidername) spidercls.start_requests = lambda s: conman.from_spider( s, result) tested_methods = conman.tested_methods_from_spidercls( spidercls) if opts.list: for method in tested_methods: contract_reqs[spidercls.name].append(method) elif tested_methods: self.crawler_process.crawl(spidercls) # start checks if opts.list: for spider, methods in sorted(contract_reqs.items()): if not methods and not opts.verbose: continue print(spider) for method in sorted(methods): print(' * %s' % method) else: start = time.time() self.crawler_process.start() stop = time.time() result.printErrors() result.printSummary(start, stop) self.exitcode = int(not result.wasSuccessful())
def test_returns(self): conman = ContractsManager(self.contracts) spider = TestSpider() response = ResponseMock() # returns_item request = conman.from_method(spider.returns_item, fail=True) output = request.callback(response) self.assertEqual(map(type, output), [TestItem]) # returns_request request = conman.from_method(spider.returns_request, fail=True) output = request.callback(response) self.assertEqual(map(type, output), [Request]) # returns_fail request = conman.from_method(spider.returns_fail, fail=True) self.assertRaises(ContractFail, request.callback, response)
def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) conman = ContractsManager([load_object(c) for c in contracts]) runner = TextTestRunner(verbosity=2 if opts.verbose else 1) result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity) # contract requests contract_reqs = defaultdict(list) spiders = self.crawler_process.spiders for spidername in args or spiders.list(): spidercls = spiders.load(spidername) spidercls.start_requests = lambda s: conman.from_spider(s, result) tested_methods = conman.tested_methods_from_spidercls(spidercls) if opts.list: for method in tested_methods: contract_reqs[spidercls.name].append(method) elif tested_methods: self.crawler_process.crawl(spidercls) # start checks if opts.list: for spider, methods in sorted(contract_reqs.items()): if not methods and not opts.verbose: continue print(spider) for method in sorted(methods): print(' * %s' % method) else: start = time.time() self.crawler_process.start() stop = time.time() result.printErrors() result.printSummary(start, stop) self.exitcode = int(not result.wasSuccessful())
def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) conman = ContractsManager([load_object(c) for c in contracts]) runner = TextTestRunner(verbosity=2 if opts.verbose else 1) result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity) # contract requests contract_reqs = defaultdict(list) spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS']) spiders = spman_cls.from_settings(self.settings) for spider in args or spiders.list(): spider = spiders.create(spider) requests = self.get_requests(spider, conman, result) contract_reqs[spider.name] = [] if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) elif requests: crawler = self.crawler_process.create_crawler(spider.name) crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.items()): if not methods and not opts.verbose: continue print(spider) for method in sorted(methods): print(' * %s' % method) else: start = time.time() self.crawler_process.start() stop = time.time() result.printErrors() result.printSummary(start, stop) self.exitcode = int(not result.wasSuccessful())
class ContractsManagerTest(unittest.TestCase): contracts = [ UrlContract, CallbackKeywordArgumentsContract, ReturnsContract, ScrapesContract, CustomFormContract, CustomSuccessContract, CustomFailContract, ] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def should_error(self): self.assertTrue(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual( frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_cb_kwargs(self): spider = TestSpider() response = ResponseMock() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request_cb_kwargs) self.assertEqual(len(contracts), 3) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract])) contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs) self.assertEqual(len(contracts), 3) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract])) contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_unexpected_keyword) self.assertEqual(len(contracts), 3) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract])) contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_missing_argument) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns_request request = self.conman.from_method(spider.returns_request_cb_kwargs, self.results) request.callback(response, **request.cb_kwargs) self.should_succeed() # returns_item request = self.conman.from_method(spider.returns_item_cb_kwargs, self.results) request.callback(response, **request.cb_kwargs) self.should_succeed() # returns_item (error, callback doesn't take keyword arguments) request = self.conman.from_method(spider.returns_item_cb_kwargs_error_unexpected_keyword, self.results) request.callback(response, **request.cb_kwargs) self.should_error() # returns_item (error, contract doesn't provide keyword arguments) request = self.conman.from_method(spider.returns_item_cb_kwargs_error_missing_argument, self.results) request.callback(response, **request.cb_kwargs) self.should_error() def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) request.callback(response) self.should_succeed() # returns_dict_item request = self.conman.from_method(spider.returns_dict_item, self.results) request.callback(response) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) request.callback(response) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() # returns_dict_fail request = self.conman.from_method(spider.returns_dict_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_dict_item_ok request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_dict_item_fail request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_multiple_missing_fields request = self.conman.from_method(spider.scrapes_multiple_missing_fields, self.results) request.callback(response) self.should_fail() message = 'ContractFail: Missing fields: name, url' assert message in self.results.failures[-1][-1] def test_custom_contracts(self): self.conman.from_spider(CustomContractSuccessSpider(), self.results) self.should_succeed() self.conman.from_spider(CustomContractFailSpider(), self.results) self.should_error() def test_errback(self): spider = TestSpider() response = ResponseMock() try: raise HttpError(response, 'Ignoring non-200 response') except HttpError: failure_mock = failure.Failure() request = self.conman.from_method(spider.returns_request, self.results) request.errback(failure_mock) self.assertFalse(self.results.failures) self.assertTrue(self.results.errors) @defer.inlineCallbacks def test_same_url(self): class TestSameUrlSpider(Spider): name = 'test_same_url' def __init__(self, *args, **kwargs): super(TestSameUrlSpider, self).__init__(*args, **kwargs) self.visited = 0 def start_requests(s): return self.conman.from_spider(s, self.results) def parse_first(self, response): self.visited += 1 return TestItem() def parse_second(self, response): self.visited += 1 return TestItem() with MockServer() as mockserver: contract_doc = '@url {}'.format(mockserver.url('/status?n=200')) TestSameUrlSpider.parse_first.__doc__ = contract_doc TestSameUrlSpider.parse_second.__doc__ = contract_doc crawler = CrawlerRunner().create_crawler(TestSameUrlSpider) yield crawler.crawl() self.assertEqual(crawler.spider.visited, 2) def test_form_contract(self): spider = TestSpider() request = self.conman.from_method(spider.custom_form, self.results) self.assertEqual(request.method, 'POST') self.assertIsInstance(request, FormRequest) def test_inherited_contracts(self): spider = InheritsTestSpider() requests = self.conman.from_spider(spider, self.results) self.assertTrue(requests)
def setUp(self): self.spider = TestBeibeiSpider() self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0)
class CustomContractsTest(unittest.TestCase): contracts = [ UrlContract, MetaContract, ReturnsContract, SpecificReturnsContract, ScrapesContract ] def setUp(self): self.spider = TestBeibeiSpider() self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def test_contracts(self): # test if MetaContract and SpecificReturnsContract extracted properly contracts = self.conman.extract_contracts(self.spider.set_meta) self.assertEqual(len(contracts), 4) self.assertEqual( frozenset(type(x) for x in contracts), frozenset([UrlContract, MetaContract, SpecificReturnsContract])) # test if the meta is set properly request = self.conman.from_method(self.spider.set_meta, self.results) self.assertEqual(request.meta, { "metakey1": "metaval1", "metakey2": "metaval2" }) def test_returns_details(self): response = ResponseMock() # returns_item request = self.conman.from_method(self.spider.returns_detail_ok, self.results) request.callback(response) self.should_succeed() request = self.conman.from_method(self.spider.returns_detail_fail, self.results) request.callback(response) self.should_fail() def test_returns_category_pages(self): response = ResponseMock() request = self.conman.from_method( self.spider.returns_detail_category_ok, self.results) request.callback(response) self.should_succeed() request = self.conman.from_method( self.spider.returns_detail_category_fail, self.results) request.callback(response) self.should_fail()
def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestRunner()._makeResult() self.results.stream = None
class ContractsManagerTest(unittest.TestCase): contracts = [UrlContract, ReturnsContract, ScrapesContract] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestRunner()._makeResult() self.results.stream = None def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) output = request.callback(response) self.assertEqual([type(x) for x in output], [TestItem]) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) output = request.callback(response) self.assertEqual([type(x) for x in output], [Request]) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) output = request.callback(response) self.assertEqual([type(x) for x in output], [TestItem]) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail()
class Command(ScrapyCommand): requires_project = True default_settings = {'LOG_ENABLED': False} def syntax(self): return "[options] <spider>" def short_desc(self): return "Check spider contracts" def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") parser.add_option("-v", "--verbose", dest="verbose", default=1, action="count", help="print all contract hooks") def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) self.conman = ContractsManager([load_object(c) for c in contracts]) self.results = TextTestRunner(verbosity=opts.verbose)._makeResult() # contract requests contract_reqs = defaultdict(list) spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS']) spiders = spman_cls.from_settings(self.settings) for spider in args or spiders.list(): spider = spiders.create(spider) requests = self.get_requests(spider) if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) elif requests: crawler = self.crawler_process.create_crawler(spider.name) crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.iteritems()): print spider for method in sorted(methods): print ' * %s' % method else: self.crawler_process.start() self.results.printErrors() def get_requests(self, spider): requests = [] for key, value in vars(type(spider)).items(): if callable(value) and value.__doc__: bound_method = value.__get__(spider, type(spider)) request = self.conman.from_method(bound_method, self.results) if request: request.callback = _generate(request.callback) requests.append(request) return requests
def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0)
class ContractsManagerTest(unittest.TestCase): contracts = [UrlContract, ReturnsContract, ScrapesContract] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) request.callback(response) self.should_succeed() # returns_dict_item request = self.conman.from_method(spider.returns_dict_item, self.results) request.callback(response) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) request.callback(response) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() # returns_dict_fail request = self.conman.from_method(spider.returns_dict_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_dict_item_ok request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_dict_item_fail request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results) request.callback(response) self.should_fail() def test_errback(self): spider = TestSpider() response = ResponseMock() try: raise HttpError(response, 'Ignoring non-200 response') except HttpError: failure_mock = failure.Failure() request = self.conman.from_method(spider.returns_request, self.results) request.errback(failure_mock) self.assertFalse(self.results.failures) self.assertTrue(self.results.errors) def test_inherited_contracts(self): spider = InheritsTestSpider() requests = self.conman.from_spider(spider, self.results) self.assertTrue(requests)
class Command(ScrapyCommand): requires_project = True default_settings = {'LOG_ENABLED': False} def syntax(self): return "[options] <spider>" def short_desc(self): return "Check contracts for given spider" def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option("-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them") def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) self.conman = ContractsManager([load_object(c) for c in contracts]) # contract requests contract_reqs = defaultdict(list) self.crawler.engine.has_capacity = lambda: True for spider in args or self.crawler.spiders.list(): spider = self.crawler.spiders.create(spider) requests = self.get_requests(spider) if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) else: self.crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.iteritems()): print spider for method in sorted(methods): print ' * %s' % method else: self.crawler.start() def get_requests(self, spider): requests = [] for key, value in vars(type(spider)).items(): if callable(value) and value.__doc__: bound_method = value.__get__(spider, type(spider)) request = self.conman.from_method(bound_method) if request: request.callback = _generate(request.callback) requests.append(request) return requests
class ContractsManagerTest(unittest.TestCase): contracts = [UrlContract, ReturnsContract, ScrapesContract] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) request.callback(response) self.should_succeed() # returns_dict_item request = self.conman.from_method(spider.returns_dict_item, self.results) request.callback(response) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) request.callback(response) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() # returns_dict_fail request = self.conman.from_method(spider.returns_dict_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_dict_item_ok request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_dict_item_fail request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results) request.callback(response) self.should_fail() def test_errback(self): spider = TestSpider() response = ResponseMock() try: raise HttpError(response, 'Ignoring non-200 response') except HttpError: failure_mock = failure.Failure() request = self.conman.from_method(spider.returns_request, self.results) request.errback(failure_mock) self.assertFalse(self.results.failures) self.assertTrue(self.results.errors)
class Command(ScrapyCommand): requires_project = True default_settings = {"LOG_ENABLED": False} def syntax(self): return "[options] <spider>" def short_desc(self): return "Check spider contracts" def add_options(self, parser): ScrapyCommand.add_options(self, parser) parser.add_option( "-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them" ) parser.add_option("-v", "--verbose", dest="verbose", default=1, action="count", help="print all contract hooks") def run(self, args, opts): # load contracts contracts = build_component_list(self.settings["SPIDER_CONTRACTS_BASE"], self.settings["SPIDER_CONTRACTS"]) self.conman = ContractsManager([load_object(c) for c in contracts]) self.results = TextTestRunner(verbosity=opts.verbose)._makeResult() # contract requests contract_reqs = defaultdict(list) spman_cls = load_object(self.settings["SPIDER_MANAGER_CLASS"]) spiders = spman_cls.from_settings(self.settings) for spider in args or spiders.list(): spider = spiders.create(spider) requests = self.get_requests(spider) if opts.list: for req in requests: contract_reqs[spider.name].append(req.callback.__name__) elif requests: crawler = self.crawler_process.create_crawler(spider.name) crawler.crawl(spider, requests) # start checks if opts.list: for spider, methods in sorted(contract_reqs.iteritems()): print spider for method in sorted(methods): print " * %s" % method else: self.crawler_process.start() self.results.printErrors() def get_requests(self, spider): requests = [] for key, value in vars(type(spider)).items(): if callable(value) and value.__doc__: bound_method = value.__get__(spider, type(spider)) request = self.conman.from_method(bound_method, self.results) if request: request.callback = _generate(request.callback) requests.append(request) return requests
class ContractsManagerTest(unittest.TestCase): contracts = [ UrlContract, ReturnsContract, ScrapesContract, CustomFormContract, CustomSuccessContract, CustomFailContract, ] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def should_error(self): self.assertTrue(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) request.callback(response) self.should_succeed() # returns_dict_item request = self.conman.from_method(spider.returns_dict_item, self.results) request.callback(response) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) request.callback(response) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() # returns_dict_fail request = self.conman.from_method(spider.returns_dict_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_dict_item_ok request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_dict_item_fail request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results) request.callback(response) self.should_fail() def test_custom_contracts(self): self.conman.from_spider(CustomContractSuccessSpider(), self.results) self.should_succeed() self.conman.from_spider(CustomContractFailSpider(), self.results) self.should_error() def test_errback(self): spider = TestSpider() response = ResponseMock() try: raise HttpError(response, 'Ignoring non-200 response') except HttpError: failure_mock = failure.Failure() request = self.conman.from_method(spider.returns_request, self.results) request.errback(failure_mock) self.assertFalse(self.results.failures) self.assertTrue(self.results.errors) @defer.inlineCallbacks def test_same_url(self): class TestSameUrlSpider(Spider): name = 'test_same_url' def __init__(self, *args, **kwargs): super(TestSameUrlSpider, self).__init__(*args, **kwargs) self.visited = 0 def start_requests(s): return self.conman.from_spider(s, self.results) def parse_first(self, response): self.visited += 1 return TestItem() def parse_second(self, response): self.visited += 1 return TestItem() with MockServer() as mockserver: contract_doc = '@url {}'.format(mockserver.url('/status?n=200')) get_unbound_function(TestSameUrlSpider.parse_first).__doc__ = contract_doc get_unbound_function(TestSameUrlSpider.parse_second).__doc__ = contract_doc crawler = CrawlerRunner().create_crawler(TestSameUrlSpider) yield crawler.crawl() self.assertEqual(crawler.spider.visited, 2) def test_form_contract(self): spider = TestSpider() request = self.conman.from_method(spider.custom_form, self.results) self.assertEqual(request.method, 'POST') self.assertIsInstance(request, FormRequest) def test_inherited_contracts(self): spider = InheritsTestSpider() requests = self.conman.from_spider(spider, self.results) self.assertTrue(requests)