def prepare_callback_replay(fixture_path, encoding="utf-8"): with open(str(fixture_path), 'rb') as f: raw_data = f.read() fixture_info = unpickle_data(decompress_data(raw_data), encoding) if 'fixture_version' in fixture_info: encoding = fixture_info['encoding'] data = unpickle_data(fixture_info['data'], encoding) else: data = fixture_info # legacy tests settings = get_project_settings() spider_name = data.get('spider_name') if not spider_name: # legacy tests spider_name = os.path.basename( os.path.dirname(os.path.dirname(fixture_path))) spider_cls = get_spider_class(spider_name, settings) spider_cls.update_settings(settings) for k, v in data.get('settings', {}).items(): settings.set(k, v, 50) crawler = Crawler(spider_cls, settings) spider_args_in = data.get('spider_args', data.get('spider_args_in', {})) spider = spider_cls.from_crawler(crawler, **spider_args_in) crawler.spider = spider return data, crawler, spider, settings
def test_is_response_going_to_be_used(): crawler = Crawler(MySpider) spider = MySpider() crawler.spider = spider def response(request): return HtmlResponse(request.url, request=request, body=b"<html></html>") # Spider settings are updated when it's initialized from a Crawler. # Since we're manually initializing it, let's just copy custom settings # and use them as our settings object. spider.settings = Settings(spider.custom_settings) injector = Injector(crawler) @inlineCallbacks def check_response_required(expected, callback): request = scrapy.Request("http://example.com", callback=callback) assert injector.is_scrapy_response_required(request) is expected yield injector.build_callback_dependencies(request, response(request)) yield from check_response_required(True, None) yield from check_response_required(True, spider.parse2) yield from check_response_required(False, spider.parse3) yield from check_response_required(False, spider.parse4) yield from check_response_required(True, spider.parse5) yield from check_response_required(True, spider.parse6) yield from check_response_required(True, spider.parse7) yield from check_response_required(False, spider.parse8) yield from check_response_required(True, spider.parse9) yield from check_response_required(False, spider.parse10) yield from check_response_required(True, spider.parse11) yield from check_response_required(True, spider.parse12)
def _crawler(extended_settings={}): settings = { "EXTENSIONS": {"spider_feeder.loaders.StartUrlsLoader": 500}, } settings.update(extended_settings) crawler = Crawler(Spider, settings=settings) crawler.spider = Spider("dummy") return crawler
def _crawler(extended_settings={}): settings = { "SPIDERMON_ENABLED": True, "EXTENSIONS": { "spidermon.contrib.scrapy.extensions.Spidermon": 500 }, } settings.update(extended_settings) crawler = Crawler(Spider, settings=settings) crawler.spider = Spider("dummy") return crawler
def get_injector_for_testing( providers: Mapping, additional_settings: Dict = None, overrides_registry: Optional[OverridesRegistryBase] = None ) -> Injector: """ Return an :class:`Injector` using a fake crawler. Useful for testing providers """ class MySpider(Spider): name = "my_spider" settings = Settings({ **(additional_settings or {}), "SCRAPY_POET_PROVIDERS": providers }) crawler = Crawler(MySpider) crawler.settings = settings spider = MySpider() spider.settings = settings crawler.spider = spider return Injector(crawler, overrides_registry=overrides_registry)
def test_stop_spider_on_account_disabled(mocker: MockerFixture): class Engine: close_spider = mocker.Mock() class _EmptySpider(Spider): name = "empty_spider" crawler = Crawler(_EmptySpider) crawler.engine = Engine() spider = _EmptySpider.from_crawler(crawler) crawler.spider = spider _stop_if_account_disabled(Exception(), crawler) spider.crawler.engine.close_spider.assert_not_called() re = request_error({"type": "whatever"}) _stop_if_account_disabled(re, crawler) spider.crawler.engine.close_spider.assert_not_called() re = request_error({"type": ACCOUNT_DISABLED_ERROR_TYPE}) _stop_if_account_disabled(re, crawler) spider.crawler.engine.close_spider.assert_called_with( spider, "account_disabled")
def generate_test(fixture_path, encoding='utf-8'): with open(str(fixture_path), 'rb') as f: raw_data = f.read() fixture_info = unpickle_data(decompress_data(raw_data), encoding) if 'fixture_version' in fixture_info: encoding = fixture_info['encoding'] data = unpickle_data(fixture_info['data'], encoding) else: data = fixture_info # legacy tests spider_name = data.get('spider_name') if not spider_name: # legacy tests spider_name = fixture_path.parent.parent.name settings = get_project_settings() spider_cls = get_spider_class(spider_name, settings) spider_cls.update_settings(settings) for k, v in data.get('settings', {}).items(): settings.set(k, v, 50) crawler = Crawler(spider_cls, settings) spider = spider_cls.from_crawler(crawler, **data.get('spider_args')) crawler.spider = spider def test(self): fx_result = data['result'] fx_version = data.get('python_version') request = request_from_dict(data['request'], spider) response = HtmlResponse(request=request, **data['response']) middlewares = [] middleware_paths = data['middlewares'] for mw_path in middleware_paths: try: mw_cls = load_object(mw_path) mw = create_instance(mw_cls, settings, crawler) middlewares.append(mw) except NotConfigured: continue crawler.signals.send_catch_log( signal=signals.spider_opened, spider=spider ) for mw in middlewares: if hasattr(mw, 'process_spider_input'): mw.process_spider_input(response, spider) result = arg_to_iter(request.callback(response)) middlewares.reverse() for mw in middlewares: if hasattr(mw, 'process_spider_output'): result = mw.process_spider_output(response, result, spider) for index, (cb_obj, fx_item) in enumerate(six.moves.zip_longest( result, fx_result, fillvalue=NO_ITEM_MARKER )): if any(item == NO_ITEM_MARKER for item in (cb_obj, fx_item)): raise AssertionError( "The fixture's data length doesn't match with " "the current callback's output length." ) cb_obj = parse_object(cb_obj, spider) fx_obj = fx_item['data'] if fx_item['type'] == 'request': clean_request(fx_obj, settings) clean_request(cb_obj, settings) else: clean_item(fx_obj, settings) clean_item(cb_obj, settings) if fx_version == 2 and six.PY3: fx_obj = binary_check(fx_obj, cb_obj, encoding) try: datadiff.tools.assert_equal(fx_obj, cb_obj) except AssertionError as e: six.raise_from( AssertionError( "Callback output #{} doesn't match recorded " "output:{}".format(index, e)), None) return test
def _crawler(settings={}): crawler = Crawler(Spider, settings=settings) crawler.spider = Spider("dummy") return crawler
def _get_spider(self, spider_class): crawler = Crawler(spider_class) crawler.spider = crawler._create_spider() return crawler.spider