def test_add_field_coverage_when_spider_closes_if_have_field_coverage_settings( ): settings = { "SPIDERMON_ENABLED": True, "EXTENSIONS": { "spidermon.contrib.scrapy.extensions.Spidermon": 100 }, "SPIDERMON_ADD_FIELD_COVERAGE": True, } crawler = get_crawler(settings_dict=settings) spider = Spider.from_crawler(crawler, "example.com") item = {"field1": "value1"} spider.crawler.signals.send_catch_log_deferred( signal=signals.item_scraped, item=item, response="", spider=spider, ) # Return item to have some stats to calculate coverage crawler.signals.send_catch_log(signal=signals.spider_closed, spider=spider, reason=None) stats = spider.crawler.stats.get_stats() assert stats.get("spidermon_field_coverage/dict/field1") == 1.0
def test_item_scraped_count_do_not_ignore_none_values(): settings = { "SPIDERMON_ENABLED": True, "EXTENSIONS": {"spidermon.contrib.scrapy.extensions.Spidermon": 100}, "SPIDERMON_ADD_FIELD_COVERAGE": True, "SPIDERMON_FIELD_COVERAGE_SKIP_NONE": False, } crawler = get_crawler(settings_dict=settings) spider = Spider.from_crawler(crawler, "example.com") returned_items = [ {"field1": "value1", "field2": "value2"}, {"field1": "value1", "field2": None}, ] for item in returned_items: spider.crawler.signals.send_catch_log_deferred( signal=signals.item_scraped, item=item, response="", spider=spider, ) stats = spider.crawler.stats.get_stats() assert stats.get("spidermon_item_scraped_count/dict/field1") == 2 assert stats.get("spidermon_item_scraped_count/dict/field2") == 2
def test_spider_opened_connect_signal(mocker, spidermon_enabled_settings): spider_opened_method = mocker.patch.object(Spidermon, "spider_opened") crawler = get_crawler(settings_dict=spidermon_enabled_settings) spider = Spider.from_crawler(crawler, "example.com") crawler.signals.send_catch_log(signal=signals.spider_opened, spider=spider) assert spider_opened_method.called, "spider_opened not called"
def test_item_scraped_do_not_connect_signal_if_do_not_have_field_coverage_settings( mocker, spidermon_enabled_settings): item_scraped_method = mocker.patch.object(Spidermon, "item_scraped") crawler = get_crawler(settings_dict=spidermon_enabled_settings) spider = Spider.from_crawler(crawler, "example.com") crawler.signals.send_catch_log(signal=signals.item_scraped, spider=spider) assert not item_scraped_method.called, "item_scraped_method called"
def test_engine_stopped_connect_signal(mocker, spidermon_enabled_settings): engine_stopped = mocker.patch.object(Spidermon, "engine_stopped") crawler = get_crawler(settings_dict=spidermon_enabled_settings) spider = Spider.from_crawler(crawler, "example.com") crawler.signals.send_catch_log(signal=signals.engine_stopped, spider=spider, reason=None) assert engine_stopped.called, "engine_stopped not called"
def test_item_scraped_connect_signal_if_field_coverage_settings_enabled( mocker, spidermon_enabled_settings): item_scraped_method = mocker.patch.object(Spidermon, "item_scraped") spidermon_enabled_settings["SPIDERMON_ADD_FIELD_COVERAGE"] = True crawler = get_crawler(settings_dict=spidermon_enabled_settings) spider = Spider.from_crawler(crawler, "example.com") crawler.signals.send_catch_log(signal=signals.item_scraped, spider=spider) assert item_scraped_method.called, "item_scraped_method not called"
def spider(): settings = { "SPIDERMON_ENABLED": True, "EXTENSIONS": {"spidermon.contrib.scrapy.extensions.Spidermon": 100}, "SPIDERMON_ADD_FIELD_COVERAGE": True, } crawler = get_crawler(settings_dict=settings) spider = Spider.from_crawler(crawler, "example.com") return spider
def setUp(self): self.req = Request('http://scrapytest.org') self.res_succeed, self.res_block = _responses(self.req) crawler = get_crawler( Spider, settings_dict={ 'HTTPPROXY_PROXY_SM_BLOCK_INSPECTOR': 'tests.test_spidermiddleware_block_inspector.inspect_block', 'HTTPPROXY_SM_RECYCLE_REQUEST': 'scrapy_proxy_management.utils.recycle_request' }) self.spider = Spider.from_crawler(crawler, name='foo') self.mw = BlockInspectorMiddleware.from_crawler(crawler)
def setUp(self): crawler = get_crawler(Spider) self.spider = Spider.from_crawler(crawler, name='foo') self.mw = HttpErrorMiddleware(Settings({})) self.req = Request('http://scrapytest.org') self.res200, self.res404 = _responses(self.req, [200, 404])