def test_add_field_coverage_when_spider_closes_if_have_field_coverage_settings(
):
    settings = {
        "SPIDERMON_ENABLED": True,
        "EXTENSIONS": {
            "spidermon.contrib.scrapy.extensions.Spidermon": 100
        },
        "SPIDERMON_ADD_FIELD_COVERAGE": True,
    }
    crawler = get_crawler(settings_dict=settings)
    spider = Spider.from_crawler(crawler, "example.com")

    item = {"field1": "value1"}
    spider.crawler.signals.send_catch_log_deferred(
        signal=signals.item_scraped,
        item=item,
        response="",
        spider=spider,
    )  # Return item to have some stats to calculate coverage

    crawler.signals.send_catch_log(signal=signals.spider_closed,
                                   spider=spider,
                                   reason=None)

    stats = spider.crawler.stats.get_stats()

    assert stats.get("spidermon_field_coverage/dict/field1") == 1.0
def test_item_scraped_count_do_not_ignore_none_values():
    settings = {
        "SPIDERMON_ENABLED": True,
        "EXTENSIONS": {"spidermon.contrib.scrapy.extensions.Spidermon": 100},
        "SPIDERMON_ADD_FIELD_COVERAGE": True,
        "SPIDERMON_FIELD_COVERAGE_SKIP_NONE": False,
    }
    crawler = get_crawler(settings_dict=settings)
    spider = Spider.from_crawler(crawler, "example.com")

    returned_items = [
        {"field1": "value1", "field2": "value2"},
        {"field1": "value1", "field2": None},
    ]

    for item in returned_items:
        spider.crawler.signals.send_catch_log_deferred(
            signal=signals.item_scraped,
            item=item,
            response="",
            spider=spider,
        )

    stats = spider.crawler.stats.get_stats()

    assert stats.get("spidermon_item_scraped_count/dict/field1") == 2
    assert stats.get("spidermon_item_scraped_count/dict/field2") == 2
Beispiel #3
0
def test_spider_opened_connect_signal(mocker, spidermon_enabled_settings):
    spider_opened_method = mocker.patch.object(Spidermon, "spider_opened")

    crawler = get_crawler(settings_dict=spidermon_enabled_settings)
    spider = Spider.from_crawler(crawler, "example.com")
    crawler.signals.send_catch_log(signal=signals.spider_opened, spider=spider)

    assert spider_opened_method.called, "spider_opened not called"
Beispiel #4
0
def test_item_scraped_do_not_connect_signal_if_do_not_have_field_coverage_settings(
        mocker, spidermon_enabled_settings):
    item_scraped_method = mocker.patch.object(Spidermon, "item_scraped")

    crawler = get_crawler(settings_dict=spidermon_enabled_settings)

    spider = Spider.from_crawler(crawler, "example.com")
    crawler.signals.send_catch_log(signal=signals.item_scraped, spider=spider)

    assert not item_scraped_method.called, "item_scraped_method called"
Beispiel #5
0
def test_engine_stopped_connect_signal(mocker, spidermon_enabled_settings):
    engine_stopped = mocker.patch.object(Spidermon, "engine_stopped")

    crawler = get_crawler(settings_dict=spidermon_enabled_settings)
    spider = Spider.from_crawler(crawler, "example.com")
    crawler.signals.send_catch_log(signal=signals.engine_stopped,
                                   spider=spider,
                                   reason=None)

    assert engine_stopped.called, "engine_stopped not called"
Beispiel #6
0
def test_item_scraped_connect_signal_if_field_coverage_settings_enabled(
        mocker, spidermon_enabled_settings):
    item_scraped_method = mocker.patch.object(Spidermon, "item_scraped")

    spidermon_enabled_settings["SPIDERMON_ADD_FIELD_COVERAGE"] = True
    crawler = get_crawler(settings_dict=spidermon_enabled_settings)

    spider = Spider.from_crawler(crawler, "example.com")
    crawler.signals.send_catch_log(signal=signals.item_scraped, spider=spider)

    assert item_scraped_method.called, "item_scraped_method not called"
def spider():
    settings = {
        "SPIDERMON_ENABLED": True,
        "EXTENSIONS": {"spidermon.contrib.scrapy.extensions.Spidermon": 100},
        "SPIDERMON_ADD_FIELD_COVERAGE": True,
    }
    crawler = get_crawler(settings_dict=settings)

    spider = Spider.from_crawler(crawler, "example.com")

    return spider
    def setUp(self):
        self.req = Request('http://scrapytest.org')
        self.res_succeed, self.res_block = _responses(self.req)

        crawler = get_crawler(
            Spider,
            settings_dict={
                'HTTPPROXY_PROXY_SM_BLOCK_INSPECTOR':
                'tests.test_spidermiddleware_block_inspector.inspect_block',
                'HTTPPROXY_SM_RECYCLE_REQUEST':
                'scrapy_proxy_management.utils.recycle_request'
            })

        self.spider = Spider.from_crawler(crawler, name='foo')
        self.mw = BlockInspectorMiddleware.from_crawler(crawler)
 def setUp(self):
     crawler = get_crawler(Spider)
     self.spider = Spider.from_crawler(crawler, name='foo')
     self.mw = HttpErrorMiddleware(Settings({}))
     self.req = Request('http://scrapytest.org')
     self.res200, self.res404 = _responses(self.req, [200, 404])