def test_on_exception(self, provided_cls: AutoExtractData):
        class Provider(AutoExtractProvider):
            async def do_request(self, *args, agg_stats: AggStats, **kwargs):
                agg_stats.n_attempts += 3
                agg_stats.n_billable_query_responses += 2
                raise Exception()

        def callback(item: provided_cls):
            pass

        page_type = provided_cls.page_type
        injector = get_injector_for_testing({Provider: 500})
        response = get_response_for_testing(callback)
        with pytest.raises(Exception) as exinf:
            yield injector.build_callback_dependencies(response.request,
                                                       response)
        stats = injector.crawler.stats
        expected = {
            f'autoextract/{page_type}/pages/count': 1,
            f'autoextract/{page_type}/pages/errors': 1,
            'autoextract/total/attempts/count': 3,
            'autoextract/total/attempts/billable': 2,
            'autoextract/total/pages/count': 1,
            'autoextract/total/pages/errors': 1,
            'autoextract/total/pages/errors/rest/Exception': 1
        }
        assert_stats(stats, expected)
    def test_on_query_error(self, provided_cls: AutoExtractData):
        page_type = provided_cls.page_type
        data = {"query": "The query", "error": "Download error"}

        class Provider(AutoExtractProvider):
            async def do_request(self, *args, agg_stats: AggStats, **kwargs):
                agg_stats.n_attempts += 3
                agg_stats.n_billable_query_responses += 2
                return [data]

        def callback(item: provided_cls):
            pass

        injector = get_injector_for_testing({Provider: 500})
        response = get_response_for_testing(callback)
        with pytest.raises(QueryError) as exinf:
            yield injector.build_callback_dependencies(response.request,
                                                       response)
        stats = injector.crawler.stats
        expected = {
            f'autoextract/{page_type}/pages/count': 1,
            f'autoextract/{page_type}/pages/errors': 1,
            'autoextract/total/attempts/count': 3,
            'autoextract/total/attempts/billable': 2,
            'autoextract/total/pages/count': 1,
            'autoextract/total/pages/errors': 1,
            'autoextract/total/pages/errors/query/Download error': 1
        }
        assert_stats(stats, expected)
        assert "Download error" in str(exinf.value)
        assert "The query" in str(exinf.value)
    async def test_on_cancellation(self, provided_cls: AutoExtractProductData):
        old_handler = signal.getsignal(SIGINT)
        signal.signal(SIGINT, lambda x, y: None)
        try:
            lock = asyncio.Lock()
            await lock.acquire()

            class Provider(AutoExtractProvider):
                async def do_request(self, *args, agg_stats: AggStats,
                                     **kwargs):
                    await lock.acquire()

            def callback(item: provided_cls):
                pass

            injector = get_injector_for_testing({Provider: 500})
            stats = injector.crawler.stats
            response = get_response_for_testing(callback)
            deferred = injector.build_callback_dependencies(
                response.request, response)
            build_callbacks_future = Deferred.asFuture(
                deferred, asyncio.get_event_loop())

            async def cancel_after(sleep):
                await asyncio.sleep(sleep)
                pid = os.getpid()
                try:
                    os.kill(pid, SIGINT)
                except KeyboardInterrupt:
                    # As an effect of the SIGINT killing the process might receive
                    # here a KeyboardInterrupt exception. This is Ok.
                    pass
                return CancelledError()

            result = await asyncio.gather(build_callbacks_future,
                                          cancel_after(0.05),
                                          return_exceptions=True)
            assert all([isinstance(r, CancelledError) for r in result])

            page_type = provided_cls.page_type
            expected_stats = {
                'autoextract/total/pages/count': 1,
                'autoextract/total/pages/cancelled': 1,
                'autoextract/total/pages/errors': 0,
                f'autoextract/{page_type}/pages/count': 1,
                f'autoextract/{page_type}/pages/cancelled': 1,
                f'autoextract/{page_type}/pages/errors': 0,
            }
            assert_stats(stats, expected_stats)

        finally:
            signal.signal(SIGINT, old_handler)
    def test_providers(self, provided_cls: AutoExtractProductData):
        page_type = provided_cls.page_type
        url, html = "http://example.com", "html_content"
        data_wo_html = {page_type: {"url": url}}
        data = {page_type: {"url": url}, "html": html}
        provider_wrapper = []

        class Provider(AutoExtractProvider):
            async def do_request(self, *args, agg_stats: AggStats, **kwargs):
                assert provider.aiohttp_session.connector.limit == 2020
                agg_stats.n_attempts += 3
                agg_stats.n_billable_query_responses += 2
                assert kwargs['api_key'] == "key"
                assert kwargs['endpoint'] == "url"
                assert kwargs['max_query_error_retries'] == 31415
                return [copy.deepcopy(data)]

        def callback(item: provided_cls):
            pass

        def callback_with_html(item: provided_cls, html: AutoExtractHtml):
            pass

        def callback_only_html(html: AutoExtractHtml):
            pass

        settings = {
            "AUTOEXTRACT_USER": "******",
            "AUTOEXTRACT_URL": "url",
            "AUTOEXTRACT_MAX_QUERY_ERROR_RETRIES": 31415,
            "CONCURRENT_REQUESTS": 2020,
            "CONCURRENT_REQUESTS_PER_DOMAIN": 1980,
        }
        injector = get_injector_for_testing({Provider: 500}, settings)
        stats = injector.crawler.stats
        provider = injector.providers[-1]
        provider_wrapper.append(provider)
        assert provider.per_domain_semaphore.concurrency_per_slot == 1980

        #  - No HTML requested case -

        response = get_response_for_testing(callback)
        kwargs = yield injector.build_callback_dependencies(
            response.request, response)
        assert kwargs["item"].data == data_wo_html
        assert type(kwargs["item"]) is provided_cls
        expected_stats = {
            'autoextract/total/pages/count': 1,
            'autoextract/total/pages/success': 1,
            'autoextract/total/attempts/count': 3,
            'autoextract/total/attempts/billable': 2,
            f'autoextract/{page_type}/pages/count': 1,
            f'autoextract/{page_type}/pages/success': 1
        }
        assert_stats(stats, expected_stats)

        #  - Both HTML and item requested case -

        response = get_response_for_testing(callback_with_html)
        kwargs = yield injector.build_callback_dependencies(
            response.request, response)
        item, html_response = kwargs["item"], kwargs["html"]
        assert item.data == data_wo_html
        assert type(item) is provided_cls
        assert (html_response.url, html_response.html) == (url, html)
        assert type(html_response) is AutoExtractHtml
        expected_stats = {
            'autoextract/total/pages/count': 2,
            'autoextract/total/pages/success': 2,
            'autoextract/total/pages/html': 1,
            'autoextract/total/attempts/count': 6,
            'autoextract/total/attempts/billable': 4,
            f'autoextract/{page_type}/pages/count': 2,
            f'autoextract/{page_type}/pages/success': 2,
            f'autoextract/{page_type}/pages/html': 1,
        }
        assert_stats(stats, expected_stats)

        #  - Only HTML is requested case -

        injector.providers[0].page_type_class_for_html = provided_cls
        response = get_response_for_testing(callback_only_html)
        kwargs = yield injector.build_callback_dependencies(
            response.request, response)
        assert "item" not in kwargs
        html_response = kwargs["html"]
        assert (html_response.url, html_response.html) == (url, html)
        assert type(html_response) is AutoExtractHtml
        expected_stats = {
            'autoextract/total/pages/count': 3,
            'autoextract/total/pages/success': 3,
            'autoextract/total/pages/html': 2,
            'autoextract/total/attempts/count': 9,
            'autoextract/total/attempts/billable': 6,
            f'autoextract/{page_type}/pages/count': 3,
            f'autoextract/{page_type}/pages/success': 3,
            f'autoextract/{page_type}/pages/html': 2,
        }
        assert_stats(stats, expected_stats)