Beispiel #1
0
 def test_missing_backend_dependency(self, mocked_registry):
     # Testing that the correct error is thrown when a user does not have
     # the Python package `redis` installed.  We mock out the registry
     # to simulate `redis` not being installed.
     mocked_registry.__getitem__.side_effect = KeyError
     with self.assertRaises(ImportError):
         CachedSession(CACHE_NAME, backend='redis')
Beispiel #2
0
    async def validate(self, cache: Any) -> Tuple[bool, Graph, Graph, Graph]:
        """Validate function."""
        async with CachedSession(cache=cache) as session:

            # Do some sanity checks on preconditions:
            # If user has given an ontology graph, we check for and do imports:
            if self.ontology_graph and len(self.ontology_graph) > 0:
                await self._import_ontologies(session)

            logging.debug(f"Validating with following config: {self.config}.")
            # Add triples from remote predicates if user has asked for that:
            if self.config.expand is True:
                await self._expand_objects_triples(session)

            # Validate!
            # `inference` should be set to one of the followoing {"none", "rdfs", "owlrl", "both"}
            conforms, results_graph, _ = validate(
                data_graph=self.data_graph,
                ont_graph=self.ontology_graph,
                shacl_graph=self.shapes_graph,
                inference="rdfs",
                inplace=False,
                meta_shacl=False,
                debug=False,
                do_owl_imports=
                False,  # owl_imports in pyshacl represent performance penalty
                advanced=False,
            )
            return (conforms, self.data_graph, self.ontology_graph,
                    results_graph)
Beispiel #3
0
    def test_return_old_data_on_error(self, datetime_mock):
        datetime_mock.utcnow.return_value = datetime.utcnow()
        expire_after = 100
        url = httpbin("get")
        s = CachedSession(CACHE_NAME,
                          CACHE_BACKEND,
                          old_data_on_error=True,
                          expire_after=expire_after)
        header = "X-Tst"

        def get(n):
            return s.get(url, headers={header: n}).json()["headers"][header]

        get("expired")
        self.assertEquals(get("2"), "expired")
        datetime_mock.utcnow.return_value = datetime.utcnow() + timedelta(
            seconds=expire_after * 2)

        with mock.patch.object(s.cache, "save_response",
                               side_effect=Exception):
            self.assertEquals(get("3"), "expired")

        with mock.patch(
                "aiohttp_client_cache.core.OriginalSession.send") as send_mock:
            resp_mock = requests.Response()
            request = requests.Request("GET", url)
            resp_mock.request = request.prepare()
            resp_mock.status_code = 400
            resp_mock._content = '{"other": "content"}'
            send_mock.return_value = resp_mock
            self.assertEquals(get("3"), "expired")

            resp_mock.status_code = 200
            self.assertIs(s.get(url).content, resp_mock.content)

        # default behaviour
        datetime_mock.return_value = datetime.utcnow() + timedelta(
            seconds=expire_after * 2)
        s = CachedSession(CACHE_NAME,
                          CACHE_BACKEND,
                          old_data_on_error=False,
                          expire_after=100)
        with mock.patch.object(s.cache, "save_response",
                               side_effect=Exception):
            with self.assertRaises(Exception):
                s.get(url)
async def precache_page_links(parent_url):
    """Fetch and cache the content of a given web page and all links found on that page"""
    async with CachedSession(cache=SQLiteBackend()) as session:
        urls = await get_page_links(session, parent_url)

        tasks = [asyncio.create_task(cache_url(session, url)) for url in urls]
        responses = await asyncio.gather(*tasks)

    return responses
Beispiel #5
0
    def test_passing_backend_instance_support(self):
        class MyCache(BaseCache):
            pass

        backend = MyCache()
        aiohttp_client_cache.install_cache(name=CACHE_NAME, backend=backend)
        self.assertIs(requests.Session().cache, backend)

        session = CachedSession(backend=backend)
        self.assertIs(session.cache, backend)
Beispiel #6
0
async def main():
    cache = SQLiteBackend(
        cache_name='~/.cache/aiohttp-requests.db',
        expire_after=default_expire_after,
        urls_expire_after=urls_expire_after,
    )

    async with CachedSession(cache=cache) as session:
        tasks = [asyncio.create_task(session.get(url)) for url in urls]
        return await asyncio.gather(*tasks)
Beispiel #7
0
async def test_fetch_graph_that_gives_unsuccessful_response(
    mock_aioresponse: Any, ) -> None:
    """Should return None."""
    url = "https://data.brreg.no/enhetsregisteret/api/enheter/961181399"
    # Set up the mock
    mock_aioresponse.get(url, status=406)

    async with CachedSession(cache=None) as session:
        with pytest.raises(FetchError):
            _ = await fetch_graph(session, url)
Beispiel #8
0
async def test_fetch_graph_that_is_not_parsable_as_rdf(
        mock_aioresponse: Any) -> None:
    """Should return None."""
    url = "https://data.brreg.no/enhetsregisteret/api/enheter/961181399"
    # Set up the mock
    mock_aioresponse.get(url, status=200, body=_mock_non_parsable_response())

    async with CachedSession(cache=None) as session:
        with pytest.raises(SyntaxError):
            _ = await fetch_graph(session, url)
Beispiel #9
0
async def test_fetch_graph_that_has_rdf_content_type(
        mock_aioresponse: Any) -> None:
    """Should return a non-empyt graph."""
    url = "https://www.w3.org/ns/regorg"
    # Set up the mock
    mock_aioresponse.get(url, status=200, body=_mock_rdf_response())

    async with CachedSession(cache=None) as session:
        o = await fetch_graph(session, url)
        assert type(o) == Graph
        assert len(o) > 0
Beispiel #10
0
 def test_post_parameters_normalization(self):
     params = {"a": "a", "b": ["1", "2", "3"], "c": "4"}
     url = httpbin("post")
     s = CachedSession(CACHE_NAME,
                       CACHE_BACKEND,
                       allowable_methods=('GET', 'POST'))
     self.assertFalse(s.post(url, data=params).from_cache)
     self.assertTrue(s.post(url, data=params).from_cache)
     self.assertTrue(s.post(url, data=sorted(params.items())).from_cache)
     self.assertFalse(
         s.post(url, data=sorted(params.items(), reverse=True)).from_cache)
Beispiel #11
0
 async def create(
     cls: Any,
     cache: Any,
     data_graph_url: Any,
     data_graph: Any,
     shapes_graph: Any,
     shapes_graph_url: Any,
     ontology_graph_url: Any,
     ontology_graph: Any,
     config: Config = None,
 ) -> ValidatorService:
     """Initialize service instance."""
     self = ValidatorService()
     async with CachedSession(cache=cache) as session:
         all_graph_urls = dict()
         # Process data graph:
         self.data_graph = (all_graph_urls.update(
             {GraphType.DATA_GRAPH: data_graph_url})
                            if data_graph_url else parse_text(data_graph))
         # Process shapes graph:
         self.shapes_graph = (all_graph_urls.update(
             {GraphType.SHAPES_GRAPH: shapes_graph_url}) if shapes_graph_url
                              else parse_text(shapes_graph))
         # Process ontology graph if given:
         if ontology_graph_url:
             all_graph_urls.update(
                 {GraphType.ONTOLOGY_GRAPH: ontology_graph_url})
         elif ontology_graph:
             self.ontology_graph = parse_text(ontology_graph)
         else:
             self.ontology_graph = Graph()
         # Process all_graph_urls:
         logging.debug(f"all_graph_urls len: {len(all_graph_urls)}")
         results = await asyncio.gather(*[
             fetch_graph(session, url, use_cache=False)
             for url in all_graph_urls.values()
         ])
         # Store the resulting graphs:
         # The order of result values corresponds to the order of awaitables in all_graph_urls.
         # Ref: https://docs.python.org/3/library/asyncio-task.html#running-tasks-concurrently
         for key, g in zip(all_graph_urls.keys(), results):
             # Did not find any other solution than this brute force chain of ifs
             if key == GraphType.DATA_GRAPH:
                 self.data_graph = g
             elif key == GraphType.SHAPES_GRAPH:
                 self.shapes_graph = g
             elif key == GraphType.ONTOLOGY_GRAPH:
                 self.ontology_graph = g
         # Config:
         if config is None:
             self.config = Config()
         else:
             self.config = config
         return self
    async def init_session(self,
                           clear=True,
                           **kwargs) -> AsyncIterator[CachedSession]:
        kwargs.setdefault('allowed_methods', ALL_METHODS)
        # kwargs.setdefault('serializer', 'pickle')
        cache = self.backend_class(CACHE_NAME, **self.init_kwargs, **kwargs)
        if clear:
            await cache.clear()

        async with CachedSession(cache=cache, **self.init_kwargs,
                                 **kwargs) as session:
            yield session
Beispiel #13
0
 def test_expire_cache(self):
     delay = 1
     url = httpbin('delay/%s' % delay)
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
     time.sleep(0.5)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
Beispiel #14
0
async def test_fetch_graph_that_does_not_have_rdf_content_type(
    mock_aioresponse: Any, ) -> None:
    """Should return a non-empty graph."""
    url = "https://data.norge.no/vocabulary/modelldcatno/modelldcatno.ttl"
    # Set up the mock
    mock_aioresponse.get(
        url,
        status=200,
        body=_mock_rdf_with_non_standard_content_type_response())

    async with CachedSession(cache=None) as session:
        o = await fetch_graph(session, url)
        assert type(o) == Graph
        assert len(o) > 0
Beispiel #15
0
    def test_post_params(self):
        # issue #2
        self.s = CachedSession(CACHE_NAME,
                               CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))

        d = {'param1': 'test1'}
        for _ in range(2):
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)

        self.assertTrue(self.s.post(httpbin('post'), data=d).from_cache)
        d.update({'something': 'else'})
        self.assertFalse(self.s.post(httpbin('post'), data=d).from_cache)
Beispiel #16
0
    def test_headers_in_get_query(self):
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, include_get_headers=True)
        headers = {"Accept": "text/json"}
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["Accept"] = "text/xml"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["X-custom-header"] = "custom"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        self.assertFalse(s.get(url).from_cache)
        self.assertTrue(s.get(url).from_cache)
Beispiel #17
0
    def test_ignore_parameters_post_raw(self):
        url = httpbin("post")
        ignored_param = "ignored"
        raw_data = "raw test data"

        s = CachedSession(
            CACHE_NAME,
            CACHE_BACKEND,
            allowable_methods=('POST'),
            ignored_parameters=[ignored_param],
        )

        self.assertFalse(s.post(url, data=raw_data).from_cache)
        self.assertTrue(s.post(url, data=raw_data).from_cache)

        raw_data = "new raw data"
        self.assertFalse(s.post(url, data=raw_data).from_cache)
async def log_requests():
    """Context manager that mocks and logs all non-cached requests"""

    async def mock_response(*args, **kwargs):
        return CachedResponse(method='GET', reason='OK', status=200, url='url', version='1.1')

    with patch.object(ClientSession, '_request', side_effect=mock_response) as mock_request:
        async with CachedSession(cache=SQLiteBackend('cache-test.sqlite')) as session:
            await session.cache.clear()
            yield session
            cached_responses = [v async for v in session.cache.responses.values()]

    logger.debug('All calls to ClientSession._request():')
    logger.debug(mock_request.mock_calls)

    logger.info(f'Responses cached: {len(cached_responses)}')
    logger.info(f'Requests sent: {mock_request.call_count}')
Beispiel #19
0
    def test_post_data(self):
        # issue #2, raw payload
        self.s = CachedSession(CACHE_NAME,
                               CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))
        d1 = json.dumps({'param1': 'test1'})
        d2 = json.dumps({'param1': 'test1', 'param2': 'test2'})
        d3 = str('some unicode data')
        bin_data = bytes('some binary data', 'utf8')

        for d in (d1, d2, d3):
            self.assertEqual(self.post(d)['data'], d)
            r = self.s.post(httpbin('post'), data=d)
            self.assert_(hasattr(r, 'from_cache'))

        self.assertEqual(self.post(bin_data)['data'], bin_data.decode('utf8'))
        r = self.s.post(httpbin('post'), data=bin_data)
        self.assert_(hasattr(r, 'from_cache'))
Beispiel #20
0
    def test_remove_expired_entries(self, datetime_mock, datetime_mock2):
        expire_after = timedelta(minutes=10)
        start_time = datetime.utcnow().replace(year=2010, minute=0)
        datetime_mock.utcnow.return_value = start_time
        datetime_mock2.utcnow.return_value = start_time

        s = CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=expire_after)
        s.get(httpbin('get'))
        s.get(httpbin('relative-redirect/3'))
        datetime_mock.utcnow.return_value = start_time + expire_after * 2
        datetime_mock2.utcnow.return_value = datetime_mock.utcnow.return_value

        ok_url = 'get?x=1'
        s.get(httpbin(ok_url))
        self.assertEqual(len(s.cache.responses), 3)
        self.assertEqual(len(s.cache.keys_map), 3)
        s.delete_expired_responses()
        self.assertEqual(len(s.cache.responses), 1)
        self.assertEqual(len(s.cache.keys_map), 0)
        self.assertIn(ok_url, list(s.cache.responses.values())[0][0].url)
Beispiel #21
0
    def test_ignore_parameters_get(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        params = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME,
                          CACHE_BACKEND,
                          ignored_parameters=[ignored_param])

        r = s.get(url, params=params)
        self.assertIn(ignored_param, r.json()['args'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)
        def do_tests_for(backend):
            s = CachedSession(CACHE_NAME, backend)
            s.cache.clear()
            n_threads = 10
            url = 'http://httpbin.org/get'

            def do_requests(url, params):
                for i in range(10):  # for testing write and read from cache
                    s.get(url, params=params)

            for _ in range(20):  # stress test
                threads = [
                    Thread(target=do_requests, args=(url, {
                        'param': i
                    })) for i in range(n_threads)
                ]
                for t in threads:
                    t.start()
                for t in threads:
                    t.join()

                for i in range(n_threads):
                    self.assert_(s.cache.has_url('%s?param=%s' % (url, i)))
Beispiel #23
0
    def test_ignore_parameters_post_json(self):
        url = httpbin("post")
        ignored_param = "ignored"
        usual_param = "some"
        d = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(
            CACHE_NAME,
            CACHE_BACKEND,
            allowable_methods=('POST'),
            ignored_parameters=[ignored_param],
        )

        r = s.post(url, json=d)
        self.assertIn(ignored_param, json.loads(r.json()['data']).keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.post(url, json=d).from_cache)

        d[ignored_param] = "new"
        self.assertTrue(s.post(url, json=d).from_cache)

        d[usual_param] = "new"
        self.assertFalse(s.post(url, json=d).from_cache)
Beispiel #24
0
 def test_unregistered_backend(self):
     with self.assertRaises(ValueError):
         CachedSession(CACHE_NAME, backend='nonexistent')
Beispiel #25
0
import json
from aiohttp_client_cache import CachedSession, SQLiteBackend
import aiohttp
import asyncio
from bs4 import BeautifulSoup

headers = {
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45",
    "Accept-Encoding": "*",
    "Connection": "keep-alive",
}

session = CachedSession(cache=SQLiteBackend(
    cache_name='torrent_cache',
    expire_after=60 * 60 * 3,
    allowed_codes=(200, 418),
))

uncached_session = aiohttp.ClientSession()


async def shorten_all(urls):
    tasks = []
    for url in urls:
        task = asyncio.create_task(shorten(url))
        tasks.append(task)
    results = await asyncio.gather(*tasks)
    return results

Beispiel #26
0
 def test_str_and_repr(self):
     s = repr(CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=10))
     self.assertIn(CACHE_NAME, s)
     self.assertIn("10", s)
Beispiel #27
0
 def setUp(self):
     self.s = CachedSession(CACHE_NAME,
                            backend=CACHE_BACKEND,
                            fast_save=FAST_SAVE)
     self.s.cache.clear()
     aiohttp_client_cache.uninstall_cache()
Beispiel #28
0
async def get_tempfile_session(**kwargs) -> AsyncIterator[CachedSession]:
    """Get a CachedSession using a temporary SQLite db"""
    with NamedTemporaryFile(suffix='.db') as temp:
        cache = SQLiteBackend(cache_name=temp.name, allowed_methods=ALL_METHODS, **kwargs)
        async with CachedSession(cache=cache) as session:
            yield session