def fake_webcache(): wc = WebCache(':memory:') old_fetch = wc.fetch mock_session = MockSession() def new_fetch(url, force_remote=False, cache_only=False, method='GET', headers={}, params={}, data={}, files={}, cookies={}, session=None, session_kwargs={}, **kwargs): return old_fetch(url=url, force_remote=force_remote, cache_only=cache_only, method=method, headers=headers, params=params, data=data, files=files, cookies=cookies, session=mock_session, session_kwargs={}, **kwargs) wc.fetch = new_fetch return wc, mock_session
def test_webcache_delete_incache_succeeds(): webcache = WebCache(':memory:') r = requests.Response() r.status_code = 200 r._content = b'Content' r.encoding = 'utf-8' r.headers['ETag'] = 'fake-etag' r = webcache.cache_response('http://fake/', r, force_cache=True) webcache.delete_from_cache('url = "http://fake/"')
def test_webcache_get_incache_succeeds(): session = MockSession() r = session.make_response(content=b'Content') r.headers['ETag'] = 'fake-etag' webcache = WebCache(':memory:') webcache.cache_response('http://fake/', r, force_cache=True) r2 = webcache.get_cached_response('http://fake/') assert isinstance(r2, dict) assert len(r2) == 4 assert r2['url'] == 'http://fake/' assert r2['etag'] == r2['response'].headers['ETag'] assert r.status_code == r2['response'].status_code assert r.content == r2['response'].content assert r.encoding == r2['response'].encoding assert r.headers['ETag'] == r2['response'].headers['ETag']
def make_scraper_registry(*, home_dir=Path('work'), census_api_key=None, registry_args={}, scraper_args={}): """Returns a Registry instance with all the per-state scrapers registered. Keyword arguments: home_dir: required, a Pathlike for the root of a working directory. Cached downloads will be saved in per-scraper directories under this. If it does not exist, it will be created. registry_args: optional, a dict of additional keyword arguments for the Registry constructor. scraper_args: optional, a dict of additional keyword arguments for all scrapers' constructors. """ os.makedirs(str(home_dir), exist_ok=True) # We need a web cache for creating the census API. web_cache = WebCache(str(home_dir / 'web_cache.db')) with UTILS_WEB_CACHE.with_instance(web_cache): census_api = CensusApi(census_api_key) registry = Registry(web_cache=web_cache, **registry_args) for scraper_class in get_scraper_classes(): registry.register_scraper( scraper_class(home_dir=home_dir / scraper_class.__name__, census_api=census_api, **scraper_args)) return registry
def test_reset(): session = MockSession() r = session.make_response(content=b'Content') r.headers['ETag'] = 'fake-etag' try: webcache = WebCache('test.db', reset=True) webcache.cache_response('http://fake/', r, force_cache=True) r2 = webcache.get_cached_response('http://fake/') webcache.conn.close() assert r2 is not None webcache = WebCache('test.db', reset=True) r3 = webcache.get_cached_response('http://fake/') webcache.conn.close() assert r3 is None finally: os.remove('test.db')
def test_webcache_get_uncached_fails(): webcache = WebCache(':memory:') row = webcache.get_cached_response('http://fake/') assert row is None
def test_webcache_delete_uncached_fails(): webcache = WebCache(':memory:') with pytest.raises(sqlite3.DatabaseError): webcache.delete_from_cache('url = http://fake/')
__all__ = ['UTILS_WEB_CACHE'] from covid19_scrapers.web_cache import WebCache from covid19_scrapers.scoped_resource import ScopedResource # Singleton WebCache for the routines in this file. # Set it using something like: # # with utils.UTILS_WEB_CACHE('my_cache.db'): # code_that_might_call_utils() # # It is initialized with an in-memory cache, which is discarded the # first time it is set using `with`. UTILS_WEB_CACHE = ScopedResource(WebCache).with_instance(WebCache(':memory:'))