def fake_webcache():
    wc = WebCache(':memory:')
    old_fetch = wc.fetch
    mock_session = MockSession()

    def new_fetch(url,
                  force_remote=False,
                  cache_only=False,
                  method='GET',
                  headers={},
                  params={},
                  data={},
                  files={},
                  cookies={},
                  session=None,
                  session_kwargs={},
                  **kwargs):
        return old_fetch(url=url,
                         force_remote=force_remote,
                         cache_only=cache_only,
                         method=method,
                         headers=headers,
                         params=params,
                         data=data,
                         files=files,
                         cookies=cookies,
                         session=mock_session,
                         session_kwargs={},
                         **kwargs)

    wc.fetch = new_fetch
    return wc, mock_session
def test_webcache_delete_incache_succeeds():
    webcache = WebCache(':memory:')
    r = requests.Response()
    r.status_code = 200
    r._content = b'Content'
    r.encoding = 'utf-8'
    r.headers['ETag'] = 'fake-etag'
    r = webcache.cache_response('http://fake/', r, force_cache=True)
    webcache.delete_from_cache('url = "http://fake/"')
def test_webcache_get_incache_succeeds():
    session = MockSession()
    r = session.make_response(content=b'Content')
    r.headers['ETag'] = 'fake-etag'

    webcache = WebCache(':memory:')
    webcache.cache_response('http://fake/', r, force_cache=True)
    r2 = webcache.get_cached_response('http://fake/')
    assert isinstance(r2, dict)
    assert len(r2) == 4
    assert r2['url'] == 'http://fake/'
    assert r2['etag'] == r2['response'].headers['ETag']
    assert r.status_code == r2['response'].status_code
    assert r.content == r2['response'].content
    assert r.encoding == r2['response'].encoding
    assert r.headers['ETag'] == r2['response'].headers['ETag']
def make_scraper_registry(*,
                          home_dir=Path('work'),
                          census_api_key=None,
                          registry_args={},
                          scraper_args={}):
    """Returns a Registry instance with all the per-state scrapers
    registered.

    Keyword arguments:

      home_dir: required, a Pathlike for the root of a working
        directory.  Cached downloads will be saved in per-scraper
        directories under this.  If it does not exist, it will be
        created.

      registry_args: optional, a dict of additional keyword arguments
        for the Registry constructor.

      scraper_args: optional, a dict of additional keyword arguments
        for all scrapers' constructors.
    """
    os.makedirs(str(home_dir), exist_ok=True)
    # We need a web cache for creating the census API.
    web_cache = WebCache(str(home_dir / 'web_cache.db'))
    with UTILS_WEB_CACHE.with_instance(web_cache):
        census_api = CensusApi(census_api_key)
    registry = Registry(web_cache=web_cache, **registry_args)
    for scraper_class in get_scraper_classes():
        registry.register_scraper(
            scraper_class(home_dir=home_dir / scraper_class.__name__,
                          census_api=census_api,
                          **scraper_args))
    return registry
def test_reset():
    session = MockSession()
    r = session.make_response(content=b'Content')
    r.headers['ETag'] = 'fake-etag'

    try:
        webcache = WebCache('test.db', reset=True)
        webcache.cache_response('http://fake/', r, force_cache=True)
        r2 = webcache.get_cached_response('http://fake/')
        webcache.conn.close()
        assert r2 is not None

        webcache = WebCache('test.db', reset=True)
        r3 = webcache.get_cached_response('http://fake/')
        webcache.conn.close()
        assert r3 is None
    finally:
        os.remove('test.db')
def test_webcache_get_uncached_fails():
    webcache = WebCache(':memory:')
    row = webcache.get_cached_response('http://fake/')
    assert row is None
def test_webcache_delete_uncached_fails():
    webcache = WebCache(':memory:')
    with pytest.raises(sqlite3.DatabaseError):
        webcache.delete_from_cache('url = http://fake/')
Example #8
0
__all__ = ['UTILS_WEB_CACHE']

from covid19_scrapers.web_cache import WebCache
from covid19_scrapers.scoped_resource import ScopedResource

# Singleton WebCache for the routines in this file.
# Set it using something like:
#
#    with utils.UTILS_WEB_CACHE('my_cache.db'):
#        code_that_might_call_utils()
#
# It is initialized with an in-memory cache, which is discarded the
# first time it is set using `with`.
UTILS_WEB_CACHE = ScopedResource(WebCache).with_instance(WebCache(':memory:'))