def test_Cache(self): from clldclient.cache import Cache with patch('clldclient.cache.user_cache_dir', Mock(return_value=self.tmp)): with HTTMock(clld): cache = Cache() r1 = cache.get('http://glottolog.org/resource/languoid/id/stan1295.json') self.assertEquals(r1.content['id'], 'stan1295') self.assertEquals(r1.canonical_url, 'http://glottolog.org/') r2 = cache.get('http://glottolog.org/resource/languoid/id/stan1295.json') self.assertEquals(r1.created, r2.created) cache.drop() r2 = cache.get('http://glottolog.org/resource/languoid/id/stan1295.json') self.assertNotEquals(r1.created, r2.created) self.assertRaises(KeyError, cache.get, 'http://glottolog.org/unknown') self.assertEquals(cache.get('http://glottolog.org/unknown', default=1), 1) res = cache.get('http://glottolog.org/resource/languoid/id/stan1295.rdf') self.assertEquals(res.mimetype, 'application/rdf+xml') self.assertEquals( res.canonical_url, b('http://glottolog.org/resource/languoid/id/stan1295.rdf')) assert hasattr(res.content, 'triples') self.assertEquals(res.links, []) cached = {r[0]: r[1] for r in cache.stats()}[b('glottolog.org')] self.assertEquals(cached, cache.purge(host=b('glottolog.org'))) now = datetime.datetime.utcnow() time.sleep(0.2) cache.get('http://glottolog.org/resource/languoid/id/stan1295.json') self.assertEquals(0, cache.purge(before=now, host=b('glottolog.org'))) self.assertEquals(1, cache.purge(after=now, host=b('glottolog.org')))
def get(self, url, default=NO_DEFAULT, headers=None): """Retrieve a Response object for a given URL. """ headers = headers or {} url = URL(url) row = self.db.execute( select([ responses.c.created, responses.c.host, responses.c.request_url, responses.c.accept, responses.c.url, responses.c.headers, responses.c.content]) .where(and_( responses.c.request_url == url.as_string().encode('utf8'), responses.c.accept == b(headers.get('Accept', '')))) ).fetchone() if not row: log.info('cache miss %s' % url) row = self.add(url, headers) if row is None: if default is NO_DEFAULT: raise KeyError(url) log.info('invalid url %s' % url) return default else: log.info('cache hit %s' % url) return Response(*row)
def purge(self, host=None, before=None, after=None): sql = responses.delete() if host: sql = sql.where(responses.c.host == b(host)) if before: sql = sql.where(responses.c.created < before) if after: sql = sql.where(responses.c.created > after) res = self.db.execute(sql) log.info('%s rows deleted' % res.rowcount) return res.rowcount
def clld(url, request): res = { '/resource/languoid/id/stan1295.json': ( '<http://glottolog.org/>; rel="canonical"; type="text/html"', 'application/json', b('{"id": "stan1295", "name": "Standard German"}')), '/resource/languoid/id/stan1295.rdf': ( '', 'application/rdf+xml; charset=utf8', b("""\ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <rdf:Description rdf:about="http://glottolog.org/resource/languoid/id/stan1295"> <dcterms:isReferencedBy rdf:resource="http://glottolog.org/resource/reference/id/7242"/> </rdf:Description> </rdf:RDF> """)), }.get(url.path) if res is None: return response(404, 'not found', {}, None, 5, request) return response( 200, res[2], {'content-type': res[1], 'link': res[0]}, None, 5, request)
def add(self, url, headers): response = requests.get(url.as_string(), headers=headers) if response.status_code == requests.codes.ok: values = OrderedDict() values['created'] = datetime.datetime.utcnow() values['host'] = url.host().encode('utf8') values['request_url'] = url.as_string().encode('utf8') values['accept'] = b(headers.get('Accept', '')) values['url'] = response.url.encode('utf8') values['headers'] = json.dumps(dict(response.headers.items())) values['content'] = response.content self.db.execute(responses.insert().values(**values)) return values.values()