def _decode_query_result(self, response, pagenum, pagelen): # attempt to decode iso-formatted datetimes # ("2013-02-14T14:06:00"). Note that this will incorrectly # decode anything that looks like a ISO date, even though it # might be typed as a string. We have no typing information # (at this stage -- we could look at self.schema() though) jsonresp = json.loads(response.text, object_hook=util.make_json_date_object_hook()) res = Results() for hit in jsonresp['hits']['hits']: h = self._decode_query_result_hit(hit) if "inner_hits" in hit: for inner_hit_type in hit["inner_hits"].keys(): for inner_hit in hit["inner_hits"][inner_hit_type]["hits"]["hits"]: if not "innerhits" in h: h["innerhits"] = [] h["innerhits"].append(self._decode_query_result_hit(inner_hit)) res.append(h) pager = {'pagenum': pagenum, 'pagecount': int(math.ceil(jsonresp['hits']['total']['value'] / float(pagelen))), 'firstresult': (pagenum - 1) * pagelen + 1, 'lastresult': (pagenum - 1) * pagelen + len(jsonresp['hits']['hits']), 'totalresults': jsonresp['hits']['total']['value']} setattr(res, 'pagenum', pager['pagenum']) setattr(res, 'pagecount', pager['pagecount']) setattr(res, 'lastresult', pager['lastresult']) setattr(res, 'totalresults', pager['totalresults']) if 'aggregations' in jsonresp: setattr(res, 'aggregations', jsonresp['aggregations']) return res, pager
def test_sesame_select(self, mock_get): store = TripleStore.connect("SESAME", "", "") rf = util.readfile want = rf("test/files/triplestore/select-results.xml").encode() got = store.select("the-query") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 1) want = rf("test/files/triplestore/select-results.json") got = store.select("the-query", format="json").decode() self.assertEqual(json.loads(want), json.loads(got)) self.assertEqual(mock_get.call_count, 2) want = json.loads( rf("test/files/triplestore/select-results-python.json"), object_hook=util.make_json_date_object_hook("issued")) got = store.select("the-query", format="python") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 3) with self.assertRaises(errors.TriplestoreError): mockresponse = Mock() mockresponse.text = "This is the actual error text" mock_get.side_effect = requests.exceptions.HTTPError( "Server error", response=mockresponse) got = store.select("the-query", format="python")
def __init__(self, path=None): if path and os.path.exists(path) and os.path.getsize(path) > 0: with open(path) as fp: hook = util.make_json_date_object_hook('orig_created', 'orig_updated', 'orig_checked', 'published', 'updated', 'indexed_ts', 'indexed_dep', 'indexed_ft', 'date') try: d = json.load(fp, object_hook=hook) except JSONDecodeError as e: if e.msg == "Extra data": logging.getLogger("documententry").warning("%s exists but has extra data from pos %s" % (path, e.pos)) fp.seek(0) jsondata = fp.read(e.pos) d = json.loads(jsondata, object_hook=hook) else: raise e if 'summary_type' in d and d['summary_type'] == "html": d['summary'] = Literal(d['summary'], datatype=RDF.XMLLiteral) del d['summary_type'] self.__dict__.update(d) self._path = path else: if path and os.path.exists(path): logging.getLogger("documententry").warning("%s exists but is empty" % path) self.id = None self.basefile = None self.orig_updated = None self.orig_checked = None self.orig_url = None self.indexed_ts = None self.indexed_dep = None self.indexed_ft = None self.published = None self.updated = None self.title = None self.summary = None self.url = None if path: self._path = path # Content src="...": A link to the actual document, or the # content inline (Source or refined version?) self.content = {} # Link rel="alternate": The metadata for this document (and # included resources) self.link = {} # silently upgrade old entry JSON files with a root level # parse dict and/or lacking the status dict if self.status is None: self.status = {} if hasattr(self, 'parse'): self.status['parse'] = self.parse delattr(self, 'parse')
def __init__(self, path=None): if path and os.path.exists(path) and os.path.getsize(path) > 0: with open(path) as fp: hook = util.make_json_date_object_hook( 'orig_created', 'orig_updated', 'orig_checked', 'published', 'updated', 'indexed_ts', 'indexed_dep', 'indexed_ft', 'status.download.date', 'status.parse.date', 'status.relate.date', 'status.generate.date') try: d = json.load(fp, object_hook=hook) except JSONDecodeError as e: if e.msg == "Extra data": logging.getLogger("documententry").warning( "%s exists but has extra data from pos %s" % (path, e.pos)) fp.seek(0) jsondata = fp.read(e.pos) d = json.loads(jsondata, object_hook=hook) else: raise e if 'summary_type' in d and d['summary_type'] == "html": d['summary'] = Literal(d['summary'], datatype=RDF.XMLLiteral) del d['summary_type'] self.__dict__.update(d) self._path = path else: if path and os.path.exists(path): logging.getLogger("documententry").warning( "%s exists but is empty" % path) self.id = None self.basefile = None self.orig_updated = None self.orig_checked = None self.orig_url = None self.indexed_ts = None self.indexed_dep = None self.indexed_ft = None self.published = None self.updated = None self.title = None self.summary = None self.url = None if path: self._path = path # Content src="...": A link to the actual document, or the # content inline (Source or refined version?) self.content = {} # Link rel="alternate": The metadata for this document (and # included resources) self.link = {} # silently upgrade old entry JSON files with a root level # parse dict and/or lacking the status dict if self.status is None: self.status = {} if hasattr(self, 'parse'): self.status['parse'] = self.parse delattr(self, 'parse')
def test_sesame_select(self, mock_get): store = TripleStore.connect("SESAME", "", "") rf = util.readfile want = rf("test/files/triplestore/select-results.xml").encode() got = store.select("the-query") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 1) want = rf("test/files/triplestore/select-results.json") got = store.select("the-query", format="json").decode() self.assertEqual(json.loads(want), json.loads(got)) self.assertEqual(mock_get.call_count, 2) want = json.loads(rf("test/files/triplestore/select-results-python.json"), object_hook=util.make_json_date_object_hook("issued")) got = store.select("the-query", format="python") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 3) with self.assertRaises(errors.TriplestoreError): mockresponse = Mock() mockresponse.text = "This is the actual error text" mock_get.side_effect = requests.exceptions.HTTPError("Server error", response=mockresponse) got = store.select("the-query", format="python")
class Feedsets(RepoTester): results2 = json.load( open("test/files/datasets/results2-plus-entries.json"), object_hook=util.make_json_date_object_hook('published', 'updated')) results2data = rdflib.Graph().parse( open("test/files/datasets/results2data.ttl"), format="turtle") facets = [ Facet(rdftype=RDF.type), Facet(rdftype=DCTERMS.publisher), Facet(rdftype=DCTERMS.issued) ] feedsets = [ Feedset(label="Sorted by type", predicate=RDF.type, feeds=[ Feed(title="All Book documents", slug="type/book", binding="rdf_type", value="Book") ]), Feedset( label="Sorted by publisher", predicate=DCTERMS.publisher, feeds=[ Feed(title="Documents published by Analytical Biochemistry", slug="publisher/analytical", binding="dcterms_publisher", value="analytical"), Feed(title= "Documents published by Journal of Biological Chemistry", slug="publisher/biochem", binding="dcterms_publisher", value="biochem"), Feed(title="Documents published by Nature", slug="publisher/nature", binding="dcterms_publisher", value="nature") ]), Feedset( label="All", predicate=None, feeds=[ Feed( title="All documents", # "... in base" ? slug="main", binding=None, value=None) ]) ] def setUp(self): super(Feedsets, self).setUp() self.repo.news_facet_entries = Mock(return_value=self.results2) self.repo.commondata = self.results2data def test_feedsets(self): got = self.repo.news_feedsets(self.results2, self.facets) want = self.feedsets # make sure 3 feedsets were created and their labels self.assertEqual(3, len(got)) self.assertEqual("Sorted by type", got[0].label) self.assertEqual("Sorted by publisher", got[1].label) self.assertEqual("All", got[2].label) # make sure the title of the only feed in the first feedset # turned out OK self.assertEqual("All Book documents", got[0].feeds[0].title) # make sure the publisher feedset has the correct things self.assertEqual(3, len(got[1].feeds)) # 3 different journals self.assertEqual("publisher/analytical", got[1].feeds[0].slug) self.assertEqual("Documents published by Analytical Biochemistry", got[1].feeds[0].title) # this test incorporates all of the above self.assertEqual(want, got) def test_select_for_feeds(self): got = self.repo.news_select_for_feeds(self.results2, self.feedsets, self.facets) # last feedset (main) should have one single feed and it # should contain all entries. self.assertEqual(len(got[-1].feeds), 1) self.assertEqual(len(got[-1].feeds[0].entries), 4) self.assertEqual("http://example.org/articles/pm14907713", got[-1].feeds[0].entries[0]['uri']) self.assertEqual("http://example.org/articles/pm942051", got[-1].feeds[0].entries[3]['uri'])