Esempio n. 1
0
 def _decode_query_result(self, response, pagenum, pagelen):
     
     # attempt to decode iso-formatted datetimes
     # ("2013-02-14T14:06:00"). Note that this will incorrectly
     # decode anything that looks like a ISO date, even though it
     # might be typed as a string. We have no typing information
     # (at this stage -- we could look at self.schema() though)
     jsonresp = json.loads(response.text,
                           object_hook=util.make_json_date_object_hook())
     res = Results()
     for hit in jsonresp['hits']['hits']:
         h = self._decode_query_result_hit(hit)
         if "inner_hits" in hit:
             for inner_hit_type in hit["inner_hits"].keys():
                 for inner_hit in hit["inner_hits"][inner_hit_type]["hits"]["hits"]:
                     if not "innerhits" in h:
                         h["innerhits"] = []
                     h["innerhits"].append(self._decode_query_result_hit(inner_hit))
         res.append(h)
     pager = {'pagenum': pagenum,
              'pagecount': int(math.ceil(jsonresp['hits']['total']['value'] / float(pagelen))),
              'firstresult': (pagenum - 1) * pagelen + 1,
              'lastresult': (pagenum - 1) * pagelen + len(jsonresp['hits']['hits']),
              'totalresults': jsonresp['hits']['total']['value']}
     setattr(res, 'pagenum', pager['pagenum'])
     setattr(res, 'pagecount', pager['pagecount'])
     setattr(res, 'lastresult', pager['lastresult'])
     setattr(res, 'totalresults', pager['totalresults'])
     if 'aggregations' in jsonresp:
         setattr(res, 'aggregations', jsonresp['aggregations'])
     return res, pager
Esempio n. 2
0
    def test_sesame_select(self, mock_get):
        store = TripleStore.connect("SESAME", "", "")
        rf = util.readfile
        want = rf("test/files/triplestore/select-results.xml").encode()
        got = store.select("the-query")
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 1)

        want = rf("test/files/triplestore/select-results.json")
        got = store.select("the-query", format="json").decode()
        self.assertEqual(json.loads(want), json.loads(got))
        self.assertEqual(mock_get.call_count, 2)

        want = json.loads(
            rf("test/files/triplestore/select-results-python.json"),
            object_hook=util.make_json_date_object_hook("issued"))
        got = store.select("the-query", format="python")
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 3)

        with self.assertRaises(errors.TriplestoreError):
            mockresponse = Mock()
            mockresponse.text = "This is the actual error text"
            mock_get.side_effect = requests.exceptions.HTTPError(
                "Server error", response=mockresponse)
            got = store.select("the-query", format="python")
Esempio n. 3
0
    def __init__(self, path=None):
        if path and os.path.exists(path) and os.path.getsize(path) > 0:
            with open(path) as fp:
                hook = util.make_json_date_object_hook('orig_created',
                                                       'orig_updated',
                                                       'orig_checked',
                                                       'published',
                                                       'updated',
                                                       'indexed_ts',
                                                       'indexed_dep',
                                                       'indexed_ft',
                                                       'date')
                try:
                    d = json.load(fp, object_hook=hook)
                except JSONDecodeError as e:
                    if e.msg == "Extra data":
                        logging.getLogger("documententry").warning("%s exists but has extra data from pos %s" % (path, e.pos))
                        fp.seek(0)
                        jsondata = fp.read(e.pos)
                        d = json.loads(jsondata, object_hook=hook)
                    else:
                        raise e
            if 'summary_type' in d and d['summary_type'] == "html":
                d['summary'] = Literal(d['summary'], datatype=RDF.XMLLiteral)
                del d['summary_type']
            self.__dict__.update(d)
            self._path = path
        else:
            if path and os.path.exists(path):
                logging.getLogger("documententry").warning("%s exists but is empty" % path)
            self.id = None
            self.basefile = None
            self.orig_updated = None
            self.orig_checked = None
            self.orig_url = None
            self.indexed_ts = None
            self.indexed_dep = None
            self.indexed_ft = None
            self.published = None
            self.updated = None
            self.title = None
            self.summary = None
            self.url = None
            if path:
                self._path = path
            # Content src="...": A link to the actual document, or the
            # content inline (Source or refined version?)
            self.content = {}
            # Link rel="alternate": The metadata for this document (and
            # included resources)
            self.link = {}

        # silently upgrade old entry JSON files with a root level
        # parse dict and/or lacking the status dict
        if self.status is None:
            self.status = {}
        if hasattr(self, 'parse'):
            self.status['parse'] = self.parse
            delattr(self, 'parse')
Esempio n. 4
0
    def __init__(self, path=None):
        if path and os.path.exists(path) and os.path.getsize(path) > 0:
            with open(path) as fp:
                hook = util.make_json_date_object_hook(
                    'orig_created', 'orig_updated', 'orig_checked',
                    'published', 'updated', 'indexed_ts', 'indexed_dep',
                    'indexed_ft', 'status.download.date', 'status.parse.date',
                    'status.relate.date', 'status.generate.date')
                try:
                    d = json.load(fp, object_hook=hook)
                except JSONDecodeError as e:
                    if e.msg == "Extra data":
                        logging.getLogger("documententry").warning(
                            "%s exists but has extra data from pos %s" %
                            (path, e.pos))
                        fp.seek(0)
                        jsondata = fp.read(e.pos)
                        d = json.loads(jsondata, object_hook=hook)
                    else:
                        raise e
            if 'summary_type' in d and d['summary_type'] == "html":
                d['summary'] = Literal(d['summary'], datatype=RDF.XMLLiteral)
                del d['summary_type']
            self.__dict__.update(d)
            self._path = path
        else:
            if path and os.path.exists(path):
                logging.getLogger("documententry").warning(
                    "%s exists but is empty" % path)
            self.id = None
            self.basefile = None
            self.orig_updated = None
            self.orig_checked = None
            self.orig_url = None
            self.indexed_ts = None
            self.indexed_dep = None
            self.indexed_ft = None
            self.published = None
            self.updated = None
            self.title = None
            self.summary = None
            self.url = None
            if path:
                self._path = path
            # Content src="...": A link to the actual document, or the
            # content inline (Source or refined version?)
            self.content = {}
            # Link rel="alternate": The metadata for this document (and
            # included resources)
            self.link = {}

        # silently upgrade old entry JSON files with a root level
        # parse dict and/or lacking the status dict
        if self.status is None:
            self.status = {}
        if hasattr(self, 'parse'):
            self.status['parse'] = self.parse
            delattr(self, 'parse')
Esempio n. 5
0
    def test_sesame_select(self, mock_get):
        store = TripleStore.connect("SESAME", "", "")
        rf = util.readfile
        want = rf("test/files/triplestore/select-results.xml").encode()
        got = store.select("the-query")
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 1)

        want = rf("test/files/triplestore/select-results.json")
        got = store.select("the-query", format="json").decode()
        self.assertEqual(json.loads(want), json.loads(got))
        self.assertEqual(mock_get.call_count, 2)

        want = json.loads(rf("test/files/triplestore/select-results-python.json"),
                          object_hook=util.make_json_date_object_hook("issued"))
        got = store.select("the-query", format="python")
        self.assertEqual(want, got)
        self.assertEqual(mock_get.call_count, 3)

        with self.assertRaises(errors.TriplestoreError):
            mockresponse = Mock()
            mockresponse.text = "This is the actual error text"
            mock_get.side_effect = requests.exceptions.HTTPError("Server error", response=mockresponse)
            got = store.select("the-query", format="python")
Esempio n. 6
0
class Feedsets(RepoTester):
    results2 = json.load(
        open("test/files/datasets/results2-plus-entries.json"),
        object_hook=util.make_json_date_object_hook('published', 'updated'))
    results2data = rdflib.Graph().parse(
        open("test/files/datasets/results2data.ttl"), format="turtle")

    facets = [
        Facet(rdftype=RDF.type),
        Facet(rdftype=DCTERMS.publisher),
        Facet(rdftype=DCTERMS.issued)
    ]

    feedsets = [
        Feedset(label="Sorted by type",
                predicate=RDF.type,
                feeds=[
                    Feed(title="All Book documents",
                         slug="type/book",
                         binding="rdf_type",
                         value="Book")
                ]),
        Feedset(
            label="Sorted by publisher",
            predicate=DCTERMS.publisher,
            feeds=[
                Feed(title="Documents published by Analytical Biochemistry",
                     slug="publisher/analytical",
                     binding="dcterms_publisher",
                     value="analytical"),
                Feed(title=
                     "Documents published by Journal of Biological Chemistry",
                     slug="publisher/biochem",
                     binding="dcterms_publisher",
                     value="biochem"),
                Feed(title="Documents published by Nature",
                     slug="publisher/nature",
                     binding="dcterms_publisher",
                     value="nature")
            ]),
        Feedset(
            label="All",
            predicate=None,
            feeds=[
                Feed(
                    title="All documents",  # "... in base" ? 
                    slug="main",
                    binding=None,
                    value=None)
            ])
    ]

    def setUp(self):
        super(Feedsets, self).setUp()
        self.repo.news_facet_entries = Mock(return_value=self.results2)
        self.repo.commondata = self.results2data

    def test_feedsets(self):
        got = self.repo.news_feedsets(self.results2, self.facets)
        want = self.feedsets

        # make sure 3 feedsets were created and their labels
        self.assertEqual(3, len(got))
        self.assertEqual("Sorted by type", got[0].label)
        self.assertEqual("Sorted by publisher", got[1].label)
        self.assertEqual("All", got[2].label)

        # make sure the title of the only feed in the first feedset
        # turned out OK
        self.assertEqual("All Book documents", got[0].feeds[0].title)

        # make sure the publisher feedset has the correct things
        self.assertEqual(3, len(got[1].feeds))  # 3 different journals
        self.assertEqual("publisher/analytical", got[1].feeds[0].slug)
        self.assertEqual("Documents published by Analytical Biochemistry",
                         got[1].feeds[0].title)

        # this test incorporates all of the above
        self.assertEqual(want, got)

    def test_select_for_feeds(self):
        got = self.repo.news_select_for_feeds(self.results2, self.feedsets,
                                              self.facets)
        # last feedset (main) should have one single feed and it
        # should contain all entries.
        self.assertEqual(len(got[-1].feeds), 1)
        self.assertEqual(len(got[-1].feeds[0].entries), 4)
        self.assertEqual("http://example.org/articles/pm14907713",
                         got[-1].feeds[0].entries[0]['uri'])
        self.assertEqual("http://example.org/articles/pm942051",
                         got[-1].feeds[0].entries[3]['uri'])