Exemple #1
0
def search_articles(search_query):
    # get the values for the 2 other bits of search info: the page number and the page size
    page = request.values.get("page", 1)
    psize = request.values.get("pageSize", 10)
    sort = request.values.get("sort")

    # check the page is an integer
    try:
        page = int(page)
    except:
        raise Api400Error("Page number was not an integer")

    # check the page size is an integer
    try:
        psize = int(psize)
    except:
        raise Api400Error("Page size was not an integer")

    results = None
    try:
        results = DiscoveryApi.search('article', None, search_query, page, psize, sort)
    except DiscoveryException as e:
        raise Api400Error(e.message)

    return jsonify_models(results)
Exemple #2
0
def search_articles(search_query):
    # get the values for the 2 other bits of search info: the page number and the page size
    page = request.values.get("page", 1)
    psize = request.values.get("pageSize", 10)
    sort = request.values.get("sort")

    # check the page is an integer
    try:
        page = int(page)
    except:
        raise Api400Error("Page number was not an integer")

    # check the page size is an integer
    try:
        psize = int(psize)
    except:
        raise Api400Error("Page size was not an integer")

    results = None
    try:
        results = DiscoveryApi.search('article', None, search_query, page, psize, sort)
    except DiscoveryException as e:
        raise Api400Error(str(e))

    return jsonify_models(results)
    def test_06_deep_paging_limit(self):
        # populate the index with some journals
        jids = []
        for i in range(10):
            j = models.Journal()
            j.set_in_doaj(True)
            bj = j.bibjson()
            bj.title = "Test Journal {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_url("http://homepage.com/{x}".format(x=i), "homepage")
            j.save()
            jids.append((j.id, j.last_updated))

        self.app_test.config["DISCOVERY_MAX_RECORDS_SIZE"] = 5

        # block until all the records are saved
        for jid, lu in jids:
            models.Journal.block(jid, lu, sleep=0.05)

        # now run some queries
        with self.app_test.test_request_context():
            # check that the first page still works
            res = DiscoveryApi.search("journal", None, "*", 1, 5)
            assert res.data.get("total") == 10
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5

            # but that the second page fails
            with self.assertRaises(DiscoveryException):
                try:
                    res = DiscoveryApi.search("journal", None, "*", 2, 5)
                except DiscoveryException as e:
                    data_dump_url = url_for("doaj.public_data_dump")
                    oai_article_url = url_for("oaipmh.oaipmh",
                                              specified="article")
                    oai_journal_url = url_for("oaipmh.oaipmh")
                    assert data_dump_url in e.message
                    assert oai_article_url in e.message
                    assert oai_journal_url in e.message
                    raise
Exemple #4
0
    def test_06_deep_paging_limit(self):
        # populate the index with some journals
        jids = []
        for i in range(10):
            j = models.Journal()
            j.set_in_doaj(True)
            bj = j.bibjson()
            bj.title = "Test Journal {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_url("http://homepage.com/{x}".format(x=i), "homepage")
            j.save()
            jids.append((j.id, j.last_updated))

        self.app_test.config["DISCOVERY_MAX_RECORDS_SIZE"] = 5

        # block until all the records are saved
        for jid, lu in jids:
            models.Journal.block(jid, lu, sleep=0.05)

        # now run some queries
        with self.app_test.test_request_context():
            # check that the first page still works
            res = DiscoveryApi.search("journal", None, "*", 1, 5)
            assert res.data.get("total") == 10
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5

            # but that the second page fails
            with self.assertRaises(DiscoveryException):
                try:
                    res = DiscoveryApi.search("journal", None, "*", 2, 5)
                except DiscoveryException as e:
                    data_dump_url = url_for("doaj.public_data_dump")
                    oai_article_url = url_for("oaipmh.oaipmh", specified="article")
                    oai_journal_url = url_for("oaipmh.oaipmh")
                    assert data_dump_url in e.message
                    assert oai_article_url in e.message
                    assert oai_journal_url in e.message
                    raise
Exemple #5
0
    def run(self):
        """
        Execute the task as specified by the background_job
        :return:
        """
        job = self.background_job
        params = job.params

        clean = self.get_param(params, 'clean')
        prune = self.get_param(params, 'prune')
        types = self.get_param(params, 'types')

        tmpStore = StoreFactory.tmp()
        mainStore = StoreFactory.get("public_data_dump")
        container = app.config.get("STORE_PUBLIC_DATA_DUMP_CONTAINER")

        if clean:
            mainStore.delete_container(container)

        # create dir with today's date
        day_at_start = dates.today()

        # Do the search and save it
        page_size = app.config.get("DISCOVERY_BULK_PAGE_SIZE", 1000)
        records_per_file = app.config.get('DISCOVERY_RECORDS_PER_FILE', 100000)

        if types == 'all':
            types = ['article', 'journal']
        else:
            types = [types]

        urls = {"article" : None, "journal" : None}
        sizes = {"article" : None, "journal" : None}

        # Scroll for article and/or journal
        for typ in types:
            job.add_audit_message(dates.now() + u": Starting export of " + typ)

            out_dir = tmpStore.path(container, "doaj_" + typ + "_data_" + day_at_start, create_container=True, must_exist=False)
            out_name = os.path.basename(out_dir)
            zipped_name = out_name + ".tar.gz"
            zip_dir = os.path.dirname(out_dir)
            zipped_path = os.path.join(zip_dir, zipped_name)
            tarball = tarfile.open(zipped_path, "w:gz")

            file_num = 1
            out_file, path, filename = self._start_new_file(tmpStore, container, typ, day_at_start, file_num)

            first_in_file = True
            count = 0
            for result in DiscoveryApi.scroll(typ, None, None, page_size, scan=True):
                if not first_in_file:
                    out_file.write(",\n")
                else:
                    first_in_file = False
                out_file.write(json.dumps(result))
                count += 1

                if count >= records_per_file:
                    file_num += 1
                    self._finish_file(tmpStore, container, filename, path, out_file, tarball)
                    out_file, path, filename = self._start_new_file(tmpStore, container, typ, day_at_start, file_num)
                    first_in_file = True
                    count = 0

            if count > 0:
                self._finish_file(tmpStore, container, filename, path, out_file, tarball)

            tarball.close()

            # Copy the source directory to main store
            try:
                filesize = self._copy_on_complete(mainStore, tmpStore, container, zipped_path)
            except Exception as e:
                tmpStore.delete_container(container)
                raise BackgroundException("Error copying {0} data on complete {1}\n".format(typ, e.message))

            store_url = mainStore.url(container, zipped_name)
            urls[typ] = store_url
            sizes[typ] = filesize

        if prune:
            self._prune_container(mainStore, container, day_at_start, types)

        self.background_job.add_audit_message(u"Removing temp store container {x}".format(x=container))
        tmpStore.delete_container(container)

        # finally update the cache
        cache.Cache.cache_public_data_dump(urls["article"], sizes["article"], urls["journal"], sizes["journal"])

        job.add_audit_message(dates.now() + u": done")
Exemple #6
0
    def run(self):
        """
        Execute the task as specified by the background_job
        :return:
        """
        job = self.background_job
        params = job.params

        clean = self.get_param(params, 'clean')
        prune = self.get_param(params, 'prune')
        types = self.get_param(params, 'types')

        tmpStore = StoreFactory.tmp()
        mainStore = StoreFactory.get("public_data_dump")
        container = app.config.get("STORE_PUBLIC_DATA_DUMP_CONTAINER")

        if clean:
            mainStore.delete_container(container)
            job.add_audit_message("Deleted existing data dump files")
            job.save()

        # create dir with today's date
        day_at_start = dates.today()

        # Do the search and save it
        page_size = app.config.get("DISCOVERY_BULK_PAGE_SIZE", 1000)
        records_per_file = app.config.get('DISCOVERY_RECORDS_PER_FILE', 100000)

        if types == 'all':
            types = ['article', 'journal']
        else:
            types = [types]

        urls = {"article": None, "journal": None}
        sizes = {"article": None, "journal": None}

        # Scroll for article and/or journal
        for typ in types:
            job.add_audit_message(dates.now() + ": Starting export of " + typ)
            job.save()

            out_dir = tmpStore.path(container,
                                    "doaj_" + typ + "_data_" + day_at_start,
                                    create_container=True,
                                    must_exist=False)
            out_name = os.path.basename(out_dir)
            zipped_name = out_name + ".tar.gz"
            zip_dir = os.path.dirname(out_dir)
            zipped_path = os.path.join(zip_dir, zipped_name)
            tarball = tarfile.open(zipped_path, "w:gz")

            file_num = 1
            out_file, path, filename = self._start_new_file(
                tmpStore, container, typ, day_at_start, file_num)

            first_in_file = True
            count = 0
            for result in DiscoveryApi.scroll(typ,
                                              None,
                                              None,
                                              page_size,
                                              scan=True):
                if not first_in_file:
                    out_file.write(",\n")
                else:
                    first_in_file = False
                out_file.write(json.dumps(result))
                count += 1

                if count >= records_per_file:
                    file_num += 1
                    self._finish_file(tmpStore, container, filename, path,
                                      out_file, tarball)
                    job.save()
                    out_file, path, filename = self._start_new_file(
                        tmpStore, container, typ, day_at_start, file_num)
                    first_in_file = True
                    count = 0

            if count > 0:
                self._finish_file(tmpStore, container, filename, path,
                                  out_file, tarball)
                job.save()

            tarball.close()

            # Copy the source directory to main store
            try:
                filesize = self._copy_on_complete(mainStore, tmpStore,
                                                  container, zipped_path)
                job.save()
            except Exception as e:
                tmpStore.delete_container(container)
                raise BackgroundException(
                    "Error copying {0} data on complete {1}\n".format(
                        typ, str(e)))

            store_url = mainStore.url(container, zipped_name)
            urls[typ] = store_url
            sizes[typ] = filesize

        if prune:
            self._prune_container(mainStore, container, day_at_start, types)
            job.save()

        self.background_job.add_audit_message(
            "Removing temp store container {x}".format(x=container))
        tmpStore.delete_container(container)

        # finally update the cache
        cache.Cache.cache_public_data_dump(urls["article"], sizes["article"],
                                           urls["journal"], sizes["journal"])

        job.add_audit_message(dates.now() + ": done")
Exemple #7
0
    def test_04_paging_for_link_headers(self):
        # calc_pagination takes total, page_size, requested_page
        # and returns page_count, previous_page, next_page, last_page
        # request 1 of 1 pages
        assert DiscoveryApi._calc_pagination(0, 10, 1) == (1, None, None, 1)  # 0 results still means page 1
        assert DiscoveryApi._calc_pagination(1, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(2, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(3, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(9, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(10, 10, 1) == (1, None, None, 1)

        # request 1st of 2 pages
        assert DiscoveryApi._calc_pagination(11, 10, 1) == (2, None, 2, 2)
        assert DiscoveryApi._calc_pagination(12, 10, 1) == (2, None, 2, 2)
        assert DiscoveryApi._calc_pagination(19, 10, 1) == (2, None, 2, 2)
        assert DiscoveryApi._calc_pagination(20, 10, 1) == (2, None, 2, 2)

        # request 2nd of 2 pages
        assert DiscoveryApi._calc_pagination(11, 10, 2) == (2, 1, None, 2)
        assert DiscoveryApi._calc_pagination(12, 10, 2) == (2, 1, None, 2)
        assert DiscoveryApi._calc_pagination(19, 10, 2) == (2, 1, None, 2)
        assert DiscoveryApi._calc_pagination(20, 10, 2) == (2, 1, None, 2)

        # various requests for 10s of 1000s of results
        assert DiscoveryApi._calc_pagination(9900 , 100, 1) == (99, None, 2, 99)
        assert DiscoveryApi._calc_pagination(9900 , 100, 99) == (99, 98, None, 99)

        assert DiscoveryApi._calc_pagination(9901 , 100, 1) == (100, None, 2, 100)
        assert DiscoveryApi._calc_pagination(9902 , 100, 1) == (100, None, 2, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 1) == (100, None, 2, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 2) == (100, 1, 3, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 98) == (100, 97, 99, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 99) == (100, 98, 100, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 100) == (100, 99, None, 100)
Exemple #8
0
    def test_03_applications(self):
        # create an account that will own the suggestions
        acc = models.Account()
        acc.set_id("owner")
        acc.save()

        # populate the index with some suggestions owned by this owner
        for i in range(5):
            a = models.Suggestion()
            a.set_owner("owner")
            bj = a.bibjson()
            bj.title = "Test Suggestion {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_url("http://homepage.com/{x}".format(x=i), "homepage")
            a.save()

            # make sure the last updated dates are suitably different
            time.sleep(1)

        # populte the index with some which are not owned by this owner
        for i in range(5):
            a = models.Suggestion()
            a.set_owner("stranger")
            bj = a.bibjson()
            bj.title = "Test Suggestion {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            a.save()

            # make sure the last updated dates are suitably different
            time.sleep(1)

        time.sleep(1)

        # now run some queries
        with self._make_and_push_test_context(acc=acc):
            # 1. a general query that should hit everything
            res = DiscoveryApi.search("application", acc, "Test", 1, 2)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 2
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 2
            assert res.data.get("query") == "Test"

            # 2. a specific field query that should hit just one
            res = DiscoveryApi.search("application", acc, "title:\"Test Suggestion 2\"", 1, 5)
            assert res.data.get("total") == 1
            assert len(res.data.get("results")) == 1
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5
            assert res.data.get("query") == "title:\"Test Suggestion 2\""

            # 3.paging out of range of results
            res = DiscoveryApi.search("application", acc, "Test", 2, 10)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 0
            assert res.data.get("page") == 2
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 4. paging outside the allowed bounds (lower)
            res = DiscoveryApi.search("application", acc, "Test", 0, 0)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 5. page size above upper limit
            res = DiscoveryApi.search("application", acc, "Test", 1, 100000)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 100
            assert res.data.get("query") == "Test"

            # 6. Failed attempt at wildcard search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "Te*t", 1, 10)

            # 7. Failed attempt at fuzzy search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "title:Test~0.8", 1, 10)

            # 8. sort on a specific field, expect a default to "asc"
            res = DiscoveryApi.search("application", acc, "Test", 1, 10, "created_date")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") < res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date"

            # 9. sort on a specific field in a specified direction
            res = DiscoveryApi.search("application", acc, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") > res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date:desc"

            # 10. Malformed sort direction
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "Test", 1, 10, "created_date:whatever")

            # 11. non-existant sort field
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "Test", 1, 10, "some.missing.field:asc")

            # 12. with a forward slash, with and without escaping (note that we have to escape the : as it has meaning for lucene)
            res = DiscoveryApi.search("application", acc, '"http\://homepage.com/1"', 1, 10)
            assert res.data.get("total") == 1

            res = DiscoveryApi.search("application", acc, '"http\:\/\/homepage.com\/1"', 1, 10)
            assert res.data.get("total") == 1

        # 13. A search with an account that isn't either of the ones in the dataset
        other = models.Account()
        other.set_id("other")
        with self._make_and_push_test_context(acc=other):
            res = DiscoveryApi.search("application", other, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 0
Exemple #9
0
    def test_01_journals(self):
        # populate the index with some journals
        saved_journals = []
        for i in range(5):
            j = models.Journal()
            j.set_in_doaj(True)
            bj = j.bibjson()
            bj.title = "Test Journal {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_url("http://homepage.com/{x}".format(x=i), "homepage")
            j.save()
            saved_journals.append((j.id, j.last_updated))

            # make sure the last updated dates are suitably different
            time.sleep(1)

        # add one that's not in DOAJ, which shouldn't turn up in our results
        j = models.Journal()
        j.set_in_doaj(False)
        bj = j.bibjson()
        bj.title = "Test Journal {x}".format(x=6)
        bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=6))
        bj.publisher = "Test Publisher {x}".format(x=6)
        bj.add_url("http://homepage.com/{x}".format(x=6), "homepage")
        j.save()
        saved_journals.append((j.id, j.last_updated))

        models.Journal.blockall(saved_journals)

        # now run some queries
        with self.app_test.test_request_context():
            # 1. a general query that should hit everything (except number 6)
            res = DiscoveryApi.search("journal", None, "Test", 1, 2)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 2
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 2
            assert res.data.get("query") == "Test"

            # 2. a specific field query that should hit just one
            res = DiscoveryApi.search("journal", None, "title:\"Test Journal 2\"", 1, 5)
            assert res.data.get("total") == 1
            assert len(res.data.get("results")) == 1
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5
            assert res.data.get("query") == "title:\"Test Journal 2\""

            # 3.paging out of range of results
            res = DiscoveryApi.search("journal", None, "Test", 2, 10)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 0
            assert res.data.get("page") == 2
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 4. paging outside the allowed bounds (lower)
            res = DiscoveryApi.search("journal", None, "Test", 0, 0)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 5. page size above upper limit
            res = DiscoveryApi.search("journal", None, "Test", 1, 100000)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 100
            assert res.data.get("query") == "Test"

            # 6. Failed attempt at wildcard search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "Te*t", 1, 10)

            # 7. Failed attempt at fuzzy search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "title:Test~0.8", 1, 10)

            # 8. sort on a specific field, expect a default to "asc"
            res = DiscoveryApi.search("journal", None, "Test", 1, 10, "created_date")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") < res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date"

            # 9. sort on a specific field in a specified direction
            res = DiscoveryApi.search("journal", None, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") > res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date:desc"

            # 10. Malformed sort direction
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "Test", 1, 10, "created_date:whatever")

            # 11. non-existant sort field
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "Test", 1, 10, "some.missing.field:asc")

            # 12. with a forward slash, with and without escaping (note that we have to escape the : as it has meaning for lucene)
            res = DiscoveryApi.search("journal", None, '"http\://homepage.com/1"', 1, 10)
            assert res.data.get("total") == 1

            res = DiscoveryApi.search("journal", None, '"http\:\/\/homepage.com\/1"', 1, 10)
            assert res.data.get("total") == 1
Exemple #10
0

# Handle wayward paths by raising an API404Error
@blueprint.route("/<path:invalid_path>", methods=["POST", "GET", "PUT", "DELETE", "PATCH", "HEAD"])     # leaving out methods should mean all, but tests haven't shown that behaviour.
def missing_resource(invalid_path):
    docs_url = app.config.get("BASE_URL", "") + url_for('.docs')
    spec_url = app.config.get("BASE_URL", "") + url_for('.api_spec')
    raise Api404Error("No endpoint at {0}. See {1} for valid paths or read the documentation at {2}.".format(invalid_path, spec_url, docs_url))


@blueprint.route('/docs')
def docs():
    return render_template('api/v1/api_docs.html')


@swag(swag_summary='Search your applications <span class="red">[Authenticated, not public]</span>', swag_spec=DiscoveryApi.get_application_swag())  # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes.
@blueprint.route("/search/applications/<path:search_query>")
@api_key_required
def search_applications(search_query):
    # get the values for the 2 other bits of search info: the page number and the page size
    page = request.values.get("page", 1)
    psize = request.values.get("pageSize", 10)
    sort = request.values.get("sort")

    # check the page is an integer
    try:
        page = int(page)
    except:
        raise Api400Error("Page number was not an integer")

    # check the page size is an integer
Exemple #11
0
    def test_04_paging_for_link_headers(self):
        # calc_pagination takes total, page_size, requested_page
        # and returns page_count, previous_page, next_page, last_page
        # request 1 of 1 pages
        assert DiscoveryApi._calc_pagination(0, 10, 1) == (1, None, None, 1)  # 0 results still means page 1
        assert DiscoveryApi._calc_pagination(1, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(2, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(3, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(9, 10, 1) == (1, None, None, 1)
        assert DiscoveryApi._calc_pagination(10, 10, 1) == (1, None, None, 1)

        # request 1st of 2 pages
        assert DiscoveryApi._calc_pagination(11, 10, 1) == (2, None, 2, 2)
        assert DiscoveryApi._calc_pagination(12, 10, 1) == (2, None, 2, 2)
        assert DiscoveryApi._calc_pagination(19, 10, 1) == (2, None, 2, 2)
        assert DiscoveryApi._calc_pagination(20, 10, 1) == (2, None, 2, 2)

        # request 2nd of 2 pages
        assert DiscoveryApi._calc_pagination(11, 10, 2) == (2, 1, None, 2)
        assert DiscoveryApi._calc_pagination(12, 10, 2) == (2, 1, None, 2)
        assert DiscoveryApi._calc_pagination(19, 10, 2) == (2, 1, None, 2)
        assert DiscoveryApi._calc_pagination(20, 10, 2) == (2, 1, None, 2)

        # various requests for 10s of 1000s of results
        assert DiscoveryApi._calc_pagination(9900 , 100, 1) == (99, None, 2, 99)
        assert DiscoveryApi._calc_pagination(9900 , 100, 99) == (99, 98, None, 99)

        assert DiscoveryApi._calc_pagination(9901 , 100, 1) == (100, None, 2, 100)
        assert DiscoveryApi._calc_pagination(9902 , 100, 1) == (100, None, 2, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 1) == (100, None, 2, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 2) == (100, 1, 3, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 98) == (100, 97, 99, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 99) == (100, 98, 100, 100)
        assert DiscoveryApi._calc_pagination(10000, 100, 100) == (100, 99, None, 100)
Exemple #12
0
    def test_03_applications(self):
        # create an account that will own the suggestions
        acc = models.Account()
        acc.set_id("owner")
        acc.save()

        # populate the index with some suggestions owned by this owner
        for i in range(5):
            a = models.Suggestion()
            a.set_owner("owner")
            bj = a.bibjson()
            bj.title = "Test Suggestion {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_url("http://homepage.com/{x}".format(x=i), "homepage")
            a.save()

            # make sure the last updated dates are suitably different
            time.sleep(1)

        # populte the index with some which are not owned by this owner
        for i in range(5):
            a = models.Suggestion()
            a.set_owner("stranger")
            bj = a.bibjson()
            bj.title = "Test Suggestion {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            a.save()

            # make sure the last updated dates are suitably different
            time.sleep(1)

        time.sleep(1)

        # now run some queries
        with self._make_and_push_test_context(acc=acc):
            # 1. a general query that should hit everything
            res = DiscoveryApi.search("application", acc, "Test", 1, 2)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 2
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 2
            assert res.data.get("query") == "Test"

            # 2. a specific field query that should hit just one
            res = DiscoveryApi.search("application", acc, "title:\"Test Suggestion 2\"", 1, 5)
            assert res.data.get("total") == 1
            assert len(res.data.get("results")) == 1
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5
            assert res.data.get("query") == "title:\"Test Suggestion 2\""

            # 3.paging out of range of results
            res = DiscoveryApi.search("application", acc, "Test", 2, 10)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 0
            assert res.data.get("page") == 2
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 4. paging outside the allowed bounds (lower)
            res = DiscoveryApi.search("application", acc, "Test", 0, 0)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 5. page size above upper limit
            res = DiscoveryApi.search("application", acc, "Test", 1, 100000)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 100
            assert res.data.get("query") == "Test"

            # 6. Failed attempt at wildcard search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "Te*t", 1, 10)

            # 7. Failed attempt at fuzzy search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "title:Test~0.8", 1, 10)

            # 8. sort on a specific field, expect a default to "asc"
            res = DiscoveryApi.search("application", acc, "Test", 1, 10, "created_date")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") < res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date"

            # 9. sort on a specific field in a specified direction
            res = DiscoveryApi.search("application", acc, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") > res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date:desc"

            # 10. Malformed sort direction
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "Test", 1, 10, "created_date:whatever")

            # 11. non-existant sort field
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("application", acc, "Test", 1, 10, "some.missing.field:asc")

            # 12. with a forward slash, with and without escaping (note that we have to escape the : as it has meaning for lucene)
            res = DiscoveryApi.search("application", acc, '"http\://homepage.com/1"', 1, 10)
            assert res.data.get("total") == 1

            res = DiscoveryApi.search("application", acc, '"http\:\/\/homepage.com\/1"', 1, 10)
            assert res.data.get("total") == 1

        # 13. A search with an account that isn't either of the ones in the dataset
        other = models.Account()
        other.set_id("other")
        with self._make_and_push_test_context(acc=other):
            res = DiscoveryApi.search("application", other, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 0
Exemple #13
0
    def test_01_journals(self):
        # populate the index with some journals
        saved_journals = []
        for i in range(5):
            j = models.Journal()
            j.set_in_doaj(True)
            bj = j.bibjson()
            bj.title = "Test Journal {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_url("http://homepage.com/{x}".format(x=i), "homepage")
            j.save()
            saved_journals.append((j.id, j.last_updated))

            # make sure the last updated dates are suitably different
            time.sleep(1)

        # add one that's not in DOAJ, which shouldn't turn up in our results
        j = models.Journal()
        j.set_in_doaj(False)
        bj = j.bibjson()
        bj.title = "Test Journal {x}".format(x=6)
        bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=6))
        bj.publisher = "Test Publisher {x}".format(x=6)
        bj.add_url("http://homepage.com/{x}".format(x=6), "homepage")
        j.save()
        saved_journals.append((j.id, j.last_updated))

        models.Journal.blockall(saved_journals)

        # now run some queries
        with self.app_test.test_request_context():
            # 1. a general query that should hit everything (except number 6)
            res = DiscoveryApi.search("journal", None, "Test", 1, 2)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 2
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 2
            assert res.data.get("query") == "Test"

            # 2. a specific field query that should hit just one
            res = DiscoveryApi.search("journal", None, "title:\"Test Journal 2\"", 1, 5)
            assert res.data.get("total") == 1
            assert len(res.data.get("results")) == 1
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5
            assert res.data.get("query") == "title:\"Test Journal 2\""

            # 3.paging out of range of results
            res = DiscoveryApi.search("journal", None, "Test", 2, 10)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 0
            assert res.data.get("page") == 2
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 4. paging outside the allowed bounds (lower)
            res = DiscoveryApi.search("journal", None, "Test", 0, 0)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 5. page size above upper limit
            res = DiscoveryApi.search("journal", None, "Test", 1, 100000)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 100
            assert res.data.get("query") == "Test"

            # 6. Failed attempt at wildcard search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "Te*t", 1, 10)

            # 7. Failed attempt at fuzzy search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "title:Test~0.8", 1, 10)

            # 8. sort on a specific field, expect a default to "asc"
            res = DiscoveryApi.search("journal", None, "Test", 1, 10, "created_date")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") < res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date"

            # 9. sort on a specific field in a specified direction
            res = DiscoveryApi.search("journal", None, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") > res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date:desc"

            # 10. Malformed sort direction
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "Test", 1, 10, "created_date:whatever")

            # 11. non-existant sort field
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("journal", None, "Test", 1, 10, "some.missing.field:asc")

            # 12. with a forward slash, with and without escaping (note that we have to escape the : as it has meaning for lucene)
            res = DiscoveryApi.search("journal", None, '"http\://homepage.com/1"', 1, 10)
            assert res.data.get("total") == 1

            res = DiscoveryApi.search("journal", None, '"http\:\/\/homepage.com\/1"', 1, 10)
            assert res.data.get("total") == 1
Exemple #14
0
    def test_02_articles(self):
        # populate the index with some articles
        for i in range(5):
            a = models.Article()
            a.set_in_doaj(True)
            bj = a.bibjson()
            bj.title = "Test Article {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.add_identifier(bj.DOI, "10.test/{x}".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_author("Agnieszka Domanska", "CL University", "https://orcid.org/0000-0001-1234-1234")
            a.save()

            # make sure the last updated dates are suitably different
            time.sleep(1)

        time.sleep(1)

        # now run some queries

        with self.app_test.test_request_context():
            # 1. a general query that should hit everything
            res = DiscoveryApi.search("article", None, "Test", 1, 2)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 2
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 2
            assert res.data.get("query") == "Test"

            # 2. a specific field query that should hit just one
            res = DiscoveryApi.search("article", None, "title:\"Test Article 2\"", 1, 5)
            assert res.data.get("total") == 1
            assert len(res.data.get("results")) == 1
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5
            assert res.data.get("query") == "title:\"Test Article 2\""

            # 3.paging out of range of results
            res = DiscoveryApi.search("article", None, "Test", 2, 10)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 0
            assert res.data.get("page") == 2
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 4. paging outside the allowed bounds (lower)
            res = DiscoveryApi.search("article", None, "Test", 0, 0)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 5. page size above upper limit
            res = DiscoveryApi.search("article", None, "Test", 1, 100000)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 100
            assert res.data.get("query") == "Test"

            # 6. Failed attempt at wildcard search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "Te*t", 1, 10)

            # 7. Failed attempt at fuzzy search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "title:Test~0.8", 1, 10)

            # 8. sort on a specific field, expect a default to "asc"
            res = DiscoveryApi.search("article", None, "Test", 1, 10, "created_date")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") < res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date"

            # 9. sort on a specific field in a specified direction
            res = DiscoveryApi.search("article", None, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") > res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date:desc"

            # 10. Malformed sort direction
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "Test", 1, 10, "created_date:whatever")

            # 11. non-existant sort field
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "Test", 1, 10, "some.missing.field:asc")

            # 12. with a forward slash, with and without escaping
            res = DiscoveryApi.search("article", None, '"10.test/1"', 1, 10)
            assert res.data.get("total") == 1

            res = DiscoveryApi.search("article", None, '"10.test\/1"', 1, 10)
            assert res.data.get("total") == 1
Exemple #15
0

# Handle wayward paths by raising an API404Error
@blueprint.route("/<path:invalid_path>", methods=["POST", "GET", "PUT", "DELETE", "PATCH", "HEAD"])     # leaving out methods should mean all, but tests haven't shown that behaviour.
def missing_resource(invalid_path):
    docs_url = app.config.get("BASE_URL", "") + url_for('.docs')
    spec_url = app.config.get("BASE_URL", "") + url_for('.api_spec')
    raise Api404Error("No endpoint at {0}. See {1} for valid paths or read the documentation at {2}.".format(invalid_path, spec_url, docs_url))


@blueprint.route('/docs')
def docs():
    return render_template('api/v1/api_docs.html')


@swag(swag_summary='Search your applications <span class="red">[Authenticated, not public]</span>', swag_spec=DiscoveryApi.get_application_swag())  # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes.
@blueprint.route("/search/applications/<path:search_query>")
@api_key_required
@analytics.sends_ga_event(GA_CATEGORY, GA_ACTIONS.get('search_applications', 'Search applications'), record_value_of_which_arg='search_query')
def search_applications(search_query):
    # get the values for the 2 other bits of search info: the page number and the page size
    page = request.values.get("page", 1)
    psize = request.values.get("pageSize", 10)
    sort = request.values.get("sort")

    # check the page is an integer
    try:
        page = int(page)
    except:
        raise Api400Error("Page number was not an integer")
Exemple #16
0
    def test_02_articles(self):
        # populate the index with some articles
        for i in range(5):
            a = models.Article()
            a.set_in_doaj(True)
            bj = a.bibjson()
            bj.title = "Test Article {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.add_identifier(bj.DOI, "10.test/{x}".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            a.save()

            # make sure the last updated dates are suitably different
            time.sleep(1)

        time.sleep(1)

        # now run some queries

        with self.app_test.test_request_context():
            # 1. a general query that should hit everything
            res = DiscoveryApi.search("article", None, "Test", 1, 2)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 2
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 2
            assert res.data.get("query") == "Test"

            # 2. a specific field query that should hit just one
            res = DiscoveryApi.search("article", None, "title:\"Test Article 2\"", 1, 5)
            assert res.data.get("total") == 1
            assert len(res.data.get("results")) == 1
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 5
            assert res.data.get("query") == "title:\"Test Article 2\""

            # 3.paging out of range of results
            res = DiscoveryApi.search("article", None, "Test", 2, 10)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 0
            assert res.data.get("page") == 2
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 4. paging outside the allowed bounds (lower)
            res = DiscoveryApi.search("article", None, "Test", 0, 0)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"

            # 5. page size above upper limit
            res = DiscoveryApi.search("article", None, "Test", 1, 100000)
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 100
            assert res.data.get("query") == "Test"

            # 6. Failed attempt at wildcard search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "Te*t", 1, 10)

            # 7. Failed attempt at fuzzy search
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "title:Test~0.8", 1, 10)

            # 8. sort on a specific field, expect a default to "asc"
            res = DiscoveryApi.search("article", None, "Test", 1, 10, "created_date")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") < res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date"

            # 9. sort on a specific field in a specified direction
            res = DiscoveryApi.search("article", None, "Test", 1, 10, "created_date:desc")
            assert res.data.get("total") == 5
            assert len(res.data.get("results")) == 5
            assert res.data.get("page") == 1
            assert res.data.get("pageSize") == 10
            assert res.data.get("query") == "Test"
            assert res.data.get("results")[0].get("created_date") > res.data.get("results")[1].get("created_date")
            assert res.data.get("sort") == "created_date:desc"

            # 10. Malformed sort direction
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "Test", 1, 10, "created_date:whatever")

            # 11. non-existant sort field
            with self.assertRaises(DiscoveryException):
                res = DiscoveryApi.search("article", None, "Test", 1, 10, "some.missing.field:asc")

            # 12. with a forward slash, with and without escaping
            res = DiscoveryApi.search("article", None, '"10.test/1"', 1, 10)
            assert res.data.get("total") == 1

            res = DiscoveryApi.search("article", None, '"10.test\/1"', 1, 10)
            assert res.data.get("total") == 1
Exemple #17
0
    docs_url = app.config.get("BASE_URL", "") + url_for('.docs')
    spec_url = app.config.get("BASE_URL", "") + url_for('.api_spec')
    raise Api404Error(
        "No endpoint at {0}. See {1} for valid paths or read the documentation at {2}."
        .format(invalid_path, spec_url, docs_url))


@blueprint.route('/docs')
def docs():
    return render_template('api/v1/api_docs.html')


@swag(
    swag_summary=
    'Search your applications <span class="red">[Authenticated, not public]</span>',
    swag_spec=DiscoveryApi.get_application_swag()
)  # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes.
@blueprint.route("/search/applications/<path:search_query>")
@api_key_required
def search_applications(search_query):
    # get the values for the 2 other bits of search info: the page number and the page size
    page = request.values.get("page", 1)
    psize = request.values.get("pageSize", 10)
    sort = request.values.get("sort")

    # check the page is an integer
    try:
        page = int(page)
    except:
        raise Api400Error("Page number was not an integer")