예제 #1
0
    def test_pubmed_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.pubmed

        title = "Climate-change-driven accelerated sea-level rise detected in the altimeter era."
        expected = "29440401"
        doi = "10.1073/pnas.1717312115"
        journal = "Proceedings of the National Academy of Sciences of the United States of America"

        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue(response.pmid() == expected)
            self.assertTrue(response.doi() == doi)
            self.assertTrue(response.title() == title)
            self.assertTrue(response.journal() == journal)
            self.assertTrue(response.issn() is None)

        title = "NOT_TO_BE_FOUND"
        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue(response.meta is None)
            self.assertTrue(response.pmid() is None)
            self.assertTrue(response.doi() is None)
            self.assertTrue(response.title() is None)
            self.assertTrue(response.journal() is None)
            self.assertTrue(response.issn() is None)
예제 #2
0
    def test_core_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.core

        doi = "10.1371/journal.pone.0013969"
        title = "Caribbean corals in crisis: record thermal stress, bleaching, and mortality in 2005".lower(
        )
        url = "https://core.ac.uk/download/pdf/51094169.pdf"
        author = "Eakin, C. Mark"
        journal = "NSUWorks"

        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue(response.doi() == doi)
            self.assertTrue(response.title().lower() == title)
            self.assertTrue(response.url() == url)
            self.assertTrue(author in response.authors())
            self.assertTrue(response.journal() == journal)

        # error case
        title = "ajso58tt849qp3g84h38pghq3974ut8gq9j9ht789"  # Should be no matches

        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue(response.meta == None)
            self.assertTrue(response.message == "Not found")
예제 #3
0
    def test_semantic_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.semantic

        doi = "10.1016/j.appet.2017.07.006"
        expected = "https://www.semanticscholar.org/paper/690195fe2ab0fa093204a050ceb2f9fd1d1b2907"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            source.report_perf(response.timing)
            self.assertTrue(response.url() == expected)
            self.assertTrue(response.doi() == doi)

        # error case
        doi = "10.00000/xxx"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            self.assertTrue(response.meta == None)

        # another error case
        doi = "10.1641/0006-3568(2005)055[0879:EITLSA]2.0.CO;2"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            self.assertTrue(response.meta == None)
예제 #4
0
    def test_unpaywall_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.unpaywall

        doi = "10.1016/j.appet.2017.07.006"
        title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases"
        url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5574185"
        authors_num = 4
        journal = "Appetite"

        expected = "https://doi.org/10.1016/j.appet.2017.07.006"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            source.report_perf(response.timing)
            self.assertTrue(response.meta["doi_url"] == expected)
            self.assertTrue(response.doi() == doi)
            self.assertTrue(response.title() == title)
            self.assertTrue(response.url() == url)
            self.assertTrue(response.journal() == journal)
            self.assertTrue(len(response.authors()) == authors_num)

        # error case
        doi = "10.00000/xxx"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            self.assertTrue(response.meta == None)
예제 #5
0
    def test_dissemin_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.dissemin

        doi = "10.1016/j.appet.2017.07.006"
        title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases"
        journal = "Appetite"
        authors_num = 4
        expected = "2017-10-01"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            source.report_perf(response.timing)
            self.assertTrue(response.meta["paper"]["date"] == expected)
            self.assertTrue(response.doi() == doi)
            self.assertTrue(response.title() == title)
            self.assertTrue(response.journal() == journal)
            self.assertTrue(len(response.authors()) == authors_num)
            self.assertTrue(response.url() is None)

        # error case
        doi = "10.00000/xxx"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            self.assertTrue(response.meta == None)
            self.assertTrue(response.message == None)

        # another error case
        doi = "10.1023/A:1018882711314"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            self.assertTrue(response.meta == None)
            self.assertTrue(response.message == None)
예제 #6
0
    def test_openaire_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.openaire

        doi = "10.1016/j.appet.2017.07.006"
        title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases"
        url = "https://europepmc.org/articles/PMC5574185/"
        authors = [
            "Taillie, Lindsey Smith", "Ng, Shu Wen", "Xue, Ya",
            "Harding, Matthew"
        ]

        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue(response.doi() == doi)
            self.assertTrue(response.title() == title)
            self.assertTrue(response.url() == url)
            self.assertTrue(response.authors() == authors)
            self.assertTrue(response.meta["open"])

        title = "Quantitative easing, portfolio rebalancing and credit growth: Micro evidence from Germany"
        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue("doi" not in response.meta)
            self.assertTrue(response.title() == title)
예제 #7
0
def main(args):
    # initialize the federated API access
    schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg", logger=None)
    graph = rc_graph.RCGraph("abstract")

    # for each publication: enrich metadata, gather the abstracts
    for partition, pub_iter in graph.iter_publications(graph.BUCKET_STAGE,
                                                       filter=args.partition):
        pub_list = []

        for pub in tqdm(pub_iter, ascii=True, desc=partition[:30]):
            pub_list.append(pub)
            abstract_match = lookup_abstract(schol, graph, partition, pub)

            if abstract_match:
                graph.publications.ab_hits += 1
            else:
                graph.update_misses(partition, pub)

        graph.write_partition(graph.BUCKET_STAGE, partition, pub_list)

    # report errors
    status = "{} successful abstract lookups".format(
        graph.publications.ab_hits)
    graph.report_misses(status,
                        "publications that failed every abstract lookup")
예제 #8
0
    def test_datacite_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.datacite

        title = "Empirical analysis of potential improvements for high voltage protective algorithms"
        expected = "10.5281/zenodo.3635395"
        journal = "Zenodo"
        author = "López, David"
        url = "https://zenodo.org/record/3635395"

        if source.has_credentials():
            response = source.title_search(title)
            self.assertTrue(response.meta and response.meta["id"] == expected)
            self.assertTrue(response.doi() == expected)
            self.assertTrue(response.title() == title)
            self.assertTrue(author in response.authors())
            self.assertTrue(response.url() == url)
            self.assertTrue(response.journal() == journal)

        # error case
        title = "ajso58tt849qp3g84h38pghq3974ut8gq9j9ht789"  # Should be no matches

        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue(response.meta == None)
예제 #9
0
    def test_datacite_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.datacite

        doi = "10.22002/d1.246"
        title = "In Situ Carbon Dioxide and Methane Mole Fractions from the Los Angeles Megacity Carbon Project"
        url = "https://data.caltech.edu/records/246"
        journal = "CaltechDATA"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            source.report_perf(response.timing)
            self.assertTrue(response.doi() == doi)
            self.assertTrue(response.title() == title)
            self.assertTrue(response.authors() == ["Verhulst, Kristal"])
            self.assertTrue(response.url() == url)
            self.assertTrue(response.journal() == journal)

        # error case
        doi = "10.00000/xxx"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            self.assertTrue(response.serialize() == None)
            self.assertTrue("404" in response.message)
            self.assertTrue(response.doi() is None)
            self.assertTrue(response.title() is None)
            self.assertTrue(response.authors() is None)
            self.assertTrue(response.url() is None)
            self.assertTrue(response.journal() is None)
예제 #10
0
    def test_nsf_par_fulltext_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.nsfPar

        ## please note, these numbers may change as new publications are added
        search_term = "NASA NOAA coral"
        responses = source.full_text_search(search_term,
                                            limit=13,
                                            exact_match=True)
        source.report_perf(responses[0].timing)
        self.assertTrue(len(responses) == 13)

        responses = source.full_text_search(search_term,
                                            limit=-1,
                                            exact_match=True)
        source.report_perf(responses[0].timing)
        self.assertTrue(len(responses) == 15)

        responses = source.full_text_search(search_term,
                                            limit=1000,
                                            exact_match=True)
        source.report_perf(responses[0].timing)
        self.assertTrue(len(responses) == 15)

        #Won't find any
        search_term = "dlkadngeonr3q0984gqn839g"
        responses = source.full_text_search(search_term, limit=13)
        source.report_perf(responses[0].timing)
        self.assertTrue(len(responses) == 1)
        self.assertTrue(responses[0].meta is None)
예제 #11
0
    def test_crossref_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.crossref

        doi = "10.1503/cmaj.170880"
        expected = "Relation between household food insecurity and breastfeeding in Canada"
        num_authors = 4
        url = "http://dx.doi.org/10.1503/cmaj.170880"
        journal = "Canadian Medical Association Journal"

        if source.has_credentials():
            response = source.publication_lookup(doi)
            source.report_perf(response.timing)
            self.assertTrue(response.title() == expected)
            self.assertTrue(response.doi() == doi)
            self.assertTrue(len(response.authors()) == num_authors)
            self.assertTrue(response.url() == url)
            self.assertTrue(response.journal() == journal)

        doi_error = "10.XXXX.XXXX"
        if source.has_credentials():
            response = source.publication_lookup(doi_error)
            source.report_perf(response.timing)
            self.assertTrue(response.message is not None)
            self.assertTrue(response.meta is None)
예제 #12
0
    def test_dimensions_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases."
        meta = schol.dimensions.title_search(title)

        print("\ntime: {:.3f} ms - {}".format(schol.dimensions.elapsed_time,
                                              schol.dimensions.name))
        self.assertTrue(meta["doi"] == "10.1016/j.appet.2017.07.006")
예제 #13
0
    def test_datacite__format_exact_quote(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.datacite

        search_term = "NOAA NASA"
        expected = '"NOAA+NASA"'

        exact_quote = source._format_exact_quote(search_term)
        self.assertTrue(exact_quote == expected)
예제 #14
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_pubmed_journal_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        issn = "1932-6203"

        meta, message = schol.pubmed.journal_lookup(issn)

        print("\ntime: {:.3f} ms - {}".format(schol.pubmed.elapsed_time,
                                              schol.pubmed.name))
        self.assertTrue(meta["ISOAbbreviation"] == "PLoS ONE")
예제 #15
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_pubmed_fulltext_id_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        search_term = "NHANES"

        meta = schol.pubmed.fulltext_id_search(search_term)

        print("\ntime: {:.3f} ms - {}".format(schol.pubmed.elapsed_time,
                                              schol.pubmed.name))
        self.assertTrue(len(meta) >= 6850)
예제 #16
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_pubmed_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        title = "Climate-change-driven accelerated sea-level rise detected in the altimeter era"

        meta = schol.pubmed.title_search(title)

        print("\ntime: {:.3f} ms - {}".format(schol.pubmed.elapsed_time,
                                              schol.pubmed.name))
        self.assertTrue(meta["MedlineCitation"]["PMID"]["#text"] == "29440401")
예제 #17
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_crossref_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        title = "Relation between household food insecurity and breastfeeding in Canada"

        meta = schol.crossref.title_search(title)

        print("\ntime: {:.3f} ms - {}".format(schol.crossref.elapsed_time,
                                              schol.crossref.name))
        self.assertTrue(meta["DOI"] == "10.1503/cmaj.170880")
예제 #18
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_crossref_fulltext_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        search_term = "NHANES"

        search_results = schol.crossref.full_text_search(search_term)

        print("\ntime: {:.3f} ms - {}".format(schol.crossref.elapsed_time,
                                              schol.crossref.name))
        self.assertTrue(search_results["total-results"] >= 877000)
예제 #19
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_dissemin_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        doi = "10.1016/j.appet.2017.07.006"

        meta = schol.dissemin.publication_lookup(doi)

        print("\ntime: {:.3f} ms - {}".format(schol.dissemin.elapsed_time,
                                              schol.dissemin.name))
        self.assertTrue(meta["paper"]["date"] == "2017-10-01")
예제 #20
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_repec_handle_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        title = "Estimating the 'True' Cost of Job Loss: Evidence Using Matched Data from California 1991-2000"

        handle = schol.repec.get_handle(title)

        print("\ntime: {:.3f} ms - {}".format(schol.repec.elapsed_time,
                                              schol.repec.name))
        self.assertTrue(handle == "RePEc:cen:wpaper:09-14")
예제 #21
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_unpaywall_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        doi = "10.1016/j.appet.2017.07.006"

        meta = schol.unpaywall.publication_lookup(doi)

        print("\ntime: {:.3f} ms - {}".format(schol.unpaywall.elapsed_time,
                                              schol.unpaywall.name))
        self.assertTrue(
            meta["doi_url"] == "https://doi.org/10.1016/j.appet.2017.07.006")
예제 #22
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_openaire_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases."
        meta = schol.openaire.title_search(title)

        print("\ntime: {:.3f} ms - {}".format(schol.openaire.elapsed_time,
                                              schol.openaire.name))
        self.assertTrue(
            repr(meta) ==
            "OrderedDict([('url', 'https://europepmc.org/articles/PMC5574185/'), ('authors', ['Taillie, Lindsey Smith', 'Ng, Shu Wen', 'Xue, Ya', 'Harding, Matthew']), ('open', True)])"
        )
예제 #23
0
    def test_openaire_fulltext_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.openaire

        search_term = "NHANES"
        expected = 100

        if source.has_credentials():
            responses = source.full_text_search(search_term, limit=expected)
            source.report_perf(responses[0].timing)
            self.assertTrue(len(responses) >= expected)
예제 #24
0
    def test_repec_handle_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.repec

        title = "Estimating the 'True' Cost of Job Loss: Evidence Using Matched Data from California 1991-2000"
        expected = "RePEc:cen:wpaper:09-14"

        if source.has_credentials():
            meta, timing, message = source.get_handle(title)
            source.report_perf(timing)
            self.assertTrue(meta == expected)
예제 #25
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_europepmc_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        title = "Zebrafish models: Gaining insight into purinergic signaling and neurological disorders"
        meta = schol.europepmc.title_search(title)

        print("\ntime: {:.3f} ms - {}".format(schol.europepmc.elapsed_time,
                                              schol.europepmc.name))
        self.assertTrue(
            repr(meta) ==
            "OrderedDict([('doi', '10.1016/j.pnpbp.2019.109770'), ('journal', 'Prog Neuropsychopharmacol Biol Psychiatry'), ('authors', ['Nabinger DD', 'Altenhofen S', 'Bonan CD.'])])"
        )
예제 #26
0
    def test_ssrn_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        source = schol.ssrn

        title = "Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit"
        expected = "OrderedDict([('doi', '10.2139/ssrn.2898991'), ('title', 'Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit'), ('keywords', ['place-based policies', 'retail food', 'tax incentives', 'community health', 'regression discontinuity']), ('authors', ['Freedman, Matthew', 'Kuhns, Annemarie'])])"

        if source.has_credentials():
            response = source.title_search(title)
            source.report_perf(response.timing)
            self.assertTrue(repr(response.meta) == expected)
            self.assertTrue(response.title() == title)
예제 #27
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_crossref_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        doi = "10.1503/cmaj.170880"

        meta = schol.crossref.publication_lookup(doi)

        print("\ntime: {:.3f} ms - {}".format(schol.crossref.elapsed_time,
                                              schol.crossref.name))
        self.assertTrue(
            meta["title"][0] ==
            "Relation between household food insecurity and breastfeeding in Canada"
        )
예제 #28
0
def main(args):
    # initialize the federated API access
    schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg", logger=None)
    graph = rc_graph.RCGraph("step2")

    # The Dimensions Analytics API is limited to 30 requests per IP address per minute. Source https://docs.dimensions.ai/dsl/api.html - TODO: this might be better refactored into scholapi to handle all particular cases
    dimensions_requests_limits = 30
    dimensions_time_limit = 60
    t0 = time.time()
    count = 0

    # for each publication: enrich metadata, gather the DOIs, etc.
    for partition, pub_iter in graph.iter_publications(graph.PATH_PUBLICATIONS,
                                                       filter=args.partition):
        pub_list = []

        for pub in tqdm(pub_iter, ascii=True, desc=partition[:30]):
            pub["title"] = unicodedata.normalize("NFKD", pub["title"]).strip()
            pub_list.append(pub)

            time_elapsed = time.time() - t0

            # already used all the API requests allowed in the time window
            if count == dimensions_requests_limits and time_elapsed < dimensions_time_limit:
                to_sleep = dimensions_time_limit - math.floor(
                    time_elapsed) + 1  # adding some extra margin
                print("API calls:", count, "time elapsed:", time_elapsed,
                      "- will sleep:", to_sleep)
                time.sleep(to_sleep)
                count = 0
                t0 = time.time()
            # didn't got to the requests limit in the time window
            elif count < dimensions_requests_limits and time_elapsed >= dimensions_time_limit:
                count = 1  # adding some extra margin
                t0 = time.time()
                #print("API calls:", count, "time elapsed:", time_elapsed,"reseting counters...")

            title_match = gather_doi(schol, graph, partition, pub)

            count += 1

            if title_match:
                graph.publications.title_hits += 1
            else:
                graph.update_misses(partition, pub)

        graph.write_partition(graph.BUCKET_STAGE, partition, pub_list)

    # report errors
    status = "{} found titles in API calls".format(
        graph.publications.title_hits)
    trouble = "publications that failed every API lookup"
    graph.report_misses(status, trouble)
예제 #29
0
파일: test.py 프로젝트: fagan2888/RCApi
    def test_semantic_publication_lookup(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        doi = "10.1016/j.appet.2017.07.006"

        meta = schol.semantic.publication_lookup(doi)

        print("\ntime: {:.3f} ms - {}".format(schol.semantic.elapsed_time,
                                              schol.semantic.name))
        self.assertTrue(
            meta["url"] ==
            "https://www.semanticscholar.org/paper/690195fe2ab0fa093204a050ceb2f9fd1d1b2907"
        )
예제 #30
0
    def test_ssrn_title_search(self):
        schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg")
        title = "Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit"

        meta = schol.ssrn.title_search(title)

        print("\ntime: {:.3f} ms - {}".format(schol.ssrn.elapsed_time,
                                              schol.ssrn.name))
        self.assertTrue(
            repr(meta) ==
            "OrderedDict([('doi', '10.2139/ssrn.2898991'), ('title', 'Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit'), ('keywords', ['place-based policies', 'retail food', 'tax incentives', 'community health', 'regression discontinuity']), ('authors', ['Freedman, Matthew', 'Kuhns, Annemarie'])])"
        )