def test_pubmed_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.pubmed title = "Climate-change-driven accelerated sea-level rise detected in the altimeter era." expected = "29440401" doi = "10.1073/pnas.1717312115" journal = "Proceedings of the National Academy of Sciences of the United States of America" if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue(response.pmid() == expected) self.assertTrue(response.doi() == doi) self.assertTrue(response.title() == title) self.assertTrue(response.journal() == journal) self.assertTrue(response.issn() is None) title = "NOT_TO_BE_FOUND" if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue(response.meta is None) self.assertTrue(response.pmid() is None) self.assertTrue(response.doi() is None) self.assertTrue(response.title() is None) self.assertTrue(response.journal() is None) self.assertTrue(response.issn() is None)
def test_core_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.core doi = "10.1371/journal.pone.0013969" title = "Caribbean corals in crisis: record thermal stress, bleaching, and mortality in 2005".lower( ) url = "https://core.ac.uk/download/pdf/51094169.pdf" author = "Eakin, C. Mark" journal = "NSUWorks" if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue(response.doi() == doi) self.assertTrue(response.title().lower() == title) self.assertTrue(response.url() == url) self.assertTrue(author in response.authors()) self.assertTrue(response.journal() == journal) # error case title = "ajso58tt849qp3g84h38pghq3974ut8gq9j9ht789" # Should be no matches if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue(response.meta == None) self.assertTrue(response.message == "Not found")
def test_semantic_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.semantic doi = "10.1016/j.appet.2017.07.006" expected = "https://www.semanticscholar.org/paper/690195fe2ab0fa093204a050ceb2f9fd1d1b2907" if source.has_credentials(): response = source.publication_lookup(doi) source.report_perf(response.timing) self.assertTrue(response.url() == expected) self.assertTrue(response.doi() == doi) # error case doi = "10.00000/xxx" if source.has_credentials(): response = source.publication_lookup(doi) self.assertTrue(response.meta == None) # another error case doi = "10.1641/0006-3568(2005)055[0879:EITLSA]2.0.CO;2" if source.has_credentials(): response = source.publication_lookup(doi) self.assertTrue(response.meta == None)
def test_unpaywall_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.unpaywall doi = "10.1016/j.appet.2017.07.006" title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases" url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5574185" authors_num = 4 journal = "Appetite" expected = "https://doi.org/10.1016/j.appet.2017.07.006" if source.has_credentials(): response = source.publication_lookup(doi) source.report_perf(response.timing) self.assertTrue(response.meta["doi_url"] == expected) self.assertTrue(response.doi() == doi) self.assertTrue(response.title() == title) self.assertTrue(response.url() == url) self.assertTrue(response.journal() == journal) self.assertTrue(len(response.authors()) == authors_num) # error case doi = "10.00000/xxx" if source.has_credentials(): response = source.publication_lookup(doi) self.assertTrue(response.meta == None)
def test_dissemin_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.dissemin doi = "10.1016/j.appet.2017.07.006" title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases" journal = "Appetite" authors_num = 4 expected = "2017-10-01" if source.has_credentials(): response = source.publication_lookup(doi) source.report_perf(response.timing) self.assertTrue(response.meta["paper"]["date"] == expected) self.assertTrue(response.doi() == doi) self.assertTrue(response.title() == title) self.assertTrue(response.journal() == journal) self.assertTrue(len(response.authors()) == authors_num) self.assertTrue(response.url() is None) # error case doi = "10.00000/xxx" if source.has_credentials(): response = source.publication_lookup(doi) self.assertTrue(response.meta == None) self.assertTrue(response.message == None) # another error case doi = "10.1023/A:1018882711314" if source.has_credentials(): response = source.publication_lookup(doi) self.assertTrue(response.meta == None) self.assertTrue(response.message == None)
def test_openaire_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.openaire doi = "10.1016/j.appet.2017.07.006" title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases" url = "https://europepmc.org/articles/PMC5574185/" authors = [ "Taillie, Lindsey Smith", "Ng, Shu Wen", "Xue, Ya", "Harding, Matthew" ] if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue(response.doi() == doi) self.assertTrue(response.title() == title) self.assertTrue(response.url() == url) self.assertTrue(response.authors() == authors) self.assertTrue(response.meta["open"]) title = "Quantitative easing, portfolio rebalancing and credit growth: Micro evidence from Germany" if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue("doi" not in response.meta) self.assertTrue(response.title() == title)
def main(args): # initialize the federated API access schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg", logger=None) graph = rc_graph.RCGraph("abstract") # for each publication: enrich metadata, gather the abstracts for partition, pub_iter in graph.iter_publications(graph.BUCKET_STAGE, filter=args.partition): pub_list = [] for pub in tqdm(pub_iter, ascii=True, desc=partition[:30]): pub_list.append(pub) abstract_match = lookup_abstract(schol, graph, partition, pub) if abstract_match: graph.publications.ab_hits += 1 else: graph.update_misses(partition, pub) graph.write_partition(graph.BUCKET_STAGE, partition, pub_list) # report errors status = "{} successful abstract lookups".format( graph.publications.ab_hits) graph.report_misses(status, "publications that failed every abstract lookup")
def test_datacite_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.datacite title = "Empirical analysis of potential improvements for high voltage protective algorithms" expected = "10.5281/zenodo.3635395" journal = "Zenodo" author = "López, David" url = "https://zenodo.org/record/3635395" if source.has_credentials(): response = source.title_search(title) self.assertTrue(response.meta and response.meta["id"] == expected) self.assertTrue(response.doi() == expected) self.assertTrue(response.title() == title) self.assertTrue(author in response.authors()) self.assertTrue(response.url() == url) self.assertTrue(response.journal() == journal) # error case title = "ajso58tt849qp3g84h38pghq3974ut8gq9j9ht789" # Should be no matches if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue(response.meta == None)
def test_datacite_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.datacite doi = "10.22002/d1.246" title = "In Situ Carbon Dioxide and Methane Mole Fractions from the Los Angeles Megacity Carbon Project" url = "https://data.caltech.edu/records/246" journal = "CaltechDATA" if source.has_credentials(): response = source.publication_lookup(doi) source.report_perf(response.timing) self.assertTrue(response.doi() == doi) self.assertTrue(response.title() == title) self.assertTrue(response.authors() == ["Verhulst, Kristal"]) self.assertTrue(response.url() == url) self.assertTrue(response.journal() == journal) # error case doi = "10.00000/xxx" if source.has_credentials(): response = source.publication_lookup(doi) self.assertTrue(response.serialize() == None) self.assertTrue("404" in response.message) self.assertTrue(response.doi() is None) self.assertTrue(response.title() is None) self.assertTrue(response.authors() is None) self.assertTrue(response.url() is None) self.assertTrue(response.journal() is None)
def test_nsf_par_fulltext_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.nsfPar ## please note, these numbers may change as new publications are added search_term = "NASA NOAA coral" responses = source.full_text_search(search_term, limit=13, exact_match=True) source.report_perf(responses[0].timing) self.assertTrue(len(responses) == 13) responses = source.full_text_search(search_term, limit=-1, exact_match=True) source.report_perf(responses[0].timing) self.assertTrue(len(responses) == 15) responses = source.full_text_search(search_term, limit=1000, exact_match=True) source.report_perf(responses[0].timing) self.assertTrue(len(responses) == 15) #Won't find any search_term = "dlkadngeonr3q0984gqn839g" responses = source.full_text_search(search_term, limit=13) source.report_perf(responses[0].timing) self.assertTrue(len(responses) == 1) self.assertTrue(responses[0].meta is None)
def test_crossref_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.crossref doi = "10.1503/cmaj.170880" expected = "Relation between household food insecurity and breastfeeding in Canada" num_authors = 4 url = "http://dx.doi.org/10.1503/cmaj.170880" journal = "Canadian Medical Association Journal" if source.has_credentials(): response = source.publication_lookup(doi) source.report_perf(response.timing) self.assertTrue(response.title() == expected) self.assertTrue(response.doi() == doi) self.assertTrue(len(response.authors()) == num_authors) self.assertTrue(response.url() == url) self.assertTrue(response.journal() == journal) doi_error = "10.XXXX.XXXX" if source.has_credentials(): response = source.publication_lookup(doi_error) source.report_perf(response.timing) self.assertTrue(response.message is not None) self.assertTrue(response.meta is None)
def test_dimensions_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases." meta = schol.dimensions.title_search(title) print("\ntime: {:.3f} ms - {}".format(schol.dimensions.elapsed_time, schol.dimensions.name)) self.assertTrue(meta["doi"] == "10.1016/j.appet.2017.07.006")
def test_datacite__format_exact_quote(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.datacite search_term = "NOAA NASA" expected = '"NOAA+NASA"' exact_quote = source._format_exact_quote(search_term) self.assertTrue(exact_quote == expected)
def test_pubmed_journal_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") issn = "1932-6203" meta, message = schol.pubmed.journal_lookup(issn) print("\ntime: {:.3f} ms - {}".format(schol.pubmed.elapsed_time, schol.pubmed.name)) self.assertTrue(meta["ISOAbbreviation"] == "PLoS ONE")
def test_pubmed_fulltext_id_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") search_term = "NHANES" meta = schol.pubmed.fulltext_id_search(search_term) print("\ntime: {:.3f} ms - {}".format(schol.pubmed.elapsed_time, schol.pubmed.name)) self.assertTrue(len(meta) >= 6850)
def test_pubmed_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") title = "Climate-change-driven accelerated sea-level rise detected in the altimeter era" meta = schol.pubmed.title_search(title) print("\ntime: {:.3f} ms - {}".format(schol.pubmed.elapsed_time, schol.pubmed.name)) self.assertTrue(meta["MedlineCitation"]["PMID"]["#text"] == "29440401")
def test_crossref_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") title = "Relation between household food insecurity and breastfeeding in Canada" meta = schol.crossref.title_search(title) print("\ntime: {:.3f} ms - {}".format(schol.crossref.elapsed_time, schol.crossref.name)) self.assertTrue(meta["DOI"] == "10.1503/cmaj.170880")
def test_crossref_fulltext_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") search_term = "NHANES" search_results = schol.crossref.full_text_search(search_term) print("\ntime: {:.3f} ms - {}".format(schol.crossref.elapsed_time, schol.crossref.name)) self.assertTrue(search_results["total-results"] >= 877000)
def test_dissemin_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") doi = "10.1016/j.appet.2017.07.006" meta = schol.dissemin.publication_lookup(doi) print("\ntime: {:.3f} ms - {}".format(schol.dissemin.elapsed_time, schol.dissemin.name)) self.assertTrue(meta["paper"]["date"] == "2017-10-01")
def test_repec_handle_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") title = "Estimating the 'True' Cost of Job Loss: Evidence Using Matched Data from California 1991-2000" handle = schol.repec.get_handle(title) print("\ntime: {:.3f} ms - {}".format(schol.repec.elapsed_time, schol.repec.name)) self.assertTrue(handle == "RePEc:cen:wpaper:09-14")
def test_unpaywall_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") doi = "10.1016/j.appet.2017.07.006" meta = schol.unpaywall.publication_lookup(doi) print("\ntime: {:.3f} ms - {}".format(schol.unpaywall.elapsed_time, schol.unpaywall.name)) self.assertTrue( meta["doi_url"] == "https://doi.org/10.1016/j.appet.2017.07.006")
def test_openaire_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") title = "Deal or no deal? The prevalence and nutritional quality of price promotions among U.S. food and beverage purchases." meta = schol.openaire.title_search(title) print("\ntime: {:.3f} ms - {}".format(schol.openaire.elapsed_time, schol.openaire.name)) self.assertTrue( repr(meta) == "OrderedDict([('url', 'https://europepmc.org/articles/PMC5574185/'), ('authors', ['Taillie, Lindsey Smith', 'Ng, Shu Wen', 'Xue, Ya', 'Harding, Matthew']), ('open', True)])" )
def test_openaire_fulltext_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.openaire search_term = "NHANES" expected = 100 if source.has_credentials(): responses = source.full_text_search(search_term, limit=expected) source.report_perf(responses[0].timing) self.assertTrue(len(responses) >= expected)
def test_repec_handle_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.repec title = "Estimating the 'True' Cost of Job Loss: Evidence Using Matched Data from California 1991-2000" expected = "RePEc:cen:wpaper:09-14" if source.has_credentials(): meta, timing, message = source.get_handle(title) source.report_perf(timing) self.assertTrue(meta == expected)
def test_europepmc_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") title = "Zebrafish models: Gaining insight into purinergic signaling and neurological disorders" meta = schol.europepmc.title_search(title) print("\ntime: {:.3f} ms - {}".format(schol.europepmc.elapsed_time, schol.europepmc.name)) self.assertTrue( repr(meta) == "OrderedDict([('doi', '10.1016/j.pnpbp.2019.109770'), ('journal', 'Prog Neuropsychopharmacol Biol Psychiatry'), ('authors', ['Nabinger DD', 'Altenhofen S', 'Bonan CD.'])])" )
def test_ssrn_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") source = schol.ssrn title = "Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit" expected = "OrderedDict([('doi', '10.2139/ssrn.2898991'), ('title', 'Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit'), ('keywords', ['place-based policies', 'retail food', 'tax incentives', 'community health', 'regression discontinuity']), ('authors', ['Freedman, Matthew', 'Kuhns, Annemarie'])])" if source.has_credentials(): response = source.title_search(title) source.report_perf(response.timing) self.assertTrue(repr(response.meta) == expected) self.assertTrue(response.title() == title)
def test_crossref_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") doi = "10.1503/cmaj.170880" meta = schol.crossref.publication_lookup(doi) print("\ntime: {:.3f} ms - {}".format(schol.crossref.elapsed_time, schol.crossref.name)) self.assertTrue( meta["title"][0] == "Relation between household food insecurity and breastfeeding in Canada" )
def main(args): # initialize the federated API access schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg", logger=None) graph = rc_graph.RCGraph("step2") # The Dimensions Analytics API is limited to 30 requests per IP address per minute. Source https://docs.dimensions.ai/dsl/api.html - TODO: this might be better refactored into scholapi to handle all particular cases dimensions_requests_limits = 30 dimensions_time_limit = 60 t0 = time.time() count = 0 # for each publication: enrich metadata, gather the DOIs, etc. for partition, pub_iter in graph.iter_publications(graph.PATH_PUBLICATIONS, filter=args.partition): pub_list = [] for pub in tqdm(pub_iter, ascii=True, desc=partition[:30]): pub["title"] = unicodedata.normalize("NFKD", pub["title"]).strip() pub_list.append(pub) time_elapsed = time.time() - t0 # already used all the API requests allowed in the time window if count == dimensions_requests_limits and time_elapsed < dimensions_time_limit: to_sleep = dimensions_time_limit - math.floor( time_elapsed) + 1 # adding some extra margin print("API calls:", count, "time elapsed:", time_elapsed, "- will sleep:", to_sleep) time.sleep(to_sleep) count = 0 t0 = time.time() # didn't got to the requests limit in the time window elif count < dimensions_requests_limits and time_elapsed >= dimensions_time_limit: count = 1 # adding some extra margin t0 = time.time() #print("API calls:", count, "time elapsed:", time_elapsed,"reseting counters...") title_match = gather_doi(schol, graph, partition, pub) count += 1 if title_match: graph.publications.title_hits += 1 else: graph.update_misses(partition, pub) graph.write_partition(graph.BUCKET_STAGE, partition, pub_list) # report errors status = "{} found titles in API calls".format( graph.publications.title_hits) trouble = "publications that failed every API lookup" graph.report_misses(status, trouble)
def test_semantic_publication_lookup(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") doi = "10.1016/j.appet.2017.07.006" meta = schol.semantic.publication_lookup(doi) print("\ntime: {:.3f} ms - {}".format(schol.semantic.elapsed_time, schol.semantic.name)) self.assertTrue( meta["url"] == "https://www.semanticscholar.org/paper/690195fe2ab0fa093204a050ceb2f9fd1d1b2907" )
def test_ssrn_title_search(self): schol = rc_scholapi.ScholInfraAPI(config_file="rc.cfg") title = "Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit" meta = schol.ssrn.title_search(title) print("\ntime: {:.3f} ms - {}".format(schol.ssrn.elapsed_time, schol.ssrn.name)) self.assertTrue( repr(meta) == "OrderedDict([('doi', '10.2139/ssrn.2898991'), ('title', 'Supply-Side Subsidies to Improve Food Access and Dietary Outcomes: Evidence from the New Markets Tax Credit'), ('keywords', ['place-based policies', 'retail food', 'tax incentives', 'community health', 'regression discontinuity']), ('authors', ['Freedman, Matthew', 'Kuhns, Annemarie'])])" )