def assert_scrape_result(doi, pdf_url, metadata_url, license, evidence): my_pub = pub.lookup_product_by_doi(doi) my_pub.refresh_hybrid_scrape() logger.info(u"was looking for pdf url {}, got {}".format( pdf_url, my_pub.scrape_pdf_url)) logger.info(u"was looking for metadata url {}, got {}".format( metadata_url, my_pub.scrape_metadata_url)) logger.info(u"was looking for license {}, got {}".format( license, my_pub.scrape_license)) logger.info(u"was looking for evidence {}, got {}".format( evidence, my_pub.scrape_evidence)) logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi)) logger.info(u"doi: https://doi.org/{}".format(doi)) if my_pub.error: logger.info(my_pub.error) assert_equals(my_pub.error, "") assert_equals_case_insensitive(my_pub.scrape_pdf_url, pdf_url) assert_equals_case_insensitive(my_pub.scrape_metadata_url, metadata_url) assert_equals(my_pub.scrape_license, license) assert_equals(my_pub.scrape_evidence, evidence) my_pub.ask_hybrid_scrape() if pdf_url or metadata_url: location = my_pub.open_locations[0] assert_equals_case_insensitive(location.pdf_url, pdf_url) assert_equals_case_insensitive(location.metadata_url, metadata_url) assert_equals(location.evidence, evidence) assert_equals(location.license, license) else: assert_false(my_pub.open_locations)
def test_chorus_dois(self, test_data): doi = test_data # because cookies breaks the cache pickling # for doi_start in ["10.1109", "10.1161", "10.1093", "10.1007", "10.1039"]: # if doi.startswith(doi_start): requests_cache.uninstall_cache() my_pub = pub.lookup_product_by_doi(doi) if not my_pub: logger.info(u"doi {} not in db, skipping".format(doi)) return my_pub.refresh() logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi)) logger.info(u"doi: https://doi.org/{}".format(doi)) logger.info(u"license: {}".format(my_pub.best_license)) logger.info(u"evidence: {}".format(my_pub.best_evidence)) logger.info(u"host: {}".format(my_pub.best_host)) if my_pub.error: logger.info(my_pub.error) assert_equals(my_pub.error, "") assert_is_not_none(my_pub.fulltext_url)
def test_active_dois(self, test_data): (doi, fulltext_url, license, color) = test_data # because cookies breaks the cache pickling # for doi_start in ["10.1109", "10.1161", "10.1093", "10.1007", "10.1039"]: # if doi.startswith(doi_start): # requests_cache.uninstall_cache() my_pub = pub.lookup_product_by_doi(doi) my_pub.refresh() logger.info(u"\n\nwas looking for {}, got {}".format( fulltext_url, my_pub.fulltext_url)) logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi)) logger.info(u"doi: https://doi.org/{}".format(doi)) logger.info(u"license: {}".format(my_pub.license)) logger.info(u"oa_color: {}".format(my_pub.oa_color)) logger.info(u"evidence: {}".format(my_pub.evidence)) if my_pub.error: logger.info(my_pub.error) assert_equals(my_pub.error, "") assert_equals(my_pub.fulltext_url, fulltext_url) assert_not_equals(my_pub.fulltext_url, None) # assert_equals(my_pub.license, license) assert_equals(my_pub.error, "")
def test_active_dois(self, test_data): (doi, fulltext_url, license, color) = test_data # because cookies breaks the cache pickling # for doi_start in ["10.1109", "10.1161", "10.1093", "10.1007", "10.1039"]: # if doi.startswith(doi_start): # requests_cache.uninstall_cache() my_pub = pub.lookup_product_by_doi(doi) my_pub.refresh() logger.info(u"\n\nwas looking for {}, got {}".format(fulltext_url, my_pub.fulltext_url)) logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi)) logger.info(u"doi: https://doi.org/{}".format(doi)) logger.info(u"license: {}".format(my_pub.license)) logger.info(u"oa_color: {}".format(my_pub.oa_color)) logger.info(u"evidence: {}".format(my_pub.evidence)) if my_pub.error: logger.info(my_pub.error) assert_equals(my_pub.error, "") assert_equals(my_pub.fulltext_url, fulltext_url) assert_not_equals(my_pub.fulltext_url, None) # assert_equals(my_pub.license, license) assert_equals(my_pub.error, "")
def test_open_dois(self, test_data): (doi, fulltext_url, license) = test_data my_pub = pub.lookup_product_by_doi(doi) my_pub.recalculate() logger.info(u"was looking for {}, got {}\n\n".format( fulltext_url, my_pub.fulltext_url)) logger.info(u"doi: http://doi.org/{}".format(doi)) logger.info(u"title: {}".format(my_pub.best_title)) logger.info(u"evidence: {}\n\n".format(my_pub.evidence)) if my_pub.error: logger.info(my_pub.error) assert_not_equals(my_pub.fulltext_url, None)
def test_open_dois(self, test_data): (doi, fulltext_url, license) = test_data my_pub = pub.lookup_product_by_doi(doi) my_pub.recalculate() logger.info(u"was looking for {}, got {}\n\n".format(fulltext_url, my_pub.fulltext_url)) logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi)) logger.info(u"doi: https://doi.org/{}".format(doi)) logger.info(u"title: {}".format(my_pub.best_title)) logger.info(u"evidence: {}\n\n".format(my_pub.evidence)) if my_pub.error: logger.info(my_pub.error) assert_not_equals(my_pub.fulltext_url, None) assert_equals(fulltext_url, my_pub.fulltext_url)
def test_arxiv_dois(self, test_data): (doi, fulltext_url, license) = test_data my_pub = pub.lookup_product_by_doi(doi) my_pub.recalculate() logger.info(u"was looking for {}, got {}\n\n".format(fulltext_url, my_pub.fulltext_url)) logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi)) logger.info(u"doi: https://doi.org/{}".format(doi)) logger.info(u"title: {}".format(my_pub.best_title)) logger.info(u"evidence: {}\n\n".format(my_pub.evidence)) if my_pub.error: logger.info(my_pub.error) assert_not_equals(my_pub.fulltext_url, None) # not sure that the arxiv url will be the best one, but make sure it is one of them urls = [loc.pdf_url for loc in my_pub.all_oa_locations] assert_true(fulltext_url in urls)
def test_sciencedirect_dois(self, test_data): (doi, pdf_url, metadata_url, license, evidence) = test_data my_pub = pub.lookup_product_by_doi(doi) my_pub.refresh_hybrid_scrape() logger.info(u"was looking for pdf url {}, got {}".format(pdf_url, my_pub.scrape_pdf_url)) logger.info(u"was looking for metadata url {}, got {}".format(metadata_url, my_pub.scrape_metadata_url)) logger.info(u"was looking for license {}, got {}".format(license, my_pub.scrape_license)) logger.info(u"was looking for evidence {}, got {}".format(evidence, my_pub.scrape_evidence)) logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi)) logger.info(u"doi: https://doi.org/{}".format(doi)) if my_pub.error: logger.info(my_pub.error) assert_equals(my_pub.error, "") assert_equals(my_pub.scrape_pdf_url, pdf_url) assert_equals(my_pub.scrape_metadata_url, metadata_url) assert_equals(my_pub.scrape_license, license) assert_equals(my_pub.scrape_evidence, evidence)