Ejemplo n.º 1
0
def assert_scrape_result(doi, pdf_url, metadata_url, license, evidence):
    my_pub = pub.lookup_product_by_doi(doi)
    my_pub.refresh_hybrid_scrape()

    logger.info(u"was looking for pdf url {}, got {}".format(
        pdf_url, my_pub.scrape_pdf_url))
    logger.info(u"was looking for metadata url {}, got {}".format(
        metadata_url, my_pub.scrape_metadata_url))
    logger.info(u"was looking for license {}, got {}".format(
        license, my_pub.scrape_license))
    logger.info(u"was looking for evidence {}, got {}".format(
        evidence, my_pub.scrape_evidence))
    logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
    logger.info(u"doi: https://doi.org/{}".format(doi))

    if my_pub.error:
        logger.info(my_pub.error)

    assert_equals(my_pub.error, "")
    assert_equals_case_insensitive(my_pub.scrape_pdf_url, pdf_url)
    assert_equals_case_insensitive(my_pub.scrape_metadata_url, metadata_url)
    assert_equals(my_pub.scrape_license, license)
    assert_equals(my_pub.scrape_evidence, evidence)

    my_pub.ask_hybrid_scrape()

    if pdf_url or metadata_url:
        location = my_pub.open_locations[0]
        assert_equals_case_insensitive(location.pdf_url, pdf_url)
        assert_equals_case_insensitive(location.metadata_url, metadata_url)
        assert_equals(location.evidence, evidence)
        assert_equals(location.license, license)
    else:
        assert_false(my_pub.open_locations)
Ejemplo n.º 2
0
    def test_chorus_dois(self, test_data):

        doi = test_data

        # because cookies breaks the cache pickling
        # for doi_start in ["10.1109", "10.1161", "10.1093", "10.1007", "10.1039"]:
        #     if doi.startswith(doi_start):
        requests_cache.uninstall_cache()

        my_pub = pub.lookup_product_by_doi(doi)
        if not my_pub:
            logger.info(u"doi {} not in db, skipping".format(doi))
            return
        my_pub.refresh()

        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))
        logger.info(u"license: {}".format(my_pub.best_license))
        logger.info(u"evidence: {}".format(my_pub.best_evidence))
        logger.info(u"host: {}".format(my_pub.best_host))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_equals(my_pub.error, "")
        assert_is_not_none(my_pub.fulltext_url)
Ejemplo n.º 3
0
    def test_active_dois(self, test_data):

        (doi, fulltext_url, license, color) = test_data

        # because cookies breaks the cache pickling
        # for doi_start in ["10.1109", "10.1161", "10.1093", "10.1007", "10.1039"]:
        #     if doi.startswith(doi_start):
        # requests_cache.uninstall_cache()

        my_pub = pub.lookup_product_by_doi(doi)
        my_pub.refresh()

        logger.info(u"\n\nwas looking for {}, got {}".format(
            fulltext_url, my_pub.fulltext_url))
        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))
        logger.info(u"license: {}".format(my_pub.license))
        logger.info(u"oa_color: {}".format(my_pub.oa_color))
        logger.info(u"evidence: {}".format(my_pub.evidence))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_equals(my_pub.error, "")
        assert_equals(my_pub.fulltext_url, fulltext_url)
        assert_not_equals(my_pub.fulltext_url, None)
        # assert_equals(my_pub.license, license)
        assert_equals(my_pub.error, "")
Ejemplo n.º 4
0
    def test_active_dois(self, test_data):

        (doi, fulltext_url, license, color) = test_data

        # because cookies breaks the cache pickling
        # for doi_start in ["10.1109", "10.1161", "10.1093", "10.1007", "10.1039"]:
        #     if doi.startswith(doi_start):
        # requests_cache.uninstall_cache()

        my_pub = pub.lookup_product_by_doi(doi)
        my_pub.refresh()

        logger.info(u"\n\nwas looking for {}, got {}".format(fulltext_url, my_pub.fulltext_url))
        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))
        logger.info(u"license: {}".format(my_pub.license))
        logger.info(u"oa_color: {}".format(my_pub.oa_color))
        logger.info(u"evidence: {}".format(my_pub.evidence))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_equals(my_pub.error, "")
        assert_equals(my_pub.fulltext_url, fulltext_url)
        assert_not_equals(my_pub.fulltext_url, None)
        # assert_equals(my_pub.license, license)
        assert_equals(my_pub.error, "")
Ejemplo n.º 5
0
    def test_chorus_dois(self, test_data):

        doi = test_data

        # because cookies breaks the cache pickling
        # for doi_start in ["10.1109", "10.1161", "10.1093", "10.1007", "10.1039"]:
        #     if doi.startswith(doi_start):
        requests_cache.uninstall_cache()

        my_pub = pub.lookup_product_by_doi(doi)
        if not my_pub:
            logger.info(u"doi {} not in db, skipping".format(doi))
            return
        my_pub.refresh()

        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))
        logger.info(u"license: {}".format(my_pub.best_license))
        logger.info(u"evidence: {}".format(my_pub.best_evidence))
        logger.info(u"host: {}".format(my_pub.best_host))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_equals(my_pub.error, "")
        assert_is_not_none(my_pub.fulltext_url)
    def test_open_dois(self, test_data):
        (doi, fulltext_url, license) = test_data
        my_pub = pub.lookup_product_by_doi(doi)
        my_pub.recalculate()

        logger.info(u"was looking for {}, got {}\n\n".format(
            fulltext_url, my_pub.fulltext_url))
        logger.info(u"doi: http://doi.org/{}".format(doi))
        logger.info(u"title: {}".format(my_pub.best_title))
        logger.info(u"evidence: {}\n\n".format(my_pub.evidence))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_not_equals(my_pub.fulltext_url, None)
Ejemplo n.º 7
0
    def test_open_dois(self, test_data):
        (doi, fulltext_url, license) = test_data
        my_pub = pub.lookup_product_by_doi(doi)
        my_pub.recalculate()

        logger.info(u"was looking for {}, got {}\n\n".format(fulltext_url, my_pub.fulltext_url))
        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))
        logger.info(u"title: {}".format(my_pub.best_title))
        logger.info(u"evidence: {}\n\n".format(my_pub.evidence))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_not_equals(my_pub.fulltext_url, None)
        assert_equals(fulltext_url, my_pub.fulltext_url)
Ejemplo n.º 8
0
    def test_arxiv_dois(self, test_data):
        (doi, fulltext_url, license) = test_data
        my_pub = pub.lookup_product_by_doi(doi)
        my_pub.recalculate()

        logger.info(u"was looking for {}, got {}\n\n".format(fulltext_url, my_pub.fulltext_url))
        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))
        logger.info(u"title: {}".format(my_pub.best_title))
        logger.info(u"evidence: {}\n\n".format(my_pub.evidence))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_not_equals(my_pub.fulltext_url, None)
        # not sure that the arxiv url will be the best one, but make sure it is one of them
        urls = [loc.pdf_url for loc in my_pub.all_oa_locations]
        assert_true(fulltext_url in urls)
Ejemplo n.º 9
0
    def test_arxiv_dois(self, test_data):
        (doi, fulltext_url, license) = test_data
        my_pub = pub.lookup_product_by_doi(doi)
        my_pub.recalculate()

        logger.info(u"was looking for {}, got {}\n\n".format(fulltext_url, my_pub.fulltext_url))
        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))
        logger.info(u"title: {}".format(my_pub.best_title))
        logger.info(u"evidence: {}\n\n".format(my_pub.evidence))
        if my_pub.error:
            logger.info(my_pub.error)

        assert_not_equals(my_pub.fulltext_url, None)
        # not sure that the arxiv url will be the best one, but make sure it is one of them
        urls = [loc.pdf_url for loc in my_pub.all_oa_locations]
        assert_true(fulltext_url in urls)
Ejemplo n.º 10
0
    def test_sciencedirect_dois(self, test_data):
        (doi, pdf_url, metadata_url, license, evidence) = test_data

        my_pub = pub.lookup_product_by_doi(doi)
        my_pub.refresh_hybrid_scrape()

        logger.info(u"was looking for pdf url {}, got {}".format(pdf_url, my_pub.scrape_pdf_url))
        logger.info(u"was looking for metadata url {}, got {}".format(metadata_url, my_pub.scrape_metadata_url))
        logger.info(u"was looking for license {}, got {}".format(license, my_pub.scrape_license))
        logger.info(u"was looking for evidence {}, got {}".format(evidence, my_pub.scrape_evidence))
        logger.info(u"https://api.unpaywall.org/v2/{}?email=me".format(doi))
        logger.info(u"doi: https://doi.org/{}".format(doi))

        if my_pub.error:
            logger.info(my_pub.error)

        assert_equals(my_pub.error, "")
        assert_equals(my_pub.scrape_pdf_url, pdf_url)
        assert_equals(my_pub.scrape_metadata_url, metadata_url)
        assert_equals(my_pub.scrape_license, license)
        assert_equals(my_pub.scrape_evidence, evidence)