예제 #1
0
class MaintenanceTest(PrefilledTest):
    @classmethod
    def setUpClass(self):
        super(MaintenanceTest, self).setUpClass()
        self.cr_api = CrossRefAPI()

    def test_cleanup_names(self):
        n = Name.lookup_name(('Anaruic', 'Leclescuantebrste'))
        n.save()
        cleanup_names()
        try:
            n = Name.objects.get(first='Anaruic', last='Leclescuantebrste')
            self.assertTrue(False and 'The name has not been cleaned up')
        except ObjectDoesNotExist:
            pass

    def test_name_initial(self):
        n = self.r2.name
        p = Paper.create_by_doi("10.1002/ange.19941062339")
        n1 = p.authors[0].name
        self.assertEqual((n1.first, n1.last), (n.first, n.last))

    def test_update_paper_statuses(self):
        p = self.cr_api.create_paper_by_doi("10.1016/j.bmc.2005.06.035")
        p = Paper.from_bare(p)
        self.assertEqual(p.pdf_url, None)
        pdf_url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
        OaiRecord.new(source=self.arxiv,
                      identifier='oai:arXiv.org:aunrisste',
                      about=p,
                      splash_url='http://www.perdu.com/',
                      pdf_url=pdf_url)
        update_paper_statuses()
        self.assertEqual(Paper.objects.get(pk=p.pk).pdf_url, pdf_url)
예제 #2
0
class CrossRefTest(TestCase):
    def setUp(self):
        self.api = CrossRefAPI()

    def test_empty_pubdate(self):
        # This DOI has an empty 'issued' date
        p = self.api.create_paper_by_doi('10.1007/978-1-4020-7884-2_13')
        self.assertEqual(p.pubdate.year, 2006)

    def test_affiliations(self):
        p = self.api.create_paper_by_doi('10.4204/eptcs.172.16')
        self.assertEqual(p.authors[0].affiliation,
                         'École Normale Supérieure, Paris')

    def test_dirty_metadata(self):
        # saving a paper with enough metadata to create a paper, but not
        # an OaiRecord.
        p = self.api.save_doi_metadata({
            "DOI":
            "10.1007/978-1-4020-7884-2_13",
            "subtitle": [],
            "author": [{
                "affiliation": [],
                "given": "Haowen",
                "family": "Chan"
            }, {
                "affiliation": [],
                "given": "Adrian",
                "family": "Perrig"
            }, {
                "affiliation": [],
                "given": "Dawn",
                "family": "Song"
            }],
            "created": {
                "timestamp": 1166129219000,
                "date-time": "2006-12-14T20:46:59Z",
                "date-parts": [[2006, 12, 14]]
            },
            "title": ["Key Distribution Techniques for Sensor Networks"],
            "type":
            "book-chapter"
        })
        self.assertTrue(p.is_orphan())
        self.assertFalse(p.visible)

    def test_doctype_book(self):
        # Books are ignored
        # (technically, that's because we currently require a
        # 'container-title' in the metadata)
        p = self.api.create_paper_by_doi('10.1385/1592597998')
        self.assertTrue(p.is_orphan())

    def test_doi_open(self):
        self.assertTrue(
            self.api.create_paper_by_doi('10.15200/winn.145838.88372').pdf_url)
        self.assertFalse(
            self.api.create_paper_by_doi('10.5061/dryad.b167g').pdf_url)
예제 #3
0
파일: oai.py 프로젝트: tarsbase/dissemin
class CiteprocTranslator(object):
    """
    A translator for the JSON-based Citeproc format served by Crossref
    """

    def __init__(self):
        self.cr_api = CrossRefAPI()

    def format(self):
        return 'citeproc'

    def translate(self, header, metadata):
        try:
            return self.cr_api.save_doi_metadata(metadata)
        except ValueError:
            return
예제 #4
0
파일: tasks.py 프로젝트: rgrunbla/dissemin
def update_crossref():
    """
    Updates paper metadata from Crossref
    """
    c = CrossRefAPI()
    c.fetch_and_save_new_records()
예제 #5
0
파일: orcid.py 프로젝트: tarsbase/dissemin
    def fetch_orcid_records(self,
                            orcid_identifier,
                            profile=None,
                            use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                                                            profile.json,
                                                            update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        ignored_papers = [
        ]  # list of ignored papers due to incomplete metadata

        # Get summary publications and separate them in two classes:
        # - the ones with DOIs, that we will fetch with CrossRef
        dois_and_putcodes = []  # list of (DOIs,putcode) to fetch
        # - the ones without: we will fetch ORCID's metadata about them
        #   and try to create a paper with what they provide
        put_codes = []
        for summary in profile.work_summaries:
            if summary.doi and use_doi:
                dois_and_putcodes.append((summary.doi, summary.put_code))
            else:
                put_codes.append(summary.put_code)

        # 1st attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers with DOIs found in our ORCiD profile.
            dois = [doi for doi, put_code in dois_and_putcodes]
            for idx, (success, paper_or_metadata) in enumerate(
                    self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id,
                                                  dois)):
                if success:
                    yield paper_or_metadata
                else:
                    put_codes.append(dois_and_putcodes[idx][1])

        # 2nd attempt with ORCID's own crappy metadata
        works = profile.fetch_works(put_codes)
        for work in works:
            if not work:
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if work.skipped:
                print(work.json)
                print(work.skip_reason)
                print('work skipped due to incorrect metadata (%s)' %
                      (work.skip_reason))

                ignored_papers.append(work.as_dict())
                continue

            yield self.create_paper(work)

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
예제 #6
0
 def setUpClass(self):
     super(MaintenanceTest, self).setUpClass()
     self.cr_api = CrossRefAPI()
예제 #7
0
class MaintenanceTest(TestCase):

    @classmethod
    def setUpClass(self):
        super(MaintenanceTest, self).setUpClass()
        self.cr_api = CrossRefAPI()

    def test_name_initial(self):
        n = self.r2.name
        p = Paper.create_by_doi("10.1002/ange.19941062339")
        n1 = p.authors[0].name
        self.assertEqual((n1.first, n1.last), (n.first, n.last))

    def test_update_paper_statuses(self):
        p = self.cr_api.create_paper_by_doi("10.1016/j.bmc.2005.06.035")
        p = Paper.from_bare(p)
        self.assertEqual(p.pdf_url, None)
        pdf_url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
        OaiRecord.new(source=self.arxiv,
                      identifier='oai:arXiv.org:aunrisste',
                      about=p,
                      splash_url='http://www.perdu.com/',
                      pdf_url=pdf_url)
        update_paper_statuses()
        self.assertEqual(Paper.objects.get(pk=p.pk).pdf_url, pdf_url)

    def test_unmerge_paper(self):
        # First we merge two unrelated papers
        p1 = Paper.create_by_doi("10.1016/j.bmc.2005.06.035")
        title1 = p1.title
        p2 = Paper.create_by_doi("10.1016/j.ijar.2017.06.011")
        title2 = p2.title
        p1.merge(p2)
        # Then we unmerge them
        unmerge_paper_by_dois(p1)
        # We have two new papers!
        p3 = Paper.get_by_doi("10.1016/j.bmc.2005.06.035")
        self.assertTrue(p3.id != p1.id)
        self.assertEqual(p3.title, title1)
        p4 = Paper.get_by_doi("10.1016/j.ijar.2017.06.011")
        self.assertTrue(p4.id != p1.id)
        self.assertTrue(p4.id != p3.id)
        self.assertEqual(p4.title, title2)

    def test_unmerge_orcid_nones(self):
        # First, fetch a few DOIs
        dois = [
            "10.1075/aicr.90.09ngo",
            "10.1075/aicr.90.04wad",
        ]
        for doi in dois:
            Paper.create_by_doi(doi)

        # Then, fetch an ORCID profile with a buggy version of the ORCID interface, which incorrectly merges papers together
        with patch.object(OrcidPaperSource, '_oai_id_for_doi') as mock_identifier:
            mock_identifier.return_value = "https://pub.orcid.org/v2.1/0000-0002-1909-134X/work/None"
            profile = OrcidProfileStub('0000-0002-1909-134X', instance='orcid.org')
            trung = Researcher.get_or_create_by_orcid('0000-0002-1909-134X', profile=profile)
            OrcidPaperSource().fetch_and_save(trung, profile=profile)

        # The two papers are incorrectly merged!
        papers = [Paper.get_by_doi(doi) for doi in dois]
        self.assertEqual(papers[0], papers[1])

        # We unmerge them
        unmerge_orcid_nones()

        # The two papers are now distinct
        papers = [Paper.get_by_doi(doi) for doi in dois]
        self.assertTrue(papers[0] != papers[1])
예제 #8
0
 def setUp(self):
     self.api = CrossRefAPI()
예제 #9
0
    def fetch_orcid_records(self,
                            orcid_identifier,
                            profile=None,
                            use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                                                            profile.json,
                                                            update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json"
        # 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = []  # list of DOIs to fetch
        ignored_papers = [
        ]  # list of ignored papers due to incomplete metadata

        # Fetch publications (1st attempt with ORCiD data)
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work',
                     profile, [])
        for pub in pubs:
            data_paper = ORCIDDataPaper.from_orcid_metadata(
                ref_name, orcid_id, pub, stop_if_dois_exists=use_doi)
            if not data_paper:
                continue

            if data_paper.dois and use_doi:  # We want to batch it rather than manually do it.
                dois.extend(data_paper.dois)
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if data_paper.skipped:
                data_paper = self.reconcile_paper(
                    ref_name,
                    orcid_id,
                    pub,
                    overrides={
                        'authors': [(self.researcher.name.first,
                                     self.researcher.name.last)]
                    })
                if data_paper.skipped:
                    print('%s is skipped due to incorrect metadata (%s)' %
                          (data_paper, data_paper.skip_reason))

                    ignored_papers.append(data_paper.as_dict())
                    continue

            yield self.create_paper(data_paper)

        # 2nd attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers from CrossRef
            #for success, paper_or_metadata in self.fetch_crossref_incrementally(cr_api, orcid_id):
            #    if success:
            #        yield paper_or_metadata
            #    else:
            #        ignored_papers.append(paper_or_metadata)
            #        print('This metadata (%s) yields no paper.' %
            #              (unicode(paper_or_metadata)))

            # Let's grab papers with DOIs found in our ORCiD profile.
            # FIXME(RaitoBezarius): if we fail here, we should get back the pub
            # and yield it.
            for success, paper_or_metadata in self.fetch_metadata_from_dois(
                    cr_api, ref_name, orcid_id, dois):
                if success:
                    yield paper_or_metadata
                else:
                    ignored_papers.append(paper_or_metadata)
                    print('This metadata (%s) yields no paper.' %
                          (paper_or_metadata))

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
예제 #10
0
파일: tasks.py 프로젝트: Phyks/dissemin
def update_crossref():
    """
    Updates paper metadata from Crossref
    """
    c = CrossRefAPI()
    c.fetch_and_save_new_records()
예제 #11
0
파일: oai.py 프로젝트: tarsbase/dissemin
 def __init__(self):
     self.cr_api = CrossRefAPI()
예제 #12
0
 def setUp(self):
     self.api = CrossRefAPI()
예제 #13
0
class CrossRefTest(TestCase):

    @classmethod
    def setUpClass(cls):
        super(CrossRefTest, cls).setUpClass()
        cls.testdir = os.path.dirname(os.path.abspath(__file__))

    def setUp(self):
        self.api = CrossRefAPI()

    def test_empty_pubdate(self):
        # This DOI has an empty 'issued' date
        p = self.api.create_paper_by_doi('10.1007/978-1-4020-7884-2_13')
        self.assertEqual(p.pubdate.year, 2006)

    def test_invalid_metadata(self):
        # authors with no family name
        paper = self.api.create_paper_by_doi('10.4156/aiss.vol3.issue9.31')
        self.assertEqual(paper, None)

    def test_affiliations(self):
        p = self.api.create_paper_by_doi('10.4204/eptcs.172.16')
        self.assertEqual(p.authors[0].affiliation,
                         'École Normale Supérieure, Paris')

    def test_dirty_metadata(self):
        # saving a paper with enough metadata to create a paper, but not
        # an OaiRecord.
        p = self.api.save_doi_metadata({
            "DOI": "10.1007/978-1-4020-7884-2_13",
            "subtitle": [],
            "author": [
                {
                    "affiliation": [],
                    "given": "Haowen",
                    "family": "Chan"
                },
                {
                    "affiliation": [],
                    "given": "Adrian",
                    "family": "Perrig"
                },
                {
                    "affiliation": [],
                    "given": "Dawn",
                    "family": "Song"
                }
            ],
            "created": {
                "timestamp": 1166129219000,
                "date-time": "2006-12-14T20:46:59Z",
                "date-parts": [
                    [
                        2006,
                        12,
                        14
                    ]
                ]
            },
            "title": [
                "Key Distribution Techniques for Sensor Networks"
            ],
            "type": "book-chapter"})
        self.assertTrue(p.is_orphan())
        self.assertFalse(p.visible)

    def test_doctype_book(self):
        # Books are ignored
        # (technically, that's because we currently require a
        # 'container-title' in the metadata)
        p = self.api.create_paper_by_doi('10.1385/1592597998')
        self.assertTrue(p.is_orphan())

    def test_doi_open(self):
        self.assertTrue(self.api.create_paper_by_doi('10.15200/winn.145838.88372').pdf_url)
        self.assertFalse(self.api.create_paper_by_doi('10.5061/dryad.b167g').pdf_url)

    def test_fetch_papers(self):
        generator = self.api.fetch_all_records(filters={'issn':'0302-9743'})
        for i in range(30):
            metadata = next(generator)
            self.assertTrue(metadata['DOI'].startswith('10.1007/'))

    def test_import_dump(self):
        self.api.ingest_dump(os.path.join(self.testdir, 'data/sample_crossref_dump.json.bz2'))
        p = Paper.get_by_doi('10.1016/j.jadohealth.2015.10.045')
        self.assertEqual("Sources, Type and Use of Social Support during Early Sexual Development of Black Gay and Bisexual Adolescent Males",
                         p.title)
예제 #14
0
 def setUpClass(self):
     super(MaintenanceTest, self).setUpClass()
     self.cr_api = CrossRefAPI()
예제 #15
0
class MaintenanceTest(TestCase):
    @classmethod
    def setUpClass(self):
        super(MaintenanceTest, self).setUpClass()
        self.cr_api = CrossRefAPI()

    def test_name_initial(self):
        n = self.r2.name
        p = Paper.create_by_doi("10.1002/ange.19941062339")
        n1 = p.authors[0].name
        self.assertEqual((n1.first, n1.last), (n.first, n.last))

    def test_update_paper_statuses(self):
        p = self.cr_api.create_paper_by_doi("10.1016/j.bmc.2005.06.035")
        p = Paper.from_bare(p)
        self.assertEqual(p.pdf_url, None)
        pdf_url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
        OaiRecord.new(source=self.arxiv,
                      identifier='oai:arXiv.org:aunrisste',
                      about=p,
                      splash_url='http://www.perdu.com/',
                      pdf_url=pdf_url)
        update_paper_statuses()
        self.assertEqual(Paper.objects.get(pk=p.pk).pdf_url, pdf_url)

    def test_unmerge_paper(self):
        # First we merge two unrelated papers
        p1 = Paper.create_by_doi("10.1016/j.bmc.2005.06.035")
        title1 = p1.title
        p2 = Paper.create_by_doi("10.1016/j.ijar.2017.06.011")
        title2 = p2.title
        p1.merge(p2)
        # Then we unmerge them
        unmerge_paper_by_dois(p1)
        # We have two new papers!
        p3 = Paper.get_by_doi("10.1016/j.bmc.2005.06.035")
        self.assertTrue(p3.id != p1.id)
        self.assertEqual(p3.title, title1)
        p4 = Paper.get_by_doi("10.1016/j.ijar.2017.06.011")
        self.assertTrue(p4.id != p1.id)
        self.assertTrue(p4.id != p3.id)
        self.assertEqual(p4.title, title2)

    def test_unmerge_orcid_nones(self):
        # First, fetch a few DOIs
        dois = [
            "10.1075/aicr.90.09ngo",
            "10.1075/aicr.90.04wad",
        ]
        for doi in dois:
            Paper.create_by_doi(doi)

        # Then, fetch an ORCID profile with a buggy version of the ORCID interface, which incorrectly merges papers together
        with patch.object(OrcidPaperSource,
                          '_oai_id_for_doi') as mock_identifier:
            mock_identifier.return_value = "https://pub.orcid.org/v2.1/0000-0002-1909-134X/work/None"
            profile = OrcidProfileStub('0000-0002-1909-134X',
                                       instance='orcid.org')
            trung = Researcher.get_or_create_by_orcid('0000-0002-1909-134X',
                                                      profile=profile)
            OrcidPaperSource().fetch_and_save(trung, profile=profile)

        # The two papers are incorrectly merged!
        papers = [Paper.get_by_doi(doi) for doi in dois]
        self.assertEqual(papers[0], papers[1])

        # We unmerge them
        unmerge_orcid_nones()

        # The two papers are now distinct
        papers = [Paper.get_by_doi(doi) for doi in dois]
        self.assertTrue(papers[0] != papers[1])
예제 #16
0
class CrossRefTest(TestCase):
    @classmethod
    def setUpClass(cls):
        super(CrossRefTest, cls).setUpClass()
        cls.testdir = os.path.dirname(os.path.abspath(__file__))

    def setUp(self):
        self.api = CrossRefAPI()

    def test_empty_pubdate(self):
        # This DOI has an empty 'issued' date
        p = self.api.create_paper_by_doi('10.1007/978-1-4020-7884-2_13')
        self.assertEqual(p.pubdate.year, 2006)

    def test_invalid_metadata(self):
        # authors with no family name
        paper = self.api.create_paper_by_doi('10.4156/aiss.vol3.issue9.31')
        self.assertEqual(paper, None)

    def test_affiliations(self):
        p = self.api.create_paper_by_doi('10.4204/eptcs.172.16')
        self.assertEqual(p.authors[0].affiliation,
                         'École Normale Supérieure, Paris')

    def test_dirty_metadata(self):
        # saving a paper with enough metadata to create a paper, but not
        # an OaiRecord.
        p = self.api.save_doi_metadata({
            "DOI":
            "10.1007/978-1-4020-7884-2_13",
            "subtitle": [],
            "author": [{
                "affiliation": [],
                "given": "Haowen",
                "family": "Chan"
            }, {
                "affiliation": [],
                "given": "Adrian",
                "family": "Perrig"
            }, {
                "affiliation": [],
                "given": "Dawn",
                "family": "Song"
            }],
            "created": {
                "timestamp": 1166129219000,
                "date-time": "2006-12-14T20:46:59Z",
                "date-parts": [[2006, 12, 14]]
            },
            "title": ["Key Distribution Techniques for Sensor Networks"],
            "type":
            "book-chapter"
        })
        self.assertTrue(p.is_orphan())
        self.assertFalse(p.visible)

    def test_doctype_book(self):
        # Books are ignored
        # (technically, that's because we currently require a
        # 'container-title' in the metadata)
        p = self.api.create_paper_by_doi('10.1385/1592597998')
        self.assertTrue(p.is_orphan())

    def test_doi_open(self):
        self.assertTrue(
            self.api.create_paper_by_doi('10.15200/winn.145838.88372').pdf_url)
        self.assertFalse(
            self.api.create_paper_by_doi('10.5061/dryad.b167g').pdf_url)

    def test_fetch_papers(self):
        generator = self.api.fetch_all_records(filters={'issn': '0302-9743'})
        for i in range(30):
            metadata = next(generator)
            self.assertTrue(metadata['DOI'].startswith('10.1007/'))

    def test_import_dump(self):
        self.api.ingest_dump(
            os.path.join(self.testdir, 'data/sample_crossref_dump.json.bz2'))
        p = Paper.get_by_doi('10.1016/j.jadohealth.2015.10.045')
        self.assertEqual(
            "Sources, Type and Use of Social Support during Early Sexual Development of Black Gay and Bisexual Adolescent Males",
            p.title)