Exemple #1
0
class OrcidProfileTest(unittest.TestCase):
    """
    TODO: duplicate all these profiles to the ORCID sandbox
    to be sure they will not be modified!
    """

    @classmethod
    def setUpClass(self):
        self.antonin = OrcidProfile(orcid_id='0000-0002-8612-8827')
        self.thomas = OrcidProfile(orcid_id='0000-0003-0524-631X')
        self.sergey = OrcidProfile(orcid_id='0000-0003-3397-9895')
        self.marco = OrcidProfile(orcid_id='0000-0002-6561-5642')

    def test_simple_name(self):
        self.assertEqual(self.antonin.name, ('Antonin', 'Delpeuch'))
        self.assertEqual(self.thomas.name, ('Thomas', 'Bourgeat'))
        self.assertEqual(self.marco.name, ('Marco', 'Diana'))

    def test_credit_name(self):
        self.assertEqual(self.sergey.name, ('Sergey M.', 'Natanzon'))
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0001-9547-293X').name, ('Darío', 'Álvarez'))

    def test_empty_lastname(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0001-5006-3868').name, ('Qiang', ''))

    def test_other_names(self):
        self.assertEqual(set(self.sergey.other_names),
                         set([('Sergey', 'Natanzon'), ('S.', 'Natanzon'),
                              ('S. M.', 'Natanzon'), ('Sergey', 'Natanzon')]))

    def test_homepage_without_http(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-5710-3989').homepage, 'http://evrard.perso.enseeiht.fr')

    def test_iterable(self):
        for key in self.thomas:
            self.assertEqual(type(key), unicode)

    def test_attr(self):
        self.assertTrue('orcid-identifier' in self.thomas)
        self.assertEqual(type(self.thomas['orcid-identifier']), dict)

    def test_wrong_instance(self):
        with self.assertRaises(ValueError):
            p = OrcidProfile('0000-0002-2963-7764', instance='dissem.in')
            del p

    def test_sandbox(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-5654-4053').name, ('Peter', 'Lieth'))

    def test_search(self):
        # for this one we use the production database
        # because test profiles on the sandbox
        # tend to get deleted quite often
        results = list(OrcidProfile.search_by_name('John', 'Doe'))
        self.assertTrue(all(map(lambda x: len(x['orcid']) and (
            len(x['first']) or len(x['last'])), results)))
        names_returned = map(lambda x: (x['first'], x['last']), results)
        self.assertTrue(('John', 'Doe') in names_returned)

    def test_institution(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-0022-2290').institution,
            {'name':'Ecole Normale Superieure',
             'identifier':None,
             'country':'FR'})
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-5654-4053').institution,
            {'country': 'FR',
             'identifier': None,
             'name': "École nationale supérieure de céramique industrielle"})

    def test_work_summaries(self):
        summaries = self.antonin.work_summaries
        dois = [summary.doi for summary in summaries]
        titles = [summary.title for summary in summaries]
        self.assertTrue('10.4204/eptcs.172.16' in dois)
        self.assertTrue('Complexity of Grammar Induction for Quantum Types' in titles)
        self.assertTrue(None not in [summary.put_code for summary in summaries])

    def test_philipp(self):
        p = OrcidProfile(orcid_id='0000-0001-6723-6833')
        summaries = p.work_summaries
        dois = [summary.doi for summary in summaries]
        self.assertTrue('10.3354/meps09890' in dois)

    def test_wrong_id_type(self):
        """
        I found this payload in an ORCID profile… looks like ORCID
        does not validate their ids against regexes
        """
        summary_json = {
            "last-modified-date" : {
            "value" : 1505077812702
            },
            "external-ids" : {
            "external-id" : [ {
                "external-id-type" : "doi",
                "external-id-value" : "http://hdl.handle.net/2080/2662",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
            } ]
            },
            "work-summary" : [ {
            "put-code" : 36669776,
            "created-date" : {
                "value" : 1505077812702
            },
            "last-modified-date" : {
                "value" : 1505077812702
            },
            "source" : {
                "source-orcid" : {
                "uri" : "https://orcid.org/0000-0002-9658-1473",
                "path" : "0000-0002-9658-1473",
                "host" : "orcid.org"
                },
                "source-client-id" : None,
                "source-name" : {
                "value" : "Bhojaraju Gunjal"
                }
            },
            "title" : {
                "title" : {
                "value" : "Open Source Solutions for Creation of ETD Archives/Repository: A Case Study of Central Library@NIT Rourkela"
                },
                "subtitle" : None,
                "translated-title" : None
            },
            "external-ids" : {
                "external-id" : [ {
                "external-id-type" : "doi",
                "external-id-value" : "http://hdl.handle.net/2080/2662",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
                } ]
            },
            "type" : "CONFERENCE_PAPER",
            "publication-date" : {
                "year" : {
                "value" : "2017"
                },
                "month" : None,
                "day" : None,
                "media-type" : None
            },
            "visibility" : "PUBLIC",
            "path" : "/0000-0002-9658-1473/work/36669776",
            "display-index" : "1"
            } ]
        }
        summary = OrcidWorkSummary(summary_json)
        self.assertEqual(summary.doi, None)

    def test_multiple_ids(self):
        summary_json = {
            "last-modified-date" : {
            "value" : 1506388112650
            },
            "external-ids" : {
            "external-id" : [ {
                "external-id-type" : "eid",
                "external-id-value" : "2-s2.0-84864877237",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
            }, {
                "external-id-type" : "doi",
                "external-id-value" : "10.3354/meps09890",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
            } ]
            },
            "work-summary" : [ {
            "put-code" : 19176128,
            "created-date" : {
                "value" : 1444695659490
            },
            "last-modified-date" : {
                "value" : 1506388112650
            },
            "source" : {
                "source-orcid" : None,
                "source-client-id" : {
                "uri" : "https://orcid.org/client/0000-0002-3054-1567",
                "path" : "0000-0002-3054-1567",
                "host" : "orcid.org"
                },
                "source-name" : {
                "value" : "CrossRef Metadata Search"
                }
            },
            "title" : {
                "title" : {
                "value" : "Elephant seal foraging dives track prey distribution, not temperature: Comment on McIntyre et al. (2011)"
                },
                "subtitle" : None,
                "translated-title" : None
            },
            "external-ids" : {
                "external-id" : [ {
                "external-id-type" : "doi",
                "external-id-value" : "10.3354/meps09890",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
                } ]
            },
            "type" : "JOURNAL_ARTICLE",
            "publication-date" : {
                "year" : {
                "value" : "2012"
                },
                "month" : {
                "value" : "08"
                },
                "day" : {
                "value" : "08"
                },
                "media-type" : None
            },
            "visibility" : "PUBLIC",
            "path" : "/0000-0001-6723-6833/work/19176128",
            "display-index" : "0"
            }]}
        summary = OrcidWorkSummary(summary_json)
        self.assertEqual(summary.doi, '10.3354/meps09890')

    def test_works(self):
        summaries = self.antonin.work_summaries
        put_codes = [s.put_code for s in summaries]
        works = list(self.antonin.fetch_works(put_codes))
        titles = [work.title for work in works]
        self.assertTrue('Complexity of Grammar Induction for Quantum Types' in titles)
        pubtypes = [work.pubtype for work in works]
        self.assertTrue('journal-article' in pubtypes)
Exemple #2
0
    def fetch_orcid_records(self,
                            orcid_identifier,
                            profile=None,
                            use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                                                            profile.json,
                                                            update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        ignored_papers = [
        ]  # list of ignored papers due to incomplete metadata

        # Get summary publications and separate them in two classes:
        # - the ones with DOIs, that we will fetch with CrossRef
        dois_and_putcodes = []  # list of (DOIs,putcode) to fetch
        # - the ones without: we will fetch ORCID's metadata about them
        #   and try to create a paper with what they provide
        put_codes = []
        for summary in profile.work_summaries:
            if summary.doi and use_doi:
                dois_and_putcodes.append((summary.doi, summary.put_code))
            else:
                put_codes.append(summary.put_code)

        # 1st attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers with DOIs found in our ORCiD profile.
            dois = [doi for doi, put_code in dois_and_putcodes]
            for idx, (success, paper_or_metadata) in enumerate(
                    self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id,
                                                  dois)):
                if success:
                    yield paper_or_metadata
                else:
                    put_codes.append(dois_and_putcodes[idx][1])

        # 2nd attempt with ORCID's own crappy metadata
        works = profile.fetch_works(put_codes)
        for work in works:
            if not work:
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if work.skipped:
                print(work.json)
                print(work.skip_reason)
                print('work skipped due to incorrect metadata (%s)' %
                      (work.skip_reason))

                ignored_papers.append(work.as_dict())
                continue

            yield self.create_paper(work)

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
Exemple #3
0
    def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
        except MetadataSourceException:
            logger.exception("ORCID Profile Error")
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                profile.json, update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        ignored_papers = []  # list of ignored papers due to incomplete metadata

        # Get summary publications and separate them in two classes:
        # - the ones with DOIs, that we will fetch with CrossRef
        dois_and_putcodes = []  # list of (DOIs,putcode) to fetch
        # - the ones without: we will fetch ORCID's metadata about them
        #   and try to create a paper with what they provide
        put_codes = []
        for summary in profile.work_summaries:
            if summary.doi and use_doi:
                dois_and_putcodes.append((summary.doi, summary.put_code))
            else:
                put_codes.append(summary.put_code)

        # 1st attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers with DOIs found in our ORCiD profile.
            dois = [doi for doi, put_code in dois_and_putcodes]
            for idx, (success, paper_or_metadata) in enumerate(self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)):
                if success:
                    yield paper_or_metadata # We know that this is a paper
                else:
                    put_codes.append(dois_and_putcodes[idx][1])

        # 2nd attempt with ORCID's own crappy metadata
        works = profile.fetch_works(put_codes)
        for work in works:
            if not work:
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if work.skipped:
                logger.warning("Work skipped due to incorrect metadata. \n %s \n %s" % (work.reason, work.skip_reason))

                ignored_papers.append(work.as_dict())
                continue

            yield self.create_paper(work)

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            logger.warning("Total ignored papers: %d" % (len(ignored_papers)))