Beispiel #1
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        researcher = None
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Beispiel #2
0
 def test_institution(self):
     self.assertEqual(OrcidProfile(
         orcid_id='0000-0002-0022-2290').institution,
         {'name':'Ecole Normale Superieure',
          'identifier':None,
          'country':'FR'})
     self.assertEqual(OrcidProfile(
         orcid_id='0000-0002-5654-4053').institution,
         {'country': 'FR',
          'identifier': None,
          'name': "École nationale supérieure de céramique industrielle"})
Beispiel #3
0
 def test_institution(self):
     self.assertEqual(
         OrcidProfile(orcid_id='0000-0002-0022-2290').institution, {
             'name': 'Ecole Normale Superieure',
             'identifier': None,
             'country': 'FR'
         })
     self.assertEqual(
         OrcidProfile(orcid_id='0000-0002-5654-4053').institution, {
             'country': 'FR',
             'identifier': None,
             'name': "Polytech'Rambouillet"
         })
Beispiel #4
0
    def bulk_import(self, directory, fetch_papers=True, use_doi=False):
        """
        Bulk-imports ORCID profiles from a dump
        (warning: this still uses our DOI cache).
        The directory should contain json versions
        of orcid profiles, as in the official ORCID
        dump.
        """

        for root, _, fnames in os.walk(directory):
            for fname in fnames:
                #if fname == '0000-0003-1349-4524.json':
                #    seen = True
                #if not seen:
                #    continue

                with open(os.path.join(root, fname), 'r') as f:
                    try:
                        profile = json.load(f)
                        orcid = profile['orcid-profile']['orcid-identifier'][
                            'path']
                        r = Researcher.get_or_create_by_orcid(orcid,
                                                              profile,
                                                              update=True)
                        if fetch_papers:
                            papers = self.fetch_orcid_records(
                                orcid,
                                profile=OrcidProfile(json=profile),
                                use_doi=use_doi)
                            for p in papers:
                                self.save_paper(p, r)
                    except (ValueError, KeyError):
                        logger.warning("Invalid profile: %s" % fname)
Beispiel #5
0
    def get_or_create_by_orcid(cls, orcid, profile=None, user=None):
        """
        Creates (or returns an existing) researcher from its ORCID id.

        :param profile: an :class:`OrcidProfile` object if it has already been fetched
                        from the API (otherwise we will fetch it ourselves)
        :param user: an user to associate with the profile.
        :returns: a :class:`Researcher` if everything went well, raises MetadataSourceException otherwise
        """
        researcher = None
        if orcid is None:
            raise MetadataSourceException('Invalid ORCID id')
        try:
            researcher = Researcher.objects.get(orcid=orcid)
        except Researcher.DoesNotExist:
            if profile is None:
                profile = OrcidProfile(id=orcid)
            else:
                profile = OrcidProfile(json=profile)
            name = profile.name
            homepage = profile.homepage
            email = profile.email
            researcher = Researcher.create_by_name(name[0],
                                                   name[1],
                                                   orcid=orcid,
                                                   user=user,
                                                   homepage=homepage,
                                                   email=email)

            # Ensure that extra info is added.
            save = False
            for kw, val in [('homepage', homepage), ('orcid', orcid),
                            ('email', email)]:
                if not researcher.__dict__[kw] and val:
                    researcher.__dict__[kw] = val
                    save = True
            if save:
                researcher.save()

            for variant in profile.other_names:
                confidence = name_similarity(variant, variant)
                name = Name.lookup_name(variant)
                researcher.add_name_variant(name, confidence)

        return researcher
Beispiel #6
0
 def test_search(self):
     # for this one we use the production database
     # because test profiles on the sandbox
     # tend to get deleted quite often
     results = list(OrcidProfile.search_by_name('John', 'Doe'))
     self.assertTrue(all(map(lambda x: len(x['orcid']) and (
         len(x['first']) or len(x['last'])), results)))
     names_returned = map(lambda x: (x['first'], x['last']), results)
     self.assertTrue(('John', 'Doe') in names_returned)
Beispiel #7
0
 def test_sandbox(self):
     self.assertEqual(OrcidProfile(
         orcid_id='0000-0002-5654-4053').name, ('Peter', 'Lieth'))
Beispiel #8
0
 def test_wrong_instance(self):
     with self.assertRaises(ValueError):
         p = OrcidProfile('0000-0002-2963-7764', instance='dissem.in')
         del p
Beispiel #9
0
 def test_homepage_without_http(self):
     self.assertEqual(OrcidProfile(
         orcid_id='0000-0002-5710-3989').homepage, 'http://evrard.perso.enseeiht.fr')
Beispiel #10
0
 def test_empty_lastname(self):
     self.assertEqual(OrcidProfile(
         orcid_id='0000-0001-5006-3868').name, ('Qiang', ''))
Beispiel #11
0
    def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
        except MetadataSourceException:
            logger.exception("ORCID Profile Error")
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                profile.json, update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        ignored_papers = []  # list of ignored papers due to incomplete metadata

        # Get summary publications and separate them in two classes:
        # - the ones with DOIs, that we will fetch with CrossRef
        dois_and_putcodes = []  # list of (DOIs,putcode) to fetch
        # - the ones without: we will fetch ORCID's metadata about them
        #   and try to create a paper with what they provide
        put_codes = []
        for summary in profile.work_summaries:
            if summary.doi and use_doi:
                dois_and_putcodes.append((summary.doi, summary.put_code))
            else:
                put_codes.append(summary.put_code)

        # 1st attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers with DOIs found in our ORCiD profile.
            dois = [doi for doi, put_code in dois_and_putcodes]
            for idx, (success, paper_or_metadata) in enumerate(self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)):
                if success:
                    yield paper_or_metadata # We know that this is a paper
                else:
                    put_codes.append(dois_and_putcodes[idx][1])

        # 2nd attempt with ORCID's own crappy metadata
        works = profile.fetch_works(put_codes)
        for work in works:
            if not work:
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if work.skipped:
                logger.warning("Work skipped due to incorrect metadata. \n %s \n %s" % (work.reason, work.skip_reason))

                ignored_papers.append(work.as_dict())
                continue

            yield self.create_paper(work)

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            logger.warning("Total ignored papers: %d" % (len(ignored_papers)))
Beispiel #12
0
 def setUpClass(self):
     self.antonin = OrcidProfile(orcid_id='0000-0002-8612-8827')
     self.thomas = OrcidProfile(orcid_id='0000-0003-0524-631X')
     self.sergey = OrcidProfile(orcid_id='0000-0003-3397-9895')
     self.marco = OrcidProfile(orcid_id='0000-0002-6561-5642')
Beispiel #13
0
class OrcidProfileTest(unittest.TestCase):
    """
    TODO: duplicate all these profiles to the ORCID sandbox
    to be sure they will not be modified!
    """

    @classmethod
    def setUpClass(self):
        self.antonin = OrcidProfile(orcid_id='0000-0002-8612-8827')
        self.thomas = OrcidProfile(orcid_id='0000-0003-0524-631X')
        self.sergey = OrcidProfile(orcid_id='0000-0003-3397-9895')
        self.marco = OrcidProfile(orcid_id='0000-0002-6561-5642')

    def test_simple_name(self):
        self.assertEqual(self.antonin.name, ('Antonin', 'Delpeuch'))
        self.assertEqual(self.thomas.name, ('Thomas', 'Bourgeat'))
        self.assertEqual(self.marco.name, ('Marco', 'Diana'))

    def test_credit_name(self):
        self.assertEqual(self.sergey.name, ('Sergey M.', 'Natanzon'))
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0001-9547-293X').name, ('Darío', 'Álvarez'))

    def test_empty_lastname(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0001-5006-3868').name, ('Qiang', ''))

    def test_other_names(self):
        self.assertEqual(set(self.sergey.other_names),
                         set([('Sergey', 'Natanzon'), ('S.', 'Natanzon'),
                              ('S. M.', 'Natanzon'), ('Sergey', 'Natanzon')]))

    def test_homepage_without_http(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-5710-3989').homepage, 'http://evrard.perso.enseeiht.fr')

    def test_iterable(self):
        for key in self.thomas:
            self.assertEqual(type(key), unicode)

    def test_attr(self):
        self.assertTrue('orcid-identifier' in self.thomas)
        self.assertEqual(type(self.thomas['orcid-identifier']), dict)

    def test_wrong_instance(self):
        with self.assertRaises(ValueError):
            p = OrcidProfile('0000-0002-2963-7764', instance='dissem.in')
            del p

    def test_sandbox(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-5654-4053').name, ('Peter', 'Lieth'))

    def test_search(self):
        # for this one we use the production database
        # because test profiles on the sandbox
        # tend to get deleted quite often
        results = list(OrcidProfile.search_by_name('John', 'Doe'))
        self.assertTrue(all(map(lambda x: len(x['orcid']) and (
            len(x['first']) or len(x['last'])), results)))
        names_returned = map(lambda x: (x['first'], x['last']), results)
        self.assertTrue(('John', 'Doe') in names_returned)

    def test_institution(self):
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-0022-2290').institution,
            {'name':'Ecole Normale Superieure',
             'identifier':None,
             'country':'FR'})
        self.assertEqual(OrcidProfile(
            orcid_id='0000-0002-5654-4053').institution,
            {'country': 'FR',
             'identifier': None,
             'name': "École nationale supérieure de céramique industrielle"})

    def test_work_summaries(self):
        summaries = self.antonin.work_summaries
        dois = [summary.doi for summary in summaries]
        titles = [summary.title for summary in summaries]
        self.assertTrue('10.4204/eptcs.172.16' in dois)
        self.assertTrue('Complexity of Grammar Induction for Quantum Types' in titles)
        self.assertTrue(None not in [summary.put_code for summary in summaries])

    def test_philipp(self):
        p = OrcidProfile(orcid_id='0000-0001-6723-6833')
        summaries = p.work_summaries
        dois = [summary.doi for summary in summaries]
        self.assertTrue('10.3354/meps09890' in dois)

    def test_wrong_id_type(self):
        """
        I found this payload in an ORCID profile… looks like ORCID
        does not validate their ids against regexes
        """
        summary_json = {
            "last-modified-date" : {
            "value" : 1505077812702
            },
            "external-ids" : {
            "external-id" : [ {
                "external-id-type" : "doi",
                "external-id-value" : "http://hdl.handle.net/2080/2662",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
            } ]
            },
            "work-summary" : [ {
            "put-code" : 36669776,
            "created-date" : {
                "value" : 1505077812702
            },
            "last-modified-date" : {
                "value" : 1505077812702
            },
            "source" : {
                "source-orcid" : {
                "uri" : "https://orcid.org/0000-0002-9658-1473",
                "path" : "0000-0002-9658-1473",
                "host" : "orcid.org"
                },
                "source-client-id" : None,
                "source-name" : {
                "value" : "Bhojaraju Gunjal"
                }
            },
            "title" : {
                "title" : {
                "value" : "Open Source Solutions for Creation of ETD Archives/Repository: A Case Study of Central Library@NIT Rourkela"
                },
                "subtitle" : None,
                "translated-title" : None
            },
            "external-ids" : {
                "external-id" : [ {
                "external-id-type" : "doi",
                "external-id-value" : "http://hdl.handle.net/2080/2662",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
                } ]
            },
            "type" : "CONFERENCE_PAPER",
            "publication-date" : {
                "year" : {
                "value" : "2017"
                },
                "month" : None,
                "day" : None,
                "media-type" : None
            },
            "visibility" : "PUBLIC",
            "path" : "/0000-0002-9658-1473/work/36669776",
            "display-index" : "1"
            } ]
        }
        summary = OrcidWorkSummary(summary_json)
        self.assertEqual(summary.doi, None)

    def test_multiple_ids(self):
        summary_json = {
            "last-modified-date" : {
            "value" : 1506388112650
            },
            "external-ids" : {
            "external-id" : [ {
                "external-id-type" : "eid",
                "external-id-value" : "2-s2.0-84864877237",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
            }, {
                "external-id-type" : "doi",
                "external-id-value" : "10.3354/meps09890",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
            } ]
            },
            "work-summary" : [ {
            "put-code" : 19176128,
            "created-date" : {
                "value" : 1444695659490
            },
            "last-modified-date" : {
                "value" : 1506388112650
            },
            "source" : {
                "source-orcid" : None,
                "source-client-id" : {
                "uri" : "https://orcid.org/client/0000-0002-3054-1567",
                "path" : "0000-0002-3054-1567",
                "host" : "orcid.org"
                },
                "source-name" : {
                "value" : "CrossRef Metadata Search"
                }
            },
            "title" : {
                "title" : {
                "value" : "Elephant seal foraging dives track prey distribution, not temperature: Comment on McIntyre et al. (2011)"
                },
                "subtitle" : None,
                "translated-title" : None
            },
            "external-ids" : {
                "external-id" : [ {
                "external-id-type" : "doi",
                "external-id-value" : "10.3354/meps09890",
                "external-id-url" : None,
                "external-id-relationship" : "SELF"
                } ]
            },
            "type" : "JOURNAL_ARTICLE",
            "publication-date" : {
                "year" : {
                "value" : "2012"
                },
                "month" : {
                "value" : "08"
                },
                "day" : {
                "value" : "08"
                },
                "media-type" : None
            },
            "visibility" : "PUBLIC",
            "path" : "/0000-0001-6723-6833/work/19176128",
            "display-index" : "0"
            }]}
        summary = OrcidWorkSummary(summary_json)
        self.assertEqual(summary.doi, '10.3354/meps09890')

    def test_works(self):
        summaries = self.antonin.work_summaries
        put_codes = [s.put_code for s in summaries]
        works = list(self.antonin.fetch_works(put_codes))
        titles = [work.title for work in works]
        self.assertTrue('Complexity of Grammar Induction for Quantum Types' in titles)
        pubtypes = [work.pubtype for work in works]
        self.assertTrue('journal-article' in pubtypes)
Beispiel #14
0
 def test_philipp(self):
     p = OrcidProfile(orcid_id='0000-0001-6723-6833')
     summaries = p.work_summaries
     dois = [summary.doi for summary in summaries]
     self.assertTrue('10.3354/meps09890' in dois)
Beispiel #15
0
    def fetch_orcid_records(self,
                            orcid_identifier,
                            profile=None,
                            use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                                                            profile.json,
                                                            update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        # curl -H "Accept: application/orcid+json"
        # 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i
        dois = []  # list of DOIs to fetch
        ignored_papers = [
        ]  # list of ignored papers due to incomplete metadata

        # Fetch publications (1st attempt with ORCiD data)
        pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work',
                     profile, [])
        for pub in pubs:
            data_paper = ORCIDDataPaper.from_orcid_metadata(
                ref_name, orcid_id, pub, stop_if_dois_exists=use_doi)
            if not data_paper:
                continue

            if data_paper.dois and use_doi:  # We want to batch it rather than manually do it.
                dois.extend(data_paper.dois)
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if data_paper.skipped:
                data_paper = self.reconcile_paper(
                    ref_name,
                    orcid_id,
                    pub,
                    overrides={
                        'authors': [(self.researcher.name.first,
                                     self.researcher.name.last)]
                    })
                if data_paper.skipped:
                    print('%s is skipped due to incorrect metadata (%s)' %
                          (data_paper, data_paper.skip_reason))

                    ignored_papers.append(data_paper.as_dict())
                    continue

            yield self.create_paper(data_paper)

        # 2nd attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers from CrossRef
            #for success, paper_or_metadata in self.fetch_crossref_incrementally(cr_api, orcid_id):
            #    if success:
            #        yield paper_or_metadata
            #    else:
            #        ignored_papers.append(paper_or_metadata)
            #        print('This metadata (%s) yields no paper.' %
            #              (unicode(paper_or_metadata)))

            # Let's grab papers with DOIs found in our ORCiD profile.
            # FIXME(RaitoBezarius): if we fail here, we should get back the pub
            # and yield it.
            for success, paper_or_metadata in self.fetch_metadata_from_dois(
                    cr_api, ref_name, orcid_id, dois):
                if success:
                    yield paper_or_metadata
                else:
                    ignored_papers.append(paper_or_metadata)
                    print('This metadata (%s) yields no paper.' %
                          (paper_or_metadata))

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
Beispiel #16
0
    def fetch_orcid_records(self,
                            orcid_identifier,
                            profile=None,
                            use_doi=True):
        """
        Queries ORCiD to retrieve the publications associated with a given ORCiD.
        It also fetches such papers from the CrossRef search interface.

        :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON).
        :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow)
        :returns: a generator, where all the papers found are yielded. (some of them could be in
                free form, hence not imported)
        """
        cr_api = CrossRefAPI()

        # Cleanup iD:
        orcid_id = validate_orcid(orcid_identifier)
        if orcid_id is None:
            raise MetadataSourceException('Invalid ORCiD identifier')

        # Get ORCiD profile
        try:
            if profile is None:
                profile = OrcidProfile(orcid_id=orcid_id)
            else:
                profile = OrcidProfile(json=profile)
        except MetadataSourceException as e:
            print e
            return

        # As we have fetched the profile, let's update the Researcher
        self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier,
                                                            profile.json,
                                                            update=True)
        if not self.researcher:
            return

        # Reference name
        ref_name = profile.name
        ignored_papers = [
        ]  # list of ignored papers due to incomplete metadata

        # Get summary publications and separate them in two classes:
        # - the ones with DOIs, that we will fetch with CrossRef
        dois_and_putcodes = []  # list of (DOIs,putcode) to fetch
        # - the ones without: we will fetch ORCID's metadata about them
        #   and try to create a paper with what they provide
        put_codes = []
        for summary in profile.work_summaries:
            if summary.doi and use_doi:
                dois_and_putcodes.append((summary.doi, summary.put_code))
            else:
                put_codes.append(summary.put_code)

        # 1st attempt with DOIs and CrossRef
        if use_doi:
            # Let's grab papers with DOIs found in our ORCiD profile.
            dois = [doi for doi, put_code in dois_and_putcodes]
            for idx, (success, paper_or_metadata) in enumerate(
                    self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id,
                                                  dois)):
                if success:
                    yield paper_or_metadata
                else:
                    put_codes.append(dois_and_putcodes[idx][1])

        # 2nd attempt with ORCID's own crappy metadata
        works = profile.fetch_works(put_codes)
        for work in works:
            if not work:
                continue

            # If the paper is skipped due to invalid metadata.
            # We first try to reconcile it with local researcher author name.
            # Then, we consider it missed.
            if work.skipped:
                print(work.json)
                print(work.skip_reason)
                print('work skipped due to incorrect metadata (%s)' %
                      (work.skip_reason))

                ignored_papers.append(work.as_dict())
                continue

            yield self.create_paper(work)

        self.warn_user_of_ignored_papers(ignored_papers)
        if ignored_papers:
            print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
Beispiel #17
0
 def test_credit_name(self):
     self.assertEqual(self.sergey.name, ('Sergey M.', 'Natanzon'))
     self.assertEqual(OrcidProfile(
         orcid_id='0000-0001-9547-293X').name, ('Darío', 'Álvarez'))