class OrcidProfileTest(unittest.TestCase): """ TODO: duplicate all these profiles to the ORCID sandbox to be sure they will not be modified! """ @classmethod def setUpClass(self): self.antonin = OrcidProfile(orcid_id='0000-0002-8612-8827') self.thomas = OrcidProfile(orcid_id='0000-0003-0524-631X') self.sergey = OrcidProfile(orcid_id='0000-0003-3397-9895') self.marco = OrcidProfile(orcid_id='0000-0002-6561-5642') def test_simple_name(self): self.assertEqual(self.antonin.name, ('Antonin', 'Delpeuch')) self.assertEqual(self.thomas.name, ('Thomas', 'Bourgeat')) self.assertEqual(self.marco.name, ('Marco', 'Diana')) def test_credit_name(self): self.assertEqual(self.sergey.name, ('Sergey M.', 'Natanzon')) self.assertEqual(OrcidProfile( orcid_id='0000-0001-9547-293X').name, ('Darío', 'Álvarez')) def test_empty_lastname(self): self.assertEqual(OrcidProfile( orcid_id='0000-0001-5006-3868').name, ('Qiang', '')) def test_other_names(self): self.assertEqual(set(self.sergey.other_names), set([('Sergey', 'Natanzon'), ('S.', 'Natanzon'), ('S. M.', 'Natanzon'), ('Sergey', 'Natanzon')])) def test_homepage_without_http(self): self.assertEqual(OrcidProfile( orcid_id='0000-0002-5710-3989').homepage, 'http://evrard.perso.enseeiht.fr') def test_iterable(self): for key in self.thomas: self.assertEqual(type(key), unicode) def test_attr(self): self.assertTrue('orcid-identifier' in self.thomas) self.assertEqual(type(self.thomas['orcid-identifier']), dict) def test_wrong_instance(self): with self.assertRaises(ValueError): p = OrcidProfile('0000-0002-2963-7764', instance='dissem.in') del p def test_sandbox(self): self.assertEqual(OrcidProfile( orcid_id='0000-0002-5654-4053').name, ('Peter', 'Lieth')) def test_search(self): # for this one we use the production database # because test profiles on the sandbox # tend to get deleted quite often results = list(OrcidProfile.search_by_name('John', 'Doe')) self.assertTrue(all(map(lambda x: len(x['orcid']) and ( len(x['first']) or len(x['last'])), results))) names_returned = map(lambda x: (x['first'], x['last']), results) self.assertTrue(('John', 'Doe') in names_returned) def test_institution(self): self.assertEqual(OrcidProfile( orcid_id='0000-0002-0022-2290').institution, {'name':'Ecole Normale Superieure', 'identifier':None, 'country':'FR'}) self.assertEqual(OrcidProfile( orcid_id='0000-0002-5654-4053').institution, {'country': 'FR', 'identifier': None, 'name': "École nationale supérieure de céramique industrielle"}) def test_work_summaries(self): summaries = self.antonin.work_summaries dois = [summary.doi for summary in summaries] titles = [summary.title for summary in summaries] self.assertTrue('10.4204/eptcs.172.16' in dois) self.assertTrue('Complexity of Grammar Induction for Quantum Types' in titles) self.assertTrue(None not in [summary.put_code for summary in summaries]) def test_philipp(self): p = OrcidProfile(orcid_id='0000-0001-6723-6833') summaries = p.work_summaries dois = [summary.doi for summary in summaries] self.assertTrue('10.3354/meps09890' in dois) def test_wrong_id_type(self): """ I found this payload in an ORCID profile… looks like ORCID does not validate their ids against regexes """ summary_json = { "last-modified-date" : { "value" : 1505077812702 }, "external-ids" : { "external-id" : [ { "external-id-type" : "doi", "external-id-value" : "http://hdl.handle.net/2080/2662", "external-id-url" : None, "external-id-relationship" : "SELF" } ] }, "work-summary" : [ { "put-code" : 36669776, "created-date" : { "value" : 1505077812702 }, "last-modified-date" : { "value" : 1505077812702 }, "source" : { "source-orcid" : { "uri" : "https://orcid.org/0000-0002-9658-1473", "path" : "0000-0002-9658-1473", "host" : "orcid.org" }, "source-client-id" : None, "source-name" : { "value" : "Bhojaraju Gunjal" } }, "title" : { "title" : { "value" : "Open Source Solutions for Creation of ETD Archives/Repository: A Case Study of Central Library@NIT Rourkela" }, "subtitle" : None, "translated-title" : None }, "external-ids" : { "external-id" : [ { "external-id-type" : "doi", "external-id-value" : "http://hdl.handle.net/2080/2662", "external-id-url" : None, "external-id-relationship" : "SELF" } ] }, "type" : "CONFERENCE_PAPER", "publication-date" : { "year" : { "value" : "2017" }, "month" : None, "day" : None, "media-type" : None }, "visibility" : "PUBLIC", "path" : "/0000-0002-9658-1473/work/36669776", "display-index" : "1" } ] } summary = OrcidWorkSummary(summary_json) self.assertEqual(summary.doi, None) def test_multiple_ids(self): summary_json = { "last-modified-date" : { "value" : 1506388112650 }, "external-ids" : { "external-id" : [ { "external-id-type" : "eid", "external-id-value" : "2-s2.0-84864877237", "external-id-url" : None, "external-id-relationship" : "SELF" }, { "external-id-type" : "doi", "external-id-value" : "10.3354/meps09890", "external-id-url" : None, "external-id-relationship" : "SELF" } ] }, "work-summary" : [ { "put-code" : 19176128, "created-date" : { "value" : 1444695659490 }, "last-modified-date" : { "value" : 1506388112650 }, "source" : { "source-orcid" : None, "source-client-id" : { "uri" : "https://orcid.org/client/0000-0002-3054-1567", "path" : "0000-0002-3054-1567", "host" : "orcid.org" }, "source-name" : { "value" : "CrossRef Metadata Search" } }, "title" : { "title" : { "value" : "Elephant seal foraging dives track prey distribution, not temperature: Comment on McIntyre et al. (2011)" }, "subtitle" : None, "translated-title" : None }, "external-ids" : { "external-id" : [ { "external-id-type" : "doi", "external-id-value" : "10.3354/meps09890", "external-id-url" : None, "external-id-relationship" : "SELF" } ] }, "type" : "JOURNAL_ARTICLE", "publication-date" : { "year" : { "value" : "2012" }, "month" : { "value" : "08" }, "day" : { "value" : "08" }, "media-type" : None }, "visibility" : "PUBLIC", "path" : "/0000-0001-6723-6833/work/19176128", "display-index" : "0" }]} summary = OrcidWorkSummary(summary_json) self.assertEqual(summary.doi, '10.3354/meps09890') def test_works(self): summaries = self.antonin.work_summaries put_codes = [s.put_code for s in summaries] works = list(self.antonin.fetch_works(put_codes)) titles = [work.title for work in works] self.assertTrue('Complexity of Grammar Induction for Quantum Types' in titles) pubtypes = [work.pubtype for work in works] self.assertTrue('journal-article' in pubtypes)
def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ cr_api = CrossRefAPI() # Cleanup iD: orcid_id = validate_orcid(orcid_identifier) if orcid_id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(orcid_id=orcid_id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # As we have fetched the profile, let's update the Researcher self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier, profile.json, update=True) if not self.researcher: return # Reference name ref_name = profile.name ignored_papers = [ ] # list of ignored papers due to incomplete metadata # Get summary publications and separate them in two classes: # - the ones with DOIs, that we will fetch with CrossRef dois_and_putcodes = [] # list of (DOIs,putcode) to fetch # - the ones without: we will fetch ORCID's metadata about them # and try to create a paper with what they provide put_codes = [] for summary in profile.work_summaries: if summary.doi and use_doi: dois_and_putcodes.append((summary.doi, summary.put_code)) else: put_codes.append(summary.put_code) # 1st attempt with DOIs and CrossRef if use_doi: # Let's grab papers with DOIs found in our ORCiD profile. dois = [doi for doi, put_code in dois_and_putcodes] for idx, (success, paper_or_metadata) in enumerate( self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)): if success: yield paper_or_metadata else: put_codes.append(dois_and_putcodes[idx][1]) # 2nd attempt with ORCID's own crappy metadata works = profile.fetch_works(put_codes) for work in works: if not work: continue # If the paper is skipped due to invalid metadata. # We first try to reconcile it with local researcher author name. # Then, we consider it missed. if work.skipped: print(work.json) print(work.skip_reason) print('work skipped due to incorrect metadata (%s)' % (work.skip_reason)) ignored_papers.append(work.as_dict()) continue yield self.create_paper(work) self.warn_user_of_ignored_papers(ignored_papers) if ignored_papers: print('Warning: Total ignored papers: %d' % (len(ignored_papers)))
def fetch_orcid_records(self, orcid_identifier, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ cr_api = CrossRefAPI() # Cleanup iD: orcid_id = validate_orcid(orcid_identifier) if orcid_id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(orcid_id=orcid_id) except MetadataSourceException: logger.exception("ORCID Profile Error") return # As we have fetched the profile, let's update the Researcher self.researcher = Researcher.get_or_create_by_orcid(orcid_identifier, profile.json, update=True) if not self.researcher: return # Reference name ref_name = profile.name ignored_papers = [] # list of ignored papers due to incomplete metadata # Get summary publications and separate them in two classes: # - the ones with DOIs, that we will fetch with CrossRef dois_and_putcodes = [] # list of (DOIs,putcode) to fetch # - the ones without: we will fetch ORCID's metadata about them # and try to create a paper with what they provide put_codes = [] for summary in profile.work_summaries: if summary.doi and use_doi: dois_and_putcodes.append((summary.doi, summary.put_code)) else: put_codes.append(summary.put_code) # 1st attempt with DOIs and CrossRef if use_doi: # Let's grab papers with DOIs found in our ORCiD profile. dois = [doi for doi, put_code in dois_and_putcodes] for idx, (success, paper_or_metadata) in enumerate(self.fetch_metadata_from_dois(cr_api, ref_name, orcid_id, dois)): if success: yield paper_or_metadata # We know that this is a paper else: put_codes.append(dois_and_putcodes[idx][1]) # 2nd attempt with ORCID's own crappy metadata works = profile.fetch_works(put_codes) for work in works: if not work: continue # If the paper is skipped due to invalid metadata. # We first try to reconcile it with local researcher author name. # Then, we consider it missed. if work.skipped: logger.warning("Work skipped due to incorrect metadata. \n %s \n %s" % (work.reason, work.skip_reason)) ignored_papers.append(work.as_dict()) continue yield self.create_paper(work) self.warn_user_of_ignored_papers(ignored_papers) if ignored_papers: logger.warning("Total ignored papers: %d" % (len(ignored_papers)))