def pubdate(self): # Pubdate # Remark(RaitoBezarius): we don't want to put 01 ; it could be interpreted as octal 1. year = parse_int(self.j('publication-date/year/value'), 1970) month = parse_int(self.j('publication-date/month/value'), 1) day = parse_int(self.j('publication-date/day/value'), 1) pubdate = try_date(year, month, day) or try_date(year, month, 1) or try_date(year, 1, 1) if pubdate is None: print ("Invalid publication date in ORCID publication, skipping") raise SkippedPaper("INVALID_PUB_DATE") else: return pubdate
def test_parse_int(self): self.assertEqual(parse_int(90, None), 90) self.assertEqual(parse_int(None, 90), 90) self.assertEqual(parse_int('est', 8), 8)
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j('work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi(jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid':jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor',[])) author_names = filter(lambda x: x is not None, map( lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord( source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/'+id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally('', {'orcid':id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode(e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid(ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord( source=orcid_oai_source, identifier='orcid:'+id+':'+metadata['DOI'], splash_url='http://orcid.org/'+id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j( 'work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi( jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid': jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor', [])) author_names = filter(lambda x: x is not None, map(lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid( ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord(source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/' + id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally( '', {'orcid': id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode( e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid( ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord(source=orcid_oai_source, identifier='orcid:' + id + ':' + metadata['DOI'], splash_url='http://orcid.org/' + id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass