def test_parse_bibtex_latex_accents(self): """ See https://github.com/dissemin/dissemin/issues/362 """ bibtex = r"""@misc{Nobody06, author = "Adri{\`{a}} Mart{\'{\i}}n Mor and Alessandro Beccu", title = "My Article", year = "2006" } """ print(parse_bibtex(bibtex)['author'][0][0]) self.assertEqual( parse_bibtex(bibtex), { 'ENTRYTYPE': 'misc', 'ID': 'Nobody06', 'author': [('Adrià Martín', 'Mor'), ('Alessandro', 'Beccu')], 'title': 'My Article', 'year': '2006' })
def authors_from_bibtex(self): if self.bibtex is not None: try: entry = parse_bibtex(self.bibtex) if 'author' not in entry or len(entry['author']) == 0: return [] else: return entry['author'] except ValueError: return [] else: return []
def test_parse_bibtex_latex_accents(self): """ See https://github.com/dissemin/dissemin/issues/362 """ bibtex = r"""@misc{Nobody06, author = "Adri{\`{a}} Mart{\'{\i}}n Mor and Alessandro Beccu", title = "My Article", year = "2006" } """ print(parse_bibtex(bibtex)['author'][0][0]) self.assertEqual( parse_bibtex(bibtex), { 'ENTRYTYPE': 'misc', 'ID': 'Nobody06', 'author': [ ('Adrià Martín', 'Mor'), ('Alessandro', 'Beccu') ], 'title': 'My Article', 'year': '2006' } )
def test_parse_bibtex(self): bibtex = """@misc{ Nobody06, author = "Orti, E. and Bredas, J.L. and Clarisse, C. and others", title = "My Article", year = "2006" } """ self.assertEqual( parse_bibtex(bibtex), { 'ENTRYTYPE': 'misc', 'ID': 'Nobody06', 'author': [('E.', 'Orti'), ('J. L.', 'Bredas'), ('C.', 'Clarisse')], 'title': 'My Article', 'year': '2006' })
def authors_from_bibtex(self, bibtex): if bibtex is not None: try: entry = parse_bibtex(bibtex) if 'author' not in entry or len(entry['author']) == 0: print ("Warning: ORCiD publication with no authors.") print (bibtex) return [] else: return entry['author'] except ValueError: return [] else: return []
def test_parse_bibtex_unicode_accents(self): """ See https://github.com/dissemin/dissemin/issues/362 """ bibtex = """@misc{ Nobody06, author = "Adrià Martin Mor", title = "My Article", year = "2006" } """ self.assertEqual( parse_bibtex(bibtex), { 'ENTRYTYPE': 'misc', 'ID': 'Nobody06', 'author': [('Adrià Martin', 'Mor')], 'title': 'My Article', 'year': '2006' })
def test_parse_bibtex_unicode_accents(self): """ See https://github.com/dissemin/dissemin/issues/362 """ bibtex = """@misc{ Nobody06, author = "Adrià Martin Mor", title = "My Article", year = "2006" } """ self.assertEqual( parse_bibtex(bibtex), { 'ENTRYTYPE': 'misc', 'ID': 'Nobody06', 'author': [('Adrià Martin', 'Mor')], 'title': 'My Article', 'year': '2006' } )
def test_parse_bibtex(self): bibtex = """@misc{ Nobody06, author = "Orti, E. and Bredas, J.L. and Clarisse, C. and others", title = "My Article", year = "2006" } """ self.assertEqual( parse_bibtex(bibtex), { 'ENTRYTYPE': 'misc', 'ID': 'Nobody06', 'author': [ ('E.', 'Orti'), ('J. L.', 'Bredas'), ('C.', 'Clarisse') ], 'title': 'My Article', 'year': '2006' } )
def test_no_newlines(self): bibtex = "@article{DBLP:journals/corr/abs-1804-07832, author= {Antonin Delpeuch and Jamie Vicary}, title= {Normal forms for planar connected string diagrams}, journal= {CoRR}, volume= {abs/1804.07832}, year= {2018}, url= {http://arxiv.org/abs/1804.07832}, archivePrefix= {arXiv}, eprint= {1804.07832}, timestamp= {Wed, 02 May 2018 15:55:01 +0200}, biburl= {https://dblp.org/rec/bib/journals/corr/abs-1804-07832}, bibsource= {dblp computer science bibliography, https://dblp.org}}" rec = parse_bibtex(bibtex) self.assertEqual(rec['author'], [('Antonin', 'Delpeuch'), ('Jamie', 'Vicary')])
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j('work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi(jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid':jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor',[])) author_names = filter(lambda x: x is not None, map( lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord( source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/'+id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally('', {'orcid':id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode(e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid(ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord( source=orcid_oai_source, identifier='orcid:'+id+':'+metadata['DOI'], splash_url='http://orcid.org/'+id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j( 'work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi( jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid': jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor', [])) author_names = filter(lambda x: x is not None, map(lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid( ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord(source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/' + id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally( '', {'orcid': id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode( e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid( ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord(source=orcid_oai_source, identifier='orcid:' + id + ':' + metadata['DOI'], splash_url='http://orcid.org/' + id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass
def test_no_newlines(self): bibtex = "@article{DBLP:journals/corr/abs-1804-07832, author= {Antonin Delpeuch and Jamie Vicary}, title= {Normal forms for planar connected string diagrams}, journal= {CoRR}, volume= {abs/1804.07832}, year= {2018}, url= {http://arxiv.org/abs/1804.07832}, archivePrefix= {arXiv}, eprint= {1804.07832}, timestamp= {Wed, 02 May 2018 15:55:01 +0200}, biburl= {https://dblp.org/rec/bib/journals/corr/abs-1804-07832}, bibsource= {dblp computer science bibliography, https://dblp.org}}" rec = parse_bibtex(bibtex) self.assertEqual(rec['author'], [('Antonin','Delpeuch'), ('Jamie','Vicary')])