def test_format_inbook(app): inbook = get_db_record('lit', 1375491) expected = ("Bechtle:2015nta", Entry('inbook', [ ('pages', u"421--462"), ('title', u"Supersymmetry"), ('year', u"2015"), ('doi', u"10.1007/978-3-319-15001-7_10"), ('archivePrefix', u"arXiv"), ('eprint', u"1506.03091"), ('primaryClass', u"hep-ex"), ], persons={ 'editor': [], 'author': [ Person(u"Bechtle, Philip"), Person(u"Plehn, Tilman"), Person(u"Sander, Christian") ], })) schema = PybtexSchema() result = schema.load(inbook) assert result is not None assert pybtex_entries_equal(result, expected)
def create_bibliography_entry(self, record): data = self.schema_class.dump(record).data doc_type = data.pop("doc_type", None) texkey = data.pop("texkey", None) authors = [ Person(person) for person in data.pop("authors_with_role_author") ] editors = [ Person(person) for person in data.pop("authors_with_role_editor") ] template_data = [ (field, str(data[field])) for (field, doc_types) in self.fields_and_doc_types if data.get(field) and (doc_types is True or doc_type in doc_types) ] data_entry = Entry(doc_type, template_data, persons={ "author": authors, "editor": editors }) data_bibtex = (texkey, data_entry) return data_bibtex
def test_format_proceeding(app): proceedings = get_db_record('lit', 701585) expected = ( "Alekhin:2005dx", Entry('proceedings', [ ('address', u"Geneva"), ('pages', u"pp.1--326"), ('publisher', u"CERN"), ('title', u"HERA and the LHC: A Workshop on the implications of HERA for LHC physics: Proceedings Part A" ), ('year', u"2005"), ('reportNumber', u"CERN-2005-014, DESY-PROC-2005-01"), ('archivePrefix', u"arXiv"), ('eprint', u"hep-ph/0601012"), ('url', u"http://weblib.cern.ch/abstract?CERN-2005-014"), ], persons={ 'editor': [Person(u"De Roeck, A."), Person(u"Jung, H.")], 'author': [], })) schema = PybtexSchema() result = schema.load(proceedings) assert result is not None assert pybtex_entries_equal(result, expected)
class MacrosTest(ParserTest, TestCase): input_string = u""" @String{and = { and }} @String{etal = and # { {et al.}}} @Article( unknown, author = nobody, ) @Article( gsl, author = "Gough, Brian"#etal, ) """ correct_result = BibliographyData([ ('unknown', Entry('article')), ('gsl', Entry('article', persons={ u'author': [Person(u'Gough, Brian'), Person(u'{et al.}')] })), ]) errors = [ 'undefined string in line 6: nobody', ]
def create_bibliography_entry(self, record): bibtex_document_type = self.schema_class.get_bibtex_document_type( record) data = self.schema_class.dump(record).data doc_type = data.pop("doc_type", None) texkey = data.pop("texkey", None) authors = [ Person(person) for person in data.pop("authors_with_role_author") ] editors = [ Person(person) for person in data.pop("authors_with_role_editor") ] fields = (self.COMMON_FIELDS_FOR_ENTRIES | self.FIELDS_FOR_ENTRY_TYPE[bibtex_document_type]) template_data = [(key, str(value)) for key, value in data.items() if value and key in fields] template_data = sorted(template_data, key=lambda x: x[0]) data_entry = Entry(doc_type, template_data, persons={ "author": authors, "editor": editors }) data_bibtex = (texkey, data_entry) return data_bibtex
def filter_bibentry(self, entry): # # entry is a pybtex.database.Entry object # for role in self.roles: if role not in entry.persons: continue for k in range(len(entry.persons[role])): p = entry.persons[role][k]; # de-latex the person first pstr = unicode(p); # BUG: FIXME: remove space after any macros pstr = re.sub(r'(\\[a-zA-Z]+)\s+', r'\1{}', pstr); # replace "blah\macro blah" by "blah\macro{}blah" if (self._names_to_utf8): pstr = latex2text.latex2text(pstr) p = Person(pstr) if self._only_single_letter_firsts: from pybtex.textutils import abbreviate def getparts(p, x): for part in p.get_part(x, False): if len(part) == 1: yield abbreviate(part) else: yield part pnew = Person('', " ".join(getparts(p, 'first')), " ".join(getparts(p, 'middle')), " ".join(p.prelast()), " ".join(p.last()), " ".join(p.lineage())); else: pnew = Person('', " ".join(p.first(True)), " ".join(p.middle(True)), " ".join(p.prelast()), " ".join(p.last()), " ".join(p.lineage())); entry.persons[role][k] = pnew #logger.debug("nameinitials: %r became %r" % (p, pnew)); return
def process_person(person_entry, role): persons = person_entry.findall(bibtexns + 'person') if persons: for person in persons: process_person(person, role) else: text = person_entry.text.strip() if text: e.add_person(Person(text), role) else: names = {} for name in person_entry: names[remove_ns(name.tag)] = name.text e.add_person(Person(**names), role)
def json2bib(jsonstring, key, type='article'): """Convert a json string into a Bibentry object.""" if not json: return data = json.loads(jsonstring) # need to remove authors field from data authors = None if 'author' in data: authors = data['author'] del data['author'] if 'issued' in data: data['year'] = str(data['issued']['date-parts'][0][0]) del data['issued'] # delete other problematic fields if 'editor' in data: del data['editor'] entry = Entry(type, fields=data) if authors: for author in authors: entry.add_person( Person(first=author['given'], last=author['family']), 'author') return Bibentry(key, entry).as_string()
def from_string(cls, name): person = Person(name) prelast, last, first, middle, lineage = (' '.join( getattr(person, part)()) for part in ('prelast', 'last', 'first', 'middle', 'lineage')) given = ' '.join(n for n in (first, middle) if n) return cls(prelast, last, given, lineage)
class InlineCommentTest(ParserTest, TestCase): input_string = u""" "some text" causes an error like this ``You're missing a field name---line 6 of file bibs/inline_comment.bib`` for all 3 of the % some text occurences below; in each case the parser keeps what it has up till that point and skips, so that it correctly gets the last entry. @article{Me2010,} @article{Me2011, author="Brett-like, Matthew", % some text title="Another article"} @article{Me2012, % some text author="Real Brett"} This one correctly read @article{Me2013,} """ correct_result = BibliographyData([ ('Me2010', Entry('article')), ('Me2011', Entry('article', persons={ 'author': [ Person(first='Matthew', last='Brett-like'), ] })), ('Me2012', Entry('article')), ('Me2013', Entry('article')), ]) errors = [ "syntax error in line 10: '}' expected", "syntax error in line 12: '}' expected", ]
def to_bib_data(self): """ Convert to the BibTeX data """ bibtype = self.data["doctype"] persons = OrderedDict([ ("author", [Person(p) for p in self.data["author"]]), ]) fields = OrderedDict([ ("title", self.data["title"][0]), ("adsbibcode", self.data["bibcode"]), ("url", self.ADS_URL % { "bibcode": self.data["bibcode"] }), ("journal", self.data["pub"]), ("year", self.data["year"]), ]) if self.data["bibstem"].lower() == "arxiv": fields.update([ ("archiveprefix", "arXiv"), ("eprint", self.data["identifier"][0]), ]) else: fields.update([ ("volume", self.data["volume"]), ("page", self.data["page"][0]), ]) if self.data.get("doi"): fields["doi"] = self.data["doi"][0] if self.data.get("keyword"): fields["keywords"] = ", ".join(self.data["keyword"]) if self.data.get("abstract"): fields["abstract"] = self.data["abstract"] # return (bibtype, persons, fields)
def __init__(self, name, replace_by, match_mode=MATCHMODE_INITIAL): r""" Arguments: - name: The author name to match, specified as "Doe, John" or "John Doe". - replace_by: The string to replace the given author by. This can be any LaTeX content, such as "\emph{John Doe}" or "\johndoe". - match_mode(MatchMode): Specify how to match the first name; may be one of 'exact', 'initial' (the default), or 'partial'. """ self.markname = name self.replace_by = replace_by self.matchmode = MatchMode(match_mode) self.person = Person(self.markname) self.last_names_normed = normnamelist(self.person.get_part('last')) self.first_names_normed = normnamelist(self.person.get_part('first')) self.first_names_normed_joined = " ".join(self.first_names_normed) self.first_initial = getnameinitial(self.first_names_normed) match_fns = { MATCHMODE_EXACT: self._match_name_exact, MATCHMODE_INITIAL: self._match_name_initial, MATCHMODE_PARTIAL: self._match_name_partial } self.match_fn = match_fns[self.matchmode.value] super(MarkAuthorFilter, self).__init__()
def load(self, record): """Deserialize an INSPIRE record into a Pybtex Entity. Takes an INSPIRE record and converts it to a ``pybtex.database.Entity``. Special treatment is applied to authors, which are expressed using ``pybtex.database.Person`` if they are real persons, and passed like other fields if they are corporate authors. Human-authors supersede corporate authors. Args: record (dict): literature record from API Returns: pybtex.database.Entity: Pybtex entity """ doc_type, fields = bibtex_type_and_fields(record) try: texkey = record['texkeys'][0] except KeyError: texkey = str(record['control_number']) LOGGER.error('No texkey for record ID {}'.format( record['control_number'])) template_data = [] for field in fields: if field in extractor.store: field_value = extractor.store[field](record, doc_type) if field_value: maplet = field, text_type(field_value) template_data.append(maplet) # Note: human-authors are put in `persons' dict, corporate author will be passed as a field in template data. data = (texkey, Entry(doc_type, template_data, persons={ 'author': [ Person(x) for x in get_authors_with_role( record.get('authors', []), 'author') ], 'editor': [ Person(x) for x in get_authors_with_role( record.get('authors', []), 'editor') ] })) return data
def parse_name(name): space = re.compile('[\s~]+') formatted_name = format_name(name, '{ff}|{vv}|{ll}|{jj}') parts = [ space.sub(' ', part.strip()) for part in formatted_name.split('|') ] first, von, last, junior = parts return Person(first=first, prelast=von, last=last, lineage=junior)
def from_string(cls, name): person = Person(name) ntypes = ('prelast_names', 'last_names', 'first_names', 'middle_names', 'lineage_names') prelast, last, first, middle, lineage = (' '.join(getattr( person, part)) for part in ntypes) given = ' '.join(n for n in (first, middle) if n) return cls(prelast, last, given, lineage)
def parse_name(name, correct_result, expected_errors=None): if expected_errors is None: expected_errors = [] with errors.capture() as captured_errors: person = Person(name) result = (person.bibtex_first_names, person.prelast_names, person.last_names, person.lineage_names) assert result == correct_result assert captured_errors == expected_errors
def process_entry(self, entry): e = Entry(entry['type']) for (k, v) in entry.iteritems(): if k in Person.valid_roles: for names in v: e.add_person(Person(**names), k) elif k == 'type': pass else: e.fields[k] = unicode(v) return e
class SimpleEntryTest(ParserTest, TestCase): input_string = u""" % maybe the simplest possible % just a comment and one reference @ARTICLE{Brett2002marsbar, author = {Matthew Brett and Jean-Luc Anton and Romain Valabregue and Jean-Baptise Poline}, title = {{Region of interest analysis using an SPM toolbox}}, journal = {Neuroimage}, institution = {}, year = {2002}, volume = {16}, pages = {1140--1141}, number = {2} } """ correct_result = BibliographyData({ 'Brett2002marsbar': Entry( 'article', fields=[ ('title', '{Region of interest analysis using an SPM toolbox}'), ('journal', 'Neuroimage'), ('institution', ''), ('year', '2002'), ('volume', '16'), ('pages', '1140--1141'), ('number', '2'), ], persons={ 'author': [ Person(first='Matthew', last='Brett'), Person(first='Jean-Luc', last='Anton'), Person(first='Romain', last='Valabregue'), Person(first='Jean-Baptise', last='Poline'), ], }, ) })
def process_entry(self, entry): bib_entry = Entry(entry['type']) for (key, value) in entry.iteritems(): key_lower = key.lower() if key_lower in Person.valid_roles: for names in value: bib_entry.add_person(Person(**names), key) elif key_lower == 'type': pass else: bib_entry.fields[key] = unicode(value) return bib_entry
class EntryInStringTest(ParserTest, TestCase): input_string = u""" @article{Me2010, author="Brett, Matthew", title="An article @article{something, author={Name, Another}, title={not really an article}} "} @article{Me2009,author={Nom de Plume, My}, title="A short story"} """ correct_result = BibliographyData( entries=[ (u'Me2010', Entry(u'article', fields=[ (u'title', u'An article @article{something, author={Name, Another}, title={not really an article}}'), ], persons=[(u'author', [Person(u'Brett, Matthew')])] )), (u'Me2009', Entry(u'article', fields=[(u'title', u'A short story')], persons={u'author': [Person(u'Nom de Plume, My')]} )), ] )
def update_entry_from_crossref(ref, entry): entry = copy(entry) if 'container-title' in ref: entry.fields['journal'] = ref['container-title'][0] if 'issue' in ref: entry.fields['number'] = str(ref['issue']) if 'page' in ref: entry.fields['pages'] = str(ref['page']).replace('-', '--') if 'title' in ref and ref['title']: entry.similarity = similar(entry.fields['title'], ref['title'][0]) entry.fields['title'] = ref['title'][0] if 'URL' in ref: entry.fields['url'] = ref['URL'] if 'volume' in ref: entry.fields['volume'] = str(ref['volume']) if 'published-print' in ref: # prioritize print vs online entry.fields['year'] = str(ref['published-print']['date-parts'][0][0]) elif 'published-online' in ref: entry.fields['year'] = str(ref['published-online']['date-parts'][0][0]) if 'DOI' in ref: entry.fields['doi'] = ref['DOI'] if 'ISSN' in ref: entry.fields['issn'] = ref['ISSN'][0] if 'author' in ref: persons = {'author': []} for author in ref['author']: if 'family' in author: authorname = author['family'] if 'given' in author: authorname += ', ' + author['given'] person = Person(authorname) else: # Author is not a person but an organization or similar person = Person(author['name']) persons['author'].append(person) if len(entry.persons.get('author', [])) <= len(persons['author']): entry.persons = persons entry.relevance = ref.get('score', 100) return entry
class DuplicatePersonFieldTest(ParserTest, TestCase): input_string = u""" @article{Me2009,author={Nom de Plume, My}, title="A short story", AUTHoR = {Foo}} """ correct_result = BibliographyData(entries=[ (u'Me2009', Entry(u'article', fields=[(u'title', u'A short story')], persons={u'author': [Person(u'Nom de Plume, My')]})), ]) errors = [ 'entry with key Me2009 has a duplicate AUTHoR field', ]
class EntryInStringTest(ParserTest, TestCase): input_string = """ @article{Me2010, author="Brett, Matthew", title="An article @article{something, author={Name, Another}, title={not really an article}} "} @article{Me2009,author={Nom de Plume, My}, title="A short story"} """ correct_result = BibliographyData( entries={ 'Me2010': Entry( 'article', fields={ 'title': 'An article @article{something, author={Name, Another}, title={not really an article}}' }, persons={'author': [Person('Brett, Matthew')]}), 'Me2009': Entry('article', fields={'title': 'A short story'}, persons={'author': [Person('Nom de Plume, My')]}) })
def process_entry(self, entry_type, key, fields): entry = Entry(entry_type) if key is None: key = 'unnamed-%i' % self.unnamed_entry_counter self.unnamed_entry_counter += 1 for field_name, field_value_list in fields: field_value = textutils.normalize_whitespace(self.flatten_value_list(field_value_list)) if field_name in self.person_fields: for name in split_name_list(field_value): entry.add_person(Person(name), field_name) else: entry.fields[field_name] = field_value self.data.add_entry(key, entry)
class CrossFileMacrosTest(ParserTest, TestCase): input_strings = [ u'@string{jackie = "Jackie Chan"}', u""", @Book{ i_am_jackie, author = jackie, title = "I Am " # jackie # ": My Life in Action", } """, ] correct_result = BibliographyData({ 'i_am_jackie': Entry('book', fields=[('title', 'I Am Jackie Chan: My Life in Action')], persons={'author': [Person(u'Chan, Jackie')]}), })
def test_format_article(app): article = get_db_record('lit', 4328) expected = ("Glashow:1961tr", Entry('article', [ ('journal', u'Nucl.Phys.'), ('pages', u'579--588'), ('title', u'Partial Symmetries of Weak Interactions'), ('volume', u'22'), ('year', u'1961'), ('doi', u'10.1016/0029-5582(61)90469-2'), ], persons={ 'editor': [], 'author': [Person(u"Glashow, S.L.")], })) schema = PybtexSchema() result = schema.load(article) assert result is not None assert pybtex_entries_equal(result, expected)
def process_entry(self, entry_type, key, fields): entry = Entry(entry_type) if key is None: key = 'unnamed-%i' % self.unnamed_entry_counter self.unnamed_entry_counter += 1 seen_fields = set() for field_name, field_value_list in fields: if field_name.lower() in seen_fields: self.handle_error(DuplicateField(key, field_name)) continue field_value = textutils.normalize_whitespace(self.flatten_value_list(field_value_list)) if field_name in self.person_fields: for name in split_name_list(field_value): entry.add_person(Person(name), field_name) else: entry.fields[field_name] = field_value seen_fields.add(field_name.lower()) self.data.add_entry(key, entry)
def test_format_book(app): book = get_db_record('lit', 736770) expected = ( "Fecko:2006zy", Entry('book', [ ('publisher', u"Cambridge University Press"), ('title', u"Differential geometry and Lie groups for physicists"), ('year', u"2011"), ('isbn', u"978-0-521-18796-1, 978-0-521-84507-6, 978-0-511-24296-0"), ], persons={ 'editor': [], 'author': [Person(u"Fecko, M.")], })) schema = PybtexSchema() result = schema.load(book) assert result is not None assert pybtex_entries_equal(result, expected)
def split_reference(reference: str) -> dict: """ Generates a dictionary with the info present on the _single_ reference line. references: raw text from "cited-references" WoS's .bib (with \n!) return: {author, year ,journal, vol, page, doi}, if they are not null. """ # removes the \_ from DOIs ref = reference.replace(r"\_", "_") # removes the non-list {[} ] ref = re.sub(r"\{\[\}([^,\]]*?)\]", r"\1", ref) # replaces inner lists {[} X, Y] with X # the first part is usually the same but in chinese/etc ref = _listPattern.sub(r'\2', ref) match = _citePattern.search(ref) if match: doi = match.group('doi') if doi: # removes whitespaces in the DOI, yes, we have them doi = doi.translate(str.maketrans('', '', ' ')) article = { 'authors': [ Person(string=_properName(match.group('author'))), ] } dict_update_notNone_inplace(article, 'year', match.group('year')) dict_update_notNone_inplace(article, 'journal', titlecase(match.group('journal'))) dict_update_notNone_inplace(article, 'vol', match.group('vol')) dict_update_notNone_inplace(article, 'page', match.group('page')) dict_update_notNone_inplace(article, 'doi', doi) # we know this is a reference. It might be only the name of the publication else: article = { 'authors': [], 'journal': titlecase(reference), } return (article)
def data2bib(data, key, type='article'): """Convert a python dict into a Bibentry object.""" if not data: return # need to remove authors field from data authors = None if 'authors' in data: authors = data['authors'] if isinstance(authors, str): authors = split_name_list(authors) if len(authors) == 1: authors = authors[0].split(',') del data['authors'] entry = Entry(type, fields=data) if authors: for p in authors: entry.add_person(Person(p), 'author') return Bibentry(key, entry).as_string()
def process_entry(self, entry_type, key, fields): entry = Entry(entry_type) if key is None: key = 'unnamed-%i' % self.unnamed_entry_counter self.unnamed_entry_counter += 1 already_handled_person_fields = set() for field_name, field_value_list in fields: field_value = textutils.normalize_whitespace( self.flatten_value_list(field_value_list)) if field_name in self.person_fields: if field_name in already_handled_person_fields: error_message = 'entry with key {} has a duplicate {} field'.format( key, field_name) self.handle_error(DuplicatePersonField(error_message)) continue for name in split_name_list(field_value): entry.add_person(Person(name), field_name) already_handled_person_fields.add(field_name) else: entry.fields[field_name] = field_value self.data.add_entry(key, entry)
def test_format_inproceeding(app): inproceedings = get_db_record('lit', 524480) expected = ( "Hu:2000az", Entry('inproceedings', [ ('address', u"Tokyo, Japan"), ('booktitle', u"4th RESCEU International Symposium on Birth and Evolution of the Universe" ), ('title', u"CMB anisotropies: A Decadal survey"), ('archivePrefix', u'arXiv'), ('eprint', u"astro-ph/0002520"), ('url', u"http://alice.cern.ch/format/showfull?sysnb=2178340"), ], persons={ 'editor': [], 'author': [Person(u"Hu, Wayne")], })) schema = PybtexSchema() result = schema.load(inproceedings) assert result is not None assert pybtex_entries_equal(result, expected)
class MarkAuthorFilter(BibFilter): helpauthor = HELP_AUTHOR helpdescription = HELP_DESC helptext = HELP_TEXT def __init__(self, name, replace_by, match_mode=MATCHMODE_INITIAL): r""" Arguments: - name: The author name to match, specified as "Doe, John" or "John Doe". - replace_by: The string to replace the given author by. This can be any LaTeX content, such as "\emph{John Doe}" or "\johndoe". - match_mode(MatchMode): Specify how to match the first name; may be one of 'exact', 'initial' (the default), or 'partial'. """ self.markname = name self.replace_by = replace_by self.matchmode = MatchMode(match_mode) self.person = Person(self.markname) self.last_names_normed = normnamelist(self.person.get_part('last')) self.first_names_normed = normnamelist(self.person.get_part('first')) self.first_names_normed_joined = " ".join(self.first_names_normed) self.first_initial = getnameinitial(self.first_names_normed) match_fns = { MATCHMODE_EXACT: self._match_name_exact, MATCHMODE_INITIAL: self._match_name_initial, MATCHMODE_PARTIAL: self._match_name_partial } self.match_fn = match_fns[self.matchmode.value] super(MarkAuthorFilter, self).__init__() def _match_name_exact(self, p): return normnamelist(p.get_part('last')) == self.last_names_normed and \ normnamelist(p.get_part('first')) == self.first_names_normed def _match_name_initial(self, p): return normnamelist(p.get_part('last')) == self.last_names_normed and ( normnamelist(p.get_part('first')) == self.first_names_normed or "".join(normnamelist(p.get_part('first'))) == self.first_initial # exact initial given ) def _match_name_partial(self, p): return normnamelist(p.get_part('last')) == self.last_names_normed and \ self.first_names_normed_joined.startswith(" ".join(normnamelist(p.get_part('first')))) def _filter_person(self, p): if self.match_fn(p): # note, create a different Person instance for each # replacement. This is in case further filters change individual # entries again, to make sure that there aren't any weird side # effects return Person(self.replace_by) return p def action(self): return BibFilter.BIB_FILTER_SINGLE_ENTRY def filter_bibentry(self, entry): # write debug messages, which are seen in verbose mode logger.longdebug("markauthor filter: filtering entry %s", entry.key) # set the field field_name to the given value: for role, persons in OrderedCaseInsensitiveDict(entry.persons).items(): entry.persons[role] = [ self._filter_person(p) for p in persons ] logger.longdebug("PhF filter: Done for %s", entry.key)
def parse_name_test(): for name, correct_result in sample_names: person = Person(name) result = (person.bibtex_first(), person.prelast(), person.last(), person.lineage()) assert result == correct_result