def json2bib(jsonstring, key, type='article'): """Convert a json string into a Bibentry object.""" if not json: return data = json.loads(jsonstring) # need to remove authors field from data authors = None if 'author' in data: authors = data['author'] del data['author'] if 'issued' in data: data['year'] = str(data['issued']['date-parts'][0][0]) del data['issued'] # delete other problematic fields if 'editor' in data: del data['editor'] entry = Entry(type, fields=data) if authors: for author in authors: entry.add_person( Person(first=author['given'], last=author['family']), 'author') return Bibentry(key, entry).as_string()
class EntryTypesTest(ParserTest, TestCase): input_string = u""" Testing what are allowed for entry types These are OK @somename{an_id,} @t2{another_id,} @t@{again_id,} @t+{aa1_id,} @_t{aa2_id,} These ones not @2thou{further_id,} @some name{id3,} @some#{id4,} @some%{id4,} """ correct_result = BibliographyData([ ('an_id', Entry('somename')), ('another_id', Entry('t2')), ('again_id', Entry('t@')), ('aa1_id', Entry('t+')), ('aa2_id', Entry('_t')), ]) errors = [ "syntax error in line 12: a valid name expected", "syntax error in line 13: '(' or '{' expected", "syntax error in line 14: '(' or '{' expected", "syntax error in line 15: '(' or '{' expected", ]
class MacrosTest(ParserTest, TestCase): input_string = u""" @String{and = { and }} @String{etal = and # { {et al.}}} @Article( unknown, author = nobody, ) @Article( gsl, author = "Gough, Brian"#etal, ) """ correct_result = BibliographyData([ ('unknown', Entry('article')), ('gsl', Entry('article', persons={ u'author': [Person(u'Gough, Brian'), Person(u'{et al.}')] })), ]) errors = [ 'undefined string in line 6: nobody', ]
def process_entry(self, entry): def process_person(person_entry, role): persons = person_entry.findall(bibtexns + 'person') if persons: for person in persons: process_person(person, role) else: text = person_entry.text.strip() if text: e.add_person(Person(text), role) else: names = {} for name in person_entry: names[remove_ns(name.tag)] = name.text e.add_person(Person(**names), role) id_ = entry.get('id') item = list(entry)[0] type = remove_ns(item.tag) e = Entry(type) for field in item: field_name = remove_ns(field.tag) if field_name in Person.valid_roles: process_person(field, field_name) else: field_text = field.text if field.text is not None else '' e.fields[field_name] = field_text return id_, e
class KeylessEntriesTest(ParserTest, TestCase): parser_options = {'keyless_entries': True} input_string = u""" @BOOK( title="I Am Jackie Chan: My Life in Action", year=1999 ) @BOOK() @BOOK{} @BOOK{ title = "Der deutsche Jackie Chan Filmführer", } """ correct_result = BibliographyData({ 'unnamed-1': Entry('book', { 'title': 'I Am Jackie Chan: My Life in Action', 'year': '1999' }), 'unnamed-2': Entry('book'), 'unnamed-3': Entry('book'), 'unnamed-4': Entry('book', {'title': u'Der deutsche Jackie Chan Filmführer'}), })
class InlineCommentTest(ParserTest, TestCase): input_string = u""" "some text" causes an error like this ``You're missing a field name---line 6 of file bibs/inline_comment.bib`` for all 3 of the % some text occurences below; in each case the parser keeps what it has up till that point and skips, so that it correctly gets the last entry. @article{Me2010,} @article{Me2011, author="Brett-like, Matthew", % some text title="Another article"} @article{Me2012, % some text author="Real Brett"} This one correctly read @article{Me2013,} """ correct_result = BibliographyData([ ('Me2010', Entry('article')), ('Me2011', Entry('article', persons={ 'author': [ Person(first='Matthew', last='Brett-like'), ] })), ('Me2012', Entry('article')), ('Me2013', Entry('article')), ]) errors = [ "syntax error in line 10: '}' expected", "syntax error in line 12: '}' expected", ]
def process_entry(self, entry): def process_person(person_entry, role): persons = person_entry.findall(bibtexns + 'person') if persons: for person in persons: process_person(person, role) else: text = person_entry.text.strip() if text: e.add_person(Person(text), role) else: names = {} for name in person_entry.getchildren(): names[remove_ns(name.tag)] = name.text e.add_person(Person(**names), role) id_ = entry.get('id') item = entry.getchildren()[0] type = remove_ns(item.tag) e = Entry(type) for field in item.getchildren(): field_name = remove_ns(field.tag) if field_name in Person.valid_roles: process_person(field, field_name) else: e.fields[field_name] = field.text.strip() return id_, e
def add(self, entry: pybtex.Entry): """ Returns if the entry was added or if it was a duplicate""" # TODO: make this a better sanity checking and perhaps report errors if not entry.key: return False if not entry.fields.get("author"): entry.fields["author"] = "UNKNOWN" original_key = entry.key entry.fields["original_key"] = original_key utf_author = bibutils.field_to_unicode(entry, "author") utf_title = bibutils.field_to_unicode(entry, "title") utf_venue = bibutils.field_to_unicode(entry, "journal") if not utf_venue: utf_venue = bibutils.field_to_unicode(entry, "booktitle") custom_key_tries = 0 added = False while not added: custom_key = None if custom_key_tries < 27: try: custom_key = bibutils.generate_custom_key(entry, self.config.custom_key_format, custom_key_tries) except Exception as e: pass else: logging.warning("Could not generate a unique custom key for entry %s", original_key) custom_key = original_key try: self.cursor.execute('INSERT INTO bib(key, custom_key, author, title, venue, year, fulltext) VALUES (?,?,?,?,?,?,?)', (original_key, custom_key, utf_author, utf_title, utf_venue, str(entry.fields.get("year")), bibutils.single_entry_to_fulltext(entry, custom_key) ) ) added = True except sqlite3.IntegrityError as e: error_message = str(e) if "UNIQUE" in error_message: if "bib.custom_key" in error_message: # custom_key was already in the DB custom_key_tries += 1 elif "bib.key" in error_message: # duplicate entry break else: raise else: raise return added
def process_entry(self, entry): e = Entry(entry['type']) for (k, v) in entry.iteritems(): if k in Person.valid_roles: for names in v: e.add_person(Person(**names), k) elif k == 'type': pass else: e.fields[k] = unicode(v) return e
def process_entry(self, entry): e = Entry(entry["type"]) for (k, v) in entry.iteritems(): if k in Person.valid_roles: for names in v: e.add_person(Person(**names), k) elif k == "type": pass else: e.fields[k] = unicode(v) return e
class CrossrefTest(ParserTest, TestCase): parser_options = {'wanted_entries': ['GSL', 'GSL2']} input_string = u""" @Article(gsl, crossref="the_journal") @Article(gsl2, crossref="The_Journal") @Journal{the_journal,} """ correct_result = BibliographyData(entries=[ ('GSL', Entry('article', [('crossref', 'the_journal')])), ('GSL2', Entry('article', [('crossref', 'The_Journal')])), ('the_journal', Entry('journal')), ])
def process_entry(self, entry): bib_entry = Entry(entry['type']) for (key, value) in entry.iteritems(): key_lower = key.lower() if key_lower in Person.valid_roles: for names in value: bib_entry.add_person(Person(**names), key) elif key_lower == 'type': pass else: bib_entry.fields[key] = unicode(value) return bib_entry
def test_natbib_citation_transform_str_repr(): from natbib import CitationTransform, DEFAULT_CONF from pybtex.database import Entry ref = Entry(type_='misc') ref.key = 'somekey' node = CitationTransform(pre="", post="", typ="cite:p", global_keys={}, config=DEFAULT_CONF.copy(), refs=[ref]) assert str(node) == "somekey" assert repr(node) == "<somekey>"
class CrossrefWantedTest(ParserTest, TestCase): """When cross-referencing an explicitly cited, the key from .aux file should be used.""" parser_options = {'wanted_entries': ['GSL', 'GSL2', 'The_Journal']} input_string = u""" @Article(gsl, crossref="the_journal") @Article(gsl2, crossref="The_Journal") @Journal{the_journal,} """ correct_result = BibliographyData(entries=[ ('GSL', Entry('article', [('crossref', 'the_journal')])), ('GSL2', Entry('article', [('crossref', 'The_Journal')])), ('The_Journal', Entry('journal')), ])
def process_entry(self, entry_type, key, fields): entry = Entry(entry_type) if key is None: key = 'unnamed-%i' % self.unnamed_entry_counter self.unnamed_entry_counter += 1 for field_name, field_value_list in fields: field_value = textutils.normalize_whitespace(self.flatten_value_list(field_value_list)) if field_name in self.person_fields: for name in split_name_list(field_value): entry.add_person(Person(name), field_name) else: entry.fields[field_name] = field_value self.data.add_entry(key, entry)
class AtTest(ParserTest, TestCase): # FIXME: check warnings input_string = u""" The @ here parses fine in both cases @article{Me2010, title={An @tey article}} @article{Me2009, title="A @tey short story"} """ correct_result = BibliographyData([ ('Me2010', Entry('article', [('title', 'An @tey article')])), ('Me2009', Entry('article', [('title', 'A @tey short story')])), ]) errors = [ "syntax error in line 2: '(' or '{' expected", ]
def test_format_inbook(app): inbook = get_db_record('lit', 1375491) expected = ("Bechtle:2015nta", Entry('inbook', [ ('pages', u"421--462"), ('title', u"Supersymmetry"), ('year', u"2015"), ('doi', u"10.1007/978-3-319-15001-7_10"), ('archivePrefix', u"arXiv"), ('eprint', u"1506.03091"), ('primaryClass', u"hep-ex"), ], persons={ 'editor': [], 'author': [ Person(u"Bechtle, Philip"), Person(u"Plehn, Tilman"), Person(u"Sander, Christian") ], })) schema = PybtexSchema() result = schema.load(inbook) assert result is not None assert pybtex_entries_equal(result, expected)
def main(bibfile, template, save_path, save_individual=False): # Make sure save_path is a directory if save_individual, and a valid file path otherwise if save_individual and not os.path.isdir(save_path): print( 'save_individual is true, but save_path is not a directory. Quitting' ) return elif not save_individual and not os.path.isdir( os.path.abspath(os.path.dirname(save_path))): print( 'save_individual is false, but save_path is not a valid file location. Quitting' ) return # Load the template. tenv = jinja2.sandbox.SandboxedEnvironment() tenv.filters['author_fmt'] = _author_fmt tenv.filters['author_list'] = _author_list tenv.filters['title'] = _title tenv.filters['venue_type'] = _venue_type tenv.filters['venue'] = _venue tenv.filters['main_url'] = _main_url tenv.filters['extra_urls'] = _extra_urls tenv.filters['monthname'] = _month_name with open(template) as f: tmpl = tenv.from_string(f.read()) # Parse the BibTeX file. with open(bibfile) as f: db = bibtex.Parser().parse_stream(f) for k, v in db.entries.items(): # Include the bibliography key in each entry. v.fields['key'] = k # Include the full BibTeX in each entry, minus fields to ignore filtered_v_field_items = filter( lambda x: x[0] not in _ignore_fields_bibtex_source, v.fields.items()) filtered_v = Entry(v.type, fields=filtered_v_field_items, persons=v.persons) v.fields['bibtex'] = BibliographyData({ k: filtered_v }).to_string('bibtex').strip() # Replace ' = "XXX"' with '={XXX}' v.fields['bibtex'] = re.sub(r' = \"(.*)\"', r'={\1}', v.fields['bibtex']) # Render the template. bib_sorted = sorted(db.entries.values(), key=_sortkey, reverse=True) if save_individual: for bib in bib_sorted: out = tmpl.render(entry=bib) file_path = os.path.join(save_path, '%s.html' % bib.key) with open(file_path, 'w') as f: f.write(out) else: out = tmpl.render(entries=bib_sorted) with open(save_path, 'w') as f: f.write(out)
class BracesAndQuotesTest(ParserTest, TestCase): input_string = '''@ARTICLE{ test, title="Nested braces and {"quotes"}", }''' correct_result = BibliographyData( {'test': Entry('article', {'title': 'Nested braces and {"quotes"}'})})
def test_format_proceeding(app): proceedings = get_db_record('lit', 701585) expected = ( "Alekhin:2005dx", Entry('proceedings', [ ('address', u"Geneva"), ('pages', u"pp.1--326"), ('publisher', u"CERN"), ('title', u"HERA and the LHC: A Workshop on the implications of HERA for LHC physics: Proceedings Part A" ), ('year', u"2005"), ('reportNumber', u"CERN-2005-014, DESY-PROC-2005-01"), ('archivePrefix', u"arXiv"), ('eprint', u"hep-ph/0601012"), ('url', u"http://weblib.cern.ch/abstract?CERN-2005-014"), ], persons={ 'editor': [Person(u"De Roeck, A."), Person(u"Jung, H.")], 'author': [], })) schema = PybtexSchema() result = schema.load(proceedings) assert result is not None assert pybtex_entries_equal(result, expected)
def create_bibliography_entry(self, record): bibtex_document_type = self.schema_class.get_bibtex_document_type( record) data = self.schema_class.dump(record).data doc_type = data.pop("doc_type", None) texkey = data.pop("texkey", None) authors = [ Person(person) for person in data.pop("authors_with_role_author") ] editors = [ Person(person) for person in data.pop("authors_with_role_editor") ] fields = (self.COMMON_FIELDS_FOR_ENTRIES | self.FIELDS_FOR_ENTRY_TYPE[bibtex_document_type]) template_data = [(key, str(value)) for key, value in data.items() if value and key in fields] template_data = sorted(template_data, key=lambda x: x[0]) data_entry = Entry(doc_type, template_data, persons={ "author": authors, "editor": editors }) data_bibtex = (texkey, data_entry) return data_bibtex
def create_bibliography_entry(self, record): data = self.schema_class.dump(record).data doc_type = data.pop("doc_type", None) texkey = data.pop("texkey", None) authors = [ Person(person) for person in data.pop("authors_with_role_author") ] editors = [ Person(person) for person in data.pop("authors_with_role_editor") ] template_data = [ (field, str(data[field])) for (field, doc_types) in self.fields_and_doc_types if data.get(field) and (doc_types is True or doc_type in doc_types) ] data_entry = Entry(doc_type, template_data, persons={ "author": authors, "editor": editors }) data_bibtex = (texkey, data_entry) return data_bibtex
class DuplicateFieldTest(ParserTest, TestCase): input_strings = [ r""" @MASTERSTHESIS{ Mastering, year = 1364, title = "Mastering Thesis Writing", school = "Charles University in Prague", TITLE = "No One Reads Master's Theses Anyway LOL", TiTlE = "Well seriously, lol.", } """ ] correct_result = BibliographyData({ 'Mastering': Entry( 'mastersthesis', fields=[ ('year', '1364'), ('title', 'Mastering Thesis Writing'), ('school', 'Charles University in Prague'), ], ), }) errors = [ 'entry with key Mastering has a duplicate TITLE field', 'entry with key Mastering has a duplicate TiTlE field', ]
def _process_one_file(key, f, info_this_key): f_pure = os.path.split(f)[1] info_this = _process_meta(f) # then let's construct a bib entry. entry_type = 'misc' if info_this['finished'] else 'unpublished' del info_this['finished'] entry_this = Entry(entry_type, [(x, str(y)) for x, y in info_this.items() if y is not None]) bib_id, _ = os.path.splitext(f_pure) bib_data = BibliographyData({bib_id: entry_this}) bib_cats = info_this['additional-categories'] if bib_cats is None: bib_cats = [] else: bib_cats = [ tuple(cat.strip().split('/')) for cat in bib_cats.split(',') ] bib_cats.append(key) bib_cats = _additional_cats_closure(bib_cats) # '_' + bib_id is the key we should use for GitHub browsing. info_this_key.append( [bib_id, (bib_data.to_string('bibtex'), key, '_' + bib_id, bib_cats)])
def citations(self): entries = self.BIBTEX_ENTRIES[:] recurse_bibtex(self, entries) all_citations = [Entry.from_string(b, 'bibtex') for b in entries] return unique_citations_only(all_citations)
def test_from_entry(): e = Entry('book', fields={'title': 'Title'}) assert Source.from_entry('abc', e)['title'] == 'Title' with pytest.raises(ValueError): Source.from_entry('a.b', e) assert Source.from_entry('a.b', e, _check_id=False).id == 'a.b'
class BracesTest(ParserTest, TestCase): input_string = u"""@ARTICLE{ test, title={Polluted with {DDT}. }, }""" correct_result = BibliographyData([(u'test', Entry('article', [(u'title', 'Polluted with {DDT}.')]))])
class BracesAndQuotesTest(ParserTest, TestCase): input_string = u'''@ARTICLE{ test, title="Nested braces and {"quotes"}", }''' correct_result = BibliographyData([ (u'test', Entry('article', [(u'title', 'Nested braces and {"quotes"}')])) ])
class BracesTest(ParserTest, TestCase): input_string = """@ARTICLE{ test, title={Polluted with {DDT}. }, }""" correct_result = BibliographyData( {'test': Entry('article', {'title': 'Polluted with {DDT}.'})})
class EntryInCommentTest(ParserTest, TestCase): input_string = u""" Both the articles register despite the comment block @Comment{ @article{Me2010, title="An article"} @article{Me2009, title="A short story"} } These all work OK without errors @Comment{and more stuff} Last article to show we can get here @article{Me2011, } """ correct_result = BibliographyData([ ('Me2010', Entry('article', fields=[('title', 'An article')])), ('Me2009', Entry('article', fields=[('title', 'A short story')])), ('Me2011', Entry('article')), ])
def run(csvFileName, bibFileName): if not os.path.isfile(csvFileName): print("File not found: ", csvFileName) return # I dont kown Why, but dont work complex path in Panda, then I copy file to local path tmpFile = tempfile.mktemp() copyfile(csvFileName, tmpFile) colnames = [ 'title', 'journal', 'book', 'volume', 'issue', 'doi', 'author', 'year', 'url', 'type' ] pn = pd.read_csv(tmpFile, names=colnames, skiprows=1) bibData = BibliographyData() total = 0 notAuthor = 0 for row_index, row in pn.iterrows(): total = total + 1 fields = [] if (not pd.isnull(row.title)): fields.append(('title', row.title)) if (not pd.isnull(row.journal)): fields.append(('journal', row.journal)) if (not pd.isnull(row.volume)): fields.append(('volume', str(row.volume))) if (not pd.isnull(row.volume)): fields.append(('issue', str(row.issue))) if (not pd.isnull(row.doi)): fields.append(('doi', row.doi)) if (not pd.isnull(row.year)): fields.append(('year', str(row.year))) if (not pd.isnull(row.url)): fields.append(('url', row.url)) if (not pd.isnull(row.author)): fields.append(('author', AuthorFix(row.author))) keyPaper = row.doi typePaper = TypePaperSelect(row.type) print("Chave " + keyPaper + " \r", end="", flush=True) if (pd.isnull(row.author)): notAuthor = notAuthor + 1 else: bibData.entries[keyPaper] = Entry(typePaper, fields) print("Processed ", total, " ") print("Removed without author ", notAuthor) print("Total Final", len(bibData.entries)) bibData.to_file(bibFileName) print("Saved file ", bibFileName)
class WantedEntriesTest(ParserTest, TestCase): parser_options = {'wanted_entries': ['GSL']} input_string = u""" @Article( gsl, ) """ correct_result = BibliographyData(entries={ 'GSL': Entry('article'), })
fields = { u'booktitle': u'RO-MAN', # 'conference': u'RO-MAN', # 'dblp_id': 1423712, u'doi': u'http://dx.doi.org/10.1109/ROMAN.2012.6343878', # 'id': 5, # u'key': u'LazewatskyS12', u'pages': u'989-994', u'title': u'Context-sensitive in-the-world interfaces for mobile manipulation robots. ', # 'type': u'inproceedings', # 'venue': u'RO-MAN 2012:989-994', # 'venue_url': u'db/conf/ro-man/ro-man2012.html#LazewatskyS12', u'year': '2012'} entry = Entry('inproceedings', persons=dict(author=authors), fields=fields) entry.key = 'asdf' output_backend = find_plugin('pybtex.backends', 'html') style_cls = find_plugin('pybtex.style.formatting', 'plain') style = style_cls() entries = [entry] formatted_entries = style.format_entries(entries) formatted_bibliography = FormattedBibliography(formatted_entries, style) stream = io.StringIO() ob = output_backend(None) for entry in formatted_bibliography: print u'<dt>%s</dt>\n' % entry.label print u'<dd>%s</dd>\n' % entry.text.render(ob) # entry.key, entry.label, entry.text.render(ob))
def to_pybtex(self): fields = self.fields entry = Entry(self.type, fields=fields, persons=dict(author=[p.to_pybtex() for p in self.authors.all()])) entry.key = self.key return entry