def format_pandoc(entries, csl_path): """ Format the entries using pandoc Args: entries (dict): dictionary of entries csl_path (str): path to formatting CSL Fle Returns: references (dict): dictionary of citation texts """ pandoc_version = tuple( int(ver) for ver in pypandoc.get_pandoc_version().split(".")) citations = OrderedDict() for key, entry in entries.items(): bibtex_string = BibliographyData(entries={ entry.key: entry }).to_string("bibtex") if pandoc_version >= (2, 11): citations[key] = _convert_pandoc_new(bibtex_string, csl_path) else: citations[key] = _convert_pandoc_legacy(bibtex_string, csl_path) return citations
class KeyParsingTest(ParserTest, TestCase): input_string = u""" # will not work as expected @article(test(parens1)) # works fine @article(test(parens2),) # works fine @article{test(braces1)} # also works @article{test(braces2),} """ correct_result = BibliographyData([ ('test(parens1))', Entry('article')), ('test(parens2)', Entry('article')), ('test(braces1)', Entry('article')), ('test(braces2)', Entry('article')), ]) errors = [ "syntax error in line 5: ')' expected", ]
class FieldNamesTest(ParserTest, TestCase): input_string = u""" Check for characters allowed in field names Here the cite key is fine, but the field name is not allowed: ``You are missing a field name`` @article{2010, 0author="Me"} Underscores allowed (no error) @article{2011, _author="Me"} Not so for spaces obviously (``expecting an '='``) @article{2012, author name = "Myself"} Or hashes (``missing a field name``) @article{2013, #name = "Myself"} But field names can start with +-. @article{2014, .name = "Myself"} @article{2015, +name = "Myself"} @article{2016, -name = "Myself"} @article{2017, @name = "Myself"} """ correct_result = BibliographyData([ ('2010', Entry('article')), ('2011', Entry('article', [('_author', 'Me')])), ('2012', Entry('article')), ('2013', Entry('article')), ('2014', Entry('article', [('.name', 'Myself')])), ('2015', Entry('article', [('+name', 'Myself')])), ('2016', Entry('article', [('-name', 'Myself')])), ('2017', Entry('article', [('@name', 'Myself')])), ]) errors = [ "syntax error in line 5: '}' expected", "syntax error in line 11: '=' expected", 'syntax error in line 14: \'}\' expected', ]
class KeylessEntriesTest(ParserTest, TestCase): parser_options = {'keyless_entries': True} input_string = u""" @BOOK( title="I Am Jackie Chan: My Life in Action", year=1999 ) @BOOK() @BOOK{} @BOOK{ title = "Der deutsche Jackie Chan Filmführer", } """ correct_result = BibliographyData([ ('unnamed-1', Entry('book', [('title', 'I Am Jackie Chan: My Life in Action'), ('year', '1999')])), ('unnamed-2', Entry('book')), ('unnamed-3', Entry('book')), ('unnamed-4', Entry('book', [('title', u'Der deutsche Jackie Chan Filmführer')])), ])
class MacrosTest(ParserTest, TestCase): input_string = """ @String{and = { and }} @String{etal = and # { {et al.}}} @Article( unknown, author = nobody, ) @Article( gsl, author = "Gough, Brian"#etal, ) """ correct_result = BibliographyData({ 'unknown': Entry('article'), 'gsl': Entry('article', persons={'author': [Person('Gough, Brian'), Person('{et al.}')]}), }) errors = [ 'undefined string in line 6: nobody', ]
def main_cli(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('bib_path', metavar='BIB_PATH', type=arg_is_file, help=('Path to bibtex-formatted file.')) parser.add_argument('-k', '--keywords', nargs='+', type=str, default=["OaksPeerReviewed", "OaksCVPreprint"], help=('Keywords for reference filter.')) args = parser.parse_args() bib_parser = bibtex.Parser() bib_data = bib_parser.parse_file(args.bib_path) filtered_bib_data = BibliographyData() for key, entry in bib_data.entries.items(): kwords = [ x.strip() for x in entry.fields.get('keywords', '').split(',') ] for kw in args.keywords: if kw in kwords: filtered_bib_data.add_entry(entry.key, entry) s = filtered_bib_data.to_string("bibtex") s = s.replace("= \"", "= {") s = s.replace("\",\n", "},\n") s = s.replace("\"\n", "}\n") sys.stdout.write(s)
def populate(self, database_file, pdf_dir): """ Collect all bibdata from all files into one big-ass database (self), rekeying the entries. This method will also set ``publipy_biburl``, ``publipy_abstracturl``, and ``publipy_pdfurl`` attributes on the entries. Pdf files are copied to ``pdf_dir/pdf``. :param str database_file: File containing all bibliography data :param str pdf_dir: Directory filled with pdfs named by their\ bibliography keys """ self.publications = BibliographyData() publications = read_bibfile(database_file).entries # ensure the pdf directory exists if pdf_dir and not (self.prefix / Path('pdf')).exists(): (self.prefix / Path('pdf')).mkdir() for oldkey, item in publications.items(): key = generate_key_swe(item) if 'publipy_biburl' not in item.fields: item.fields['publipy_biburl'] = str(self.prefix / Path('bib') / Path(key + '.bib')) if 'abstract' in item.fields: item.fields['publipy_abstracturl'] = str( self.prefix / Path('abstracts') / Path(key + '.txt')) if pdf_dir and 'publipy_pdfurl' not in item.fields: pdf_path_old = Path(pdf_dir) / Path(oldkey + '.pdf') if pdf_path_old.exists() and pdf_path_old.is_file(): pdf_path_new = self.prefix / Path('pdf') / Path(key + '.pdf') shutil.copy(str(pdf_path_old), str(pdf_path_new)) item.fields['publipy_pdfurl'] = str(pdf_path_new) self.add_entry(key, item)
def add_publications(generator): """ Populates context with a list of BibTeX publications. Configuration ------------- generator.settings['PUBLICATIONS']: Dictionary that contains bibliographies: The key denotes the bibliographies name to use in headers The values describe the BibTeX files to read Mandatory for this plugin. generator.settings['PUBLICATIONS_NAVBAR']: Bool denoting whether a navigation bar containing links to each bibliography should be produced. Defaults to 'True'. generator.settings['PUBLICATIONS_HEADER']: Bool denoting whether a header (h2) should be produced for each bibliography. Defaults to 'True'. generator.settings['PUBLICATIONS_SPLIT']: Bool denoting whether bibliographies should be split by year (h3). Defaults to 'True'. generator.settings['PUBLICATIONS_HIGHLIGHTs']: String, e.g., a name, that will be entailed in a <strong> tag to highlight. Default: empty Output ------ generator.context['publications']: Dictionary containing the name of the publication list a a key, bibliography entries as a value. A bibliography entry contains of a list of tuples (key, year, text, bibtex, pdf, slides, poster). See Readme.md for more details. """ if 'PUBLICATIONS' not in generator.settings: return if 'PUBLICATIONS_NAVBAR' not in generator.settings: generator.context['PUBLICATIONS_NAVBAR'] = True try: from StringIO import StringIO except ImportError: from io import StringIO try: from pybtex.database.input.bibtex import Parser from pybtex.database.output.bibtex import Writer from pybtex.database import BibliographyData, PybtexError from pybtex.backends import html from pybtex.style.formatting import plain except ImportError: logger.warn('`pelican_bibtex` failed to load dependency `pybtex`') return refs = generator.settings['PUBLICATIONS'] generator.context['publications'] = collections.OrderedDict() for rid in refs: ref = refs[rid] bibfile = os.path.join(generator.settings['PATH'], ref['file']) try: bibdata_all = Parser().parse_file(bibfile) except PybtexError as e: logger.warn('`pelican_bibtex` failed to parse file %s: %s' % (bibfile, str(e))) return if 'title' in ref: title = ref['title'] else: title = rid if 'header' in ref: header = ref['header'] else: header = True if 'split' in ref: split = ref['split'] else: split = True if 'split_link' in ref: split_link = ref['split_link'] else: split_link = True if 'bottom_link' in ref: bottom_link = ref['bottom_link'] else: bottom_link = True if 'all_bibtex' in ref: all_bibtex = ref['all_bibtex'] else: all_bibtex = False if 'highlight' in ref: highlights = ref['highlight'] else: highlights = [] if 'group_type' in ref: group_type = ref['group_type'] else: group_type = False publications = [] # format entries plain_style = plain.Style() html_backend = html.Backend() formatted_entries = plain_style.format_entries( bibdata_all.entries.values()) for formatted_entry in formatted_entries: key = formatted_entry.key entry = bibdata_all.entries[key] year = entry.fields.get('year') typee = entry.type if entry.fields.get('tags'): tags = [ tag.strip() for tag in entry.fields.get('tags').split(';') ] else: tags = [] display_tags = [ x for x in tags if x != "doi-open" and x != "url-open" ] # This shouldn't really stay in the field dict # but new versions of pybtex don't support pop pdf = entry.fields.get('pdf', None) slides = entry.fields.get('slides', None) poster = entry.fields.get('poster', None) doi = entry.fields.get('doi', None) url = entry.fields.get('url', None) #clean fields from appearing in bibtex and on website entry_tmp = entry for to_del in ['pdf', 'slides', 'poster', 'tags']: entry_tmp.fields.pop(to_del, None) #render the bibtex string for the entry bib_buf = StringIO() bibdata_this = BibliographyData(entries={key: entry_tmp}) Writer().write_stream(bibdata_this, bib_buf) #clean more fields from appearing on website for to_del in ['doi', 'url']: entry_tmp.fields.pop(to_del, None) entry_clean = next( plain_style.format_entries(bibdata_this.entries.values()), None) # apply highlight (strong) text = entry_clean.text.render(html_backend) for replace in highlights: text = text.replace(replace, '<strong>' + replace + '</strong>') publications.append( (key, typee, year, text, tags, display_tags, bib_buf.getvalue(), pdf, slides, poster, doi, url)) generator.context['publications'][rid] = {} generator.context['publications'][rid]['title'] = title generator.context['publications'][rid]['path'] = os.path.basename( bibfile) generator.context['publications'][rid]['header'] = header generator.context['publications'][rid]['split'] = split generator.context['publications'][rid]['bottom_link'] = bottom_link generator.context['publications'][rid]['split_link'] = split_link generator.context['publications'][rid]['all_bibtex'] = all_bibtex generator.context['publications'][rid][ 'data'] = collections.OrderedDict() if group_type: generator.context['publications'][rid]['data'] = sorted( publications, key=lambda pub: (-int(pub[2].replace("in press", "9999")), pub[1])) else: generator.context['publications'][rid]['data'] = sorted( publications, key=lambda pub: -int(pub[2].replace("in press", "9999")))
def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = BibliographyData() self.__citations = set()
else: if cur_year is None: html_output += "\t<ul>\n" cur_year = True pub_html = list(style.format_entries([entry]))[0].text.render_as("html") pub_html = pub_html.replace("\n", " ") if highlight_author: # highlight an author (usually oneself) pub_html = pub_html.replace(highlight_author, "<strong>{}</strong>".format(highlight_author), 1) html_output += f'\t\t<li class="publication" id="{entry.key.replace(":", "_")}">\n\t\t\t' + pub_html extra_links = [] if bibtex_dir: # write bib files to bibtex_dir for downloading sanitised_label = label.replace(":", "_") bib_link = f"{bibtex_dir}/{sanitised_label}.bib" BibliographyData({label: entry}).to_file(bib_link, "bibtex") extra_links.append(f'[<a href="{root_dir}/{bib_link}">bibtex</a>]') if "file" in entry.fields: # the link to the pdf file (a, filename, kind) = entry.fields["file"].split(":", 2) file_path = os.path.join("papers", filename) presentation_path = os.path.join("presentations", filename) extra_links.append(f'[<a href="{root_dir}/{file_path}">file</a>]') if os.path.exists(presentation_path): extra_links.append(f'[<a href="{root_dir}/{presentation_path}">presentation</a>]') if "dataset" in entry.fields: dataset_path = entry.fields["dataset"]
def write_result(output, filter_data, data): write_bib = BibliographyData({key:data[key] for key in filter_data}) need_modification = write_bib.to_string('bibtex') with open(output, 'w', encoding='utf-8') as file: file.write(need_modification)
def run(folder_path, file_list, file_name_out, exclude_list, log_process): global merged_count if log_process: f_removed = open( os.path.join(folder_path, "BibFilesMerge_removed.csv"), "w", encoding="utf-8", ) csv_removed = csv.writer( f_removed, quotechar='"', quoting=csv.QUOTE_ALL) csv_removed.writerow( ["cause", "source", "key", "doi", "author", "year", "title", "publish"] ) f_final = open( os.path.join(folder_path, "BibFilesMerge_final.csv"), "w", encoding="utf-8" ) csv_final = csv.writer(f_final, quotechar='"', quoting=csv.QUOTE_ALL) csv_final.writerow( ["key", "source", "doi", "author", "year", "title", "publish", "abstract"] ) file_name_path_out = os.path.join(folder_path, file_name_out) bib_data_out = BibliographyData() total = 0 merged_count = 0 without_author = 0 without_year = 0 without_jornal = 0 duplicates = 0 excluded_from_bib = 0 bib_data_to_exclude = {} for bib_file_name in file_list: bib_data = custom_parse_file(bib_file_name) print( "-" * 3, bib_file_name + ":", len(bib_data.entries.values()), " " * 30, ) for entry in bib_data.entries.values(): total += 1 doi = get_entry_DOI(entry) author = get_entry_author(entry) year = get_entry_year(entry) title = get_entry_title(entry) publish = get_entry_publish(entry) found_entry_to_exclude = False for bib_file_name_exclude in exclude_list: if bib_file_name_exclude not in bib_data_to_exclude: bib_data = custom_parse_file(bib_file_name_exclude) bib_data_to_exclude[ bib_file_name_exclude ] = bib_data.entries.values() for entry_exclude in bib_data_to_exclude[bib_file_name_exclude]: if is_duplicated(entry_exclude, entry): excluded_from_bib += 1 found_entry_to_exclude = True break if found_entry_to_exclude: break if found_entry_to_exclude: continue if not author: without_author += 1 if log_process: # cause;source;key;doi;author;year;title;publish csv_removed.writerow( [ "no author", bib_file_name, entry.key, doi, author, year, title, publish, ] ) elif not year: without_year = without_year + 1 if log_process: # cause;source;key;doi;author;year;title;publish csv_removed.writerow( [ "no year", bib_file_name, entry.key, doi, author, year, title, publish, ] ) elif not publish: without_jornal = without_jornal + 1 if log_process: # cause;source;key;doi;author;year;title;publish csv_removed.writerow( [ "no journal", bib_file_name, entry.key, doi, author, year, title, publish, ] ) else: key = entry.key.lower() print("Key " + key + " " * 30 + "\r", end="", flush=True) entry.fields["source"] = bib_file_name old_entry = None for entry_out in bib_data_out.entries.values(): if is_duplicated(entry_out, entry, True): old_entry = entry_out break if old_entry != None: duplicates += 1 if log_process: # cause;source;key;doi;author;year;title;publish csv_removed.writerow( [ "duplicate of next", bib_file_name, entry.key, doi, author, year, title, publish, ] ) doi = get_entry_DOI(old_entry) author = get_entry_author(old_entry) year = get_entry_year(old_entry) title = get_entry_title(old_entry) publish = get_entry_publish(old_entry) csv_removed.writerow( [ "duplicate of prev", old_entry.fields["source"], old_entry.key, doi, author, year, title, publish, ] ) bib_data_out.entries[old_entry.key] = merge_entry( old_entry, entry) else: while key in bib_data_out.entries.keys(): key = key + "_a" bib_data_out.entries[key] = entry print(" " * 50) print("Total:\t\t\t", total) print("No Author:\t\t", without_author) print("No Year:\t\t", without_year) print("No Publisher:\t\t", without_jornal) print("Duplicates:\t\t", duplicates) print("Merged:\t\t\t", merged_count) print("Excluded from bib:\t", excluded_from_bib) print("Final:\t\t\t", len(bib_data_out.entries)) without_abstract_list = {i: 0 for i in file_list} without_abstract = 0 for entry in bib_data_out.entries.values(): if log_process: doi = get_entry_DOI(entry) author = get_entry_author(entry) year = get_entry_year(entry) title = get_entry_title(entry) publish = get_entry_publish(entry) abstract = get_entry_abstract(entry) # key;source;doi;author;year;title;publish;abstract csv_final.writerow( [ entry.key, entry.fields["source"], doi, author, year, title, publish, abstract, ] ) if not "abstract" in entry.fields: without_abstract = without_abstract + 1 without_abstract_list[entry.fields["source"]] = ( without_abstract_list[entry.fields["source"]] + 1 ) print("Without Abstract:\t", without_abstract, without_abstract_list) bib_data_out.to_file(file_name_path_out, bib_format="bibtex") if log_process: f_removed.close() f_final.close()
def write_result(name, new_data): """:param new_data is the dictionary of entries""" write_bib = BibliographyData(new_data) need_modification = write_bib.to_string('bibtex') with open("%s" % name, 'w', encoding='utf-8') as file: file.write(need_modification)
class WindowsNewlineTest(ParserTest, TestCase): input_strings = [ u"""'@Article\r\n\r\n\r\n}\r\n'""", ] correct_result = BibliographyData() errors = ["syntax error in line 4: '(' or '{' expected"]
parser = bibtex.Parser() bibdata = parser.parse_file(sys.argv[1]) entries = [] for tag in bibdata.entries.keys(): d = {} entry = bibdata.entries[tag] fields = entry.fields d["tag"] = tag for k in fields.keys(): d[k.lower()] = fields[k] d["bibtex.string"] = BibliographyData({tag: entry}).to_string("bibtex") entries.append(d) df = pd.DataFrame(entries) df = df.set_index("tag") fName = sys.argv[1].split(".")[0] df.to_csv(fName + ".converted.csv") elif sys.argv[1].endswith(".csv"): outfile = open(sys.argv[1].split(".")[0] + ".converted.bib", "w+") df = pd.read_csv(sys.argv[1]) df = df.set_index("tag")
reference_data = BibliographyData(entries=[ ('ruckenstein-diffusion', Entry( 'article', fields={ 'language': 'english', 'title': 'Predicting the Diffusion Coefficient in Supercritical Fluids', 'journal': 'Ind. Eng. Chem. Res.', 'volume': '36', 'year': '1997', 'pages': '888-895', }, persons={ 'author': [Person('Liu, Hongquin'), Person('Ruckenstein, Eli')] }, )), ('test-booklet', Entry('booklet', fields={ 'language': 'english', 'title': 'Just a booklet', 'year': '2006', 'month': 'January', 'address': 'Moscow', 'howpublished': 'Published by Foo', }, persons={'author': [Person('de Last, Jr., First Middle')]})), ('test-inbook', Entry('inbook', fields={ 'publisher': 'Some Publisher', 'language': 'english', 'title': 'Some Title', 'series': 'Some series', 'booktitle': 'Some Good Book', 'number': '3', 'edition': 'Second', 'year': '1933', 'pages': '44--59', }, persons={'author': [Person('Jackson, Peter')]})), ('viktorov-metodoj', Entry( 'book', fields={ 'publisher': 'Л.: <<Химия>>', 'year': '1977', 'language': 'russian', 'title': 'Методы вычисления физико-химических величин и прикладные расчёты', }, persons={'author': [Person('Викторов, Михаил Маркович')]})), ], preamble=['%%% pybtex example file'])
def run(self): style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))() bibtex_dir = self.options.get('bibtex_dir', 'bibtex') detail_page_dir = self.options.get('detail_page_dir', 'papers') highlight_authors = self.options.get('highlight_author', None) if highlight_authors: highlight_authors = highlight_authors.split(';') self.state.document.settings.record_dependencies.add(self.arguments[0]) parser = Parser() # Sort the publication entries by year reversed data = sorted(parser.parse_file(self.arguments[0]).entries.items(), key=lambda e: e[1].fields['year'], reverse=True) html = '<div class="publication-list">\n' cur_year = None if bibtex_dir: # create the bibtex dir if the option is set try: os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir))) except OSError: # probably because the dir already exists pass if detail_page_dir: # create the detail page dir if the option is set try: os.mkdir( os.path.sep.join((self.output_folder, detail_page_dir))) except OSError: # probably because the dir already exists pass for label, entry in data: # print a year title when year changes if entry.fields['year'] != cur_year: if cur_year is not None: # not first year group html += '</ul>' cur_year = entry.fields['year'] html += '<h3>{}</h3>\n<ul>'.format(cur_year) pub_html = list(style.format_entries( (entry, )))[0].text.render_as('html') if highlight_authors: # highlight one of several authors (usually oneself) for highlight_author in highlight_authors: pub_html = pub_html.replace( highlight_author.strip(), '<strong>{}</strong>'.format(highlight_author), 1) html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html extra_links = "" bibtex_fields = dict(entry.fields) # Remove some fields for the publicly available BibTeX file since they are mostly only # used by this plugin. for field_to_remove in ('abstract', 'fulltext'): if field_to_remove in bibtex_fields: del bibtex_fields[field_to_remove] bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons) # detail_page_dir may need bib_data later bib_data = BibliographyData(dict({label: bibtex_entry})) if bibtex_dir: # write bib files to bibtex_dir for downloading bib_link = '{}/{}.bib'.format(bibtex_dir, label) bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex') extra_links += '[<a href="{}">BibTeX</a>] '.format( self.site.config['BASE_URL'] + bib_link) if 'fulltext' in entry.fields: # the link to the full text, usually a link to the pdf file extra_links += '[<a href="{}">full text</a>] '.format( entry.fields['fulltext']) if extra_links or detail_page_dir: html += '<br>' html += extra_links if detail_page_dir: # render the details page of a paper page_url = '/'.join((detail_page_dir, label + '.html')) html += ' [<a href="{}">abstract and details</a>]'.format( self.site.config['BASE_URL'] + page_url) context = { 'title': str(LaTeXParser(entry.fields['title']).parse()), 'abstract': str(LaTeXParser(entry.fields['abstract']).parse()) if 'abstract' in entry.fields else '', 'bibtex': bib_data.to_string('bibtex'), 'bibtex_link': '/' + bib_link if bibtex_dir else '', 'default_lang': self.site.config['DEFAULT_LANG'], 'label': label, 'lang': self.site.config['DEFAULT_LANG'], 'permalink': self.site.config['SITE_URL'] + page_url, 'reference': pub_html, 'extra_links': extra_links } if 'fulltext' in entry.fields and entry.fields[ 'fulltext'].endswith('.pdf'): context['pdf'] = entry.fields['fulltext'] self.site.render_template( 'publication.tmpl', os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')), context, ) html += '</li>' if len(data) != 0: # publication list is nonempty html += '</ul>' html += '</div>' return [ nodes.raw('', html, format='html'), ]
def run(folderPath, fileList, fileNameOut, logProcess): global mergedCont if logProcess: fRemoved = open(os.path.join(folderPath, 'BibFilesMerge_removed.csv'), 'w', encoding='utf-8') csvRemoved = csv.writer(fRemoved, delimiter=';', quotechar='"') csvRemoved.writerow([ 'cause', 'source', 'key', 'doi', 'author', 'year', 'title', 'publish' ]) fFinal = open(os.path.join(folderPath, 'BibFilesMerge_final.csv'), 'w', encoding='utf-8') csvFinal = csv.writer(fFinal, delimiter=';', quotechar='"') csvFinal.writerow([ 'key', 'source', 'doi', 'author', 'year', 'title', 'publish', 'abstract' ]) fileNamePathOut = os.path.join(folderPath, fileNameOut) bibDataOut = BibliographyData() total = 0 mergedCont = 0 withoutAuthor = 0 withoutYear = 0 withoutJornal = 0 duplicates = 0 print() print() for bibFileName in fileList: bibData = parse_file(os.path.join(folderPath, bibFileName)) print(bibFileName + ':', len(bibData.entries.values()), " ") for entry in bibData.entries.values(): total = total + 1 doi = getEntryDOIStr(entry) author = getEntryAuthorStr(entry) year = getEntryYearStr(entry) title = getEntryTitleStr(entry) publish = getEntryPublishStr(entry) if author == '': withoutAuthor = withoutAuthor + 1 if logProcess: #cause;source;key;doi;author;year;title;publish csvRemoved.writerow([ 'no author', bibFileName, entry.key, doi, author, year, title, publish ]) elif year == '': withoutYear = withoutYear + 1 if logProcess: #cause;source;key;doi;author;year;title;publish csvRemoved.writerow([ 'no year', bibFileName, entry.key, doi, author, year, title, publish ]) elif publish == '': withoutJornal = withoutJornal + 1 if logProcess: #cause;source;key;doi;author;year;title;publish csvRemoved.writerow([ 'no journal', bibFileName, entry.key, doi, author, year, title, publish ]) else: key = entry.key.lower() print("Key " + key + " \r", end="", flush=True) entry.fields['source'] = bibFileName oldEntry = None cleanTitle = cleanStringToCompare(title) for entryOut in bibDataOut.entries.values(): if (doi != ''): doiOut = getEntryDOIStr(entryOut) if (doiOut != '' and doi == doiOut): oldEntry = entryOut break cleanOutTitle = cleanStringToCompare( entryOut.fields['title']) if (cleanTitle == cleanOutTitle): year = int(str(entry.rich_fields['year'])) yearOut = int(str(entryOut.rich_fields['year'])) diff = abs(year - yearOut) if (diff == 0): oldEntry = entryOut elif (diff == 1 or diff == 2): try: lastname = unidecode.unidecode( entry.persons['author'] [0].last_names[0]).lower() except: lastname = "" try: lastNameOut = unidecode.unidecode( entryOut.persons['author'] [0].last_names[0]).lower() except: lastNameOut = "" try: firstName = unidecode.unidecode( entry.persons['author'] [0].firstNames[0]).lower() except: firstName = "" try: firstNameOut = unidecode.unidecode( entryOut.persons['author'] [0].firstNames[0]).lower() except: firstNameOut = "" if (lastname == lastNameOut or lastname == firstNameOut or lastNameOut == firstName): oldEntry = entryOut break if (oldEntry != None): duplicates = duplicates + 1 if logProcess: #cause;source;key;doi;author;year;title;publish csvRemoved.writerow([ 'duplicate of next', bibFileName, entry.key, doi, author, year, title, publish ]) doi = getEntryDOIStr(oldEntry) author = getEntryAuthorStr(oldEntry) year = getEntryYearStr(oldEntry) title = getEntryTitleStr(oldEntry) publish = getEntryPublishStr(oldEntry) csvRemoved.writerow([ 'duplicate of prev', oldEntry.fields['source'], oldEntry.key, doi, author, year, title, publish ]) bibDataOut.entries[oldEntry.key] = mergeEntry( oldEntry, entry) else: while (key in bibDataOut.entries.keys()): key = key + "_a" bibDataOut.entries[key] = entry print(" ") print("Total:\t\t", total) print("No Author:\t", withoutAuthor) print("No Year:\t", withoutYear) print("No Publisher:\t", withoutJornal) print("Duplicates:", duplicates, "| Merged:", mergedCont) print("Final:\t\t", len(bibDataOut.entries)) withoutAbstractList = {i: 0 for i in fileList} withoutAbstract = 0 for entry in bibDataOut.entries.values(): if logProcess: doi = getEntryDOIStr(entry) author = getEntryAuthorStr(entry) year = getEntryYearStr(entry) title = getEntryTitleStr(entry) publish = getEntryPublishStr(entry) abstract = getEntryAbstractStr(entry) #key;source;doi;author;year;title;publish;abstract csvFinal.writerow([ entry.key, entry.fields['source'], doi, author, year, title, publish, abstract ]) if not 'abstract' in entry.fields: withoutAbstract = withoutAbstract + 1 withoutAbstractList[entry.fields['source']] = withoutAbstractList[ entry.fields['source']] + 1 print("without Abstract ", withoutAbstract, withoutAbstractList) bibDataOut.to_file(fileNamePathOut) if logProcess: fRemoved.close() fFinal.close()
entries = set() dupentries = False with open(args.f, 'r') as bin: for l in bin: if l.startswith('@'): l = l.replace('@misc', '') l = l.replace('@article', '') l = l.replace('@inproceedings', '') if l in entries: sys.stderr.write("Duplicate entry " + l.replace('{', '').replace(',', '')) dupentries = True entries.add(l) if dupentries: sys.stderr.write( "FATAL: The bibtex file has duplicate entries in it. Please remove them before trying to continue\n" ) sys.stderr.write( "(It is an issue with Google Scholar, but pybtex breaks with duplicate entries. Sorry)\n" ) sys.exit(-1) bib = parse_file(args.f, 'bibtex') for e in bib.entries: if 'year' in bib.entries[e].fields: if int(bib.entries[e].fields['year']) >= args.y: bib_data = BibliographyData({e: bib.entries[e]}) print(bib_data.to_string('bibtex'))
def run(self, lines): """ Create a bibliography from cite commands. """ # Join the content to enable regex searches throughout entire text content = '\n'.join(lines) # Build the database of bibtex data self._citations = [] # member b/c it is used in substitution function self._bibtex = BibliographyData() # "" bibfiles = [] match = re.search(self.RE_BIBLIOGRAPHY, content) if match: for bfile in match.group(1).split(','): try: filename, _ = self.getFilename(bfile.strip()) bibfiles.append(filename) data = self.parseBibtexFile(bibfiles[-1]) self._bibtex.add_entries(data.entries.iteritems()) except UndefinedMacro: LOG.error('Undefined macro in bibtex file: %s, specify macro_files arguments ' \ 'in configuration file (e.g. website.yml)', bfile.strip()) except TypeError: LOG.error('Unable to locate bibtex file in %s', self.markdown.current.filename) else: return lines # Determine the style match = re.search(self.RE_STYLE, content) if match: content = content.replace(match.group(0), '') try: style = find_plugin('pybtex.style.formatting', match.group(1)) except PluginNotFound: LOG.error('Unknown bibliography style "%s"', match.group(1)) return lines else: style = find_plugin('pybtex.style.formatting', 'plain') # Replace citations with author date, as an anchor content = re.sub(self.RE_CITE, self.authors, content) # Create html bibliography if self._citations: # Generate formatted html using pybtex formatted_bibliography = style().format_bibliography( self._bibtex, self._citations) backend = find_plugin('pybtex.backends', 'html') stream = io.StringIO() backend().write_to_stream(formatted_bibliography, stream) # Strip the bib items from the formatted html html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE | re.DOTALL) # Produces an ordered list with anchors to the citations output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n' output = output.format(str(bibfiles)) for i, item in enumerate(html): output += u'<li name="{}">{}</li>\n'.format( self._citations[i], item) output += u'</ol>\n' content = re.sub(self.RE_BIBLIOGRAPHY, self.markdown.htmlStash.store(output, safe=True), content) return content.split('\n')
file = PdfFileReader(in_) last_page = start_page + file.getNumPages() - 1 bib_entry.fields['pages'] = '{}--{}'.format(start_page, last_page) start_page = last_page + 1 # Add the abstract if present if submission_id in abstracts: bib_entry.fields['abstract'] = abstracts.get(submission_id) # Add booktitle for non-proceedings entries if bib_type == 'inproceedings': bib_entry.fields['booktitle'] = metadata['booktitle'] try: bib_string = BibliographyData({ anthology_id: bib_entry }).to_string('bibtex') except TypeError as e: print('Fatal: Error in BibTeX-encoding paper', submission_id, file=sys.stderr) sys.exit(1) final_bibs.append(bib_string) with open(bib_path, 'w') as out_bib: print(bib_string, file=out_bib) print('CREATED', bib_path) # Create an index for LaTeX book proceedings if not os.path.exists('book-proceedings'): os.makedirs('book-proceedings')
def add_publications(self): # Check if PUBLICATIONS_SRC is set if 'PUBLICATIONS_SRC' not in self.settings: logger.warn('PUBLICATIONS_SRC not set') return # Try to parse the bibtex files pub_dir = self.settings['PUBLICATIONS_SRC'] try: bibdata_all = BibliographyData() for file in os.listdir(pub_dir): with codecs.open(pub_dir + os.sep + file, 'r', encoding="utf8") as stream: bibdata = Parser().parse_stream(stream) key, entry = bibdata.entries.items()[0] bibdata_all.entries[key] = entry except PybtexError as e: logger.warn('`pelican_bibtex` failed to parse file %s: %s' % (file, str(e))) return # Create Publication objects and add them to a list publications = [] # format entries plain_style = plain.Style() formatted_entries = list( plain_style.format_entries(bibdata_all.entries.values())) decoder = latexcodec.lexer.LatexIncrementalDecoder() for entry in bibdata_all.entries: raw_tex = BibliographyData(entries={ entry: bibdata_all.entries[entry] }).to_string('bibtex') #raw_tex += '\n}' formatted_entry = list( plain_style.format_entries([bibdata_all.entries[entry]]))[0] key = formatted_entry.key entry = bibdata_all.entries[key] year = entry.fields.get('year', 2018) authors = entry.fields.get('author', '').split(' and ') print(authors) parsed_authors = [] for author in authors: if ',' in author: parsed_authors.append(LatexNodes2Text().latex_to_text( re.sub(r'[\{\}]', '', (author.split(',')[1] + ' ' + author.split(',')[0]).strip()))) else: parsed_authors.append( LatexNodes2Text().latex_to_text(author)) authors = parsed_authors title = LatexNodes2Text().latex_to_text( entry.fields.get('title', '')) pdf = entry.fields.get('pdf', None) slides = entry.fields.get('slides', None) poster = entry.fields.get('poster', None) where = '' if 'booktitle' in entry.fields: where = LatexNodes2Text().latex_to_text( entry.fields.get('booktitle')) elif 'journal' in entry.fields: where = LatexNodes2Text().latex_to_text( entry.fields.get('journal')) abstract = entry.fields.get('abstract', '') pub = Publication(key, authors, title, year, where, abstract=abstract, pdf_url=pdf, resource_urls=[('slides', slides), ('poster', poster)]) pub.citations['bib'] = raw_tex.rstrip('\r\n') publications.append(pub) self.publications_per_year[pub.year].append(pub) for author in authors: if author in self.context['MEDIUS_AUTHORS'].keys(): self.publications_per_author[author].append(pub) self.publications_per_type[BIBTEX_TYPE_TO_TEXT[entry.type]].append( pub) self.publications_per_type_rev[pub] = BIBTEX_TYPE_TO_TEXT[ entry.type] return publications
class BracesAndQuotesTest(ParserTest, TestCase): input_string = u'''@ARTICLE{ test, title="Nested braces and {"quotes"}", }''' correct_result = BibliographyData([(u'test', Entry('article', [(u'title', 'Nested braces and {"quotes"}')]))])
def add_publications(generator): """ Populates context with a list of BibTeX publications. Configuration ------------- generator.settings['PUBLICATIONS_SRC']: local path to the BibTeX file to read. Output ------ generator.context['publications']: List of tuples (key, year, text, bibtex, pdf, slides, poster). See Readme.md for more details. """ if 'PUBLICATIONS_SRC' not in generator.settings: return try: from StringIO import StringIO except ImportError: from io import StringIO try: from pybtex.database.input.bibtex import Parser from pybtex.database.output.bibtex import Writer from pybtex.database import BibliographyData, PybtexError from pybtex.backends import html #from pybtex.style.formatting import plain from rahul_style import Style as RahulStyle except ImportError: logger.warn('`pelican_bibtex` failed to load dependency `pybtex`') return refs_file = generator.settings['PUBLICATIONS_SRC'] try: bibdata_all = Parser().parse_file(refs_file) except PybtexError as e: logger.warn('`pelican_bibtex` failed to parse file %s: %s' % (refs_file, str(e))) return publications = [] # format entries plain_style = RahulStyle() #plain_style = plain.Style() html_backend = html.Backend() html_backend.symbols['br'] = u'<BR/>' all_entries = bibdata_all.entries.values() # remove URL field if DOI is present for entry in all_entries: if "doi" in entry.fields.keys(): entry.fields._dict["url"] = "" formatted_entries = plain_style.format_entries(all_entries) for formatted_entry in formatted_entries: key = formatted_entry.key entry = bibdata_all.entries[key] pub_type = entry.type year = entry.fields.get('year') # This shouldn't really stay in the field dict # but new versions of pybtex don't support pop pdf = entry.fields.get('pdf', None) #slides = entry.fields.get('slides', None) #poster = entry.fields.get('poster', None) doi = entry.fields.get('doi', None) url = entry.fields.get('url', None) arxiv = entry.fields.get('arxiv', None) #render the bibtex string for the entry bib_buf = StringIO() bibdata_this = BibliographyData(entries={key: entry}) Writer().write_stream(bibdata_this, bib_buf) text = formatted_entry.text.render(html_backend) # prettify entries # remove BibTeX's {} text = text.replace("\{", "") text = text.replace("{", "") text = text.replace("\}", "") text = text.replace("}", "") # remove textbf used for cv text = text.replace("\\textbf ", "") # remove \ that comes after Proc. text = text.replace("\\", "") publications.append((pub_type, key, year, text, bib_buf.getvalue(), pdf, doi, url, arxiv)) generator.context['publications'] = publications
class EmptyDataTest(ParserTest, TestCase): input_string = u'' correct_result = BibliographyData()
def import_arxiv(arxiv_id=None, tags=None, path="~/Papers", path_tmp="/tmp/", **kwargs): assert arxiv_id is not None papers_path = Path(path).expanduser().absolute() new_folder = (Path(path_tmp).expanduser().absolute() / f"papers_import_arxiv_{arxiv_id}") new_file = new_folder / Path(arxiv_id + ".arxiv") if not new_file.exists(): new_file.parent.mkdir(exist_ok=True) new_file.touch() if tags != None: tags = tags.split(",") for tag in tags: path = Path(new_folder, "#" + tag.strip()) if not path.exists(): path.touch() # Get arxiv info info, xmldoc = arxiv2dict(arxiv_id) # Save xml xml = new_file.parent / (arxiv_id + ".arxiv") with open(xml, "w+") as f: f.write(xmldoc.toxml()) # Cite key citekey = make_citekey(info["first_author_surname"].lower(), info["year"], info["title"]) # PDF pdf_name = citekey + ".pdf" files_pdf = list(new_file.parent.glob("*.pdf")) if len(files_pdf) == 0: # get pdf response = requests.get(info["url_pdf"]) with open(new_file.parent / pdf_name, "wb") as f: f.write(response.content) if len(files_pdf) == 1 and files_pdf[0] != pdf_name: files_pdf[0].rename(new_file.parent / pdf_name) # Write abstract.txt abstract = new_file.parent / "abstract.txt" with open(abstract, "w+") as f: f.write(info["abstract"].strip()) # Bib entry ref_entry = { citekey: Entry( "article", [ ("author", " and ".join(info["author"])), ("title", str(info["title"])), ("year", str(info["year"])), ("eprint", str(info["id"])), ("journal", "arXiv preprint"), ], ), } # Write bib file ref = new_file.parent / f"{citekey}.bib" with open(ref, "w+") as f: f.write(BibliographyData(ref_entry).to_string("bibtex")) # Rename parent folder according to citekey new_folder_renamed = Path(new_file.parent.parent / citekey) Path(new_file.parent).rename(new_folder_renamed) # Move folder shutil.move(str(new_folder_renamed), str(papers_path / citekey))
from pybtex.database import BibliographyData, Entry bib_data = BibliographyData({ 'article-minimal': Entry('article', [ ('author', 'L[eslie] B. Lamport'), ('title', 'The Gnats and Gnus Document Preparation System'), ('journal', "G-Animal's Journal"), ('year', '1986'), ]), }) print(bib_data.to_string('bibtex'))
def add_publications(generator): """ Populates context with a list of BibTeX publications. Configuration ------------- generator.settings['PUBLICATIONS_SRC']: local path to the BibTeX file to read. Output ------ generator.context['publications']: List of tuples (key, year, text, bibtex, pdf, slides, poster). See Readme.md for more details. """ if 'PUBLICATIONS_SRC' not in generator.settings: return try: from StringIO import StringIO except ImportError: from io import StringIO try: from pybtex.database.input.bibtex import Parser from pybtex.database.output.bibtex import Writer from pybtex.database import BibliographyData, PybtexError from pybtex.backends import html from pybtex.style.formatting import plain except ImportError: logger.warn('`pelican_bibtex` failed to load dependency `pybtex`') return refs_file = generator.settings['PUBLICATIONS_SRC'] try: bibdata_all = Parser().parse_file(refs_file) except PybtexError as e: logger.warn('`pelican_bibtex` failed to parse file %s: %s' % ( refs_file, str(e))) return publications = [] # format entries plain_style = plain.Style() html_backend = html.Backend() formatted_entries = plain_style.format_entries(bibdata_all.entries.values()) for formatted_entry in formatted_entries: key = formatted_entry.key entry = bibdata_all.entries[key] year = entry.fields.get('year') # This shouldn't really stay in the field dict # but new versions of pybtex don't support pop pdf = entry.fields.get('pdf', None) slides = entry.fields.get('slides', None) poster = entry.fields.get('poster', None) #render the bibtex string for the entry bib_buf = StringIO() bibdata_this = BibliographyData(entries={key: entry}) Writer().write_stream(bibdata_this, bib_buf) text = formatted_entry.text.render(html_backend) publications.append((key, year, text, bib_buf.getvalue(), pdf, slides, poster)) generator.context['publications'] = publications
def run(self): bibtex_dir = self.options.get('bibtex_dir', 'bibtex') detail_page_dir = self.options.get('detail_page_dir', 'papers') highlight_authors = self.options.get('highlight_author', None) if highlight_authors: highlight_authors = highlight_authors.split(';') style = Style(self.site.config['BASE_URL'] + detail_page_dir if detail_page_dir else None) self.state.document.settings.record_dependencies.add(self.arguments[0]) parser = Parser() # Sort the publication entries by year reversed data = sorted(parser.parse_file(self.arguments[0]).entries.items(), key=lambda e: e[1].fields['year'], reverse=True) html = '<div class="publication-list">\n' cur_year = None if bibtex_dir: # create the bibtex dir if the option is set try: os.makedirs(os.path.sep.join((self.output_folder, bibtex_dir))) except OSError: # probably because the dir already exists pass if detail_page_dir: # create the detail page dir if the option is set try: os.makedirs( os.path.sep.join((self.output_folder, detail_page_dir))) except OSError: # probably because the dir already exists pass for label, entry in data: # print a year title when year changes if entry.fields['year'] != cur_year: if cur_year is not None: # not first year group html += '</ul>' cur_year = entry.fields['year'] html += '<h3>{}</h3>\n<ul>'.format(cur_year) entry.label = label # Pass label to the style. pub_html = list(style.format_entries( (entry, )))[0].text.render_as('html') if highlight_authors: # highlight one of several authors (usually oneself) for highlight_author in highlight_authors: pub_html = pub_html.replace( highlight_author.strip(), '<strong>{}</strong>'.format(highlight_author), 1) html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html extra_links = "" if 'fulltext' in entry.fields: # the link to the full text, usually a link to the pdf file extra_links += '[<a href="{}">full text</a>] '.format( entry.fields['fulltext']) bibtex_fields = dict(entry.fields) # Collect and remove custom links (fields starting with "customlink") custom_links = dict() for key, value in bibtex_fields.items(): if key.startswith('customlink'): custom_links[key[len('customlink'):]] = value # custom fields (custom links) for key, value in custom_links.items(): extra_links += '[<a href="{}">{}</a>] '.format(value, key) # Remove some fields for the publicly available BibTeX file since they are mostly only # used by this plugin. for field_to_remove in ('abstract', 'fulltext'): if field_to_remove in bibtex_fields: del bibtex_fields[field_to_remove] # Prepare for the bib file. Note detail_page_dir may need bib_data later. bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons) bib_data = BibliographyData(dict({label: bibtex_entry})) bib_string = bib_data.to_string('bibtex') extra_links += ''' [<a href="javascript:void(0)" onclick=" (function(target, id) {{ if ($('#' + id).css('display') == 'block') {{ $('#' + id).hide('fast'); $(target).text('BibTeX▼') }} else {{ $('#' + id).show('fast'); $(target).text('BibTeX▲') }} }})(this, '{}');">BibTeX▼</a>] '''.format('bibtex-' + label) if bibtex_dir: # write bib files to bibtex_dir for downloading bib_link = '{}/{}.bib'.format(bibtex_dir, label) bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex') if extra_links or detail_page_dir or 'abstract' in entry.fields: html += '<br>' # Add the abstract link. if 'abstract' in entry.fields: html += ''' [<a href="javascript:void(0)" onclick=" (function(target, id) {{ if ($('#' + id).css('display') == 'block') {{ $('#' + id).hide('fast'); $(target).text('abstract▼') }} else {{ $('#' + id).show('fast'); $(target).text('abstract▲') }} }})(this, '{}');">abstract▼</a>] '''.format( 'abstract-' + label) display_none = '<div id="{}" style="display:none"><pre>{}</pre></div>' bibtex_display = display_none.format('bibtex-' + label, bib_string) abstract_text = str(LaTeXParser(entry.fields['abstract']).parse() ) if 'abstract' in entry.fields else '' if detail_page_dir: # render the details page of a paper page_url = '/'.join((detail_page_dir, label + '.html')) html += '[<a href="{}">details</a>] '.format( self.site.config['BASE_URL'] + page_url) context = { 'title': str(LaTeXParser(entry.fields['title']).parse()), 'abstract': abstract_text, 'bibtex': bib_data.to_string('bibtex'), 'bibtex_link': '/' + bib_link if bibtex_dir else '', 'default_lang': self.site.config['DEFAULT_LANG'], 'label': label, 'lang': self.site.config['DEFAULT_LANG'], 'permalink': self.site.config['SITE_URL'] + page_url, 'reference': pub_html, 'extra_links': extra_links + bibtex_display } if 'fulltext' in entry.fields and entry.fields[ 'fulltext'].endswith('.pdf'): context['pdf'] = entry.fields['fulltext'] self.site.render_template( 'publication.tmpl', os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')), context, ) html += extra_links # Add the hidden abstract and bibtex. if 'abstract' in entry.fields: html += ''' <div id="{}" class="publication-abstract" style="display:none"> <blockquote>{}</blockquote></div> '''.format('abstract-' + label, abstract_text) html += bibtex_display html += '</li>' if len(data) != 0: # publication list is nonempty html += '</ul>' html += '</div>' return [ nodes.raw('', html, format='html'), ]
def add_publications(generator): """ Populates context with a list of BibTeX publications. Configuration ------------- generator.settings['PUBLICATIONS_SRC']: Local path to the BibTeX file to read. generator.settings['PUBLICATIONS_SPLIT_BY']: The name of the bibtex field used for splitting the publications. No splitting if title is not provided. generator.settings['PUBLICATIONS_UNTAGGED_TITLE']: The title of the header for all untagged entries. No such list if title is not provided. Output ------ generator.context['publications_lists']: A map with keys retrieved from the field named in PUBLICATIONS_SPLIT_TAG. Values are lists of tuples (key, year, text, bibtex, pdf, slides, poster) See Readme.md for more details. generator.context['publications']: Contains all publications as a list of tuples (key, year, text, bibtex, pdf, slides, poster). See Readme.md for more details. """ if 'PUBLICATIONS_SRC' not in generator.settings: return try: from StringIO import StringIO except ImportError: from io import StringIO try: from pybtex.database.input.bibtex import Parser from pybtex.database.output.bibtex import Writer from pybtex.database import BibliographyData, PybtexError from pybtex.backends import html from pybtex.style.formatting import plain except ImportError: logger.warn('`pelican_bib` failed to load dependency `pybtex`') return refs_file = generator.settings['PUBLICATIONS_SRC'] try: bibdata_all = Parser().parse_file(refs_file) except PybtexError as e: logger.warn('`pelican_bib` failed to parse file %s: %s' % (refs_file, str(e))) return publications = [] publications_lists = {} publications_untagged = [] split_by = None untagged_title = None if 'PUBLICATIONS_SPLIT_BY' in generator.settings: split_by = generator.settings['PUBLICATIONS_SPLIT_BY'] if 'PUBLICATIONS_UNTAGGED_TITLE' in generator.settings: untagged_title = generator.settings['PUBLICATIONS_UNTAGGED_TITLE'] # format entries plain_style = plain.Style() html_backend = html.Backend() formatted_entries = plain_style.format_entries( bibdata_all.entries.values()) for formatted_entry in formatted_entries: key = formatted_entry.key entry = bibdata_all.entries[key] year = entry.fields.get('year') # This shouldn't really stay in the field dict # but new versions of pybtex don't support pop pdf = entry.fields.get('pdf', None) slides = entry.fields.get('slides', None) poster = entry.fields.get('poster', None) tags = [] if split_by: tags = entry.fields.get(split_by, []) # parse to list, and trim each string if tags: tags = [tag.strip() for tag in tags.split(',')] # create keys in publications_lists if at least one # tag is given for tag in tags: publications_lists[tag] = publications_lists.get(tag, []) #render the bibtex string for the entry bib_buf = StringIO() bibdata_this = BibliographyData(entries={key: entry}) Writer().write_stream(bibdata_this, bib_buf) text = formatted_entry.text.render(html_backend) entry_tuple = { 'key': key, 'year': year, 'text': text, 'bibtex': bib_buf.getvalue(), 'pdf': pdf, 'slides': slides, 'poster': poster } publications.append(entry_tuple) for tag in tags: publications_lists[tag].append(entry_tuple) if not tags and untagged_title: publications_untagged.append(entry_tuple) # append untagged list if title is given if untagged_title and publications_untagged: publications_lists[untagged_title] = publications_untagged # output generator.context['publications'] = publications generator.context['publications_lists'] = publications_lists