def _process_one_file(key, f, info_this_key): f_pure = os.path.split(f)[1] info_this = _process_meta(f) # then let's construct a bib entry. entry_type = 'misc' if info_this['finished'] else 'unpublished' del info_this['finished'] entry_this = Entry(entry_type, [(x, str(y)) for x, y in info_this.items() if y is not None]) bib_id, _ = os.path.splitext(f_pure) bib_data = BibliographyData({bib_id: entry_this}) bib_cats = info_this['additional-categories'] if bib_cats is None: bib_cats = [] else: bib_cats = [ tuple(cat.strip().split('/')) for cat in bib_cats.split(',') ] bib_cats.append(key) bib_cats = _additional_cats_closure(bib_cats) # '_' + bib_id is the key we should use for GitHub browsing. info_this_key.append( [bib_id, (bib_data.to_string('bibtex'), key, '_' + bib_id, bib_cats)])
def main(): doi = _extract_doi(args.identifier[0]) if doi is None: print(item) elif args.bibtex: result = cn.content_negotiation(doi, format="bibtex") bibtex = parse_string(result, "bibtex") try: name = "".join( bibtex.entries.values()[0].persons.values()[0][0].last_names) name = name.replace("ä", "ae").replace("ö", "oe").replace("ü", "ue") name = unidecode(name) shortdoi = _short_doi(doi)[3:] year = bibtex.entries.values()[0].fields["year"] key = "{}_{}_{}".format(name, year, shortdoi) new = BibliographyData() new.add_entry(key, bibtex.entries[bibtex.entries.keys()[0]]) print(new.to_string("bibtex")) except KeyError: print(result) else: try: result = cn.content_negotiation(doi, format=args.format) print(result) except requests.exceptions.HTTPError: print(doi) print()
def createMaterialize(self, parent, token, page): ol = self.createHTML(parent, token, page) if ol is None: return for child in ol.children: key = child['id'] db = BibliographyData() db.add_entry(key, self.extension.database().entries[key]) btex = db.to_string("bibtex") m_id = uuid.uuid4() html.Tag(child, 'a', style="padding-left:10px;", class_='modal-trigger moose-bibtex-modal', href="#{}".format(m_id), string='[BibTeX]') modal = html.Tag(child, 'div', class_='modal', id_=m_id) content = html.Tag(modal, 'div', class_='modal-content') pre = html.Tag(content, 'pre', style="line-height:1.25;") html.Tag(pre, 'code', class_='language-latex', string=btex) return ol
def to_bibtex(citations): import uuid from pybtex.database import BibliographyData entries = {str(uuid.uuid4())[:8]: b for b in citations} bib_data = BibliographyData(entries=entries) return bib_data.to_string('bibtex')
def main(): if len(sys.argv) != 3: print('Filters BibTeX file preserving order and limiting to last 5 years') print('usage: %s <original-bib> <output-bib>' % \ os.path.basename(sys.argv[0])) print('example: %s publications.bib filtered.bib') sys.exit(1) original = sys.argv[1] minyear = datetime.date.today().year - 5 output = sys.argv[2] from pybtex.database import parse_file, BibliographyData bib_data = parse_file(original) filtered = BibliographyData() for key in bib_data.entries: entry = bib_data.entries[key] year = int(entry.fields['year']) if year > minyear: print('Selecting @%s[%s] from %s' % (entry.type, key, year)) filtered.entries[key] = entry print('Saving to %s...' % output) s = filtered.to_string('bibtex') for f, t in FIX_STRINGS: s = s = s.replace(f, t) with open(output, 'wt') as f: f.write(s)
def preExecute(self, content): self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: if key in self.__database.entries: if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key])
def preExecute(self): duplicates = self.get('duplicates', list()) self.__database = BibliographyData() bib_files = [] for node in self.translator.getPages(): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: duplicate_key = key in self.__database.entries duplicate_key_allowed = key in duplicates if duplicate_key and (not duplicate_key_allowed): if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) elif not duplicate_key: self.__database.add_entry(key, db.entries[key])
def download(): querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) # download entries bib = BibliographyData() start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract citation data for article in querier.articles: querier.get_citation_data(article) # parse entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for key, entry in data.entries.items(): # make sure URL is present if 'url' not in entry.fields: url = article.attrs['url'][0] if url: entry.fields['url'] = url # store bib.add_entry(key, entry) # next page start += 10 query.set_start(start) # write to file CACHE.write_bytes(bib.to_bytes('bibtex').replace(br'\\&', br'\&'))
def saveEntry(key, entry, fileName, appendFlag=False): """ Add a found BibTeX entry into a file with the given. :param key: The BibTeX key for the new entry. :param entry: The new BibTeX entry to be added. :param fileName: The name of the file into which the entry is to \ be put. :param appendFlag: The entry should be appended to the file if the \ value is ``True`` and replace all the existing entries in the file \ otherwise. """ try: new_data = BibliographyData(entries=OrderedCaseInsensitiveDict(), preamble=[]) new_data.entries[key] = entry tempFileName = config.ROOT_DIR + '.temp.bib' new_data.to_file(tempFileName) command = 'cat {temp} {arrows} {real}; rm -f {temp}'. \ format(temp = tempFileName, arrows = ('>>' if appendFlag else '>'), real = fileName) os.system(command) except: tkMessageBox.showerror( 'LiteRef Error', 'Could not create the file: ' + fileName + '\nAbandoning the key.')
def preExecute(self): set_strict_mode( False) # allow incorrectly formatted author/editor names # If this is invoked during a live serve, we need to recompile the list of '.bib' files and # read them again, otherwise there's no way to distinguish existing entries from duplicates self.__bib_files = [] for node in self.translator.findPages( lambda p: p.source.endswith('.bib')): self.__bib_files.append(node.source) self.__database = BibliographyData() for bfile in self.__bib_files: try: db = parse_file(bfile) self.__bib_file_database[bfile] = db except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering for key in db.entries: if key in self.__database.entries: if self.get('duplicate_warning') and ( key not in self.get('duplicates')): msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key])
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__citations = set() def preExecute(self, content): self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: if key in self.__database.entries: if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) reader.addInline(BibtexReferenceComponent(), location='>FormatInline') renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexBiliography', RenderBibtexBibliography())
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = ( True, "Show a warning when duplicate entries detected.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__citations = set() def preExecute(self, content): self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: if key in self.__database.entries: if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) reader.addInline(BibtexReferenceComponent(), location='>FormatInline') renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexBiliography', RenderBibtexBibliography())
def run(self): style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))() bibtex_dir = self.options.get('bibtex_dir', 'bibtex') highlight_author = self.options.get('highlight_author', None) parser = Parser() # Sort the publication entries by year reversed data = sorted(parser.parse_file(self.arguments[0]).entries.items(), key=lambda e: e[1].fields['year'], reverse=True) print(type(data)) html = '<div class = "publication-list">\n' cur_year = None if bibtex_dir: # create the bibtex dir if the option is set try: os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir))) except OSError: # probably because the dir already exists pass for label, entry in data: # print a year title when year changes if entry.fields['year'] != cur_year: if cur_year is not None: # not first year group html += '</ul>' cur_year = entry.fields['year'] html += '<h3>{}</h3>\n<ul>'.format(cur_year) pub_html = list(style.format_entries((entry,)))[0].text.render_as('html') if highlight_author: # highlight an author (usually oneself) pub_html = pub_html.replace(highlight_author, '<strong>{}</strong>'.format(highlight_author), 1) html += '<li class = "publication">' + pub_html extra_links = "" if bibtex_dir: # write bib files to bibtex_dir for downloading bib_link = '{}/{}.bib'.format(bibtex_dir, label) bib_data = BibliographyData(dict({label: entry})) bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex') extra_links += '[<a href="{}">bibtex</a>] '.format(bib_link) if 'pdf' in entry.fields: # the link to the pdf file extra_links += '[<a href="{}">pdf</a>] '.format(entry.fields['pdf']) if extra_links: html += '<br/>' + extra_links html += '</li>' if len(data) != 0: # publication list is nonempty html += '</ul>' html += '</div>' return [nodes.raw('', html, format='html'), ]
def run(csvFileName, bibFileName): if not os.path.isfile(csvFileName): print("File not found: ", csvFileName) return # I dont kown Why, but dont work complex path in Panda, then I copy file to local path tmpFile = tempfile.mktemp() copyfile(csvFileName, tmpFile) colnames = [ 'title', 'journal', 'book', 'volume', 'issue', 'doi', 'author', 'year', 'url', 'type' ] pn = pd.read_csv(tmpFile, names=colnames, skiprows=1) bibData = BibliographyData() total = 0 notAuthor = 0 for row_index, row in pn.iterrows(): total = total + 1 fields = [] if (not pd.isnull(row.title)): fields.append(('title', row.title)) if (not pd.isnull(row.journal)): fields.append(('journal', row.journal)) if (not pd.isnull(row.volume)): fields.append(('volume', str(row.volume))) if (not pd.isnull(row.volume)): fields.append(('issue', str(row.issue))) if (not pd.isnull(row.doi)): fields.append(('doi', row.doi)) if (not pd.isnull(row.year)): fields.append(('year', str(row.year))) if (not pd.isnull(row.url)): fields.append(('url', row.url)) if (not pd.isnull(row.author)): fields.append(('author', AuthorFix(row.author))) keyPaper = row.doi typePaper = TypePaperSelect(row.type) print("Chave " + keyPaper + " \r", end="", flush=True) if (pd.isnull(row.author)): notAuthor = notAuthor + 1 else: bibData.entries[keyPaper] = Entry(typePaper, fields) print("Processed ", total, " ") print("Removed without author ", notAuthor) print("Total Final", len(bibData.entries)) bibData.to_file(bibFileName) print("Saved file ", bibFileName)
def aggregate_snls(snls): """ Aggregates a series of SNLs into the fields for a single SNL """ # Choose earliesst created_at created_at = sorted([snl["about"]["created_at"]["string"] for snl in snls])[0] # Choose earliest history history = sorted(snls, key=lambda snl: snl["about"]["created_at"]["string"])[0]["about"]["history"] # Aggregate all references into one dict to remove duplicates refs = {} for snl in snls: try: entries = parse_string(snl["about"]["references"], bib_format="bibtex") refs.update(entries.entries) except: self.logger.debug("Failed parsing bibtex: {}".format(snl["about"]["references"])) entries = BibliographyData(entries=refs) references = entries.to_string("bibtex") # Aggregate all remarks remarks = list(set([remark for snl in snls for remark in snl["about"]["remarks"]])) # Aggregate all projects projects = list(set([projects for snl in snls for projects in snl["about"]["projects"]])) # Aggregate all authors - Converting a single dictionary first performs duplicate checking authors = {entry["name"].lower(): entry["email"] for snl in snls for entry in snl["about"]["authors"]} authors = [{"name": name.title(), "email": email} for name, email in authors.items()] # Aggregate all the database IDs db_ids = defaultdict(list) for snl in snls: if len(snl["about"]["history"]) == 1 and \ snl["about"]["history"][0]["name"] in DB_indexes: db_name = snl["about"]["history"][0]["name"] db_id_key = DB_indexes[db_name] db_ids[db_id_key].append(snl["about"]["history"][0]["description"].get("id", None)) # remove Nones and empty lists db_ids = {k: list(filter(None, v)) for k, v in db_ids.items() if len(list(filter(None, db_ids.items()))) > 0} snl_fields = { "created_at": created_at, "history": history, "references": references, "remarks": remarks, "projects": projects, "authors": authors, "data": {"_db_ids": db_ids} } return snl_fields
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = BibliographyData() self.__citations = set() def init(self, translator): command.CommandExtension.init(self, translator) bib_files = [] for node in anytree.PreOrderIter(self.translator.root): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering for key in db.entries: if key in self.__database.entries: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key]) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(command) self.addCommand(BibtexCommand()) reader.addInline(BibtexReferenceComponent(), location='>Format') renderer.add(BibtexCite, RenderBibtexCite()) renderer.add(BibtexBibliography, RenderBibtexBibliography())
def parse_bibtex(args, wanted): if wanted != None: bibs = BibliographyData(wanted_entries=wanted) else:# Because Ubuntu/Debian doesn't have a new enough pybtex for wanted_entries bibs = BibliographyData() parser = Parser() for filename in input_bibtex_filenames(args): filebibs = parser.parse_file(filename) bibs.add_entries(iter(filebibs.entries.items())) # Sort the entries to ensure a consistent ordering of the output so that adding # one new citation doesn't alter the whole file bibs.entries = OrderedDict(sorted(bibs.entries.items(),key=lambda x : x[0])) return bibs
def bib(args): gbib = BibliographyData() def _harvest(ds, **kw): for bib in ds.cldf_dir.glob('*.bib'): bib = parse_file(str(bib)) for id_, entry in bib.entries.items(): id_ = '{0}:{1}'.format(ds.id, id_) if id_ not in gbib.entries: gbib.add_entry(id_, entry) with_dataset(args, _harvest, default_to_all=True) gbib.to_file( str(Path(args.cfg['paths']['lexibank']).joinpath('lexibank.bib')))
def make_bibliography(table): # pragma: nocover db = BibliographyData() for row in table: try: entry = row_to_bibentry(row) except ValueError as e: print('Reference', row.get('Reference_ID'), 'dropped:', str(e), file=sys.stderr) continue db.add_entry(entry.key, entry) return db
def main(bibfile, template, save_path, save_individual=False): # Make sure save_path is a directory if save_individual, and a valid file path otherwise if save_individual and not os.path.isdir(save_path): print( 'save_individual is true, but save_path is not a directory. Quitting' ) return elif not save_individual and not os.path.isdir( os.path.abspath(os.path.dirname(save_path))): print( 'save_individual is false, but save_path is not a valid file location. Quitting' ) return # Load the template. tenv = jinja2.sandbox.SandboxedEnvironment() tenv.filters['author_fmt'] = _author_fmt tenv.filters['author_list'] = _author_list tenv.filters['title'] = _title tenv.filters['venue_type'] = _venue_type tenv.filters['venue'] = _venue tenv.filters['main_url'] = _main_url tenv.filters['extra_urls'] = _extra_urls tenv.filters['monthname'] = _month_name with open(template) as f: tmpl = tenv.from_string(f.read()) # Parse the BibTeX file. with open(bibfile) as f: db = bibtex.Parser().parse_stream(f) for k, v in db.entries.items(): # Include the bibliography key in each entry. v.fields['key'] = k # Include the full BibTeX in each entry, minus fields to ignore filtered_v_field_items = filter( lambda x: x[0] not in _ignore_fields_bibtex_source, v.fields.items()) filtered_v = Entry(v.type, fields=filtered_v_field_items, persons=v.persons) v.fields['bibtex'] = BibliographyData({ k: filtered_v }).to_string('bibtex').strip() # Replace ' = "XXX"' with '={XXX}' v.fields['bibtex'] = re.sub(r' = \"(.*)\"', r'={\1}', v.fields['bibtex']) # Render the template. bib_sorted = sorted(db.entries.values(), key=_sortkey, reverse=True) if save_individual: for bib in bib_sorted: out = tmpl.render(entry=bib) file_path = os.path.join(save_path, '%s.html' % bib.key) with open(file_path, 'w') as f: f.write(out) else: out = tmpl.render(entries=bib_sorted) with open(save_path, 'w') as f: f.write(out)
def to_markdown_pandoc(entry, csl_path): """ Converts the PyBtex entry into formatted markdown citation text """ bibtex_string = BibliographyData(entries={ entry.key: entry }).to_string("bibtex") citation_text = """ --- nocite: '@*' --- """ with tempfile.TemporaryDirectory() as tmpdir: bib_path = os.path.join(tmpdir, "temp.bib") with open(bib_path, "w") as bibfile: bibfile.write(bibtex_string) # Call Pandoc. markdown = pypandoc.convert_text( source=citation_text, to="markdown_strict-citations", format="md", extra_args=["--csl", csl_path, "--bibliography", bib_path], filters=["pandoc-citeproc"], ) # TODO: Perform this extraction better markdown = markdown.split("\n")[0][2:] return str(markdown)
class MacrosTest(ParserTest, TestCase): input_string = u""" @String{and = { and }} @String{etal = and # { {et al.}}} @Article( unknown, author = nobody, ) @Article( gsl, author = "Gough, Brian"#etal, ) """ correct_result = BibliographyData([ ('unknown', Entry('article')), ('gsl', Entry('article', persons={ u'author': [Person(u'Gough, Brian'), Person(u'{et al.}')] })), ]) errors = [ 'undefined string in line 6: nobody', ]
class KeylessEntriesTest(ParserTest, TestCase): parser_options = {'keyless_entries': True} input_string = u""" @BOOK( title="I Am Jackie Chan: My Life in Action", year=1999 ) @BOOK() @BOOK{} @BOOK{ title = "Der deutsche Jackie Chan Filmführer", } """ correct_result = BibliographyData({ 'unnamed-1': Entry('book', { 'title': 'I Am Jackie Chan: My Life in Action', 'year': '1999' }), 'unnamed-2': Entry('book'), 'unnamed-3': Entry('book'), 'unnamed-4': Entry('book', {'title': u'Der deutsche Jackie Chan Filmführer'}), })
class EntryTypesTest(ParserTest, TestCase): input_string = u""" Testing what are allowed for entry types These are OK @somename{an_id,} @t2{another_id,} @t@{again_id,} @t+{aa1_id,} @_t{aa2_id,} These ones not @2thou{further_id,} @some name{id3,} @some#{id4,} @some%{id4,} """ correct_result = BibliographyData([ ('an_id', Entry('somename')), ('another_id', Entry('t2')), ('again_id', Entry('t@')), ('aa1_id', Entry('t+')), ('aa2_id', Entry('_t')), ]) errors = [ "syntax error in line 12: a valid name expected", "syntax error in line 13: '(' or '{' expected", "syntax error in line 14: '(' or '{' expected", "syntax error in line 15: '(' or '{' expected", ]
class BracesAndQuotesTest(ParserTest, TestCase): input_string = '''@ARTICLE{ test, title="Nested braces and {"quotes"}", }''' correct_result = BibliographyData( {'test': Entry('article', {'title': 'Nested braces and {"quotes"}'})})
def preExecute(self, content): duplicates = self.get('duplicates', list()) self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: duplicate_key = key in self.__database.entries duplicate_key_allowed = key in duplicates if duplicate_key and (not duplicate_key_allowed): if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) elif not duplicate_key: self.__database.add_entry(key, db.entries[key])
class InlineCommentTest(ParserTest, TestCase): input_string = u""" "some text" causes an error like this ``You're missing a field name---line 6 of file bibs/inline_comment.bib`` for all 3 of the % some text occurences below; in each case the parser keeps what it has up till that point and skips, so that it correctly gets the last entry. @article{Me2010,} @article{Me2011, author="Brett-like, Matthew", % some text title="Another article"} @article{Me2012, % some text author="Real Brett"} This one correctly read @article{Me2013,} """ correct_result = BibliographyData([ ('Me2010', Entry('article')), ('Me2011', Entry('article', persons={ 'author': [ Person(first='Matthew', last='Brett-like'), ] })), ('Me2012', Entry('article')), ('Me2013', Entry('article')), ]) errors = [ "syntax error in line 10: '}' expected", "syntax error in line 12: '}' expected", ]
class DuplicateFieldTest(ParserTest, TestCase): input_strings = [ r""" @MASTERSTHESIS{ Mastering, year = 1364, title = "Mastering Thesis Writing", school = "Charles University in Prague", TITLE = "No One Reads Master's Theses Anyway LOL", TiTlE = "Well seriously, lol.", } """ ] correct_result = BibliographyData({ 'Mastering': Entry( 'mastersthesis', fields=[ ('year', '1364'), ('title', 'Mastering Thesis Writing'), ('school', 'Charles University in Prague'), ], ), }) errors = [ 'entry with key Mastering has a duplicate TITLE field', 'entry with key Mastering has a duplicate TiTlE field', ]
def create_bibliography(self, record): texkey, entries = self.create_bibliography_entry(record) data = {texkey: entries} bib_data = BibliographyData(data) writer = BibtexWriter() return writer.to_string(bib_data)
class BracesTest(ParserTest, TestCase): input_string = u"""@ARTICLE{ test, title={Polluted with {DDT}. }, }""" correct_result = BibliographyData([(u'test', Entry('article', [(u'title', 'Polluted with {DDT}.')]))])
class BracesTest(ParserTest, TestCase): input_string = """@ARTICLE{ test, title={Polluted with {DDT}. }, }""" correct_result = BibliographyData( {'test': Entry('article', {'title': 'Polluted with {DDT}.'})})
def create_bibliography(self, record_list): bib_dict = {} for record in record_list: texkey, entries = self.create_bibliography_entry(record) bib_dict[texkey] = entries bib_data = BibliographyData(bib_dict) writer = BibtexWriter() return writer.to_string(bib_data)
class UnusedEntryTest(ParserTest, TestCase): parser_options = {'wanted_entries': []} input_string = u""" @Article( gsl, author = nobody, ) """ correct_result = BibliographyData()
class BracesAndQuotesTest(ParserTest, TestCase): input_string = u'''@ARTICLE{ test, title="Nested braces and {"quotes"}", }''' correct_result = BibliographyData([ (u'test', Entry('article', [(u'title', 'Nested braces and {"quotes"}')])) ])
def on_config(self, config): """ Loads bibliography on load of config """ bibfiles = [] # Set bib_file from either url or path if self.config.get("bib_file", None) is not None: is_url = validators.url(self.config["bib_file"]) # if bib_file is a valid URL, cache it with tempfile if is_url: bibfiles.append( tempfile_from_url(self.config["bib_file"], '.bib')) else: bibfiles.append(self.config["bib_file"]) elif self.config.get("bib_dir", None) is not None: bibfiles.extend(Path(self.config["bib_dir"]).glob("*.bib")) else: raise Exception( "Must supply a bibtex file or directory for bibtex files") # load bibliography data refs = {} for bibfile in bibfiles: bibdata = parse_file(bibfile) refs.update(bibdata.entries) self.bib_data = BibliographyData(entries=refs) # Set CSL from either url or path (or empty) is_url = validators.url(self.config["csl_file"]) if is_url: self.csl_file = tempfile_from_url(self.config["csl_file"], '.csl') else: self.csl_file = self.config.get("csl_file", None) # Toggle whether or not to render citations inline (Requires CSL) self.cite_inline = self.config.get("cite_inline", False) if self.cite_inline and not self.csl_file: raise Exception( "Must supply a CSL file in order to use cite_inline") return config
def load_bib(self, filename="IMSfull.bib"): parser = bibtex.Parser() bib_data = parser.parse_file(filename) self.lastload = os.path.getmtime(filename) self.filename = filename pubs = [] index_keys = {} index_bibkeys = {} for key, elem in bib_data.entries.iteritems(): entry = elem.fields # generate original bibtex # using StringIO and bibtex.writer a = BibliographyData() a.add_entry(key, elem) output = StringIO.StringIO() w = Writer() w.write_stream(a, output) entry["bibtex"] = output.getvalue() # sha1 for absolute unique keys x = hashlib.sha1(simplejson.dumps(entry)) entry["key"] = x.hexdigest() entry["authors"] = self.parse_authors(elem.persons) entry["bibkey"] = elem.key # keywords entry["keywords"] = [] if entry.get("keyword"): for i in entry["keyword"].split(","): entry["keywords"].append(i.strip()) entry["reference"] = self.render_references(elem.type, entry) # append to pubs pubs.append(entry) index_keys[x.hexdigest()] = len(pubs) - 1 index_bibkeys[elem.key] = len(pubs) - 1 if "year" not in entry: entry["year"] = "" # set at end -> less time for threading problems self.index_keys = index_keys self.index_bibkeys = index_bibkeys self.pubs = pubs
def createMaterialize(self, token, parent): ol = self.createHTML(token, parent) for child in ol.children: key = child['id'] db = BibliographyData() db.add_entry(key, self.extension.database.entries[key]) btex = db.to_string("bibtex") m_id = uuid.uuid4() html.Tag(child, 'a', style="padding-left:10px;", class_='modal-trigger moose-bibtex-modal', href="#{}".format(m_id), string=u'[BibTeX]') modal = html.Tag(child, 'div', class_='modal', id_=m_id) content = html.Tag(modal, 'div', class_='modal-content') pre = html.Tag(content, 'pre', style="line-height:1.25;") html.Tag(pre, 'code', class_='language-latex', string=btex)
def modifiedKeys(self, keyPattern): newBib = BibliographyData() for bib_id in self._bibdata.entries: persons = self._bibdata.entries[bib_id].persons fields = self._bibdata.entries[bib_id].fields new_bib_id = '' for kp in keyPattern: if kp: if kp in persons.keys(): new_bib_id += ''.join(a.last()[0] for a in persons[kp]) elif kp in fields.keys(): new_bib_id += ''.join(a for a in fields[kp]) if new_bib_id: new_bib_id = self._cleanLaTeXFromKey(new_bib_id) newBib.entries[new_bib_id] = self._bibdata.entries[bib_id] else: newBib.entries[bib_id] = self._bibdata.entries[bib_id] return newBib
def run(self): style = find_plugin('pybtex.style.formatting', self.options.get('style', 'unsrt'))() bibtex_dir = self.options.get('bibtex_dir', 'bibtex') detail_page_dir = self.options.get('detail_page_dir', 'papers') highlight_author = self.options.get('highlight_author', None) self.state.document.settings.record_dependencies.add(self.arguments[0]) parser = Parser() # Sort the publication entries by year reversed data = sorted(parser.parse_file(self.arguments[0]).entries.items(), key=lambda e: e[1].fields['year'], reverse=True) print(type(data)) html = '<div class = "publication-list">\n' cur_year = None if bibtex_dir: # create the bibtex dir if the option is set try: os.mkdir(os.path.sep.join((self.output_folder, bibtex_dir))) except OSError: # probably because the dir already exists pass if detail_page_dir: # create the detail page dir if the option is set try: os.mkdir(os.path.sep.join((self.output_folder, detail_page_dir))) except OSError: # probably because the dir already exists pass for label, entry in data: # print a year title when year changes if entry.fields['year'] != cur_year: if cur_year is not None: # not first year group html += '</ul>' cur_year = entry.fields['year'] html += '<h3>{}</h3>\n<ul>'.format(cur_year) pub_html = list(style.format_entries((entry,)))[0].text.render_as('html') if highlight_author: # highlight an author (usually oneself) pub_html = pub_html.replace(highlight_author, '<strong>{}</strong>'.format(highlight_author), 1) html += '<li class = "publication">' + pub_html extra_links = "" bib_data = BibliographyData(dict({label: entry})) # detail_page_dir may need it later if bibtex_dir: # write bib files to bibtex_dir for downloading bib_link = '{}/{}.bib'.format(bibtex_dir, label) bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex') extra_links += '[<a href="{}">BibTeX</a>] '.format( self.site.config['BASE_URL'] + bib_link) if 'fulltext' in entry.fields: # the link to the full text, usually a link to the pdf file extra_links += '[<a href="{}">full text</a>] '.format(entry.fields['fulltext']) if extra_links or detail_page_dir: html += '<br>' html += extra_links if detail_page_dir: # render the details page of a paper page_url = '/'.join((detail_page_dir, label + '.html')) html += ' [<a href="{}">abstract and details</a>]'.format( self.site.config['BASE_URL'] + page_url) context = { 'title': process_bibtex_string(entry.fields['title']), 'abstract': process_bibtex_string(entry.fields['abstract']) if 'abstract' in entry.fields else '', 'bibtex': bib_data.to_string('bibtex'), 'bibtex_link': '/' + bib_link if bibtex_dir else '', 'default_lang': self.site.config['DEFAULT_LANG'], 'label': label, 'lang': self.site.config['DEFAULT_LANG'], 'permalink': self.site.config['SITE_URL'] + page_url, 'reference': pub_html, 'extra_links': extra_links } if 'fulltext' in entry.fields and entry.fields['fulltext'].endswith('.pdf'): context['pdf'] = entry.fields['fulltext'] self.site.render_template( 'publication.tmpl', os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')), context, ) html += '</li>' if len(data) != 0: # publication list is nonempty html += '</ul>' html += '</div>' return [nodes.raw('', html, format='html'), ]
def filter_bibolamazifile(self, bibolamazifile): # # bibdata is a pybtex.database.BibliographyData object # bibdata = bibolamazifile.bibliographyData(); arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile) # first, find required fields and apply possible "filters" _rx_short_journal_known = re.compile(r'\b(?P<word>' + r'|'.join(KNOWN_ABBREV.keys()) + r')\b', re.IGNORECASE); def abbreviate(x): if x.lower() in NO_ABBREV: return x return x[0:3]+'.' def short_journal(x): if x.strip().lower() in KNOWN_JOURNALS: return KNOWN_JOURNALS[x.strip().lower()] x = _rx_short_journal_known.sub(lambda m: KNOWN_ABBREV[m.group('word').lower()], x); x = re.sub(r'\b(' + r'|'.join(BORING_WORDS) + r')\b(?!\s*($|[-:;\.]))', '', x, flags=re.IGNORECASE); x = re.sub(r'\b(?P<word>\w+)\b([^\.]|$)', lambda m: abbreviate(m.group('word')), x); x = re.sub(r'[^\w.]+', '', x) if (len(x)>20): x = x[0:18]+'..' return x; def arxivInfo(entry, field): inf = arxivaccess.getArXivInfo(entry.key); if inf is None: return '' return inf[field] fld_fn = { 'author': lambda entry: getlast(entry.persons['author'][0], lower=False)[0], 'authors': lambda entry: "".join([getlast(a, lower=False)[0] for a in entry.persons['author']])[0:25], 'year': lambda entry: entry.fields.get('year', ''), 'year2': lambda entry: '%02d' % (int(entry.fields.get('year', '')) % 100), 'journal_abb': lambda entry: fmtjournal(entry.fields.get('journal', '')), 'journal': lambda entry: short_journal(normstr(delatex(entry.fields.get('journal', '')),lower=False)), 'title_word': lambda entry: next( (word for word in re.sub(r'[^\w\s]', '', delatex(entry.fields.get('title', ''))).split() if word.lower() not in BORING_TITLE_WORDS), '' ), 'doi': lambda entry: entry.fields.get('doi', ''), 'arxivid': lambda entry: arxivInfo(entry, 'arxivid'), 'primaryclass': lambda entry: arxivInfo(entry, 'primaryclass'), }; # used fields fld = set([m.group('field') for m in re.finditer(r'(^|[^%])(%%)*%\((?P<field>\w+)\)', self.fmt)]) # check all valid fields for f in fld: if f not in fld_fn: raise BibFilterError('citekey', "Invalid field `%s\' for citekey filter") logger.debug('Used fields are %r', fld) newbibdata = BibliographyData() class Jump: pass for (key, entry) in bibdata.entries.iteritems(): keyorig = key try: ainfo = arxivaccess.getArXivInfo(key); if (self.if_published is not None): if (not self.if_published and (ainfo is None or ainfo['published'])): logger.longdebug('Skipping published entry %s (filter: unpublished)', key) raise Jump if (self.if_published and (ainfo is not None and not ainfo['published'])): logger.longdebug('Skipping unpublished entry %s (filter: published)', key) raise Jump if self.if_type is not None: if entry.type not in self.if_type: logger.longdebug('Skipping entry %s of different type %s (filter: %r)', key, entry.type, self.if_type) raise Jump repldic = dict(zip(fld, [fld_fn[f](entry) for f in fld])); try: key = self.fmt % repldic; except ValueError as e: raise BibFilterError('citekey', "Error replacing fields: %s" % (e)) except Jump: pass finally: # avoid duplicate keys newkey = key count = 0 while newkey in newbibdata.entries: count += 1; newkey = key + '.%d'%(count) if count: logger.warning("`%s': Citation key `%s' already used: using `%s' instead.", keyorig, key, newkey) # add the entry newbibdata.add_entry(newkey, entry); bibolamazifile.setBibliographyData(newbibdata); return
class BibtexExtension(command.CommandExtension): """ Extension for BibTeX citations and bibliography. """ @staticmethod def defaultConfig(): config = command.CommandExtension.defaultConfig() config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.") config['duplicates'] = (list(), "A list of duplicates that are allowed.") return config def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = None self.__citations = set() def initMetaData(self, page, meta): meta.initData('citations', set()) def addCitations(self, *args): self.__citations.update(args) def preExecute(self, content): duplicates = self.get('duplicates', list()) self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: duplicate_key = key in self.__database.entries duplicate_key_allowed = key in duplicates if duplicate_key and (not duplicate_key_allowed): if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) elif not duplicate_key: self.__database.add_entry(key, db.entries[key]) def postTokenize(self, ast, page, meta, reader): if self.__citations: meta.getData('citations').update(self.__citations) self.__citations.clear() has_bib = False for node in anytree.PreOrderIter(ast): if node.name == 'BibtexBibliography': has_bib = True break if not has_bib: BibtexBibliography(ast) @property def database(self): return self.__database def extend(self, reader, renderer): self.requires(core, command) self.addCommand(reader, BibtexCommand()) self.addCommand(reader, BibtexReferenceComponent()) reader.addInline(BibtexReferenceComponentDeprecated(), location='>FormatInline') renderer.add('BibtexCite', RenderBibtexCite()) renderer.add('BibtexBibliography', RenderBibtexBibliography()) if isinstance(renderer, LatexRenderer): renderer.addPackage('natbib', 'round')
class MooseBibtex(MooseCommonExtension, Preprocessor): """ Creates per-page bibliographies using latex syntax. """ RE_BIBLIOGRAPHY = r'(?<!`)\\bibliography\{(.*?)\}' RE_STYLE = r'(?<!`)\\bibliographystyle\{(.*?)\}' RE_CITE = r'(?<!`)\\(?P<cmd>cite|citet|citep)\{(?P<key>.*?)\}' def __init__(self, markdown_instance=None, **kwargs): MooseCommonExtension.__init__(self, **kwargs), Preprocessor.__init__(self, markdown_instance) def run(self, lines): """ Create a bibliography from cite commands. """ # Join the content to enable regex searches throughout entire text content = '\n'.join(lines) # Build the database of bibtex data self._citations = [] # member b/c it is used in substitution function self._bibtex = BibliographyData() # "" bibfiles = [] match = re.search(self.RE_BIBLIOGRAPHY, content) if match: bib_string = match.group(0) for bfile in match.group(1).split(','): try: bibfiles.append(os.path.join(self._docs_dir, bfile.strip())) data = parse_file(bibfiles[-1]) except Exception as e: log.error('Failed to parse bibtex file: {}'.format(bfile.strip())) traceback.print_exc(e) return lines self._bibtex.add_entries(data.entries.iteritems()) else: return lines # Determine the style match = re.search(self.RE_STYLE, content) if match: content = content.replace(match.group(0), '') try: style = find_plugin('pybtex.style.formatting', match.group(1)) except: log.error('Unknown bibliography style "{}"'.format(match.group(1))) return lines else: style = find_plugin('pybtex.style.formatting', 'plain') # Replace citations with author date, as an anchor content = re.sub(self.RE_CITE, self.authors, content) # Create html bibliography if self._citations: # Generate formatted html using pybtex formatted_bibliography = style().format_bibliography(self._bibtex, self._citations) backend = find_plugin('pybtex.backends', 'html') stream = io.StringIO() backend().write_to_stream(formatted_bibliography, stream) # Strip the bib items from the formatted html html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE|re.DOTALL) # Produces an ordered list with anchors to the citations output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'.format(str(bibfiles)) for i, item in enumerate(html): output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item) output += u'</ol>\n' content = re.sub(self.RE_BIBLIOGRAPHY, self.markdown.htmlStash.store(output, safe=True), content) return content.split('\n') def authors(self, match): """ Return the author(s) citation for text, linked to bibliography. """ cmd = match.group('cmd') key = match.group('key') tex = '\\%s{%s}' % (cmd, key) if key in self._bibtex.entries: self._citations.append(key) entry = self._bibtex.entries[key] a = entry.persons['author'] n = len(a) if n > 2: author = '{} et al.'.format(' '.join(a[0].last_names)) elif n == 2: a0 = ' '.join(a[0].last_names) a1 = ' '.join(a[1].last_names) author = '{} and {}'.format(a0, a1) else: author = ' '.join(a[0].last_names) if cmd == 'citep': a = '<a href="#{}" data-moose-cite="{}">{}, {}</a>'.format(key, tex, author, entry.fields['year']) return '({})'.format(self.markdown.htmlStash.store(a, safe=True)) else: a = '<a href="#{}" data-moose-cite="{}">{} ({})</a>'.format(key, tex, author, entry.fields['year']) return self.markdown.htmlStash.store(a, safe=True)
class BibtexPreprocessor(MooseMarkdownCommon, Preprocessor): """ Creates per-page bibliographies using latex syntax. """ RE_BIBLIOGRAPHY = r'(?<!`)\\bibliography\{(.*?)\}' RE_STYLE = r'(?<!`)\\bibliographystyle\{(.*?)\}' RE_CITE = r'(?<!`)\\(?P<cmd>cite|citet|citep)\{(?P<keys>.*?)\}' @staticmethod def defaultSettings(): """BibtexPreprocessor configure options.""" return dict() # this extension doesn't have settings def __init__(self, markdown_instance=None, **kwargs): MooseMarkdownCommon.__init__(self, **kwargs) Preprocessor.__init__(self, markdown_instance) self._macro_files = kwargs.pop('macro_files', None) self._bibtex = None self._citations = [] def parseBibtexFile(self, bibfile): """ Returns parsed bibtex file. If "macro_files" are supplied in the configuration file, then a temporary file will be made that contains the supplied macros above the original bib file. This temporary combined file can then be parsed by pybtex. """ if self._macro_files: t_bib_path = MooseDocs.abspath("tBib.bib") with open(t_bib_path, "wb") as t_bib: for t_file in self._macro_files: with open(MooseDocs.abspath(t_file.strip()), "rb") as in_file: shutil.copyfileobj(in_file, t_bib) with open(bibfile, "rb") as in_file: shutil.copyfileobj(in_file, t_bib) data = parse_file(t_bib_path) if os.path.isfile(t_bib_path): os.remove(t_bib_path) else: data = parse_file(bibfile) return data def run(self, lines): """ Create a bibliography from cite commands. """ # Join the content to enable regex searches throughout entire text content = '\n'.join(lines) # Build the database of bibtex data self._citations = [] # member b/c it is used in substitution function self._bibtex = BibliographyData() # "" bibfiles = [] match = re.search(self.RE_BIBLIOGRAPHY, content) if match: for bfile in match.group(1).split(','): try: bibfiles.append(MooseDocs.abspath(bfile.strip())) data = self.parseBibtexFile(bibfiles[-1]) except UndefinedMacro: LOG.error('Undefined macro in bibtex file: %s, specify macro_files arguments ' \ 'in configuration file (e.g. website.yml)', bfile.strip()) self._bibtex.add_entries(data.entries.iteritems()) else: return lines # Determine the style match = re.search(self.RE_STYLE, content) if match: content = content.replace(match.group(0), '') try: style = find_plugin('pybtex.style.formatting', match.group(1)) except PluginNotFound: LOG.error('Unknown bibliography style "%s"', match.group(1)) return lines else: style = find_plugin('pybtex.style.formatting', 'plain') # Replace citations with author date, as an anchor content = re.sub(self.RE_CITE, self.authors, content) # Create html bibliography if self._citations: # Generate formatted html using pybtex formatted_bibliography = style().format_bibliography(self._bibtex, self._citations) backend = find_plugin('pybtex.backends', 'html') stream = io.StringIO() backend().write_to_stream(formatted_bibliography, stream) # Strip the bib items from the formatted html html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE|re.DOTALL) # Produces an ordered list with anchors to the citations output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n' output = output.format(str(bibfiles)) for i, item in enumerate(html): output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item) output += u'</ol>\n' content = re.sub(self.RE_BIBLIOGRAPHY, self.markdown.htmlStash.store(output, safe=True), content) return content.split('\n') def authors(self, match): """ Return the author(s) citation for text, linked to bibliography. """ cmd = match.group('cmd') keys = match.group('keys') tex = '\\%s{%s}' % (cmd, keys) cite_list = [] # Loop over all keys in the cite command for key in [k.strip() for k in keys.split(',')]: # Error if the key is not found and move on if key not in self._bibtex.entries: LOG.error('Unknown bibtext key: %s', key) continue # Build the author list self._citations.append(key) entry = self._bibtex.entries[key] a = entry.persons['author'] n = len(a) if n > 2: author = '{} et al.'.format(' '.join(a[0].last_names)) elif n == 2: a0 = ' '.join(a[0].last_names) a1 = ' '.join(a[1].last_names) author = '{} and {}'.format(a0, a1) else: author = ' '.join(a[0].last_names) if cmd == 'citep': a = '<a href="#{}">{}, {}</a>'.format(key, author, entry.fields['year']) else: a = '<a href="#{}">{} ({})</a>'.format(key, author, entry.fields['year']) cite_list.append(a) # Create the correct text for list of keys in the cite command if len(cite_list) == 2: cite_list = [' and '.join(cite_list)] elif len(cite_list) > 2: cite_list[-1] = 'and ' + cite_list[-1] # Write the html if cmd == 'citep': html = '(<span data-moose-cite="{}">{}</span>)'.format(tex, '; '.join(cite_list)) else: html = '<span data-moose-cite="{}">{}</span>'.format(tex, ', '.join(cite_list)) # substitute Umlauts umlaut_re = re.compile(r"\{\\\"([aouAOU])\}") html = umlaut_re.sub('&\\1uml;', html) return self.markdown.htmlStash.store(html, safe=True)
def run(self): bibtex_dir = self.options.get('bibtex_dir', 'bibtex') detail_page_dir = self.options.get('detail_page_dir', 'papers') highlight_authors = self.options.get('highlight_author', None) if highlight_authors: highlight_authors = highlight_authors.split(';') style = Style(self.site.config['BASE_URL'] + detail_page_dir if detail_page_dir else None) self.state.document.settings.record_dependencies.add(self.arguments[0]) all_entries = [] labels = set() for a in self.arguments: parser = Parser() for item in parser.parse_file(a).entries.items(): if item[0] in labels: # duplicated entries LOGGER.warning( ("publication_list: BibTeX entries with duplicated labels are found. " "Only the first occurrence will be used.")) continue labels.add(item[0]) all_entries.append(item) # Sort the publication entries by year reversed data = sorted(all_entries, key=lambda e: e[1].fields['year'], reverse=True) html = '<div class="publication-list">\n' cur_year = None if bibtex_dir: # create the bibtex dir if the option is set try: os.makedirs(os.path.sep.join((self.output_folder, bibtex_dir))) except OSError: # probably because the dir already exists pass if detail_page_dir: # create the detail page dir if the option is set try: os.makedirs(os.path.sep.join((self.output_folder, detail_page_dir))) except OSError: # probably because the dir already exists pass for label, entry in data: # print a year title when year changes if entry.fields['year'] != cur_year: if cur_year is not None: # not first year group html += '</ul>' cur_year = entry.fields['year'] html += '<h3>{}</h3>\n<ul>'.format(cur_year) entry.label = label # Pass label to the style. pub_html = list(style.format_entries((entry,)))[0].text.render_as('html') if highlight_authors: # highlight one of several authors (usually oneself) for highlight_author in highlight_authors: # We need to replace all occurrence of space except for the last one with # , since pybtex does it for all authors count = highlight_author.count(' ') - 1 pub_html = pub_html.replace( highlight_author.strip().replace(' ', ' ', count), '<strong>{}</strong>'.format(highlight_author), 1) html += '<li class="publication" style="padding-bottom: 1em;">' + pub_html extra_links = "" if 'fulltext' in entry.fields: # the link to the full text, usually a link to the pdf file extra_links += '[<a href="{}">full text</a>] '.format(entry.fields['fulltext']) bibtex_fields = dict(entry.fields) # Collect and remove custom links (fields starting with "customlink") custom_links = dict() for key, value in bibtex_fields.items(): if key.startswith('customlink'): custom_links[key[len('customlink'):]] = value # custom fields (custom links) for key, value in custom_links.items(): extra_links += '[<a href="{}">{}</a>] '.format(value, key) # Remove some fields for the publicly available BibTeX file since they are mostly only # used by this plugin. for field_to_remove in ('abstract', 'fulltext'): if field_to_remove in bibtex_fields: del bibtex_fields[field_to_remove] # Prepare for the bib file. Note detail_page_dir may need bib_data later. bibtex_entry = Entry(entry.type, bibtex_fields, entry.persons) bib_data = BibliographyData(dict({label: bibtex_entry})) bib_string = bib_data.to_string('bibtex') extra_links += ''' [<a href="javascript:void(0)" onclick=" (function(target, id) {{ if ($('#' + id).css('display') == 'block') {{ $('#' + id).hide('fast'); $(target).text('BibTeX▼') }} else {{ $('#' + id).show('fast'); $(target).text('BibTeX▲') }} }})(this, '{}');">BibTeX▼</a>] '''.format('bibtex-' + label) if bibtex_dir: # write bib files to bibtex_dir for downloading bib_link = '{}/{}.bib'.format(bibtex_dir, label) bib_data.to_file('/'.join([self.output_folder, bib_link]), 'bibtex') if extra_links or detail_page_dir or 'abstract' in entry.fields: html += '<br>' # Add the abstract link. if 'abstract' in entry.fields: html += ''' [<a href="javascript:void(0)" onclick=" (function(target, id) {{ if ($('#' + id).css('display') == 'block') {{ $('#' + id).hide('fast'); $(target).text('abstract▼') }} else {{ $('#' + id).show('fast'); $(target).text('abstract▲') }} }})(this, '{}');">abstract▼</a>] '''.format('abstract-' + label) display_none = '<div id="{}" style="display:none"><pre>{}</pre></div>' bibtex_display = display_none.format( 'bibtex-' + label, bib_string) abstract_text = str( LaTeXParser(entry.fields['abstract']).parse()) if 'abstract' in entry.fields else '' if detail_page_dir: # render the details page of a paper page_url = '/'.join((detail_page_dir, label + '.html')) html += '[<a href="{}">details</a>] '.format( self.site.config['BASE_URL'] + page_url) context = { 'title': str(LaTeXParser(entry.fields['title']).parse()), 'abstract': abstract_text, 'bibtex': bib_data.to_string('bibtex'), 'bibtex_link': '/' + bib_link if bibtex_dir else '', 'default_lang': self.site.config['DEFAULT_LANG'], 'label': label, 'lang': self.site.config['DEFAULT_LANG'], 'permalink': self.site.config['SITE_URL'] + page_url, 'reference': pub_html, 'extra_links': extra_links + bibtex_display } if 'fulltext' in entry.fields: context['pdf'] = entry.fields['fulltext'] self.site.render_template( 'publication.tmpl', os.path.sep.join((self.output_folder, detail_page_dir, label + '.html')), context, ) html += extra_links # Add the hidden abstract and bibtex. if 'abstract' in entry.fields: html += ''' <div id="{}" class="publication-abstract" style="display:none"> <blockquote>{}</blockquote></div> '''.format('abstract-' + label, abstract_text) html += bibtex_display html += '</li>' if len(data) != 0: # publication list is nonempty html += '</ul>' html += '</div>' return [nodes.raw('', html, format='html'), ]
def filter_bibolamazifile(self, bibolamazifile): # # bibdata is a pybtex.database.BibliographyData object # if (not self.dupfile and not self.warn): logger.warning("duplicates filter: No action is being taken because neither " "-sDupfile= nor -dWarn have been requested.") return bibdata = bibolamazifile.bibliographyData(); used_citations = None if self.keep_only_used_in_jobname: if not self.dupfile: logger.warning("Option -sKeepOnlyUsedInJobname has no effect without -sDupfile=... !") else: logger.debug("Getting list of used citations from %s.aux." %(self.keep_only_used_in_jobname)) used_citations = auxfile.get_all_auxfile_citations( self.keep_only_used_in_jobname, bibolamazifile, self.name(), self.jobname_search_dirs, return_set=True ) duplicates = []; arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile) dupl_entryinfo_cache_accessor = self.cacheAccessor(DuplicatesEntryInfoCacheAccessor) for (key, entry) in bibdata.entries.iteritems(): #cache_entries[key] = {} dupl_entryinfo_cache_accessor.prepare_entry_cache(key, entry, arxivaccess) newbibdata = BibliographyData(); unused = BibliographyData(); #unused_respawned = set() # because del unused.entries[key] is not implemented ... :( def copy_entry(entry): #return copy.deepcopy(entry) # too deep ... newpers = {} for role, plist in entry.persons.iteritems(): newpers[role] = [copy.deepcopy(p) for p in plist] return Entry(type_=entry.type, fields=entry.fields.items(), # will create own Fielddict persons=newpers, collection=entry.collection ) # Strategy: go through the list of entries, and each time keeping it if it is new, # or updating the original and registering the alias if it is a duplicate. # # With only_used, the situation is a little trickier as we cannot just discard the # entries as they are filtered: indeed, they might be duplicates of a used entry, # with which one should merge the bib information. # # So the full algorithm does not immediately discard the unused keys, but rather # keeps them in an `unused` list. If they are later required, they are respawned # into the actual new list. # for (key, entry) in bibdata.entries.iteritems(): # # search the newbibdata object, in case this entry already exists. # #logger.longdebug('inspecting new entry %s ...', key); is_duplicate_of = None duplicate_original_is_unused = False for (nkey, nentry) in newbibdata.entries.iteritems(): if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key), dupl_entryinfo_cache_accessor.get_entry_cache(nkey)): logger.longdebug(' ... matches existing entry %s!', nkey); is_duplicate_of = nkey; break for (nkey, nentry) in unused.entries.iteritems(): #if nkey in unused_respawned: # continue if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key), dupl_entryinfo_cache_accessor.get_entry_cache(nkey)): logger.longdebug(' ... matches existing entry %s!', nkey); is_duplicate_of = nkey; duplicate_original_is_unused = True break # # if it's a duplicate # if is_duplicate_of is not None: dup = (key, is_duplicate_of) if duplicate_original_is_unused: self.update_entry_with_duplicate(is_duplicate_of, unused.entries[is_duplicate_of], key, entry) else: # a duplicate of a key we have used. So update the original ... self.update_entry_with_duplicate(is_duplicate_of, newbibdata.entries[is_duplicate_of], key, entry) # ... and register the alias. duplicates.append(dup); if duplicate_original_is_unused and used_citations and key in used_citations: # if we had set the original in the unused list, but we need the # alias, then respawn the original to the newbibdata so we can refer # to it. Bonus: use the name with which we have referred to it, so we # don't need to register any duplicate. newbibdata.add_entry(key, unused.entries[is_duplicate_of]) #unused_respawned.add(is_duplicate_of) del unused.entries[is_duplicate_of] else: if used_citations is not None and key not in used_citations: # new entry, but we don't want it. So add it to the unused list. unused.add_entry(key, entry) else: # new entry and we want it. So add it to the main newbibdata list. newbibdata.add_entry(key, entry) # output duplicates to the duplicates file if (self.dupfile): # and write definitions to the dupfile dupfilepath = os.path.join(bibolamazifile.fdir(), self.dupfile); check_overwrite_dupfile(dupfilepath); dupstrlist = []; with codecs.open(dupfilepath, 'w', 'utf-8') as dupf: dupf.write(BIBALIAS_HEADER.replace('####DUP_FILE_NAME####', self.dupfile)); if not self.custom_bibalias: dupf.write(BIBALIAS_LATEX_DEFINITIONS) # Note: Sort entries in some way (e.g. alphabetically according to # (alias, original)), to avoid diffs in VCS's for (dupalias, duporiginal) in sorted(duplicates, key=lambda x: (x[0],x[1])): dupf.write((r'\bibalias{%s}{%s}' % (dupalias, duporiginal)) + "\n"); dupstrlist.append("\t%s is an alias of %s" % (dupalias,duporiginal)) ; dupf.write('\n\n'); # issue debug message logger.debug("wrote duplicates to file: \n" + "\n".join(dupstrlist)); if (self.warn and duplicates): def warnline(dupalias, duporiginal): def fmt(key, entry, cache_entry): s = ", ".join(string.capwords('%s, %s' % (x[0], "".join(x[1]))) for x in cache_entry['pers']); if 'title_clean' in cache_entry and cache_entry['title_clean']: s += ', "' + (cache_entry['title_clean']).capitalize() + '"' if 'j_abbrev' in cache_entry and cache_entry['j_abbrev']: s += ', ' + cache_entry['j_abbrev'] f = entry.fields if f.get('month',None) and f.get('year',None): s += ', ' + f['month'] + ' ' + f['year'] elif f.get('month', None): s += ', ' + f['month'] + ' <unknown year>' elif f.get('year', None): s += ', ' + f['year'] if 'doi' in entry.fields and entry.fields['doi']: s += ', doi:'+entry.fields['doi'] if 'arxivinfo' in cache_entry and cache_entry['arxivinfo']: s += ', arXiv:'+cache_entry['arxivinfo']['arxivid'] if 'note_cleaned' in cache_entry and cache_entry['note_cleaned']: s += '; ' + cache_entry['note_cleaned'] return s tw = textwrap.TextWrapper(width=DUPL_WARN_ENTRY_COLWIDTH) fmtalias = fmt(dupalias, bibdata.entries[dupalias], dupl_entryinfo_cache_accessor.get_entry_cache(dupalias)) fmtorig = fmt(duporiginal, bibdata.entries[duporiginal], dupl_entryinfo_cache_accessor.get_entry_cache(duporiginal)) linesalias = tw.wrap(fmtalias) linesorig = tw.wrap(fmtorig) maxlines = max(len(linesalias), len(linesorig)) return (DUPL_WARN_ENTRY % { 'alias': dupalias, 'orig': duporiginal } + "\n".join( ('%s%s%s%s' %(' '*DUPL_WARN_ENTRY_BEGCOL, linealias + ' '*(DUPL_WARN_ENTRY_COLWIDTH-len(linealias)), ' '*DUPL_WARN_ENTRY_COLSEP, lineorig) for (linealias, lineorig) in zip(linesalias + ['']*(maxlines-len(linesalias)), linesorig + ['']*(maxlines-len(linesorig)))) ) + "\n\n" ) logger.warning(DUPL_WARN_TOP + "".join([ warnline(dupalias, duporiginal) for (dupalias, duporiginal) in duplicates ]) + DUPL_WARN_BOTTOM % {'num_dupl': len(duplicates)}); # ### TODO: do this not only if we are given a dupfile? #if self.dupfile: # ### --> Bibolamazi v3: also set this if no dupfile was given. This is because we # ### are moving entries themselves around and modifying them anyway # # set the new bibdata, without the duplicates # DON'T DO THIS, BECAUSE CACHES MAY HAVE KEPT A POINTER TO THE BIBDATA. #bibolamazifile.setBibliographyData(newbibdata); # # Instead, update bibolamazifile's bibliographyData() object itself. # bibolamazifile.setEntries(newbibdata.entries.iteritems()) return
packages = yaml.load(package_file) # only tagged packages go in release with open('tags.json') as tag_file: tags = json.load(tag_file) tagged = list(tags.keys()) from pybtex.database import BibliographyData, Entry master_data = BibliographyData( { 'article-minimal': Entry('article', [ ('author', 'Leslie B. Lamport'), ('title', "blah blah blah"), ('journal', "Some outlet"), ('year', '1986'), ]), }) # handle duplicates for package in packages: subpackages = packages[package].split() for subpackage in subpackages: package_bib = "tmp/{subpackage}/doc/_static/references.bib".format(subpackage=subpackage) if os.path.isfile(package_bib): local = pybtex.database.parse_file(package_bib) for entry in local.entries: if entry not in master_data.entries: master_data.add_entry(entry, local.entries[entry]) print('adding', entry)
parser.add_argument('-y', help="Earliest year to report conflict (default={})".format(earlyyear), default=earlyyear, type=int) args = parser.parse_args() entries = set() dupentries=False with open(args.f, 'r') as bin: for l in bin: if l.startswith('@'): l = l.replace('@misc', '') l = l.replace('@article', '') l = l.replace('@inproceedings', '') if l in entries: sys.stderr.write("Duplicate entry " + l.replace('{', '').replace(',', '')) dupentries=True entries.add(l) if dupentries: sys.stderr.write("FATAL: The bibtex file has duplicate entries in it. Please remove them before trying to continue\n") sys.stderr.write("(It is an issue with Google Scholar, but pybtex breaks with duplicate entries. Sorry)\n") sys.exit(-1) bib = parse_file(args.f, 'bibtex') for e in bib.entries: if 'year' in bib.entries[e].fields: if int(bib.entries[e].fields['year']) >= args.y: bib_data = BibliographyData({e : bib.entries[e]}) print(bib_data.to_string('bibtex'))
def __init__(self, *args, **kwargs): command.CommandExtension.__init__(self, *args, **kwargs) self.__database = BibliographyData() self.__citations = set()
def __init__(self, root=None, **kwargs): Preprocessor.__init__(self, **kwargs) self._citations = [] self._bibtex = BibliographyData() self._root = root
def run(self, lines): """ Create a bibliography from cite commands. """ # Join the content to enable regex searches throughout entire text content = '\n'.join(lines) # Build the database of bibtex data self._citations = [] # member b/c it is used in substitution function self._bibtex = BibliographyData() # "" bibfiles = [] match = re.search(self.RE_BIBLIOGRAPHY, content) if match: bib_string = match.group(0) for bfile in match.group(1).split(','): try: bibfiles.append(os.path.join(self._docs_dir, bfile.strip())) data = parse_file(bibfiles[-1]) except Exception as e: log.error('Failed to parse bibtex file: {}'.format(bfile.strip())) traceback.print_exc(e) return lines self._bibtex.add_entries(data.entries.iteritems()) else: return lines # Determine the style match = re.search(self.RE_STYLE, content) if match: content = content.replace(match.group(0), '') try: style = find_plugin('pybtex.style.formatting', match.group(1)) except: log.error('Unknown bibliography style "{}"'.format(match.group(1))) return lines else: style = find_plugin('pybtex.style.formatting', 'plain') # Replace citations with author date, as an anchor content = re.sub(self.RE_CITE, self.authors, content) # Create html bibliography if self._citations: # Generate formatted html using pybtex formatted_bibliography = style().format_bibliography(self._bibtex, self._citations) backend = find_plugin('pybtex.backends', 'html') stream = io.StringIO() backend().write_to_stream(formatted_bibliography, stream) # Strip the bib items from the formatted html html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE|re.DOTALL) # Produces an ordered list with anchors to the citations output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'.format(str(bibfiles)) for i, item in enumerate(html): output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item) output += u'</ol>\n' content = re.sub(self.RE_BIBLIOGRAPHY, self.markdown.htmlStash.store(output, safe=True), content) return content.split('\n')
def run(self, lines): """ Create a bibliography from cite commands. """ # Join the content to enable regex searches throughout entire text content = '\n'.join(lines) # Build the database of bibtex data self._citations = [] # member b/c it is used in substitution function self._bibtex = BibliographyData() # "" bibfiles = [] match = re.search(self.RE_BIBLIOGRAPHY, content) if match: for bfile in match.group(1).split(','): try: filename, _ = self.getFilename(bfile.strip()) bibfiles.append(filename) data = self.parseBibtexFile(bibfiles[-1]) self._bibtex.add_entries(data.entries.iteritems()) except UndefinedMacro: LOG.error('Undefined macro in bibtex file: %s, specify macro_files arguments ' \ 'in configuration file (e.g. website.yml)', bfile.strip()) except TypeError: LOG.error('Unable to locate bibtex file in %s', self.markdown.current.filename) except BibliographyDataError as e: LOG.error('%s in %s', str(e), self.markdown.current.filename) except Exception as e: #pylint: disable=broad-except LOG.error('Unknown error in %s when parsing bibtex file: %s', str(e), self.markdown.current.filename) else: return lines # Determine the style match = re.search(self.RE_STYLE, content) if match: content = content.replace(match.group(0), '') try: style = find_plugin('pybtex.style.formatting', match.group(1)) except PluginNotFound: LOG.error('Unknown bibliography style "%s"', match.group(1)) return lines else: style = find_plugin('pybtex.style.formatting', 'plain') # Replace citations with author date, as an anchor content = re.sub(self.RE_CITE, self.authors, content) # Create html bibliography if self._citations: # Generate formatted html using pybtex formatted_bibliography = style().format_bibliography(self._bibtex, self._citations) backend = find_plugin('pybtex.backends', 'html') stream = io.StringIO() backend().write_to_stream(formatted_bibliography, stream) # Strip the bib items from the formatted html html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE|re.DOTALL) # Produces an ordered list with anchors to the citations output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n' output = output.format(str(bibfiles)) for i, item in enumerate(html): output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item) output += u'</ol>\n' content = re.sub(self.RE_BIBLIOGRAPHY, self.markdown.htmlStash.store(output, safe=True), content) return content.split('\n')