def parse(): src_bib = parse_file(CACHE, 'bibtex') bib = parse_file(DST, 'bibtex') for entry in src_bib.entries.values(): if entry.key in IGNORE: continue elif entry.type != 'article': continue journal = entry.fields['journal'].lower() if journal == 'biorxiv' and entry.fields['pages'] in BIORXIV_OBSOLETE: continue elif entry.key in bib.entries: if journal == bib.entries[entry.key].fields['journal'].lower(): continue # fix title for repl in ACRONYMS: entry.fields['title'] = re.sub(repl, '{' + repl + '}', entry.fields['title']) # URL is redundant with DOI if 'doi' in entry.fields: entry.fields.pop('url', None) key = entry.key for c in ascii_lowercase: if key in bib.entries: key = f'{entry.key}_{c}' else: break bib.add_entry(key, entry) DST.write_bytes(bib.to_bytes('bibtex').replace(br'\\&', br'\&'))
def dupcheck(fin): """ Check INPUT for duplicate entries. """ db = parse_file(fin, "bibtex").lower() tx_key_titles = defaultdict(list) for key, e in db.entries.items(): if "title" not in e.fields: continue title = e.fields["title"] tx = title.lower() tx = "".join(c for c in tx if c.isalnum() or c.isspace()) tx = " ".join(tx.split()) tx_key_titles[tx].append((key, title)) bold_wrap_len = len(click.style(" ", bold=True)) - 1 for tx, key_titles in tx_key_titles.items(): if len(key_titles) > 1: max_keylen = max(len(i) for i, _ in key_titles) fmt = " (%%-%ds) |- %%s" % (max_keylen + bold_wrap_len) code = click.style("Duplicate title", fg="red") tx = click.style(tx, fg="yellow") click.echo("%s: %s" % (code, tx)) for key, title in key_titles: key = click.style(key, bold=True) click.echo(fmt % (key, title))
def check(disable_good_key_check, fin): """ Check INPUT for inconsistancies. """ db = parse_file(fin, "bibtex").lower() for key, e in db.entries.items(): if not is_good_key(key) and disable_good_key_check is False: code = click.style("Bad key", fg="red") key = click.style(key, bold=True) msg = "%s: %s" % (code, key) click.echo(msg) if not is_good_pages(e): code = click.style("Bad pages", fg="yellow") key = click.style(key, bold=True) msg = "%s for %s: %s" % (code, key, e.fields["pages"]) click.echo(msg) empty = empty_fields(e) if empty: code = click.style("Empty fields", fg="blue") key = click.style(key, bold=True) msg = "%s for %s: %s" % (code, key, " ".join(empty)) click.echo(msg) missing = missing_fields(e) missing = missing_acceptable(missing, e) if missing: code = click.style("Missing fields", fg="magenta") key = click.style(key, bold=True) typ = click.style(e.type, fg="green") msg = "%s for %s: %s | %s" % (code, key, " ".join(missing), typ) click.echo(msg)
def gather_candidates(self, context): bib_filepath = '~/.pandoc/library.bib' bib_file = database.parse_file(bib_filepath, bib_format='bibtex') bib_entries = bib_file.entries citation_keys = bib_entries.keys() titles = [bib_entries[k].fields['title'][1:-1] for k in citation_keys] absts = [ bib_entries[k].fields['abstract'] if 'abstract' in bib_entries[k].fields else "" for k in citation_keys ] kind = [] for k in citation_keys: if 'year' in bib_entries[k].fields: tmp = "(" + bib_entries[k].fields['year'] + ") " if 'journal' in bib_entries[k].fields: tmp += bib_entries[k].fields['journal'] kind.append(tmp) citation = [] for k, t, a, j in zip(citation_keys, titles, absts, kind): citation.append({ 'word': k, 'abbr': k + ": " + t, 'info': a, 'kind': j }) return citation
def format_database( from_filename, to_filename, bib_format=None, output_backend=None, input_encoding=None, output_encoding=None, parser_options=None, min_crossrefs=2, style=None, **kwargs ): if parser_options is None: parser_options = {} output_backend = find_plugin('pybtex.backends', output_backend, filename=to_filename) bib_data = database.parse_file( from_filename, encoding=input_encoding, bib_format=bib_format, **parser_options ) style_cls = find_plugin('pybtex.style.formatting', style) style = style_cls( label_style=kwargs.get('label_style'), name_style=kwargs.get('name_style'), sorting_style=kwargs.get('sorting_style'), abbreviate_names=kwargs.get('abbreviate_names'), min_crossrefs=min_crossrefs, ) formatted_bibliography = style.format_bibliography(bib_data) output_backend(output_encoding).write_to_file(formatted_bibliography, to_filename)
def preExecute(self): duplicates = self.get('duplicates', list()) self.__database = BibliographyData() bib_files = [] for node in self.translator.getPages(): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: duplicate_key = key in self.__database.entries duplicate_key_allowed = key in duplicates if duplicate_key and (not duplicate_key_allowed): if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) elif not duplicate_key: self.__database.add_entry(key, db.entries[key])
def bib_reader(filename): "A generator which yields pbtex.database.Entry instances" try: return iter(parse_file(filename).entries.values()) except Exception as e: print("Error reading bibtex file {}: {}".format(filename, e)) return []
def custom_parse_file(file_bib): print(file_bib, " " * 30) loop = True while loop: loop = False try: bib_data = parse_file(file_bib) return bib_data except BibliographyDataError as ex: repeated_key = ex.args[0].replace( "repeated bibliograhpy entry: ", "") if not os.path.isfile(file_bib + ".bkp"): shutil.copyfile(file_bib, file_bib + ".bkp") if repeated_key: with open(file_bib, "r") as file: file_data = file.read() while file_data.find(repeated_key + ",") > -1: new_key = repeated_key + "_" + str(random.randint(1, 101)) file_data = file_data.replace( repeated_key + ",", new_key + ",", 1) print( file_bib + ": repeatedKey", repeated_key, "replaced by", new_key ) with open(file_bib, "w+") as file: file.write(file_data) loop = True
def upload_file(): if request.method == 'POST': #Check for file selection and appropriate file type if 'file' not in request.files: return redirect(url_for('upload', message='Please select a file.')) file = request.files['file'] if file.filename == '': return redirect( url_for('upload', message='Please select a file with a name.')) if not allowed_file(file.filename): return redirect( url_for('upload', message='Please select a .bib file.')) #if everything looks good, upload the selected file else: file.save( os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))) collection = request.form['collection'] bib_data = parse_file( os.path.join(app.config['UPLOAD_FOLDER'], secure_filename( file.filename))) #parse the .bib file fill(bib_data, collection) #populate the database return redirect(url_for('upload', message='Upload successful!'))
def process_bibtex(corpus_path): import pybtex from pybtex.database import parse_file from topicexplorer.lib.util import overwrite_prompt, safe_symlink print "Loading BibTeX from", corpus_path bib = parse_file(corpus_path) target_dir = os.path.basename(corpus_path).replace('.bib','') if not os.path.exists(target_dir): os.makedirs(target_dir) elif overwrite_prompt(target_dir): shutil.rmtree(target_dir) os.makedirs(target_dir) else: raise IOError("Path exits: {}".format(target_dir)) for entry in bib.entries: if bib.entries[entry].fields.get('file', None): filename = '/' + bib.entries[entry].fields['file'].replace(':pdf','')[1:] if 'C$\\backslash$:' in filename: filename = filename.replace('C$\\backslash$:', '') filename = filename[1:] filename = os.path.normpath(filename) filename = os.path.abspath(filename) if not os.path.exists(filename): print "Invalid 'file' field for BibTeX entry {}:\n\t({})".format(entry, filename) else: new_path = os.path.join(target_dir, os.path.basename(filename)) safe_symlink(filename, new_path) else: print "No 'file' field for BibTeX entry: {}".format(entry) return target_dir
def preExecute(self, content): self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: if key in self.__database.entries: if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key])
def parse_refs(bibtexf, verbose=False): """ Parse the references and return some data structure :param bibtexf: the bibtex file :param verbose: more output :return: the BibliographyData object and a dictionary linking lower case titles to entry keys """ if verbose: message(f"Parsing {bibtexf}", "GREEN") bib = parse_file(bibtexf, 'bibtex') titles = {} for e in bib.entries: try: if 'title' in bib.entries[e].fields: # sys.stderr.write(f"{bcolors.BLUE}{bib.entries[e].fields['title'].lower()}{bcolors.ENDC}\n") t = bib.entries[e].fields['title'].lower() t = t.replace('{', '') t = t.replace('}', '') titles[t.lower()] = e except Exception as ex: sys.stderr.write(f"Error parsing entry: {e}\n") print(ex) if verbose: message(f"Found {len(titles)} references", "BLUE") return bib, titles
def main(): if len(sys.argv) != 3: print('Filters BibTeX file preserving order and limiting to last 5 years') print('usage: %s <original-bib> <output-bib>' % \ os.path.basename(sys.argv[0])) print('example: %s publications.bib filtered.bib') sys.exit(1) original = sys.argv[1] minyear = datetime.date.today().year - 5 output = sys.argv[2] from pybtex.database import parse_file, BibliographyData bib_data = parse_file(original) filtered = BibliographyData() for key in bib_data.entries: entry = bib_data.entries[key] year = int(entry.fields['year']) if year > minyear: print('Selecting @%s[%s] from %s' % (entry.type, key, year)) filtered.entries[key] = entry print('Saving to %s...' % output) s = filtered.to_string('bibtex') for f, t in FIX_STRINGS: s = s = s.replace(f, t) with open(output, 'wt') as f: f.write(s)
def init(viewer, config, args): global metadata try: filename = args.bibtex or config.get('bibtex', 'path') except ConfigParser.Error: model_path = config.get('main','path') filename = os.path.join(model_path, 'library.bib') print "Loading Bibtex metadata from", filename bib = parse_file(filename) metadata = dict() for entry in bib.entries: key = '/' + bib.entries[entry].fields.get('file','').replace(':pdf','')[1:] if 'C$\\backslash$:' in key: key = key.replace('C$\\backslash$:', '') key = key[1:] key = os.path.normpath(key) key = os.path.basename(key) try: citation = pybtex.format_from_file( filename, style='plain', output_backend='text', citations=[entry])[3:] metadata[key] = citation except PybtexError: metadata[key] = filename
def build_bib_dict(): pybtex_style = pybtex.plugin.find_plugin('pybtex.style.formatting', 'plain')() pybtex_html_backend = pybtex.plugin.find_plugin('pybtex.backends', 'html')() pybtex_parser = pybtex.database.input.bibtex.Parser() bib_dict = {} bib_files = [bib for bib in listdir('bib') if bib[-4:] == '.bib'] for bib_file in bib_files: full_path = join('bib', bib_file) print(full_path) data = parse_file(full_path) data_formatted = pybtex_style.format_entries( six.itervalues(data.entries)) output = io.StringIO() pybtex_html_backend.write_to_stream(data_formatted, output) html = output.getvalue() output.close() html = html.split("<dd>")[1] html = html.split("</dd>")[0] for old_string, new_string in replace_strings: html = html.replace(old_string, new_string) key = next(iter(data.entries)) bib_dict[key] = html return bib_dict
def preExecute(self, content): duplicates = self.get('duplicates', list()) self.__database = BibliographyData() bib_files = [] for node in content: if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: duplicate_key = key in self.__database.entries duplicate_key_allowed = key in duplicates if duplicate_key and (not duplicate_key_allowed): if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) elif not duplicate_key: self.__database.add_entry(key, db.entries[key])
def read_wmt_bib() -> List[Paper]: result = [] with open("downloads/2020.wmt-1.0.bib") as f: bib = database.parse_file(f) for i, entry in enumerate(bib.entries.values()): if entry.type == "book": continue title = LatexNodes2Text().latex_to_text(entry.fields["title"]) url = entry.fields["url"] abstract = LatexNodes2Text().latex_to_text( entry.fields["abstract"]) author = "|".join([ " ".join(reversed(str(e).split(", "))) for e in entry.persons["author"] ]) uid = url.replace("https://www.aclweb.org/anthology/", "") url = "https://www.statmt.org/wmt20/pdf/" + uid + ".pdf" paper = Paper( uid=f"WS-2.{uid}", ws_id="WS-2", title=title, authors=author, abstract=abstract, track="WS-2", kind="workshop", link=url, ) result.append(paper) return result
def _add_file(fname, force_redownload, db, per_file_progress_bar): """ Return #added, #skipped, file_skipped """ if fname.startswith('http'): if not force_redownload and db.file_has_been_downloaded(fname): return 0, 0, True try: new_entries = pybtex.parse_string(download_file(fname), bib_format="bibtex").entries except urllib.error.URLError as e: raise AddFileError("Error downloading '%s' [%s]" % (fname, str(e))) except pybtex.PybtexError: raise AddFileError("Error parsing file %s" % fname) db.register_file_downloaded(fname) else: new_entries = pybtex.parse_file(fname, bib_format="bibtex").entries added = 0 skipped = 0 if per_file_progress_bar: iterable = tqdm( new_entries.values(), ncols=80, bar_format="{l_bar}{bar}| [Elapsed: {elapsed} ETA: {remaining}]") else: iterable = new_entries.values() for entry in iterable: if db.add(entry): added += 1 else: skipped += 1 return added, skipped, False
def run(self, lines): """ Create a bibliography from cite commands. """ # Join the content to enable regex searches throughout entire text content = '\n'.join(lines) # Build the database of bibtex data bibfiles = [] match = re.search(self.RE_BIBLIOGRAPHY, content) if match: bib_string = match.group(0) for bfile in match.group(1).split(','): try: bibfiles.append(os.path.join(self._root, bfile)) data = parse_file(bibfiles[-1]) except: log.error('Failed to parse bibtex file: {}'.format(bfile)) return lines self._bibtex.add_entries(data.entries.iteritems()) else: return lines # Determine the style match = re.search(self.RE_STYLE, content) if match: content = content.replace(match.group(0), '') try: style = find_plugin('pybtex.style.formatting', match.group(1)) except: log.error('Unknown bibliography style "{}"'.format(match.group(1))) return lines else: style = find_plugin('pybtex.style.formatting', 'plain') # Replace citations with author date, as an anchor content = re.sub(self.RE_CITE, self.authors, content) # Create html bibliography if self._citations: # Generate formatted html using pybtex formatted_bibliography = style().format_bibliography(self._bibtex, self._citations) backend = find_plugin('pybtex.backends', 'html') stream = io.StringIO() backend().write_to_stream(formatted_bibliography, stream) # Strip the bib items from the formatted html html = re.findall(r'\<dd\>(.*?)\</dd\>', stream.getvalue(), flags=re.MULTILINE|re.DOTALL) # Produces an ordered list with anchors to the citations output = u'<ol class="moose-bibliography" data-moose-bibfiles="{}">\n'.format(str(bibfiles)) for i, item in enumerate(html): output += u'<li name="{}">{}</li>\n'.format(self._citations[i], item) output += u'</ol>\n' content = re.sub(self.RE_BIBLIOGRAPHY, output, content) return content.split('\n')
def _harvest(ds, **kw): for bib in ds.cldf_dir.glob('*.bib'): bib = parse_file(str(bib)) for id_, entry in bib.entries.items(): id_ = '{0}:{1}'.format(ds.id, id_) if id_ not in gbib.entries: gbib.add_entry(id_, entry)
def init(app, config_file): global metadata config = topicexplorer.config.read(config_file) try: filename = config.get('bibtex', 'path') except ConfigParserError: model_path = config.get('main', 'path') filename = os.path.join(model_path, 'library.bib') print("Loading Bibtex metadata from", filename) bib = parse_file(filename) metadata = dict() for entry in bib.entries: key = '/' + bib.entries[entry].fields.get('file', '').replace( ':pdf', '')[1:] if 'C$\\backslash$:' in key: key = key.replace('C$\\backslash$:', '') key = key[1:] key = os.path.normpath(key) key = os.path.basename(key) try: citation = pybtex.format_from_file(filename, style='plain', output_backend='text', citations=[entry])[3:] metadata[key] = citation except PybtexError: metadata[key] = filename
def init(self, translator): command.CommandExtension.init(self, translator) bib_files = [] for node in anytree.PreOrderIter(self.translator.root): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering warn = self.get('duplicate_warning') for key in db.entries: if key in self.__database.entries: if warn: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key])
def preExecute(self): set_strict_mode( False) # allow incorrectly formatted author/editor names # If this is invoked during a live serve, we need to recompile the list of '.bib' files and # read them again, otherwise there's no way to distinguish existing entries from duplicates self.__bib_files = [] for node in self.translator.findPages( lambda p: p.source.endswith('.bib')): self.__bib_files.append(node.source) self.__database = BibliographyData() for bfile in self.__bib_files: try: db = parse_file(bfile) self.__bib_file_database[bfile] = db except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering for key in db.entries: if key in self.__database.entries: if self.get('duplicate_warning') and ( key not in self.get('duplicates')): msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key])
def main(): args = get_args() print("Parsing %s file..." % args.input_file) bib_data = parse_file(args.input_file) print('Generating %i citations...' % len(bib_data.entries)) citations = generate_citations(bib_data) print("Saving %s file..." % args.output_file) save(args.output_file, sort_citations(citations))
def test_on_page_markdown(self): self.plugin.on_config(self.plugin.config) test_data = parse_file(os.path.join(test_files_dir, "single.bib")) test_markdown = "This is a citation. [@test]\n\n \\bibliography" self.assertIn( "[^1]: First Author and Second Author\. Test title\. *Testing Journal*, 2019\.", self.plugin.on_page_markdown(test_markdown, None, None, None), )
def fmt(fin, fout): """ Clean up the INPUT bibtex file and write to OUTPUT """ bib_data = parse_file(fin, "bibtex") writer = Writer() writer.write_stream(bib_data, fout)
def __init__(self, db_file, key): """Immediately read in citations and references, if files exist.""" self.db_file = db_file self.key = key if os.path.isfile( os.path.join('bib_files', f'{self.key}_citations.bib')): self.citations = parse_file( os.path.join('bib_files', f'{self.key}_citations.bib')) else: self.citations = None if os.path.isfile( os.path.join('bib_files', f'{self.key}_references.bib')): self.references = parse_file( os.path.join('bib_files', f'{self.key}_references.bib')) else: self.references = None self._citations() self._text_data()
def test_full_bibliography(self): test_data = parse_file(os.path.join(test_files_dir, "single.bib")) self.plugin.csl_file = None self.plugin.format_citations(test_data.entries.items()) self.assertIn("First Author and Second Author", self.plugin.full_bibliography) self.plugin.csl_file = os.path.join(test_files_dir, "nature.csl") self.plugin.format_citations(test_data.entries.items()) self.assertIn("Author, F. & Author, S", self.plugin.full_bibliography)
def readBib(fileName): """ Read the BibTeX entry contained in the given file. :param fileName: The name of the file. :return: The BibTeX entry. """ return parse_file(fileName) # This is a BibliographyData object
def create_bib(): """Read bibtex file and create publications page.""" bib_data = parse_file('local.bib') # Maps to which section add each entry, based on its type TYPE2SEC = { 'article' : 'Articles', 'mastersthesis' : 'Theses', 'misc' : 'Theses', 'other' : 'Talks', 'phdthesis' : 'Theses', 'proceedings' : 'Articles', } # add the headings for type of publications, automatically extracting # from the values above in alphabetical order bib_entries = {} headings = sorted(set(TYPE2SEC.values())) for heading in headings: bib_entries[heading] = [] # Fill categories with entries by year for entry_key in bib_data.entries: year = int(bib_data.entries[entry_key].fields['Year']) entry_type = bib_data.entries[entry_key].type bib_entry = bib_data.entries[entry_key].fields bib_entry['Author'] = '; '.join([str(p) for p in bib_data.entries[entry_key].persons['Author']]) bib_entries[TYPE2SEC[entry_type]].append(bib_data.entries[entry_key].fields) # handler for the file pub_handler = open('publications.md', 'w') pub_handler.write("---\n") pub_handler.write("title: Publications\n") pub_handler.write("author: Tiago Tresoldi\n") pub_handler.write("---\n") pub_handler.write("Here is a list of my main publications. The BibTex reference (from where it is generated), can be downloaded [here](local.bib).\n") # Fill the output for heading in headings: pub_handler.write('\n## %s\n\n' % heading) # Negative Year so we can sort correctly: by year descending, by title # ascending sorted_entries = sorted( bib_entries[heading], key = lambda x: (-int(x['Year']), x['Title'])) for entry in sorted_entries: pub_handler.write('- %s\n' % bibtex2md(entry)) # close the handler pub_handler.close()
def parseBibtexFile(self, bibfile): """ Returns parsed bibtex file. If "macro_files" are supplied in the configuration file, then a temporary file will be made that contains the supplied macros above the original bib file. This temporary combined file can then be parsed by pybtex. """ if self._macro_files: t_bib_path = os.path.join(MooseDocs.ROOT_DIR, "tBib.bib") with open(t_bib_path, "wb") as t_bib: for t_file in self._macro_files: with open(os.path.join(MooseDocs.ROOT_DIR, t_file.strip()), "rb") as in_file: shutil.copyfileobj(in_file, t_bib) with open(bibfile, "rb") as in_file: shutil.copyfileobj(in_file, t_bib) data = parse_file(t_bib_path) if os.path.isfile(t_bib_path): os.remove(t_bib_path) else: data = parse_file(bibfile) return data
def gen_refs(bibfile): target = os.path.splitext(os.path.split(bibfile)[1])[0] + '.yml' call_citeproc(bibfile, target) bib = ptd.parse_file(bibfile) ybib = yaml.safe_load(open(target, encoding='utf-8')) for yitem in ybib['references']: bitem = bib.entries.get(yitem['id']) yitem = merge(bitem, yitem) yaml.dump(ybib, open('publications.yml', 'w', encoding="utf-8")) return ybib
def parseBibtexFile(self, bibfile): """ Returns parsed bibtex file. If "macro_files" are supplied in the configuration file, then a temporary file will be made that contains the supplied macros above the original bib file. This temporary combined file can then be parsed by pybtex. """ if self._macro_files: with open("tBib.bib", "wb") as tBib: for tFile in self._macro_files: with open(MooseDocs.abspath(tFile.strip()), "rb") as inFile: shutil.copyfileobj(inFile, tBib) with open(bibfile, "rb") as inFile: shutil.copyfileobj(inFile, tBib) data = parse_file("tBib.bib") os.remove("tBib.bib") else: data = parse_file(bibfile) return data
def convert_bibtex_keys(input_file: str, output_file: str): """ Convert keys in a bibtex file to Google Scholar format. @input_file: string, input file name. @output_file: string, output file name. """ bib_data = parse_file(input_file) keys, new_keys = obtain_replace_keys(bib_data) new_entries = OrderedCaseInsensitiveDict() for key, new_key in zip(keys, new_keys): new_entries[new_key] = bib_data.entries[key] bib_data.entries = new_entries bib_data = update_arxiv_information(bib_data) with open(output_file, 'w', encoding='utf-8') as ofile: bib_data.to_file(ofile)
def gen_refs(bibfile): target = os.path.splitext(os.path.split(bibfile)[1])[0] + '.yml' call_citeproc(bibfile, target) bib = ptd.parse_file(bibfile) ybib = yaml.load(open(target, encoding = 'utf-8')) for yitem in ybib['references']: bitem = bib.entries.get(yitem['id']) yitem = merge(bitem, yitem) newdic = dict({yitem['id'] : bitem}) output = ptd.BibliographyData(newdic) output.to_file( os.path.join(BIBTEMPDIRECTORY, yitem['id'] + ".bib"), "bibtex") yaml.dump(ybib, open('publications.yml', 'w', encoding="utf-8")) return ybib
def __init__(self, filename='../references.bib'): logger.info('Reading references from {}'.format(filename)) self.data = parse_file(filename) logger.info(self.data.entries.keys()) # set up bibliography formatting self.style = pybtex.plugin.find_plugin('pybtex.style.formatting', self.style_name)() self.backend = pybtex.plugin.find_plugin('pybtex.backends', 'html')() # set up inline reference and doi dictionaries entries = self.data.entries self.inline = ({k: self._parse_entry(v) for k, v in entries.items()}) self.doi = ({k: self._get_doi(v) for k, v in entries.items()}) # set up key regex pattern = '(' + ')|('.join(self.data.entries.keys()) + ')' self.regex = re.compile(pattern)
def _parse_bib(self, b): self.M[b] = os.path.getmtime(b) self.E[b] = {} try: bib = parse_file(b) except Exception as ERR: nvimr_warn('Error parsing ' + b + ': ' + str(ERR)) return for k in bib.entries: self.E[b][k] = {'citekey': k, 'title': '', 'year': '????'} self.E[b][k]['author'] = self._get_authors(bib.entries[k].persons) if 'title' in bib.entries[k].fields: self.E[b][k]['title'] = bib.entries[k].fields['title'] if 'year' in bib.entries[k].fields: self.E[b][k]['year'] = bib.entries[k].fields['year'] if 'file' in bib.entries[k].fields: self.E[b][k]['file'] = bib.entries[k].fields['file']
def convert( from_filename, to_filename, from_format=None, to_format=None, input_encoding=None, output_encoding=None, parser_options=None, preserve_case=True, **kwargs ): if parser_options is None: parser_options = {} if from_filename == to_filename: raise ConvertError('input and output file can not be the same') bib_data = database.parse_file( from_filename, bib_format=from_format, encoding=input_encoding, **parser_options ) if not preserve_case: bib_data = bib_data.lower() bib_data.to_file(to_filename, bib_format=to_format, encoding=output_encoding)
def init(self, translator): command.CommandExtension.init(self, translator) bib_files = [] for node in anytree.PreOrderIter(self.translator.root): if node.source.endswith('.bib'): bib_files.append(node.source) for bfile in bib_files: try: db = parse_file(bfile) except UndefinedMacro as e: msg = "The BibTeX file %s has an undefined macro:\n%s" LOG.warning(msg, bfile, e.message) #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/ # databaseadd_entries-method-not-considering for key in db.entries: if key in self.__database.entries: msg = "The BibTeX entry '%s' defined in %s already exists." LOG.warning(msg, key, bfile) else: self.__database.add_entry(key, db.entries[key])
import os import pybtex.database as pybib_db import wordcloud as wc from PIL import Image import numpy as np import matplotlib matplotlib.use('Qt5Agg') import matplotlib.pyplot as plt bib_data = pybib_db.parse_file('weishinn-ku.bib') bib_data = bib_data.lower() titles = [] for entry in bib_data.entries.values(): titles.append(entry.fields['title']) print("Total {0} titles.".format(len(titles))) text = ' '.join(titles) text = text.upper() # "network" appears 22 times text = text.replace("NETWORK", " ", 16) text = text.replace("QUERY", "QUERIES") stopwords = set([
can include it in the webpage. """ for key,entry in db.entries.items(): for auth in entry.persons["author"]: if ("Harrison" not in auth.first_names or "Chapman" not in auth.last_names): entry.add_person(auth, "otherauthor") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Convert a bibtex file into a yaml file, for " "intelligent inclusion into Jekyll pages") parser.add_argument( "bibfile", type=argparse.FileType("r"), help="BibTeX file to convert into YAML") parser.add_argument( "yamlfile", type=argparse.FileType("w"), help="YAML file to write, will be overwritten") args = parser.parse_args() bibdb = database.parse_file(args.bibfile) extra_bibparse(bibdb) writer = Writer() writer.write_file(bibdb, args.yamlfile)
parser.add_argument('-y', help="Earliest year to report conflict (default={})".format(earlyyear), default=earlyyear, type=int) args = parser.parse_args() entries = set() dupentries=False with open(args.f, 'r') as bin: for l in bin: if l.startswith('@'): l = l.replace('@misc', '') l = l.replace('@article', '') l = l.replace('@inproceedings', '') if l in entries: sys.stderr.write("Duplicate entry " + l.replace('{', '').replace(',', '')) dupentries=True entries.add(l) if dupentries: sys.stderr.write("FATAL: The bibtex file has duplicate entries in it. Please remove them before trying to continue\n") sys.stderr.write("(It is an issue with Google Scholar, but pybtex breaks with duplicate entries. Sorry)\n") sys.exit(-1) bib = parse_file(args.f, 'bibtex') for e in bib.entries: if 'year' in bib.entries[e].fields: if int(bib.entries[e].fields['year']) >= args.y: bib_data = BibliographyData({e : bib.entries[e]}) print(bib_data.to_string('bibtex'))
from pybtex.database import parse_file from pybtex.plugin import find_plugin SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) SITE_ROOT = os.path.dirname(SCRIPT_DIR) INFILE = os.path.join(SCRIPT_DIR, 'publications.bib') OUTFILE = os.path.join(SITE_ROOT, '_data', 'publications.json') # Retrieves the year for a BibTeX entry def getYear(entry): return int(entry.fields['year']) # Read the BibTeX database file db = parse_file(INFILE) # Retrieve the Style class for the "plain" style PlainStyle = find_plugin('pybtex.style.formatting', 'plain') style = PlainStyle() # Retrieve the HTML backend HtmlBackend = find_plugin('pybtex.backends', 'html') backend = HtmlBackend('utf-8') # Sort the BibTeX database entries in reverse-chronological order entries = list(db.entries.values()) entries = list(reversed(sorted(entries, key = getYear))) # Retrieve the list of years that have publications years = list([getYear(entry) for entry in entries])
matchintemporal brodbeck2018transformation """.split() ACRONYMS = ['EEG', 'MEG', 'MRI'] querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract articles for article in querier.articles: querier.get_citation_data(article) # convert to pybtex entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for entry in data.entries.values(): if entry.key in IGNORE: continue elif entry.type != 'article':