def load_bib(bib, titles): """Returns dict {'BibTeX ID': {record}} """ with open(bib) as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode # Find the url field of a misc entry # https://github.com/sciunto-org/python-bibtexparser/issues/93 parser.homogenise_fields = False bib = bibtexparser.load(bibtex_file, parser=parser) with open(titles) as titles_file: parser = BibTexParser() parser.customization = convert_to_unicode titles = bibtexparser.load(titles_file, parser=parser) res = {} for entry in bib.entries: if 'journal' in entry and entry['journal'].lower() in titles.strings: entry['journal'] = titles.strings[entry['journal'].lower()] if 'author' in entry: # F**k me entry['author'] = entry['author'].replace('{́i}', 'í') res[entry['id'].strip()] = entry return res
def load_bib(filename=BIB_FILENAME, stringio=None): parser = bibtexparser.bparser.BibTexParser() parser.ignore_nonstandard_types = False parser.homogenise_fields = False parser.customization = convert_to_unicode # convert latex escape code (like `\'{e}`) to UTF8 parser.encoding = "utf8" if stringio: bib_database = bibtexparser.load(stringio, parser=parser) else: with codecs.open(filename, mode="r", encoding="utf-8") as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) return bib_database
def load(cls, path, package=None): if package is not None: root = pkg_resources.resource_filename(package, '.') root = os.path.abspath(root) path = os.path.join(root, path) parser = bp.bparser.BibTexParser() # Downstream tooling is much easier with unicode. For actual latex # users, use the modern biber backend instead of bibtex parser.customization = bp.customization.convert_to_unicode with open(path) as fh: try: db = bp.load(fh, parser=parser) except Exception as e: raise ValueError("There was a problem loading the BiBTex file:" "%r" % path) from e entries = collections.OrderedDict() for entry in db.entries: id_ = entry.pop('ID') type_ = entry.pop('ENTRYTYPE') if id_ in entries: raise ValueError("Duplicate entry-key found in BibTex file: %r" % id_) entries[id_] = CitationRecord(type_, entry) return cls(entries)
def _FindBibEntriesParser(self): """ """ ret = [] parser = BibTexParser() parser.customization = bib_customizations for filename in self._Walk(self._main_directory, ".bib"): skip, cache = self._CacheDataAndSkip(filename) if skip: ret.extend(cache) continue resp = [] with open(filename) as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: if 'ID' not in entry: continue title = entry['title'] author = entry['author'] resp.append(responses.BuildCompletionData( entry['ID'], "%s (%s)" % (title, author) )) ret.extend(resp) self._cached_data[filename] = resp return ret
def get(filename, ignore_fields=None): """ Get all entries from a BibTeX file. :param filename: The name of the BibTeX file. :param ignore_fields: An optional list of fields to strip from the BibTeX \ file. :returns: A ``bibtexparser.BibDatabase`` object representing the fetched \ entries. """ # Handle default argument if ignore_fields is None: ignore_fields = [] # Open bibtex file with open(filename, 'r') as fh: bibtex = bibtexparser.load(fh) # Clean the entries if necessary bibtex.entries = [{k: entry[k] for k in entry if k not in ignore_fields} for entry in bibtex.entries] return bibtex
def parse(bibfile): with open(bibfile) as bibtex_file: writer = BibTexWriter() bib_database = bibtexparser.load(bibtex_file) for entry in bib_database.entries: print "\t-" print "\t\tlayout: paper" print "\t\tpaper-type: "+ preprocess(entry["type"]) print "\t\tyear: " + preprocess(entry["year"]) print "\t\tselected: no" print "\t\ttitle: >\n\t\t\t"+preprocess(entry["title"]) print "\t\tauthors: "+ parseauthors(preprocess(entry["author"])).encode('UTF8') print "\t\timg: " print "\t\tvenue: " if("pages" in entry.keys()): print "\t\tpages: "+preprocess(entry["pages"]) if("booktitle" in entry.keys()): print "\t\tbooktitle: "+preprocess(entry["booktitle"]) if("journal" in entry.keys()): print "\t\tjournal: "+preprocess(entry["journal"]) if("url" in entry.keys()): print "\t\tdoc-url: "+preprocess(entry["url"]) else: print "\t\tdoc-url: " if("abstract" in entry.keys()): print "\t\tabstract: >\n\t\t\t" + preprocess(entry["abstract"]).encode('UTF8') print "\t\tbibtex: >\n\t\t\t"+ writer._entry_to_bibtex(entry).replace("\n","\n\t\t\t").encode('UTF8')
def __init__(self, bibfile=''): """ Initializes an empty References object which points to a .bib file Parameters ---------- bibfile: str The path to the bibtex file from which the references will be read """ bibfile = bibfile or \ pkg_resources.resource_filename('ExoCTK', 'data/core/bibtex.bib') # Attributes for the filepath and references self.bibfile = bibfile self.refs = [] # Load the bibtex into a database bf = open(bibfile) self.database = bt.load(bf) bf.close() # The list of all bibcodes in the bibfile self.bibcodes = [i['ID'] for i in self.database.entries]
def convert(inpath): ''' Convert from bibtex to bibjson. One argument expected: path to bibtex file. ''' import bibtexparser from bibtexparser.bparser import BibTexParser import json parser = BibTexParser() with open(inpath) as bibtex_file: parser.customization = customizations bib_database = bibtexparser.load(bibtex_file, parser=parser) myRecords = list() num = 1 records = dict() for record in bib_database.entries: record1 = dict() record1 = record record1["_id"] = num record1["collection"] = "test01" num = num + 1 myRecords.append(record1) #temp = json.dumps(record, indent=2, sort_keys=True) #t #myRecords records["records"] = myRecords return records
def diffFilesIndex(): """Compute differences between Bibtex index and PDF files Returns a dict with bibtex entry: * full bibtex entry with file='' if file is not found * only file entry if file with missing bibtex entry """ files = tools.listDir(config.get("folder")) files = [i for i in files if tools.getExtension(i) in ['.pdf', '.djvu']] try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: index = bibtexparser.load(fh) index_diff = index.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False for key in index_diff.keys(): if index_diff[key]['file'] not in files: index_diff[key]['file'] = '' else: files.remove(index_diff[key]['file']) for filename in files: index_diff[filename] = {'file': filename} return index.entries_dict
def move_pdfs(inputfile, outputdir): bibdata = None with open(inputfile, 'r') as f: bibdata = bibtexparser.load(f) if bibdata is None: sys.exit("Could not load input file {}".format(inputfile)) if not os.path.exists(outputdir): os.makedirs(outputdir) for entry in bibdata.entries: if 'file' not in entry: continue pdfin = entry['file'].split(':')[1] pdfout = os.path.join(outputdir, entry['ID']+'.pdf') if pdfin == '': continue pdfin = '/' + pdfin if not os.path.exists(pdfin): continue shutil.copyfile(pdfin, pdfout)
def match_plain_by_bib(self, plainFile, bibFile): """ For each paper in plainFile, find its vol, no, page, and doi """ f = open(plainFile) titles = [] for line in f: line = line.strip() titles.append(line) print('# of ' + plainFile + ' = ' + str(len(titles))) # open bib file with open(bibFile) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) count = 0 missTitles = [] for title in titles: done = False for each in bib_database.entries: if(title == each['title']): vol = each['volume'] if ('volume' in each.keys()) else '' no = each['number'] if ('number' in each.keys()) else '' pages = each['pages'] if ('pages' in each.keys()) else '' doi = each['doi'] if ('doi' in each.keys()) else '' print(vol + ',' + no + ',' + pages + ',,' + doi) count = count + 1 done = True if( not done ): print(',,,,') missTitles.append(title) print(missTitles)
def parse_bibtex_into_evidence(file_name): import bibtexparser e = P.Evidence() with open(file_name) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) key = bib_database.entries[0]["ID"] e.setKey(key) try: doi = bib_database.entries[0]["doi"] if doi: e.doi(doi) except KeyError: pass try: author = bib_database.entries[0]["author"] if author: e.author(author) except KeyError: pass try: title = bib_database.entries[0]["title"] if title: e.title(title) except KeyError: pass try: year = bib_database.entries[0]["year"] if year: e.year(year) except KeyError: pass return e
def read_bibtex(filename): import bibtexparser from bibtexparser.bparser import BibTexParser def customizations(record): """ custom transformation applied during parsing """ record = bibtexparser.customization.convert_to_unicode(record) # Split author field from separated by 'and' into a list of "Name, Surname". record = bibtexparser.customization.author(record) # Split editor field from separated by 'and' into a list of "Name, Surname". record = editor_split(record) return record def editor_split(record): """ custom transformation - split editor field into a list of "Name, Surname" :record: dict -- the record :returns: dict -- the modified record """ if "editor" in record: if record["editor"]: record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")]) else: del record["editor"] return record with open(filename) as f: parser = BibTexParser() parser.customization = customizations return bibtexparser.load(f, parser=parser).entries
def getBibtex(entry, file_id='both', clean=False): """Returns the bibtex entry corresponding to entry, as a dict entry is either a filename or a bibtex ident file_id is file or id or both to search for a file / id / both clean is to clean the ignored fields specified in config """ try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False bibtex_entry = False if file_id == 'both' or file_id == 'id': try: bibtex_entry = bibtex[entry] except KeyError: pass if file_id == 'both' or file_id == 'file': if os.path.isfile(entry): for key in bibtex.keys(): if os.path.samefile(bibtex[key]['file'], entry): bibtex_entry = bibtex[key] break if clean: for field in config.get("ignore_fields"): try: del(bibtex_entry[field]) except KeyError: pass return bibtex_entry
def deleteId(ident, keep=False): """Delete a file based on its id in the bibtex file""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (IOError, TypeError): tools.warning("Unable to open index file.") return False if ident not in bibtex.keys(): return False if not keep: try: os.remove(bibtex[ident]['file']) except (KeyError, OSError): tools.warning("Unable to delete file associated to id " + ident + " : " + bibtex[ident]['file']) try: if not os.listdir(os.path.dirname(bibtex[ident]['file'])): os.rmdir(os.path.dirname(bibtex[ident]['file'])) except (KeyError, OSError): tools.warning("Unable to delete empty tag dir " + os.path.dirname(bibtex[ident]['file'])) try: del(bibtex[ident]) bibtexRewrite(bibtex) except KeyError: tools.warning("No associated bibtex entry in index for file " + bibtex[ident]['file']) return True
def load_bib(bib_file): """Load Bibtex database. Abbreviations are applied and LHCb commands removed. Arguments: bib_file (str): bib file to load. Returns: bibtexparser.bibdatabase.BibDatabase: Loaded database Raises: KeyError: If the journal is not recognized """ def customizations(record): """Apply customizations to loaded records.""" if 'journal' in record: new_journal = ABBREVIATIONS.get(record['journal'], None) if not new_journal: raise Exception record['journal'] = new_journal if 'title' in record: if r'\lhcb' in record: record['title'] = record['title'].replace(r'\lhcb', 'LHCb') return record if not os.path.exists(bib_file): raise OSError("Cannot find file %s" % bib_file) with open(bib_file) as bibtex_file: parser = bibtexparser.bparser.BibTexParser() parser.ignore_nonstandard_types = False parser.customization = customizations bib_database = bibtexparser.load(bibtex_file, parser=parser) return bib_database
def run(self): sort_type = self.options.get('sort', 'date') # Load the publications template if 'template' in self.options: template_path = self.options['template'] template_dir, template_name = os.path.split(template_path) env = Environment(loader=FileSystemLoader(template_dir)) template = env.get_template(template_name) else: # Use template from the Pelican theme template = pelican_generator.get_template('publications') parser = BibTexParser(common_strings=True) parser.customization = customize if self.arguments: bibtex_path = self.arguments[0].strip() with open(bibtex_path, 'r') as bibtex_file: bib = bibtexparser.load(bibtex_file, parser=parser) else: bib = bibtexparser.loads('\n'.join(self.content), parser=parser) entries_to_select = self.options.get('entries', []) if entries_to_select: d = bib.entries_dict entries = [d[e] for e in entries_to_select] else: entries = bib.entries entries = sort_entries(entries, sort_type) rendered_template = template.render(publications=entries) return [nodes.raw('', rendered_template, format='html')]
def main(): args = _args() bibfile = args.input_bib texfile = args.input_tex with open(bibfile) as bibtex_file: parser = BibTexParser() bib_database = bibtexparser.load(bibtex_file, parser=parser) citation_keys = set() re_cite = re.compile('cite\{([0-9A-Za-z,\s]+)\}') with open(texfile) as tex_file: for l in tex_file: labels = re_cite.findall(l) if labels: for l in labels: for z in l.split(','): citation_keys.add(z.strip()) print('Found {} citation keys'.format(len(citation_keys))) old_entries = bib_database.entries[:] bib_database.entries = [x for x in old_entries if x['ID'] in citation_keys] bibtex_string = bibtexparser.dumps(bib_database) with open(args.output_bib, 'w') as new_bibtex_file: new_bibtex_file.write(bibtex_string.encode('utf8')) print('Cleaned file saved in {}'.format(args.output_bib))
def cli(ctx, db, fuzzy, no_abbr, input, output): config = Config(db, fuzzy) with open(input, 'r', encoding='utf-8') as fp: bib = bibtexparser.load(fp) failed = False for entry in bib.entries: filter_entry(config, entry) if no_abbr: continue try: journal_name = entry['journal'] except KeyError: continue else: abbr = config.lookup(journal_name) if abbr: entry['journal'] = abbr else: failed = True with open(output, 'w', encoding='utf-8') as fp: bibtexparser.dump(bib, fp) if failed: ctx.exit(1)
def test_content_entries_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def open_bibtex_file(filename): if os.path.exists(filename): with open(filename) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) return bib_database else: print("Provided file does not exist.") return
def load_database(self): ''' load bibtex file if needed ''' if any([not os.path.exists(self.args.output_path), self.args.overwrite == 'y']): self.args.bibtex_database = bibtexparser.loads('') else: self.args.bibtex_database = bibtexparser.load(open(args.output_path))
def bibtex_parsing(fileName, parent): list_of_instances = [] with open(fileName) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) for entry in bib_database.entries: list_of_instances.append( PaperData(find_university(entry), parent, find_authors(entry), find_title(entry), find_year(entry), get_cited_references(entry), timesCited(entry))) return list_of_instances
def editEntry(entry, file_id='both'): bibtex = backend.getBibtex(entry, file_id) if bibtex is False: tools.warning("Entry "+entry+" does not exist.") return False if file_id == 'file': filename = entry else: filename = bibtex['file'] new_bibtex = checkBibtex(filename, tools.parsed2Bibtex(bibtex)) # Tag update if new_bibtex['tag'] != bibtex['tag']: print("Editing tag, moving file.") new_name = backend.getNewName(new_bibtex['file'], new_bibtex, new_bibtex['tag']) while os.path.exists(new_name): tools.warning("file "+new_name+" already exists.") default_rename = new_name.replace(tools.getExtension(new_name), " (2)" + tools.getExtension(new_name)) rename = tools.rawInput("New name ["+default_rename+"]? ") if rename == '': new_name = default_rename else: new_name = rename new_bibtex['file'] = new_name try: shutil.move(bibtex['file'], new_bibtex['file']) except shutil.Error: tools.warning('Unable to move file '+bibtex['file']+' to ' + new_bibtex['file'] + ' according to tag edit.') try: if not os.listdir(os.path.dirname(bibtex['file'])): os.rmdir(os.path.dirname(bibtex['file'])) except OSError: tools.warning("Unable to delete empty tag dir " + os.path.dirname(bibtex['file'])) try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: index = bibtexparser.load(fh) index = index.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False index[new_bibtex['id']] = new_bibtex backend.bibtexRewrite(index) return True
def load_bib(filename=BIB_FILENAME): parser = bibtexparser.bparser.BibTexParser() parser.ignore_nonstandard_types = False parser.homogenise_fields = False parser.encoding = 'utf8' with codecs.open(filename, mode='r', encoding='utf-8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) return bib_database
def test_write_file(self): with open(self.input_file_path) as bibtex_file: bibtex_database = bibtexparser.load(bibtex_file) with TemporaryFile(mode='w+') as bibtex_out_file: bibtexparser.dump(bibtex_database, bibtex_out_file) bibtex_out_file.seek(0) bibtex_out_str = bibtex_out_file.read() self.assertEqual(bibtex_out_str, self.expected)
def bib_lookup(bib, bibnick, what): bib_database = None with open(bib) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) if bib_database is not None: if bibnick in bib_database.entries_dict: entry = bib_database.entries_dict[bibnick] if what in entry: return sanitize(entry[what]) return None
def parse_bibtex(self): """Parses all *.bib files inside a directory. Returns: dict: a dictionary containing years as keys, and bibtexparser.bibdatabase.BibDatabase as values. Warning ------- The names of the file should always be `YEAR.bib`. """ bib = [] # The regular expression for the "and" between the authors and_re = re.compile(r"\s+and\s+") dash_re = re.compile(r"-+") # Getting the BibTeX files for fn in glob(os.path.join(self.bibtex_dir, "*.bib")): year = int(os.path.basename(fn).split(".")[0]) pubs = [] with open(fn, "r") as i_file: pubs = [ entries for entries in bibtexparser.load(i_file).entries ] # Some formatting for i in range(len(pubs)): # Adding a dot to the title, if required if not pubs[i]["title"].endswith("."): pubs[i]["title"] += "." # Adding a dot to the authors, if required if not pubs[i]["author"].endswith("."): pubs[i]["author"] += "." # Replacing the in between author "and" authors = and_re.split(pubs[i]["author"]) authors = ", ".join(authors[:-1]) + " and " + authors[-1] pubs[i]["author"] = authors # Replacing '--' with '-' pubs[i]["pages"] = dash_re.sub("-", pubs[i]["pages"]) # Adding the pubmed identification number pubs[i]["pmid"] = int(pubs[i]["ID"].replace("pmid", "")) # Saving bib.append((year, pubs)) # Sorting bib.sort(reverse=True, key=lambda pub: pub[0]) return bib
def convert(source, crossrefQuery=''): r = requests.get(source) bibText = r.text.encode('utf-8') if True: author = '' title = '' year = '' for line in bibText.split('\n'): if line.strip() == '': continue if line.find(' author =') != -1: author = line.strip() author = author[author.find('{') + 1 : author.rfind('}')].strip().replace(' and ', ', ') if line.find(' year =') != -1: year = line.strip() year = year[year.find('{') + 1 : year.rfind('}')].strip() year = 'description:' + year if line.find(' title =') != -1: title = line.strip() title = title[title.find('{') + 1 : title.rfind('}')].strip() print ' | ' + title + ' | | ' + ' author:'+ author + ' ' + year title = '' author = '' year = '' else: f = open('web_content/bib', 'w+') f.write(bibText) f.close() f = open('web_content/bib', 'r') bib_database = bibtexparser.load(f) for entry in bib_database.entries: desc = '' if entry.has_key('year'): desc = 'description:' + entry['year'] line = ' | ' + entry['title'] + ' | | author:' + entry['author'] + ' ' + desc print line.encode('utf-8')
def __init__(self, pdf_file_path, bib_file_path=None): self.__pdf_file_path = pdf_file_path self.__bib_file_path = bib_file_path if self.__bib_file_path and os.path.isfile(self.__bib_file_path): with open(self.__bib_file_path) as f: self.__bib = bibtex.load(f) if 'note' in self.__bib.entries[0].keys(): self.note = self.__bib.entries[0]['note'] if 'tags' in self.__bib.entries[0].keys(): tags = re.split('[;,\n]+',self.__bib.entries[0]['tags']) self.tags = [t.strip() for t in tags]
from pathlib import Path import bibtexparser import markdown THIS_DIR = (Path(__file__) / "..").resolve() PAPERS_DIR = THIS_DIR / "papers" bibs = [p for p in PAPERS_DIR.iterdir() if str(p).lower().endswith(".bib")] pdfs = [p for p in PAPERS_DIR.iterdir() if str(p).lower().endswith(".pdf")] parsed_bibs = {} for b in bibs: with b.open() as f: parsed_bibs[b.name] = bibtexparser.load(f).entries[0] def get_global_context() -> dict: return dict(parsed_bibs=parsed_bibs, markdown=markdown.markdown)
def load_bibtex_file(filepath): bibtex_db = None with open(filepath, encoding='utf-8') as bibtex_file: bibtex_db = bibtexparser.load(bibtex_file) return bibtex_db
def loadrefs(filename_bib): with open(filename_bib) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) entries = bib_database.entries return entries
special = { 'United States of America': 'U. S. A.', 'A European Journal': 'European Journal' } def shorten(word): word, suff = re.findall(r'([^:]+)(.*)', word)[0] return abbreviate(word) + suff def process(title): for full, abbr in special.items(): title = title.replace(full, abbr) words = [ corrections.get(w, w) for w in title.split() if w.lower() not in ignored ] if len(words) > 1: words = [ word if word.endswith('.') else shorten(word) for word in words ] return ' '.join(words) bib = bibtex.load(sys.stdin) for item in bib.entries: if 'journal' in item: item['journal'] = process(item['journal']) bibtex.dump(bib, sys.stdout)
def extract(self, source): return load(source).entries
import os import pathlib import plistlib import re import bibtexparser from bibtexparser.bwriter import BibTexWriter library = pathlib.Path(os.environ['DROPBOX']).joinpath('research', 'library.bib') survey = bibtexparser.bibdatabase.BibDatabase() survey.entries = [] with open(str(library)) as bib: db = bibtexparser.load(bib) for comment in db.comments: if comment.startswith('BibDesk Static Groups{'): #} c = comment.replace("BibDesk Static Groups{\n", '').replace('}', '') static_groups = plistlib.loads(bytes(c, 'utf-8')) for g in static_groups: if g['group name'] == 'sgvis': cites = set(g['keys'].split(',')) for article in db.entries: keys_to_drop = set() if article['ID'] in cites: if article.get('month'): keys_to_drop.add('month') for k in article.keys(): if k.startswith('opt') or k.startswith(
def main(): parser = ArgumentParser() parser.add_argument("files", metavar="TEX", nargs="+", help="tex files to search citation keys") parser.add_argument( "-o", "--output", metavar="BIB", help= "main bibtex file; new entries will be added to this file, existing entries may be updated", ) parser.add_argument( "-r", "--other", nargs="+", metavar="BIB", help="other bibtex files that contain existing references (read-only)", ) parser.add_argument( "--no-update", dest="update", action="store_false", help="for existing entries, do not check ADS for updates", ) parser.add_argument( "--force-regenerate", action="store_true", help= "for all existing entries, regenerate the bibtex with the latest version from ADS if found", ) parser.add_argument( "--merge-other", action="store_true", help="merge the entries from other bibtex files", ) # thanks to syrte for adding this option parser.add_argument( "--include-physics", action="store_true", help="include physics database when searching ADS", ) parser.add_argument( "--no-backup", dest="backup", action="store_false", help="back up output file if being overwritten", ) parser.add_argument( "--version", action="version", version="%(prog)s {version}".format(version=__version__), ) args = parser.parse_args() if args.include_physics: global _database # pylint: disable=global-statement _database = '("astronomy" OR "physics")' if len(args.files) == 1 and args.files[0].lower().endswith( ".bib"): # bib update mode if args.output or args.other: parser.error( "Input file is a bib file, not tex file. This will enter bib update mode. Do not specify `--output` and `--other` together in this mode." ) if not args.update: parser.error( "Input file is a bib file, not tex file. This will enter bib update mode. Must not specify --no-update" ) if not os.path.isfile(args.files[0]): parser.error("Cannot locate input bib file {}".format( args.files[0])) keys = None args.output = args.files[0] elif args.output: # bib output is specified keys, _ = search_keys(args.files, find_bib=False) else: # bib output is missing, auto-identify keys, bib = search_keys(args.files, find_bib=True) if not bib: parser.error( "Cannot identify bibtex file from the tex source. Use -o to specify a bibtex file as output." ) args.output = bib.pop(0) if args.other: args.other.extend(bib) else: args.other = bib msg = "Auto-identifying bibtex files...\n" msg += "Main bibtex source (output file): {}\n".format(args.output) if args.other: msg += "Additional bibtex sources: {}\n".format(", ".join( args.other)) print(_headerize(msg)) if os.path.isfile(args.output): with open(args.output) as fp: bib = bibtexparser.load(fp, parser=get_bparser()) else: bib = bibtexparser.loads(" ", parser=get_bparser()) bib_other = bibtexparser.loads(" ", parser=get_bparser()) if args.other: for f in args.other: with open(f) as fp: bib_other = update_bib( bib_other, bibtexparser.load(fp, parser=get_bparser())) if keys is None: # bib update mode keys = list(bib.entries_dict) not_found = set() to_retrieve = set() all_entries = defaultdict(list) for key in keys: key_exists = key in bib.entries_dict key_exists_in_others = key in bib_other.entries_dict if args.update: if key_exists: bibcode = extract_bibcode(bib.entries_dict[key]) bibcode_new = entry2bibcode(bib.entries_dict[key]) elif key_exists_in_others and args.merge_other: bibcode = extract_bibcode(bib_other.entries_dict[key]) bibcode_new = entry2bibcode(bib_other.entries_dict[key]) else: bibcode_new = None if bibcode_new: all_entries[bibcode_new].append(key) if bibcode_new != bibcode or args.force_regenerate: to_retrieve.add(bibcode_new) print("{}:{} UPDATE => {}".format( key, "" if key_exists else " FOUND IN SECONDARY BIB SOURCES,", bibcode_new, )) continue if key_exists: print("{}: EXISTING".format(key)) continue if key_exists_in_others and args.merge_other: bib.entries_dict[key] = bib_other.entries_dict[key] bib.entries = list(bib.entries_dict.values()) print("{}: FOUND IN OTHER BIB SOURCE, MERGED".format(key)) continue if key_exists_in_others: print("{}: FOUND IN OTHER BIB SOURCE, IGNORED".format(key)) continue bibcode = find_bibcode(key) if bibcode: to_retrieve.add(bibcode) all_entries[bibcode].append(key) print("{}: NEW ENTRY => {}".format(key, bibcode)) else: not_found.add(key) print("{}: NOT FOUND".format(key)) if not_found: print(_headerize("Please check the following keys")) for key in not_found: print(key) repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1] if repeated_keys: print(_headerize("The following keys refer to the same entry")) for b, k in repeated_keys: print( "{1} has been referred as the following keys; please keep only one:\n{0}\n" .format(" ".join(k), b)) if to_retrieve: print(_headerize("Building new bibtex file, please wait...")) bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve), "bibtex").execute(), parser=get_bparser()) for entry in bib_new.entries: entry["ID"] = all_entries[entry["ID"]][0] bib = update_bib(bib, bib_new) bib_dump_str = bibtexparser.dumps(bib).encode("utf8") if args.backup and os.path.isfile(args.output): copyfile(args.output, args.output + ".bak") with open(args.output, "wb") as fp: fp.write(bib_dump_str) print(_headerize("Done!")) # check version try: latest_version = StrictVersion( requests.get( "https://pypi.python.org/pypi/adstex/json", timeout=0.1, ).json()["info"]["version"]) except (requests.RequestException, KeyError, ValueError): pass else: if latest_version > StrictVersion(__version__): msg = "A newer version of adstex (v{}) is now available!\n".format( latest_version) msg += "Please consider updating it by running:\n\n" msg += "pip install adstex=={}".format(latest_version) print(_headerize(msg))
def main(): parser = ArgumentParser() parser.add_argument('files', metavar='TEX', nargs='+', help='tex files to search citation keys') parser.add_argument( '-o', '--output', metavar='BIB', required=True, help= 'main bibtex file; new entries will be added to this file, existing entries may be updated' ) parser.add_argument( '-r', '--other', nargs='+', metavar='BIB', help='other bibtex files that contain existing references (read-only)') parser.add_argument( '--no-update', dest='update', action='store_false', help='for existing entries, do not check ADS for updates') parser.add_argument( '--force-update', dest='force_update', action='store_true', help= 'for all existing entries, overwrite with the latest version from ADS') parser.add_argument('--include-physics', dest='include_physics', action='store_true', help='include physics database when searching ADS') parser.add_argument( '--version', action='version', version='%(prog)s {version}'.format(version=__version__)) args = parser.parse_args() if args.include_physics: _database = '("astronomy" OR "physics")' keys = search_keys(args.files) if os.path.isfile(args.output): with open(args.output) as fp: bib = bibtexparser.load(fp, parser=get_bparser()) else: bib = bibtexparser.loads(' ', parser=get_bparser()) bib_other = bibtexparser.loads(' ', parser=get_bparser()) if args.other: for f in args.other: with open(f) as fp: bib_other = update_bib( bib_other, bibtexparser.load(fp, parser=get_bparser())) not_found = set() to_retrieve = set() all_entries = defaultdict(list) try: for key in keys: if key in bib.entries_dict: if args.update: bibcode = extract_bibcode(bib.entries_dict[key]) bibcode_new = entry2bibcode(bib.entries_dict[key]) if bibcode_new: all_entries[bibcode_new].append(key) if bibcode_new != bibcode or args.force_update: to_retrieve.add(bibcode_new) print('{}: UPDATE => {}'.format(key, bibcode_new)) continue print('{}: EXISTING'.format(key)) continue if key in bib_other.entries_dict: print('{}: FOUND IN OTHER REFS, IGNORED'.format(key)) continue bibcode = find_bibcode(key) if bibcode: to_retrieve.add(bibcode) all_entries[bibcode].append(key) print('{}: NEW ENTRY => {}'.format(key, bibcode)) else: not_found.add(key) print('{}: NOT FOUND'.format(key)) except KeyboardInterrupt: print() if not_found: print(_headerize('Please check the following keys')) for key in not_found: print(key) repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1] if repeated_keys: print(_headerize('The following keys refer to the same entry')) for b, k in repeated_keys: print( '{1} has been referred as the following keys; please keep only one:\n{0}\n' .format(' '.join(k), b)) if to_retrieve: print(_headerize('Building new bibtex file, please wait...')) bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve), 'bibtex').execute(), parser=get_bparser()) for entry in bib_new.entries: entry['ID'] = all_entries[entry['ID']][0] bib = update_bib(bib, bib_new) bib_dump_str = bibtexparser.dumps(bib).encode('utf8') with open(args.output, 'wb') as fp: fp.write(bib_dump_str) print(_headerize('Done!'))
def main(): parser = ArgumentParser() parser.add_argument('files', metavar='TEX', nargs='+', help='tex files to search citation keys') parser.add_argument('-o', '--output', metavar='BIB', help='main bibtex file; new entries will be added to this file, existing entries may be updated') parser.add_argument('-r', '--other', nargs='+', metavar='BIB', help='other bibtex files that contain existing references (read-only)') parser.add_argument('--no-update', dest='update', action='store_false', help='for existing entries, do not check ADS for updates') parser.add_argument('--force-regenerate', action='store_true', help='for all existing entries, regenerate the bibtex with the latest version from ADS if found') parser.add_argument('--include-physics', action='store_true', help='include physics database when searching ADS') parser.add_argument('--no-backup', dest='backup', action='store_false', help='back up output file if being overwritten') parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=__version__)) args = parser.parse_args() if args.include_physics: _database = '("astronomy" OR "physics")' if len(args.files) == 1 and args.files[0].lower().endswith('.bib'): # bib update mode if args.output or args.other: parser.error('Input file is a bib file, not tex file. This will enter bib update mode. Do not specify "output" and "other".') if not args.update: parser.error('Input file is a bib file, not tex file. This will enter bib update mode. Must not specify --no-update') if not os.path.isfile(args.files[0]): parser.error('Cannot locate input bib file {}'.format(args.files[0])) keys = None args.output = args.files[0] elif args.output: # bib output is specified keys, _ = search_keys(args.files, find_bib=False) else: # bib output is missing, auto-identify keys, bib = search_keys(args.files, find_bib=True) if not bib: parser.error('Cannot identify bibtex file from the tex source. Use -o to specify a bibtex file as output.') args.output = bib.pop(0) if args.other: args.other.extend(bib) else: args.other = bib msg = 'Auto-identifying bibtex files...\n' msg += 'Main bibtex source (output file): {}\n'.format(args.output) if args.other: msg += 'Additional bibtex sources: {}\n'.format(', '.join(args.other)) print(_headerize(msg)) if os.path.isfile(args.output): with open(args.output) as fp: bib = bibtexparser.load(fp, parser=get_bparser()) else: bib = bibtexparser.loads(' ', parser=get_bparser()) bib_other = bibtexparser.loads(' ', parser=get_bparser()) if args.other: for f in args.other: with open(f) as fp: bib_other = update_bib(bib_other, bibtexparser.load(fp, parser=get_bparser())) if keys is None: # bib update mode keys = list(bib.entries_dict) not_found = set() to_retrieve = set() all_entries = defaultdict(list) for key in keys: if key in bib.entries_dict: if args.update: bibcode = extract_bibcode(bib.entries_dict[key]) bibcode_new = entry2bibcode(bib.entries_dict[key]) if bibcode_new: all_entries[bibcode_new].append(key) if bibcode_new != bibcode or args.force_regenerate: to_retrieve.add(bibcode_new) print('{}: UPDATE => {}'.format(key, bibcode_new)) continue print('{}: EXISTING'.format(key)) continue if key in bib_other.entries_dict: print('{}: FOUND IN OTHER BIB SOURCE, IGNORED'.format(key)) continue bibcode = find_bibcode(key) if bibcode: to_retrieve.add(bibcode) all_entries[bibcode].append(key) print('{}: NEW ENTRY => {}'.format(key, bibcode)) else: not_found.add(key) print('{}: NOT FOUND'.format(key)) if not_found: print(_headerize('Please check the following keys')) for key in not_found: print(key) repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1] if repeated_keys: print(_headerize('The following keys refer to the same entry')) for b, k in repeated_keys: print('{1} has been referred as the following keys; please keep only one:\n{0}\n'.format(' '.join(k), b)) if to_retrieve: print(_headerize('Building new bibtex file, please wait...')) bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve), 'bibtex').execute(), parser=get_bparser()) for entry in bib_new.entries: entry['ID'] = all_entries[entry['ID']][0] bib = update_bib(bib, bib_new) bib_dump_str = bibtexparser.dumps(bib).encode('utf8') if args.backup and os.path.isfile(args.output): copyfile(args.output, args.output + '.bak') with open(args.output, 'wb') as fp: fp.write(bib_dump_str) print(_headerize('Done!')) # check version try: latest_version = StrictVersion(requests.get( 'https://pypi.python.org/pypi/adstex/json').json()['info']['version']) except (requests.RequestException, KeyError, ValueError): pass else: if latest_version > StrictVersion(__version__): msg = 'A newer version of adstex (v{}) is now available!\n'.format(latest_version) msg += 'Please consider updating it by running:\n\n' msg += 'pip install adstex=={}'.format(latest_version) print(_headerize(msg))
def test_write_str(self): with open(self.input_file_path) as bibtex_file: bibtex_database = bibtexparser.load(bibtex_file) result = bibtexparser.dumps(bibtex_database) self.assertEqual(result, self.expected)
def test_parse_file_module(self): with open(self.input_file_path) as bibtex_file: bibtex_database = bibtexparser.load(bibtex_file) self.assertEqual(bibtex_database.entries, self.entries_expected)
def peoplelisthtml(): print('Students') filename = '/Users/stephane/Documents/Etudiants/Recherche/internships.csv' with open(filename) as csvfile: reader = list(csv.DictReader(csvfile, delimiter=';')) reader.sort(key=lambda x: x['Year'], reverse=True) stringlist = [] for row in reader: print(row['Name']) student = i.Intern(row) stringlist.append(student.display_fancy()) doc, tag, text = genhtml.makelist(stringlist, 'Past supervised students', order='u') filename = 'siteweb/students.html' f = open(filename, 'w') f.write(indent(doc.getvalue()).encode('utf-8')) f.close() print('Collaborators') filename = '/Users/stephane/Documents/Etudiants/Recherche/collaborators.csv' with open(filename) as csvfile: reader = list(csv.DictReader(csvfile, delimiter=';')) reader.sort(key=lambda x: x['Name']) stringlist = [] for row in reader: # print(row['Name']) people = collab.Collab(row) stringlist.append(people.display_fancy()) doc, tag, text = genhtml.makelist(stringlist, 'Collaborators', order='u') filename = 'siteweb/collaborators.html' f = open(filename, 'w') f.write(indent(doc.getvalue()).encode('utf-8')) f.close() print('Biblio') filename = '/Users/stephane/Documents/Articles/Published/Liste_biblio/mesarticles.bib' with open(filename) as bibtex_file: bib_database = bib.load(bibtex_file) entries = list(bib_database.entries) entries.sort(key=lambda x: x['year'], reverse=True) stringlist = [] for entry in entries: stringlist.append(bibstyle.display(entry, typ='html')) doc, tag, text = genhtml.makelist(stringlist, 'Publications', order='o', opt=' reversed') filename = 'siteweb/publications.html' f = open(filename, 'w') f.write(indent(doc.getvalue()).encode('utf-8')) f.close()
import sys import bibtexparser as btp from collections import OrderedDict as odict bib_main = '../refs.bib' # source bib bib_val = 'validation.bib' # output bib with only validation val_bibid = '*.bibid' # input bibid file val_search = '*.search' # output search file wrap = '({}).m_titl' # template search string groups = ['ssa','extra'] # groups (+full) dbo = btp.bibdatabase.BibDatabase() search = [] # read main bib file with open(bib_main,'r') as f: ddbi = btp.load(f).entries_dict clean = lambda title: '"{}"'.format(title.strip('{}?')) for group in groups: # read validation bibids with open(val_bibid.replace('*',group),'r') as f: ids = f.read().split(',\n') # create the search string (titles) search += [' OR\n '.join([clean(ddbi[id]['title']) for id in ids ])] # write the output with open(val_search.replace('*',group),'w') as f: f.write(wrap.format(' '+search[-1])) # update the validation database dbo.entries += [ddbi[id] for id in ids] with open(bib_val,'w') as f: btp.dump(dbo,f) with open(val_search.replace('*','full'),'w') as f: f.write(wrap.format(' '+' OR\n '.join(search)))
ENTRIES_TO_EXTRACT = [ ] entriesById = {} outBib = BibDatabase() for fpBibtex in BIBTEX_FILES: with open(fpBibtex, encoding="utf8") as bibtex_file: bib = bibtexparser.load(bibtex_file) for entry in bib.entries: if entry['ID'] not in entriesById: entriesById[entry['ID']] = [entry] else: entriesById[entry['ID']].append(entry) for entryToExtract in ENTRIES_TO_EXTRACT: if entryToExtract in entriesById: for ref in entriesById[entryToExtract]: outBib.entries.append( ref ) with open(OUTPUT_FILE, 'w+') as fp:
for item in list_quoting: if item in bib.keys(): try: value = bib[item].encode('utf-8').replace("\"", "'") if item == 'link': value = value.replace("http://www.lis.ic.unicamp.br", "") bibtex_file.write("\n" + item + ": \"" + value + "\"") except: print bib['ID'] + ": " + item with open('lis.bib') as bibtex_file: parser = BibTexParser() #parser.customization = homogeneize_latex_encoding parser.customization = convert_to_unicode bibtex_database = bibtexparser.load(bibtex_file, parser=parser) i = 0 for bib in bibtex_database.entries: name = bib['date'] + '-' + bib['ID'] + '.html' with open("../_posts/publications/" + name, 'w+') as bibtex_file: authors = bib['author'].split(" and ") authors = "; ".join([ "".join(x.split(" ")[-1]) + ", " + " ".join(x.split(" ")[0:-1]) for x in authors ]) bibtex_file.write("---") bibtex_file.write("\ncategories: [\"publications\",\"" + bib['year'] + "\"]") bibtex_file.write("\ncode: \"" + bib['ID'] + bib['year'] + "\"")
def prepare(doc): file = doc.get_metadata("filename", "") doc.sourcedir = doc.get_metadata("sourcedir", "content/") bibfile = doc.get_metadata("bibfile", "") if bibfile: doc.bibfile = os.path.join(doc.sourcedir, bibfile) else: doc.bibfile = "" if doc.bibfile: with open(doc.bibfile) as bibtex_file: doc.bibentries = bibtexparser.load(bibtex_file).entries_dict else: doc.bibentries = {} logdir = doc.get_metadata("logdir", "log") if not os.path.isdir(logdir): logdir = "" logfilename = os.path.join(logdir, os.path.basename(file) + ".log") if file else "book-filter.log" # logfilename = "log_"+file+".book.log" doc.label_descriptions = {} # eventually handle different files, counters doc.searchtext = "" doc.logfile = open(logfilename, "w", encoding='utf-8') doc.logfile.write("LOG: argv:" + str(sys.argv) + "\n") doc.logfile.write("Metadata:\n" + str(doc.metadata) + "\n") doc.lastlabel = "" doc.counter = 0 doc.chapternum = doc.get_metadata("chapternum", default="99") if doc.format == "html": doc.toc = removekeys(loadyaml("toc.yaml"), doc.chapternum + ".") pdflink(doc) auxfilename = doc.get_metadata("auxfile", "bookaux.yaml") doc.labels = loadyaml(auxfilename) doc.currentlevel = 1 doc.currentplace = [doc.chapternum] doc.footnotecounter = 1 doc.footnotecontents = [] D = doc.get_metadata( "latexsectionheaders", { 1: "chapter", 2: "section", 3: "subsection", 4: "subsubsection", 5: "paragraph", 6: "subparagraph" }) doc.latex_headers = {int(k): D[k] for k in D} logstring("Latex Headers: " + str(doc.latex_headers), doc) doc.label_classes = doc.get_metadata("latexsectionheaders", { "solvedex": "Solved Exercise", "bigidea": "Big Idea" }) doc.handlers = [ h_paragraph, h_csvtable, h_add_search, h_block_header, h_link_ref, h_latex_headers, h_latex_div, h_code_block, h_code_inline_draft, h_latex_image, h_latex_cite, h_html_footnote, h_html_header, h_emph, h_math ]
if fuzz.ratio(article['title'].lower(), title.lower()) >= 90: article['abstract'] = summary article['arxiv_id'] = arxiv_id inputparser = argparse.ArgumentParser( description= 'Split input .bib file into separate markdown files with yaml front matter ' ) inputparser.add_argument('file', type=str, help='The path to the bib file') args = inputparser.parse_args() with open(args.file) as bibfile: bib_data = bibtexparser.load(bibfile) for ref in bib_data.entries: bibtexparser.customization.convert_to_unicode(ref) ref['entry'] = ref.pop('ENTRYTYPE') if 'url' in ref.keys(): ref['hyperlink'] = ref.pop('url') for key in ref: ref[key] = ref[key].replace('\n', ' ') msc = ref['mrclass'].replace('(', '') msc = msc.replace(')', '') msc = msc.split() ref['mrclass'] = {'primary': msc[0], 'secondary': msc[1:]} bibtexparser.customization.author(ref) authors = [] all_authors_last = ''
import os import sys import bibtexparser if len(sys.argv) < 3: print("USAGE: bib2pdf.py bibtexfile.bib path_for_files_download") exit() pathdwn = sys.argv[2] f = open("doi_list.txt", "w") bibfilename = str(sys.argv[1]) with open(bibfilename) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) N = len(bib_database.entries) for i in range(N): #title = bib_database.entries[i]['title'] doi = bib_database.entries[i]['doi'] if doi != 'null': f.write(doi + '\n') f.close() os.system("/usr/local/bin/PyPaperBot --doi-file=" "./doi_list.txt" " --dwn-dir=" "" + pathdwn + """ --restrict=""1""")
def main(): parser = argparse.ArgumentParser( description= 'Parses an ORCID bibtex export, and prints a nice flat text file with Altmetric and citations added.' ) parser.add_argument( '-file', help='Alternative filename for the ORCID export. Default: "works.bib".', default='works.bib') parser.add_argument( '-logname', help='Where the logs should be sorted. Default: "arc_friend.log"', default='arc_friend.log') args = parser.parse_args() logging.basicConfig(filename=args.logname, level=logging.INFO, filemode='w') parser = BibTexParser( common_strings=False ) #The common_strings option needs to be set when the parser object is created and has no effect if changed afterwards. parser.ignore_nonstandard_types = False parser.homogenise_fields = True with open(args.file) as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser) ''' I want the final output to look like this: 10. Person, A., Person, B., Person, C., Another Person, C., 2011. Cautionary notes on the use of APIs. API journal 23, 3871–8. Impact Factor = 10.11 Citations = 12 AltMetric score = 0 ''' now = datetime.datetime.now() now = now.strftime('%dth %B, %Y') print(f'Citations and Altmetric scores obtained on {now}.') print('Citations obtained from CrossRef.' ) # important to mention this as Google Scholar has higher citations counter = 1 for e in bib_database.entries: e = bibtexparser.customization.homogenize_latex_encoding(e) # get rid of names like "Marie Kubal\\'akov\\'a" e = bibtexparser.customization.convert_to_unicode(e) author_string = e['author'] author_list = author_string.replace('{', '').replace('}', '').split(' and ') # shorten the first names # Farquard Banana becomes F.B. shortened_author_list = [] # two possible names for some reason returned by ORCID # Bayer, {Philipp E.} so the last name is first # Candy M. Taylor so the last name is last for a in author_list: a = a.strip() newa = '' if not a: continue # I have now encountered three ways names are encoded if ',' in a: # Bayer, Philipp E # last name is first: # last name newa += a.split()[0] + ' ' # first name newa += '.'.join([substring[0] for substring in a.split()[1:]]) elif a.split()[-1].isupper(): # Bayer PE newa += a.replace(' ', ', ') else: # Philipp Bayer # last name is last newa += a.split()[-1] + ', ' newa += '.'.join( [substring[0] for substring in a.split()[:-1]]) # add missing dot at end of first name if newa[-1] != '.': newa += '.' shortened_author_list.append(newa) shortened_author_string = ', '.join(shortened_author_list) # is this a book chapter, or a paper? if 'booktitle' in e: journal = e['booktitle'] else: try: journal = e['journal'].replace('\\', '').replace('}', '').replace( '{', '') except KeyError: journal = False try: doi = e['doi'] except KeyError: logging.info(f'{title} has no doi, skipping (for now?)') continue title = e['title'].replace('}', '').replace('{', '').replace( '\n', '').replace('\r', '') if journal == 'Zenodo' or 'ZENODO' in doi: logging.info( f'Skipping cited dataset {title}, {doi} at Zenodo (for now?)') continue try: year = e['year'] except KeyError: year = False try: volume = e['volume'] except KeyError: volume = False try: pages = e['pages'] except KeyError: pages = False overall_string = f'{counter}. {shortened_author_string}, {year}. {title}.' if journal: overall_string += f' {journal}, ' if volume: overall_string += f' {volume}, ' if pages: overall_string += f' {pages}.' overall_string = overall_string.strip() overall_string = overall_string.replace(' ', ' ') if overall_string[-1] == ',': overall_string = overall_string.rstrip(',') + '.' # now get the citations # http://api.crossref.org/works/10.1179/1942787514y.0000000039 for example crossref_url = f'http://api.crossref.org/works/{doi}' r = requests.get(crossref_url) reference_count = r.json()['message']['is-referenced-by-count'] # 'https://api.altmetric.com/v1/doi/10.1038/news.2011.490' altmetric_url = f'https://api.altmetric.com/v1/doi/{doi}' r = requests.get(altmetric_url) try: altmetric_score = r.json()['score'] except json.decoder.JSONDecodeError: # try again r = requests.get(altmetric_url) try: altmetric_score = r.json()['score'] except json.decoder.JSONDecodeError: logging.info(f'Cannot get Altmetric score for {doi}. {title}') continue overall_string += f'\nImpact Factor = FILLME\nCitations = {reference_count}\nAltmetric score = {altmetric_score}\n' overall_string = overall_string.replace('..', '') print(overall_string) counter += 1
def main(): import bibtexparser from bibtexparser.bwriter import BibTexWriter with open('ircre.bib', encoding='utf8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) entries = bib_database.entries print("---------------------------") print("---------------------------") print("---------------------------") print("Total articles number: " + str(len(entries))) print("---------------------------") print("---------------------------") print("---------------------------") writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order', ) articleentries = [] for i in range(len(entries)): if entries[i]['ENTRYTYPE'] == 'article': articleentries.append(entries[i].copy()) for n in range(len(entries) - 100): i = n + 100 print("---------------------------") print("Entry number: " + str(i)) title = entries[i]['title'] clusterid = entries[i]['clusterid'] print("Title: " + title) print("Cluster ID: " + clusterid) if not clusterid == "unknown": print("hello" + str(i)) try: citations = os.popen('''./scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''' ).read().strip().split()[-1] except: citations = "unknown" else: citations = "unknown" print("new Citations: " + citations) if 'cited' in entries[i]: oldcitednumber = int(entries[i]['cited']) else: oldcitednumber = 0 print("Old Cited Number: " + str(oldcitednumber)) if not citations == "unknown": citednumber = int(citations) if citednumber > oldcitednumber and ( (citednumber - oldcitednumber) < 8): entries[i]['cited'] = str(citednumber) with open('cited-add-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) os.popen("cp cited-add-ircre.bib tempcited-add-ircre.bib") with open('cited-add-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) return 0
import bibtexparser with open('C:\\Users\\dell\\Desktop\\任务与资料\\savedrecs.bib', encoding='UTF-8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) print(bib_database.entries) with open('C:\\Users\\dell\\Desktop\\任务与资料\\savedrecs.bib', encoding='UTF-8') as bibtex_file1: bib_database1 = bibtexparser.load(bibtex_file1) print(bib_database1.entries) with open('C:\\Users\\dell\\Desktop\\任务与资料\\savedrecs.bib', encoding='UTF-8') as bibtex_file2: bib_database2 = bibtexparser.load(bibtex_file2) print(bib_database2.entries) with open('C:\\Users\\dell\\Desktop\\任务与资料\\savedrecs.bib', encoding='UTF-8') as bibtex_file3: bib_database3 = bibtexparser.load(bibtex_file3) print(bib_database3.entries) with open('C:\\Users\\dell\\Desktop\\任务与资料\\savedrecs.bib', encoding='UTF-8') as bibtex_file4: bib_database4 = bibtexparser.load(bibtex_file4) print(bib_database4.entries)
def main(): desc = 'a simple tool to find out-of-date arXiv preprints, optionally updating and writing a new file' parser = argparse.ArgumentParser(description=desc) h = 'the input bib file to search through' parser.add_argument('bibfile', type=str, help=h) h = 'do a dry run, simply printing out all of the out-of-date references' parser.add_argument('--dry-run', '-n', action='store_true', help=h) h = 'the output bib file to write; if not provided; any new entries will be writted to stdout' parser.add_argument('-o', '--output', type=str, help=h) h = "string specifying NASA ADS API token; see https://github.com/adsabs/adsabs-dev-api#access. " h += "The token can also be stored in ~/.ads/dev_key for repeated use" parser.add_argument('-t', '--token', type=str, help=h) h = 'whether to use verbose output' parser.add_argument('-v', '--verbose', action='store_true', help=h) ns = parser.parse_args() # set the token if ns.token is not None: os.environ['ADS_DEV_KEY'] = ns.token # parse the bib file with open(ns.bibfile, 'r') as ff: refs = bibtexparser.load(ff) # the indices of pre-prints preprints = [] for i, r in enumerate(refs.entries): adsurl = r.get('adsurl', None) if is_preprint(r): preprints.append(i) elif ns.verbose: print("entry '%s' appears to be a published work" %(r['ID'])) # sort from largest to smallest preprints = sorted(preprints, reverse=True) args = (len(preprints), len(refs.entries)) print("%d out of %d references possibly out-of-date..." % args) # get the relevant info from ADS updated = [] for i in preprints: r = refs.entries[i] print("checking publication status of the '%s' bib entry" %r['ID']) arxiv_id = None # try to match the pattern for field in r: for pattern in ARXIV_PATTERNS: matches = pattern.search(r[field]) if matches: arxiv_id = matches.group(0) break # check ads url too if arxiv_id is None and 'adsurl' in r and 'abs/' in r['adsurl']: arxiv_id = r['adsurl'].split('abs/')[-1] # skip this one and warn! if arxiv_id is None: warnings.warn("cannot check entry '%s'; please add 'eprint' or proper 'adsurl' fields" %r['ID']) continue # query for the bibcode try: q = ads.SearchQuery(q="arxiv:%s" %arxiv_id, fl=['bibcode']) except: raise ValueError("syntax error in bib file; check 'eprint' and 'adsurl' fields for '%s'" %r['ID']) # check for token if q.token is None: raise RuntimeError("no ADS API token found; cannot query the ADS database. " "See https://github.com/adsabs/adsabs-dev-api#access") # process each paper for paper in q: # get the bibtex bibquery = ads.ExportQuery(paper.bibcode) bibtex = bibquery.execute() # new ref entry new_ref = bibtexparser.loads(bibtex).entries[0] # update if published if not is_preprint(new_ref): updated.append(new_ref['ID']) print(" '%s' entry found to be out-of-date" %r['ID']) # remove old entry refs.entries.pop(i) # add new entry refs.entries.append(new_ref) # write output file if len(updated) and not ns.dry_run: writer = bibtexparser.bwriter.BibTexWriter() if ns.output is not None: with open(ns.output, 'w') as ff: ff.write(writer.write(refs)) else: # only print out the new ones indices = [i for i, ref in enumerate(refs.entries) if ref['ID'] in updated] refs.entries = [refs.entries[i] for i in indices] print(writer.write(refs))
TODO: - add functionality that takes in a file w/ a list of species names, instead of hardcoding them here - convert sys.argv inputs to argparse ''' import bibtexparser import sys import re fname = sys.argv[-2] outname = sys.argv[-1] with open(fname) as f: try: bib_database = bibtexparser.load(f) except KeyError: print('Error: Please remove the "month" field from your BibTeX file.') print( 'This can be done using grep -v "month" file.bib > new_file.bib.') sys.exit(0) genus_names = [ 'chlamydomonas', 'drosophila', 'saccharomyces', 'arabidopsis', 'caenorhabditis', 'mus' ] species_names = [ 'reinhardtii', 'simulans', 'melanogaster', 'persimilis', 'cerevisiae', 'paradoxus', 'pombe', 'thaliana', 'elegans', 'musculus', 'reinhardi' ]
yield author.split(" ")[-1] else: yield author try: authors = entry["author"] except KeyError: authors = entry["editor"] authors = normalize(authors).split("and") return list(get_last_name(authors)) print("Reading Bibliography...") with open(sys.argv[1]) as bibtex_file: bibliography = bibtexparser.load(bibtex_file, parser) print("Looking for Dois...") before = 0 new = 0 total = len(bibliography.entries) for i, entry in enumerate(bibliography.entries): print("\r{i}/{total} entries processed, please wait...".format( i=i, total=total), flush=True, end="") try: if "doi" not in entry or entry["doi"].isspace(): title = entry["title"] authors = get_authors(entry) for author in authors:
# Save the credentials for the next run with open('token.pickle', 'wb') as token: pickle.dump(creds, token) service = build('sheets', 'v4', credentials=creds) return service if __name__ == '__main__': assert len(sys.argv) == 2, 'Please only pass the path to bib.' # Parse bibtex file bib_path = sys.argv[-1] parser = bibtexparser.bparser.BibTexParser(common_strings=True) with open(bib_path) as bib_file: bib_db = bibtexparser.load(bib_file, parser=parser) # Parse existing papers service = google_sheets_login() sheet = service.spreadsheets() result = sheet.values().get(spreadsheetId=SAMPLE_SPREADSHEET_ID, range=SAMPLE_RANGE_NAME).execute() existing_rows = result.get('values', []) all_titles = [row[1] for row in existing_rows if len(row) >= 2] new_rows = [] for entry in reversed(bib_db.entries): # Insert into DB if title is not duplicate title = entry.get('title', None) title = title.replace('{', '').replace('}', '') if title is not None and title not in all_titles:
def main(bibtex_filename, fig_filename, headless=False): # Settings cite_as_noun = True # Open bibtext with open(bibtex_filename) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) print(bib_database.entries) print(bib_database.comments) print(bib_database.preambles) print(bib_database.strings) # Calculate year range min_year = int(bib_database.entries[0]['year']) max_year = int(bib_database.entries[0]['year']) for e in bib_database.entries: min_year = min(min_year, int(e['year'])) max_year = max(max_year, int(e['year'])) year_range = int(max_year) - int(min_year) # Calculate number of paper in each year papers_per_year = {} papers_per_year_counter = {} for i in range(min_year, max_year + 1): quantity = count_by_attribute(bib_database.entries, 'year', str(i)) if quantity > 0: papers_per_year[str(i)] = quantity papers_per_year_counter[str(i)] = 0 # Define graph and node position dict_pos = {} mapping = {} G = nx.DiGraph() i = 1 for entry in bib_database.entries: # blue if it cites anyone node_color = 'blue' if 'citations' in entry else 'red' # get paper ID node_name = entry['ID'] # add node to the graph G.add_node(node_name, color=node_color) # year as a float (for vertex position in the graph) year = float(entry['year']) # position_x for a year is (0,1,2,...,n)/n positionx = (papers_per_year_counter[entry['year']] ) / papers_per_year[entry['year']] papers_per_year_counter[entry['year']] += 1 current_year_offset = year - int(min_year) position = [ positionx + current_year_offset / 1000, current_year_offset / (year_range) + positionx / 5 ] # papers_per_year_counter.update({entry['year']: papers_per_year_counter[entry['year']]+1 }) # give position to this index dict_pos[i] = position # map node name to the index mapping[node_name] = i i += 1 # Create edges for entry in bib_database.entries: if 'citations' in entry: print(entry['ID']) for citation_id in entry['citations'].replace('\n', '').split(','): print(' ' + citation_id) # if there is a citation id if citation_id: # if we will cite as noun if cite_as_noun: # find entry with this id citation_entry = find_by_attribute( bib_database.entries, 'ID', citation_id) # if found, create edge if citation_entry: G.add_edge(entry['ID'], citation_entry['ID']) else: # else, create edge G.add_edge(entry['ID'], citation_id) # Calculate node colors as the number of citations it has node_colors = [] for node in G.nodes: in_edges = G.in_degree(node) node_colors.append(in_edges) range_of_colors = max(node_colors) - min(node_colors) # Convert graph to figure plt.figure(figsize=(8, 6)) # Plot graph as planar (if small) or by year (if large) if G.number_of_nodes() < 10: try: dict_pos = nx.planar_layout(G) nx.draw_networkx(G, with_labels=True, node_color=node_colors, cmap=plt.cm.Blues, vmin=min(node_colors) - range_of_colors * 0.4, vmax=max(node_colors), font_weight='bold', pos=nx.planar_layout(G)) except: dict_pos = nx.circular_layout(G) nx.draw_networkx(G, with_labels=True, node_color=node_colors, cmap=plt.cm.Blues, vmin=min(node_colors) - range_of_colors * 0.4, vmax=max(node_colors), font_weight='bold', pos=nx.circular_layout(G)) else: G = nx.relabel_nodes(G, mapping) i = 1 for e in mapping: print(str(i) + ': ' + e) i += 1 nx.draw_networkx(G, with_labels=True, node_color=node_colors, cmap=plt.cm.Blues, vmin=min(node_colors) - range_of_colors * 0.4, vmax=max(node_colors), font_weight='bold', pos=dict_pos) for i in range(int(min_year), int(max_year) + 1): plt.text(-0.2, (i - int(min_year)) / (int(max_year) - int(min_year)) - 0.03, str(i) + ':') # Insert text with years maxDP = 0.0 for e in dict_pos: if maxDP < dict_pos[e][1]: maxDP = dict_pos[e][1] for node in G: plt.text(dict_pos[node][0], dict_pos[node][1] - 1.28 / (32 * maxDP), str(G.in_degree(node)), fontsize=4, color='red') # Insert ticks plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False) for pos in ['right', 'top', 'bottom', 'left']: plt.gca().spines[pos].set_visible(False) # Save plot plt.savefig(fig_filename) # Show plot if headless is False: plt.show() # End main return 0
def main(): # Settings cite_as_noun = True # Open bibtext with open('bibtex.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) print(bib_database.entries) print(bib_database.comments) print(bib_database.preambles) print(bib_database.strings) # Define graph G = nx.DiGraph() dict_pos = {} dict_qpycurr = {} initial_year = 2000 current_year = 2020 dict_qpy = {} i = initial_year while i <= current_year: qtde = count_by_attribute(bib_database.entries, 'year', str(i)) if qtde > 0: dict_qpy[str(i)] = qtde dict_qpycurr[str(i)] = 0 i += 1 for e in dict_qpy: initial_year = e break for e in dict_qpy: final_year = e # Create nodes mapping = {} i = 1 for entry in bib_database.entries: node_color = 'blue' if 'citations' in entry else 'red' node_name = entry['ID'] G.add_node(node_name, color=node_color) year = float(entry['year']) positionx = (dict_qpycurr[entry['year']]) / dict_qpy[entry['year']] dict_qpycurr.update({entry['year']: dict_qpycurr[entry['year']] + 1}) position = [ positionx + (year - int(initial_year)) / 1000, (year - int(initial_year)) / (int(final_year) - int(initial_year)) + positionx / 5 ] dict_pos[i] = position mapping[node_name] = i i += 1 # Create edges for entry in bib_database.entries: if 'citations' in entry: print(entry['ID']) for citation_id in entry['citations'].split(','): print(' ' + citation_id) if citation_id: if cite_as_noun: citation_entry = find_by_attribute( bib_database.entries, 'ID', citation_id) if citation_entry: node_name_a = entry['ID'] node_name_b = citation_entry['ID'] #print(node_name_a + ' : ' + node_name_b) G.add_edge(node_name_a, node_name_b) else: G.add_edge(entry['ID'], citation_id) # Calculate node colors as the number of citations it has node_colors = [] for node in G.nodes: in_edges = G.in_degree(node) node_colors.append(in_edges) range_of_colors = max(node_colors) - min(node_colors) # Convert graph to figure plt.figure(figsize=(8, 6)) # Options #pos=nx.planar_layout(G) if G.number_of_nodes() < 30: try: dict_pos = nx.planar_layout(G) nx.draw_networkx(G, with_labels=True, node_color=node_colors, cmap=plt.cm.Blues, vmin=min(node_colors) - range_of_colors * 0.4, vmax=max(node_colors), font_weight='bold', pos=nx.planar_layout(G)) except: dict_pos = nx.circular_layout(G) nx.draw_networkx(G, with_labels=True, node_color=node_colors, cmap=plt.cm.Blues, vmin=min(node_colors) - range_of_colors * 0.4, vmax=max(node_colors), font_weight='bold', pos=nx.circular_layout(G)) else: G = nx.relabel_nodes(G, mapping) i = 1 for e in mapping: print(str(i) + ': ' + e) i += 1 nx.draw_networkx(G, with_labels=True, node_color=node_colors, cmap=plt.cm.Blues, vmin=min(node_colors) - range_of_colors * 0.4, vmax=max(node_colors), font_weight='bold', pos=dict_pos) for i in range(int(initial_year), int(final_year) + 1): plt.text(-0.2, (i - int(initial_year)) / (int(final_year) - int(initial_year)) - 0.03, str(i) + ':') maxDP = 0.0 for e in dict_pos: if maxDP < dict_pos[e][1]: maxDP = dict_pos[e][1] for node in G: plt.text(dict_pos[node][0], dict_pos[node][1] - 1.28 / (32 * maxDP), str(G.in_degree(node)), fontsize=4, color='red') plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False) for pos in ['right', 'top', 'bottom', 'left']: plt.gca().spines[pos].set_visible(False) #plt.show() # Save plot plt.savefig("citation_graph.eps") # End main return 0
#!/usr/bin/env python3 import bibtexparser with open('references.bib') as input_file: bib_entries = bibtexparser.load(input_file) for x in bib_entries.entries: id = x["ID"] try: year = x["year"] except KeyError: year = "" print("\cite{" + id + "}\t" + year)
def read_conference(conferencebib): with open(conferencebib) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) return bib_database.entries
def load(bibtex_file): parser = make_default_bibtex_parser() return bibtexparser.load(bibtex_file, parser=parser)