def ircrebibmerge(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/sorted-articles.bib', encoding='utf8') as sortedarticle_file: sortedarticle_database = bibtexparser.load(sortedarticle_file, articlesparser) sortedarticles = sortedarticle_database.entries.copy() top15parser = BibTexParser(common_strings=False) top15parser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/top15.bib', encoding='utf8') as top15_file: top15_database = bibtexparser.load(top15_file, top15parser) top15articles = top15_database.entries.copy() othersparser = BibTexParser(common_strings = False) othersparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', encoding='utf8') as others_file: others_database = bibtexparser.load(others_file, othersparser) others = others_database.entries.copy() alldb = BibDatabase() entries = [] for i in range(len(top15articles)): entries.append(top15articles[i].copy()) for i in range(len(sortedarticles)): entries.append(sortedarticles[i].copy()) for i in range(len(others)): entries.append(others[i].copy()) alldb.entries = entries writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = None with open('/home/limingtao/ircre-bibtex/ircreupdate/newircre.bib', 'w', encoding='utf8') as newircrebibfile: bibtexparser.dump(alldb, newircrebibfile, writer=writer) return 0
def _ingest_citations(rc): import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import getnames parser = BibTexParser() parser.ignore_nonstandard_types = False def customizations(record): for n in ["author", "editor"]: if n in record: a = [i for i in record[n].replace("\n", " ").split(", ")] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record parser.customization = customizations with open(rc.filename, "r", encoding='utf-8') as f: bibs = bibtexparser.load(f, parser=parser) coll = rc.client[rc.db][rc.coll] for bib in bibs.entries: bibid = bib.pop("ID") bib["entrytype"] = bib.pop("ENTRYTYPE") if "author" in bib: bib["author"] = [ a.strip() for b in bib["author"] for a in RE_AND.split(b) ] if "title" in bib: bib["title"] = RE_SPACE.sub(" ", bib["title"]) rc.client.update_one(rc.db, rc.coll, {"_id": bibid}, bib, upsert=True)
def annotes_dicts(bibfile, pdfdir, filters, include_all=False): with open(bibfile, encoding="utf-8") as bibtex_file: bibtex_str = bibtex_file.read() parser = BibTexParser() parser.ignore_nonstandard_types = False bib_database = bibtexparser.loads(bibtex_str, parser) annotes_list = [] for entry in bib_database.entries: match = True for key, pattern in filters: if key not in entry or not re.search(pattern, entry[key]): match = False break filepath = '' if match and (entry.get('file') or entry.get('review') or include_all): if entry.get('file'): filepath = os.path.join(pdfdir, entry['file'][1:-4]) sys.stderr.write("%s\n" % filepath) annotes = get_annotes(filepath) else: annotes = [] if annotes or entry.get('review') is not None or include_all: info = {'file': filepath} annotes_list.append(info) for k in 'author', 'year', 'title', 'journal', 'review', 'ID', 'doi': info[k] = _to_utf(entry.get(k, None)) info['annotations'] = [{k:_to_utf(v) for k,v in j._asdict().items()} for j in annotes] annotes_list.sort(key=lambda x: x['ID']) return annotes_list
def get_bibtex(f): parser = BibTexParser(common_strings=False) parser.ignore_nonstandard_types = False parser.homogenise_fields = True parser.customization = clean_tex return bibtexparser.load(f, parser)
def collect_author_name(fn): writer = BibTexWriter() #create the output file output = fn.replace('.bib', '.author') #open the output file output_file = open(output, 'w+') #open the bibtex file with open(fn, encoding='ISO-8859-1') as bibtex_file: #read the bibtex file into a list of dictionary parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = True parser.homogenise_fields = False bib_database = bibtexparser.loads(bibtex_file.read(), parser) entries = bib_database.entries #for each bibtex item for entry in entries: #go through all the entries for key, val in entry.items(): #if the label for this entry is author if key.lower() == 'author': all_authors = val.split('and') #write all the values in this entry to authors' name file for one_author in all_authors: one_author = one_author.strip() output_file.write(one_author + '\n') print(format_errors) print(parsing_errors) return output
def convert_entry_to_ccg_style(bib_str): """ Load all entries (@article, @book, @inproceedings etc.) and convert them into ccg style Note: 1. This function only looks for entries, so the output won't include other items, such as @string, @comments 2. (TODO) In some cases, there might be two version of a publication with identical key (because they have the same author and year). :param bib_str: a string which contains one or more entry :return: A list of triples, each triple corresponds to one entry from the input. Each triple has the form (old_key, new_key, new_bib) -- old_key: the old entry key new_key: the new ccg-style entry key new_bib: converted ccg-style entry in string form """ parser = BibTexParser() parser.ignore_nonstandard_types = False bib_db = bibtexparser.loads(bib_str, parser) result_list = [] for entry in bib_db.entries: old_key = entry["ID"] new_key, new_entry = _entry_to_ccg_style(entry) new_entry_str = _entry_to_str(new_entry) result_list.append((old_key, new_key, new_entry_str)) return result_list
def inject_labels(input_fn, output_fn, writer): #open the reformated bibtex file with open(input_fn, encoding='ISO-8859-1') as bibtex_file: #setup the parser for bibtex parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = True parser.homogenise_fields = False bib_database = bibtexparser.loads(bibtex_file.read(), parser) #the format of the bibtex database is a list of dictionary entries = bib_database.entries new_entries = [] for entry in entries: new_entry = {} #The key is the label and the val is the value in each entry for key, val in entry.items(): #ignor the labels of id, entrytype and author if not key.lower() in ['id', 'entrytype', 'author']: #append begining and ending labels to the value string new_entry[key] = '@@@{}@@@ {} @@@@{}@@@@'.format( key, val, key) else: #assign the new value to the key new_entry[key] = val new_entries.append(new_entry) #assign the new entries to the database bib_database.entries = new_entries #write the labeled bibtex file into the output file with open(output_fn, 'w') as out_file: out_file.write(writer.write(bib_database))
def import_bibtex( bibtex, pub_dir="publication", featured=False, overwrite=False, normalize=False, dry_run=False, ): """Import publications from BibTeX file""" from academic.cli import AcademicError, log # Check BibTeX file exists. if not Path(bibtex).is_file(): err = "Please check the path to your BibTeX file and re-run" log.error(err) raise AcademicError(err) # Load BibTeX file for parsing. with open(bibtex, "r", encoding="utf-8") as bibtex_file: parser = BibTexParser(common_strings=True) parser.customization = convert_to_unicode parser.ignore_nonstandard_types = False bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: parse_bibtex_entry( entry, pub_dir=pub_dir, featured=featured, overwrite=overwrite, normalize=normalize, dry_run=dry_run, )
def getentries(filename): try: save_import_file(filename) except IOError as e: logg.error("bibtex import: save import file failed: {}".format(e)) raise IOError("save import file failed") # use utf-8-sig instead of utf-8 to get rid of BOM_UTF8, which confuses bibtex parser for encoding in ('utf-8-sig', 'utf-16', None): try: error = None fi = codecs.open(filename, "r", encoding=encoding) parser = BibTexParser() # accept also non standard records like @SCIENCEREPORT parser.ignore_nonstandard_types = False parser.customization = _bibteximport_customize bibtex = bibtex_load(fi, parser=parser) # seems to be the correct encoding, don't try other encodings break except Exception as e: # check if there is a utf-encoding error, then try other encoding if (encoding is 'utf-8-sig' and str(e).lower().find('utf8') >= 0) or \ (encoding is 'utf-16' and str(e).lower().find('utf-16') >= 0): continue error = e break if error: logg.error("bibtex import: bibtexparser failed: {}".format(e)) raise ValueError("bibtexparser failed") return bibtex.entries
def _ingest_citations(rc): import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import getnames parser = BibTexParser() parser.ignore_nonstandard_types = False def customizations(record): for n in ['author', 'editor']: if n in record: a = [i for i in record[n].replace('\n', ' ').split(', ')] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record parser.customization = customizations with open(rc.filename, 'r') as f: bibs = bibtexparser.load(f, parser=parser) coll = rc.client[rc.db][rc.coll] for bib in bibs.entries: bibid = bib.pop('ID') bib['entrytype'] = bib.pop('ENTRYTYPE') if 'author' in bib: bib['author'] = [a.strip() for b in bib['author'] for a in RE_AND.split(b)] if 'title' in bib: bib['title'] = RE_SPACE.sub(' ', bib['title']) rc.client.update_one(rc.db, rc.coll, {'_id': bibid}, bib, upsert=True)
def getcitation(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile: articles_database = bibtexparser.load(articlesfile, articlesparser) articleentries = articles_database.entries import random samplelist = random.sample(range(len(articleentries)), 20) print(samplelist) for i in samplelist: print("---------------------------") print("Entry number: " + str(i)) title = articleentries[i]['title'] clusterid = articleentries[i]['clusterid'] print("Title: " + title) print("Cluster ID: " + clusterid) if not clusterid == "unknown": print(str(i)) try: citations = os.popen( '''/usr/bin/python3 /home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''').read().strip().split()[ -1] except: citations = "unknown" else: citations = "unknown" print("new Citations: " + citations) if 'cited' in articleentries[i]: oldcitednumber = int(articleentries[i]['cited']) else: oldcitednumber = 0 print("Old Cited Number: " + str(oldcitednumber)) if not citations == "unknown": citednumber = int(citations) if citednumber > oldcitednumber and ((citednumber - oldcitednumber) < 8): articleentries[i]['cited'] = str(citednumber) writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order',) with open('/home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib tempcited-add-articles.bib") os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/articles.bib /home/limingtao/ircre-bibtex/ircreupdate/oldarticles.bib") with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) return 0
def updatestatistics(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile: articles_database = bibtexparser.load(articlesfile, articlesparser) articleentries = articles_database.entries totalcitations = 0 totalif = 0.0 citationlist = [] jourallist = [] hihonumber = 0 totalpublications = len(articleentries) + 28 totalarticles = len(articleentries) for i in range(len(articleentries)): if 'cited' in articleentries[i]: citednumber = int(articleentries[i]['cited']) else: citednumber = 0 if 'impactfactor' in articleentries[i]: impactfactor = float(articleentries[i]['impactfactor']) else: impactfactor = 0.0 if 'hihosubject' in articleentries[i]: hihonumber = hihonumber + 1 citationlist.append(citednumber) jourallist.append(articleentries[i]['journal']) totalcitations = totalcitations + citednumber totalif = totalif + impactfactor hindex = Hindex(citationlist) i10index = I10index(citationlist) totalcitations = totalcitations + 19 citationperpaper = totalcitations / len(articleentries) journalnumber = len(set(jourallist)) averageif = totalif / len(articleentries) # print(totalcitations) # print(hindex) # print(i10index) # print(citationperpaper) # print(journalnumber) # print(averageif) # print(hihonumber) # print(totalpublications) with open('/home/limingtao/ircre-bibtex/ircreupdate/newstatistics.js', 'w', encoding='utf8') as statisticsjsfile: statisticsjsfile.write('totalpublications = "%d";\n' % totalpublications) statisticsjsfile.write('totalarticles = "%d";\n' % totalarticles) statisticsjsfile.write('totalcitations = "%d";\n' % totalcitations) statisticsjsfile.write('hindex = "%d";\n' % hindex) statisticsjsfile.write('i10index = "%d";\n' % i10index) statisticsjsfile.write('numberjournals = "%d";\n' % journalnumber) statisticsjsfile.write('numberesihighlycited = "%d";\n' % hihonumber) statisticsjsfile.write('citationperpaper = "%.2f";\n' % citationperpaper) statisticsjsfile.write('averageif = "%.3f";\n' % averageif) return 0
def read_bibtex(bibtex_str): parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenize_fields = True bib_database = parser.parse(bibtex_str) keyworded = map(bibtexparser.customization.keyword, bib_database.entries) converted = list(map(bibtexparser.customization.convert_to_unicode, keyworded)) authored = map(bibtexparser.customization.author, converted) return list(authored)
def get_bibtex_data(filename): parser = BibTexParser() parser.ignore_nonstandard_types = False with open(filename) as f: bib_database = bibtexparser.loads(f.read(), parser) sources_dict_lst = [] for entry in bib_database.entries: sources_dict_lst.append(entry) return sources_dict_lst
def database(self) -> BibDatabase: """Return the BibTex Python object representation of master file. """ logger.info(f'parsing master bibtex file: {self.master_bib}') parser = BibTexParser() parser.ignore_nonstandard_types = False with open(self.master_bib) as f: return bibtexparser.load(f, parser)
def add_entry_by_string(self, bib_string, file_name=None, skip_if_file_exists=True, skip_if_doi_exists=False, parser=None): """ Add a new entry corresponding to a BibTex string. :param bib_string: a string giving the section in a BibTex file that would represent this reference. :param file_name: the name of a local file to include in the reference section. Optional. :param skip_if_file_exists: boolean, default is True, meaning that if a reference pointing to the same local file already exists in the database, this reference will not be added. Intended to make it easy to update a database without worrying about overwriting existing files. :param skip_if_doi_exists: boolean, default is False, but if True, do not add this reference if another reference with the same DOI already exists. Intended to avoid adding duplicate files. :param parser: An instance of bibtexparser.bparser.BibTextParser customized to parse the new string. The default parser is set with: * ignore_nonstandard_types = False * parser.homogenise_fields = True * parser.customization = lambda entry: self.format_entry(entry) thus, the custom parsing uses the format_entry method of this class with the instance of the class at the time this method was called. :return: none, adds entry in place. """ if skip_if_file_exists and file_name is not None: if file_name in self.files: root_logger.info( 'Not adding {}, entry for that file already in .bib file'. format(file_name)) return # To ensure we get a properly formatted string, we'll parse it into a standard BibDatabase then steal # the entry from it if parser is None: parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenise_fields = True # Create a lambda function that knows about the current state of the database parser.customization = lambda entry: self.format_entry(entry) tmpdat = parser.parse(bib_string) if skip_if_doi_exists and 'doi' in tmpdat.entries[ 0] and tmpdat.entries[0]['doi'] in self.dois: root_logger.info( 'Not adding {}, entry with DOI "{}" already in bib file'. format(file_name, tmpdat.entries[0]['doi'])) return if file_name is not None: tmpdat.entries[0]['file'] = file_name # We shouldn't need to do anything else. The other means of access entries (e.g. the dict) seem to be properties # created on the fly from the entries list self.entries.append(tmpdat.entries[0])
def get_bibtex_dict (stream): from bibtexparser.bparser import BibTexParser parser = BibTexParser () parser.ignore_nonstandard_types = False parser.homogenise_fields = False # TODO: one bit of homogenization that might be nice: it seems that # newlines get preserved, in `author` records at least. Those should be # replaced with spaces (and multiple spaces collapsed if needed). return parser.parse_file (stream).get_entry_dict ()
def get_bibtex_dict(stream): from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenise_fields = False # TODO: one bit of homogenization that might be nice: it seems that # newlines get preserved, in `author` records at least. Those should be # replaced with spaces (and multiple spaces collapsed if needed). return parser.parse_file(stream).get_entry_dict()
def _open_bib_db(self, bibfile): """Open the bibtex database""" self._bib_path = os.path.dirname(bibfile) parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenize_fields = True with open(bibfile) as bib_: bib_db = bibtexparser.load(bib_, parser) return bib_db
def __init__(self, bfile, jfile="data/journals.csv"): if not bfile: fname = '/home/mattis/DropBox/evobib/basic.bib' with open(bfile) as f: bd = f.read() # try open journals file self._journals = dict([(a,b[0]) for a,*b in csv2list(jfile)]) # customize stuff parser = BTP() parser.ignore_nonstandard_types = False bdb = btp.loads(bd, parser=parser) self._entries = bdb.entries # make entries to keys self._dict = {} crossrefs = [] for entry in self._entries: self._dict[entry['id']] = defaultdict(str) for k in entry: if k != 'id': self._dict[entry['id']][k] = entry[k] if 'crossref' in entry: crossrefs += [(entry['id'],entry['crossref'])] # resolve crossrefs for source,target in crossrefs: if source in self._dict and target in self._dict: for k in self._dict[target]: if k not in self._dict[source]: self._dict[source][k] = self._dict[target][k] else: print("[!] WARNING: target for <{0}> missing!".format(target)) self._alias = dict( location = ['address'], year = ['date'] ) self._modifiers = { 'paperconference' : 'Paper, presented at the conference', 'paperworkshop': 'Paper, presented at the workshop', 'talkconference': 'Talk, held at the conference', 'talkworkshop': 'Talk, held at the workshop', 'talkatm': 'Talk, held at the' } self._clean_keys() self._clean_entries() self._load_templates()
def __init__(self, bfile, jfile="data/journals.csv"): if not bfile: fname = '/home/mattis/DropBox/evobib/basic.bib' with open(bfile) as f: bd = f.read() # try open journals file self._journals = dict([(a,b[0]) for a,*b in csv2list(jfile)]) # customize stuff parser = BTP() parser.ignore_nonstandard_types = False bdb = btp.loads(bd, parser=parser) self._entries = bdb.entries # make entries to keys self._dict = {} crossrefs = [] for entry in self._entries: self._dict[entry['id']] = defaultdict(str) for k in entry: if k != 'id': self._dict[entry['id']][k] = entry[k] if 'crossref' in entry: crossrefs += [(entry['id'],entry['crossref'])] # resolve crossrefs for source,target in crossrefs: if source in self._dict and target in self._dict: for k in self._dict[target]: if k not in self._dict[source]: self._dict[source][k] = self._dict[target][k] else: print("[!] WARNING: target for <{0}> missing!".format(target)) self._alias = dict( location = ['address'], year = ['date'] ) self._modifiers = { 'paperconference' : 'Paper, presented at the conference', 'paperworkshop': 'Paper, presented at the workshop', 'talkconference': 'Talk, held at the conference', 'talkworkshop': 'Talk, held at the workshop', 'talkatm': 'Talk, held at the' } self._clean_keys() self._clean_entries() self._load_templates()
def collection_from_bibtex_str(bib_str, **kwargs): """ Transform a Bibtex string (e.g. from a .bib-file) to a BibJSON collection. :param bib_str: input bibtex string :param kwargs: metadata for the BibJSON collection. "collection" parameter must be set. :return BibJSON collection dictionary """ bib_parser = BibTexParser() bib_parser.ignore_nonstandard_types = False # this is flipped. this seems to be an error in the library bib_parser.customization = _parse_bib_entry bib_obj = bibtexparser.loads(bib_str, parser=bib_parser) return collection_from_dict(bib_obj.entries_dict, **kwargs)
def load_bibtex(bibfile): parser = BibTexParser() parser.ignore_nonstandard_types = False with io.open(bibfile, 'r', encoding='utf-8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) bib_entries = bib_database.entries bib_entries.sort(key=lambda x: x.get("author", "")) bib_entries.sort(key=lambda x: mo_co(x.get("month", "")), reverse=True) bib_entries.sort(key=lambda x: x.get("year", ""), reverse=True) return bib_entries
def read_bib(filename): """ read bibtex file and return bibtexparser object """ if not os.path.exists(filename): print("... no bib file: {}".format(filename)) os.exit(0) parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenise_fields = False with open(filename) as f: bibtex_str = f.read() bib_database = bibtexparser.loads(bibtex_str, parser) return bib_database
def bibtex_reader(self, bibtextdata): """ Parse the bibtex data Arguments: bibtextdata {str} -- bibtexdata Returns: list -- list of all entries of the bibtex """ parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenise_fields = False parser.common_strings = False bib_database = bibtexparser.loads(bibtextdata, parser) return bib_database.entries[0]
def getclusterid(title, author): parser = BibTexParser(common_strings=False) parser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as article_file: article_database = bibtexparser.load(article_file, parser) article_entries = article_database.entries.copy() entries = bib_database.entries print("---------------------------") print("---------------------------") print("---------------------------") print("Total articles number: " + str(len(entries))) print("---------------------------") print("---------------------------") print("---------------------------") writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order',) for i in range(len(entries)): if entries[i]['clusterid'] == 'unknown': print("---------------------------") print("Entry number: " + str(i)) title = entries[i]['title'] print("Title: " + title) clusterid = '' try: clusterid = os.popen( '''/home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -t --phrase="''' + title + '''" |grep ID| grep Cluster''').read().strip().split()[ -1] except: clusterid = "unknown" print("new Cluster ID: " + clusterid) entries[i]['clusterid'] = clusterid with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib /home/limingtao/ircre-bibtex/ircreupdate/tempclusterid-added-ircre.bib") with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) return 0
def _process_bibtex(file, expected_count=1) -> "typing.List[EditableFM]": """ Parse a BibTeX .bib file and return the parsed metadata :param file: The .bib file to parse :param expected_count: The expected number of entries inside the .bib :return: The parsed metadata as a list of EditableFM """ parser = BibTexParser(common_strings=True) parser.customization = import_bibtex.convert_to_unicode parser.ignore_nonstandard_types = False with Path(bibtex_dir, file).open("r", encoding="utf-8") as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) results = [] for entry in bib_database.entries: results.append( import_bibtex.parse_bibtex_entry(entry, dry_run=True)) assert len(results) == expected_count return results
def get_bibtex_entry(doi, bibtext_cache={}, shortdoi_cache={}): """ Return a bibtexparser entry for a DOI """ bibtext = get_bibtext(doi, cache=bibtext_cache) if not bibtext: return None short_doi = shorten(doi, cache=shortdoi_cache) parser = BibTexParser() parser.ignore_nonstandard_types = False bibdb = bibtexparser.loads(bibtext, parser) entry, = bibdb.entries quoted_doi = urllib.request.quote(doi) entry['link'] = 'https://doi.org/{}'.format(quoted_doi) if 'author' in entry: entry['author'] = ' and '.join(entry['author'].rstrip(';').split('; ')) entry['ID'] = short_doi[3:] return entry
def parse_bibfile(bibfile): """given a bibtex .bib file, parse it and return the papers found""" with open(bibfile) as bibtex_file: parser = BibTexParser() parser.customization = customizations parser.ignore_nonstandard_types = False bib_database = bibtexparser.load(bibtex_file, parser=parser) papers = [] for e in bib_database.entries: p = extract_paper_info(e) if not e is None: papers.append(p) papers.sort(reverse=True) return papers
def read(self, filename): metadata = { 'title': 'Publications', 'category': 'Publications', 'date': str(datetime.datetime.now()) } parsed = {} for key, value in metadata.items(): parsed[key] = self.process_metadata(key, value) with open(filename) as f: parser = BibTexParser() parser.ignore_nonstandard_types = False db = parser.parse_file(f) entries = [self._parse_entry(e) for e in db.entries] thesis = [] publications = [] arxiv = [] workshops = [] non_refereed = [] media = [] projects = [] thesis = [e for e in entries if e['type'] == 'thesis'] publications = [e for e in entries if e['type'] == 'publication'] arxiv = [e for e in entries if e['type'] == 'arxiv'] workshops = [e for e in entries if e['type'] == 'workshop'] non_refereed = [e for e in entries if e['type'] == 'non-refereed'] media = [e for e in entries if e['type'] == 'media'] projects = [e for e in entries if e['type'] == 'project'] for e in entries: del e['type'] jinja_env = Environment() jinja_env.filters['compile_jsx'] = compile_jsx html = jinja_env.from_string(template).render( thesis=thesis, publications=publications, media=media, arxiv=arxiv, non_refereed=non_refereed, projects=projects, workshops=workshops) return html, parsed
def parse_bibfile(bibfile): """given a bibtex .bib file, parse it and return the papers found""" with open(bibfile) as bibtex_file: parser = BibTexParser() parser.customization = customizations parser.ignore_nonstandard_types = False bib_database = bibtexparser.load(bibtex_file, parser=parser) papers = [] for e in bib_database.entries: p = extract_paper_info(e) if not e is None: papers.append(p) papers.sort(reverse=True) return papers
def main(self): # Parse arguments self.args = self.argument_parser.parse_args() self.verbose = self.args.verbose # Set up the BibTeX parser parser = BibTexParser() parser.homogenise_fields = False parser.ignore_nonstandard_types = False parser.customization = lambda r: BibItem(r, self.keywords.update, self.config) # Parse the database self.db = bibtexparser.load(self.args.infile, parser=parser) # Invoke the command chosen by the user command = getattr(self, self.args.command.replace('-', '_')) args = self.config.get(self.args.command, {}) command(**args)
def open(self, bibfile): # read file with open(bibfile) as bibtex_file: bibtex_str = bibtex_file.read() # tune the parser parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = True parser.homogenise_fields = True # generate database self.bib_database = bibtexparser.loads(bibtex_str, parser) # print(self.bib_database.entries) # get all PDFs and store them as a dictionary with the id as index for e in self.bib_database.entries: # print(e['ID'] + " " + e['file'].split(':')[1]) self.pdf_files[e['ID']] = e['file'].split(':')[1]
def bib_to_dict(bib_string): """ convert bibtex string to dictionary """ parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenise_fields = False parser.customization = convert_to_unicode bdb = bibtexparser.loads(bib_string, parser) if len(bdb.entries) > 0: for i in range(len(bdb.entries)): if bdb.entries[i].get('keywords', '') != '': bdb.entries[i]['keywords'] = bdb.entries[i].get( 'keywords').split(',') if len(bdb.entries) == 1: return bdb.entries[0] else: return bdb.entries else: return None
def parse(filename): """ parse bibtex file and return dictionary of key values as result uses a formatter to create Unite text for each entry :returns: dicionary - key of dictionary item is BibTeX entry key - val of dictionary item is text for Unite to display for that entry all text in the dictionary, including keys, is unicode """ # 1. parse the file entries = list() with open(filename) as bibtex_file: parser = BibTexParser() parser.customization = customizations parser.ignore_nonstandard_types=False entries = bibtexparser.load(bibtex_file, parser=parser).entries # 2. build the Unite text for each entry unite_keyvals = dict() for e in entries: f = getattr(formatter.apalike, e['ENTRYTYPE'], formatter.apalike.default) unite_keyvals[unicode(e['ID'])] = f(e) return unite_keyvals
#see https://docs.python.org/3/howto/argparse.html parser = argparse.ArgumentParser("") parser.add_argument('-t', '--target', default="~/Mega/library.bib") parser.add_argument('-o', '--output', default="bibtex") parser.add_argument('-l', '--library', default="~/MEGA/Mendeley") args = parser.parse_args() args.target = realpath(abspath(expanduser(args.target))) args.library = realpath(abspath(expanduser(args.library))) assert(exists(args.target)) logging.info("Targeting: {}".format(args.target)) logging.info("Output to: {}".format(args.output)) parser = BibTexParser(common_strings=False) parser.ignore_nonstandard_types = False parser.homogenise_fields = True def make_bar(k, v, left_pad_v, right_scale_v): pad = ((10 + left_pad_v) - len(k)) bar = ceil(((100 - pad) / right_scale_v) * v) full_str = "{}{}({}) : {}>\n".format(k, " " * pad, v, "=" * bar) return full_str def file_to_hash(filename): if not isfile(filename): raise Exception(filename) with open(filename, 'rb') as f: return sha256(f.read()).hexdigest() def add_slash_if_necessary(x):
def parse_urlfile(url_file): """ take a file of the form category: ads url and get the bibtex from the URL and return a list of Paper objects with the category stored as the subject """ papers = [] with open(url_file) as f: parser = BibTexParser() parser.customization = customizations parser.ignore_nonstandard_types = False for line in f: if line.startswith("#") or line.strip() == "": continue subject, url = line.split(": ") # for the ADS bibtex URL, lop off the paper_id paper_id = url.strip().split("/")[-1] bibtex_url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX".format(paper_id) # get the bibtex in html -- this is a little tricky, since # urlopen gives us a byte object that we need to decode # into unicode before we can play with it. print(bibtex_url) with urllib.request.urlopen(bibtex_url) as response: bibtex_html = response.read() raw_bibtex_html = bibtex_html.splitlines() bibtex_string = "" for bibline in raw_bibtex_html: bibtex_string += "{}\n".format(bibline.decode("utf8")) # strip off any header and just leave the bibtex found_start = False bibtex = "" for bibline in bibtex_string: if bibline.startswith("@"): found_start = True if found_start: bibtex += bibline # parse the bibtex string bib_database = bibtexparser.loads(bibtex, parser=parser) for e in bib_database.entries: p = extract_paper_info(e) if not e is None: p.subject = subject papers.append(p) papers.sort(reverse=True) return papers