def load_bib(bib, titles): """Returns dict {'BibTeX ID': {record}} """ with open(bib) as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode # Find the url field of a misc entry # https://github.com/sciunto-org/python-bibtexparser/issues/93 parser.homogenise_fields = False bib = bibtexparser.load(bibtex_file, parser=parser) with open(titles) as titles_file: parser = BibTexParser() parser.customization = convert_to_unicode titles = bibtexparser.load(titles_file, parser=parser) res = {} for entry in bib.entries: if 'journal' in entry and entry['journal'].lower() in titles.strings: entry['journal'] = titles.strings[entry['journal'].lower()] if 'author' in entry: # F**k me entry['author'] = entry['author'].replace('{́i}', 'í') res[entry['id'].strip()] = entry return res
def main(argv=None) : if argv is None: argv = sys.argv # etc., replacing sys.argv with argv in the getopt() call. filename = "" parser = BibTexParser() parser.customization = customizations if len(argv) > 1 : filename = argv[1] else: filename = "example.bib" with open(filename) as bibtex_file: bibtex_str = bibtex_file.read() bib_database = bibtexparser.loads(bibtex_str, parser=parser) #print_books(bib_database.entries) print_summary(bib_database.entries) print_journals(bib_database.entries) print_conferences(bib_database.entries) return 0;
def parseEntry(s): # normalize unicode by reparsing parser = BibTexParser() parser.customization = convert_to_unicode db1 = bibtexparser.loads(s, parser=parser) es = db1.entries return es[0]
def read_bib_file(filename, homogenize=False): """ Read bibtex file. Args: filename (str): path of the bibtex file. homogenize (bool): whether to homogenize the entries upon reading. Returns: A BibDatabase object. """ # Read input bibtex file bibtex_str = " " if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as bibfile: bibtex_str = bibfile.read() bibtex_str += " " # Choose parser parser = None if homogenize: parser = BibTexParser(common_strings=True) parser.customization = nomenclature.homogenize_latex_encoding # Create database from string return bibtexparser.loads(bibtex_str, parser=parser)
def _ingest_citations(rc): import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import getnames parser = BibTexParser() parser.ignore_nonstandard_types = False def customizations(record): for n in ["author", "editor"]: if n in record: a = [i for i in record[n].replace("\n", " ").split(", ")] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record parser.customization = customizations with open(rc.filename, "r", encoding='utf-8') as f: bibs = bibtexparser.load(f, parser=parser) coll = rc.client[rc.db][rc.coll] for bib in bibs.entries: bibid = bib.pop("ID") bib["entrytype"] = bib.pop("ENTRYTYPE") if "author" in bib: bib["author"] = [ a.strip() for b in bib["author"] for a in RE_AND.split(b) ] if "title" in bib: bib["title"] = RE_SPACE.sub(" ", bib["title"]) rc.client.update_one(rc.db, rc.coll, {"_id": bibid}, bib, upsert=True)
def run(self): sort_type = self.options.get('sort', 'date') # Load the publications template if 'template' in self.options: template_path = self.options['template'] template_dir, template_name = os.path.split(template_path) env = Environment(loader=FileSystemLoader(template_dir)) template = env.get_template(template_name) else: # Use template from the Pelican theme template = pelican_generator.get_template('publications') parser = BibTexParser(common_strings=True) parser.customization = customize if self.arguments: bibtex_path = self.arguments[0].strip() with open(bibtex_path, 'r') as bibtex_file: bib = bibtexparser.load(bibtex_file, parser=parser) else: bib = bibtexparser.loads('\n'.join(self.content), parser=parser) entries_to_select = self.options.get('entries', []) if entries_to_select: d = bib.entries_dict entries = [d[e] for e in entries_to_select] else: entries = bib.entries entries = sort_entries(entries, sort_type) rendered_template = template.render(publications=entries) return [nodes.raw('', rendered_template, format='html')]
def read_bib_file(filename, homogenize=False): """ Read bibtex file. Args: filename (str): path of the bibtex file. homogenize (bool): whether to homogenize the entries upon reading. Returns: A BibDatabase object. """ # Read input bibtex file bibtex_str = " " if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as bibfile: bibtex_str = bibfile.read() # Choose parser parser = None if homogenize: parser = BibTexParser() parser.customization = nomenclature.homogenize_latex_encoding # Create database from string return bibtexparser.loads(bibtex_str, parser=parser)
def _processResults(self, data): """ Get bibtex data from zbMATH website. """ bibs = re.findall("(?si)bibtex/.*?\d{3,}\.bib", data) data = [] import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = customizations if self.otherID: # setup for MRef fetching from msn import MRef mr = MRef() for bib in bibs: bibtext = urllib.urlopen("https://zbmath.org/" + bib).read() zbl = bibtexparser.loads(bibtext, parser=parser) if self.otherID and mr.fetch(bibtext): # found MRef match for zbMATH record msn = bibtexparser.loads(mr.refs) # use MSN bibtex entry with zbl number added # and doi transfered if missing msn.entries[0]['zbl'] = zbl.entries[0]['zbl'] if 'doi' not in msn.entries[0] and 'doi' in zbl.entries[0]: msn.entries[0]['doi'] = zbl.entries[0]['doi'] zbl = msn data.append(bibtexparser.dumps(zbl)) self.refs = "\n".join(data)
def _FindBibEntriesParser(self): """ """ ret = [] parser = BibTexParser() parser.customization = bib_customizations for filename in self._Walk(self._main_directory, ".bib"): skip, cache = self._CacheDataAndSkip(filename) if skip: ret.extend(cache) continue resp = [] with open(filename) as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: if 'ID' not in entry: continue title = entry['title'] author = entry['author'] resp.append( responses.BuildCompletionData( entry['ID'], "%s (%s)" % (title, author))) ret.extend(resp) self._cached_data[filename] = resp return ret
def parse_bibtex(filename): with open(filename) as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode #parser.customization = homogenize_latex_encoding bib_db = bibtexparser.load(bibtex_file, parser=parser) print("Parsed the bibtex file, there are {} entries\n".format( len(bib_db.entries))) all_articles = [] for entry in bib_db.entries: title = "{}".format(entry['title']) title = title.replace(" ", " ").replace("\n", " ").replace( "\r", "").replace("{", "").replace("}", "") year = "{}".format(entry['year']) if 'author' in entry: author = entry['author'] author = author.encode('ascii', 'ignore').decode('ascii') author = author.replace(" ", " ").replace("\n", " ").replace( "\r", "").replace("{", "").replace("}", "") else: continue article = {} article['title'] = title article['year'] = year article['author'] = author all_articles.append(article) print(all_articles) return all_articles
def rename_bib_file(citeDir, filename): my_bucket, s3_resource = aws_bucket_info() bibtex_file = my_bucket.Object(filename).get()['Body'].read().decode( 'utf-8') parser = BibTexParser() parser.customization = convert_to_unicode bibtex_database = bibtexparser.loads(bibtex_file, parser=parser) entries = bibtex_database.entries[0] df = read_bib_df(citation_pickle_file=citation_pickle_file) if entries['ID'] not in df['ID'].values: search_query = scholarly.search_pubs_query(entries['title']) res = next(search_query) strings_to_look = ['url', 'abstract'] search_output = [] for ss in strings_to_look: try: search_output.append(res.bib[ss]) except: search_output.append("") dict_to_store = { 'ID': entries['ID'], 'numLikes': 0, 'Title': entries['title'], 'Authors': entries['author'], 'url': search_output[0], 'Abstract': search_output[1] } store_bib_in_df(dict_to_store, citation_pickle_file=citation_pickle_file) new_bibfile = citeDir + entries['ID'] + "_slrm.bib" return new_bibfile else: return ""
def read_bibtex(filename): import bibtexparser from bibtexparser.bparser import BibTexParser def customizations(record): """ custom transformation applied during parsing """ record = bibtexparser.customization.convert_to_unicode(record) # Split author field from separated by 'and' into a list of "Name, Surname". record = bibtexparser.customization.author(record) # Split editor field from separated by 'and' into a list of "Name, Surname". record = editor_split(record) return record def editor_split(record): """ custom transformation - split editor field into a list of "Name, Surname" :record: dict -- the record :returns: dict -- the modified record """ if "editor" in record: if record["editor"]: record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")]) else: del record["editor"] return record with open(filename) as f: parser = BibTexParser() parser.customization = customizations return bibtexparser.load(f, parser=parser).entries
def _FindBibEntriesParser(self): """ """ ret = [] parser = BibTexParser() parser.customization = bib_customizations for filename in self._Walk(self._main_directory, ".bib"): skip, cache = self._CacheDataAndSkip(filename) if skip: ret.extend(cache) continue resp = [] with open(filename) as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: if 'ID' not in entry: continue title = entry['title'] author = entry['author'] resp.append(responses.BuildCompletionData( entry['ID'], "%s (%s)" % (title, author) )) ret.extend(resp) self._cached_data[filename] = resp return ret
def import_bibtex( bibtex, pub_dir="publication", featured=False, overwrite=False, normalize=False, dry_run=False, ): """Import publications from BibTeX file""" from academic.cli import AcademicError, log # Check BibTeX file exists. if not Path(bibtex).is_file(): err = "Please check the path to your BibTeX file and re-run" log.error(err) raise AcademicError(err) # Load BibTeX file for parsing. with open(bibtex, "r", encoding="utf-8") as bibtex_file: parser = BibTexParser(common_strings=True) parser.customization = convert_to_unicode parser.ignore_nonstandard_types = False bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: parse_bibtex_entry( entry, pub_dir=pub_dir, featured=featured, overwrite=overwrite, normalize=normalize, dry_run=dry_run, )
def _bibtexQuery(self, query): """ Turn query into bibtex dictionary. """ import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib = bibtexparser.loads(query, parser=parser) if bib.entries: # only the first record record = bib.entries[0] # clean up entries if "author" in record: # just last name record["author"] = re.sub(r',.*?(and\s*|$)', ' ', record['author']) if "title" in record: record["title"] = self._citationQuery(record["title"])[0][1] if "journal" in record: record["journal"] = self._citationQuery(record["journal"])[0][1] if "year" in record: record["date"] = record["year"] # only use a few fields # TODO add numbers return [(k, v) for k, v in record.items() if k in {"author", "title", "journal", "mrnumber", "date", "arxiv", "zbl"}] else: return []
def __init__(self,bibfile): parser = BibTexParser(common_strings=True) parser.customization = convert_to_unicode with open(bibfile) as bib: bibdb = bibtexparser.load(bib, parser=parser) self.entries = bibdb.entries self.map_keywords()
def match_bibtex_to_work(bibtex_str): """Find works by bibtex entries Returns a list of matches: (entry, work) Doctest: .. doctest:: >>> reload() >>> bibtex = ''' @inproceedings{murta2014a, ... address = {Cologne, Germany}, ... author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, ... booktitle = {IPAW}, ... pages = {71--83}, ... publisher = {Springer}, ... title = {no{W}orkflow: capturing and analyzing provenance of scripts}, ... year = {2014} ... } ''' >>> works = match_bibtex_to_work(bibtex) >>> murta2014a = work_by_varname("murta2014a") >>> works[0][1] is murta2014a True """ parser = BibTexParser() parser.customization = convert_to_unicode entries = bibtexparser.loads(bibtex_str, parser=parser).entries for entry in entries: entry['title'] = entry['title'].replace('{', '').replace('}', '') return [(entry, find_work_by_info(bibtex_to_info(copy(entry)))) for entry in entries]
def simplify(inputfile, limitn=None, verbose=False): with open(inputfile, 'r') as file: bibtex_str = file.read() parser = BibTexParser() # parser.customization = homogenize_latex_encoding parser.customization = convert_to_unicode # parser.ignore_nonstandard_types = True # parser.homogenise_fields = True # parser.common_strings = False # parser.encoding = 'utf8' # parser.customization = customizations bib_database = bibtexparser.loads(bibtex_str, parser=parser) for entry in bib_database.entries: print("@", str(entry['ENTRYTYPE']), "{", str(entry['unique-id']).strip().translate(None, "{}") + ",") print( "\t author = {", str(entry['author']).strip().split('and')[0] + " and others},") print("\t title = {", str(entry['title']).strip(), "},") if 'doi' in entry: print("\t doi = {", str(entry['doi']).strip(), "},") if 'month' in entry: print("\t month = {", str(entry['month']).strip(), "},") print("\t year = {", str(entry['year']).strip(), "},") if 'volume' in entry: print("\t volume = {", str(entry['volume']).strip(), "},") if 'journal' in entry: print("\t journal = {", str(entry['journal']).strip(), "},") if 'number' in entry: print("\t number = {", str(entry['number']).strip(), "},") if 'booktitle' in entry: print("\t booktitle = {", str(entry['booktitle']).strip(), "},") print("}\n")
def import_bibtex(request): review_id = request.POST['review-id'] source_id = request.POST['source-id'] review = Review.objects.get(pk=review_id) source = Source.objects.get(pk=source_id) bibtex_file = request.FILES['bibtex'] list_bibtex_file = fix_bibtex_file(bibtex_file.readlines()) str_bibtex_file = '\r\n'.join(list_bibtex_file) ext = os.path.splitext(bibtex_file.name)[1] valid_extensions = ['.bib', '.bibtex'] if ext in valid_extensions or bibtex_file.content_type == 'application/x-bibtex': parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(str_bibtex_file, parser=parser) articles = bibtex_to_article_object(bib_database, review, source) import pdb; pdb.set_trace() _import_articles(request, source, articles) else: messages.error(request, u'Invalid file type. Only .bib or .bibtex files are accepted.') return redirect(r('import_studies', args=(review.author.username, review.name)))
def parse_bibtex(bib): '''Parses the BibTex returned by the DOI resolver Args: bib (str): a BibTex record Returns: Dict containing reference data ''' for entity, repl in ENTITIES.iteritems(): bib = bib.replace(entity, repl) # Parse BibTex using the handy dandy bibtexparser module import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import convert_to_unicode parser = BibTexParser() parser.customization = convert_to_unicode parsed = bibtexparser.loads(bib, parser=parser).entries[0] # Miscellaneous clean up braces = re.compile(u'\{([A-Z_ \-]+|[\u0020-\uD7FF])\}', re.U) for key, val in parsed.iteritems(): val = braces.sub(r'\1', val) if '{' in val: raise Exception('Unhandled LaTeX: {}'.format(val.encode('cp1252'))) parsed[key] = val parsed['pages'] = parsed.get('pages', '').replace('--', '-') if parsed.get('publisher', '').endswith(')'): parsed['publisher'] = parsed['publisher'].rsplit('(', 1)[0].rstrip() #pp.pprint(parsed) return parsed
def import_bibtex(bibtex, pub_dir='publication', featured=False, overwrite=False): """Import publications from BibTeX file""" # Check BibTeX file exists. if not Path(bibtex).is_file(): print('Please check the path to your BibTeX file and re-run.') return # Import Google Scholar data of Julien Vanharen = R6OO3noAAAAJ. scholar_data = Author("R6OO3noAAAAJ") with open("data/scholar_data.json", "w") as fid: fid.write( json.dumps(scholar_data.publication, sort_keys=True, indent=4, separators=(',', ': '))) # Load BibTeX file for parsing. with open(bibtex, 'r', encoding='utf-8') as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: parse_bibtex_entry(entry, scholar_data, pub_dir=pub_dir, featured=featured, overwrite=overwrite)
def get_references(filepath): with open(filepath) as bibtex_file: parser = BibTexParser() parser.customization = customizations bib_database = bibtexparser.load(bibtex_file, parser=parser) # print(bib_database.entries) return bib_database
def parse_urlfile(url_file): """ take a file of the form category: ads url and get the bibtex from the URL and return a list of Paper objects with the category stored as the subject """ papers = [] with open(url_file) as f: parser = BibTexParser() parser.customization = customizations for line in f: if line.startswith("#") or line.strip() == "": continue subject, url = line.split(": ") # for the ADS bibtex URL, lop off the paper_id paper_id = url.strip().split("/")[-1] bibtex_url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX".format(paper_id) # get the bibtex in html -- this is a little tricky, since # urlopen gives us a byte object that we need to decode # into unicode before we can play with it. print(bibtex_url) with urllib.request.urlopen(bibtex_url) as response: bibtex_html = response.read() raw_bibtex_html = bibtex_html.splitlines() bibtex_string = "" for line in raw_bibtex_html: bibtex_string += "{}\n".format(line.decode("utf8")) # strip off any header and just leave the bibtex found_start = False bibtex = "" for line in bibtex_string: if line.startswith("@"): found_start = True if found_start: bibtex += line # parse the bibtex string bib_database = bibtexparser.loads(bibtex, parser=parser) for e in bib_database.entries: p = extract_paper_info(e) if not e is None: p.subject = subject papers.append(p) papers.sort(reverse=True) return papers
def getentries(filename): try: save_import_file(filename) except IOError as e: logg.error("bibtex import: save import file failed: {}".format(e)) raise IOError("save import file failed") # use utf-8-sig instead of utf-8 to get rid of BOM_UTF8, which confuses bibtex parser for encoding in ('utf-8-sig', 'utf-16', None): try: error = None fi = codecs.open(filename, "r", encoding=encoding) parser = BibTexParser() # accept also non standard records like @SCIENCEREPORT parser.ignore_nonstandard_types = False parser.customization = _bibteximport_customize bibtex = bibtex_load(fi, parser=parser) # seems to be the correct encoding, don't try other encodings break except Exception as e: # check if there is a utf-encoding error, then try other encoding if (encoding is 'utf-8-sig' and str(e).lower().find('utf8') >= 0) or \ (encoding is 'utf-16' and str(e).lower().find('utf-16') >= 0): continue error = e break if error: logg.error("bibtex import: bibtexparser failed: {}".format(e)) raise ValueError("bibtexparser failed") return bibtex.entries
def import_bibtex(request): review_id = request.POST['review-id'] source_id = request.POST['source-id'] review = Review.objects.get(pk=review_id) source = Source.objects.get(pk=source_id) bibtex_file = request.FILES['bibtex'] ext = os.path.splitext(bibtex_file.name)[1] valid_extensions = ['.bib', '.bibtex'] if ext in valid_extensions or bibtex_file.content_type == 'application/x-bibtex': parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.load(bibtex_file, parser=parser) articles = bibtex_to_article_object(bib_database, review, source) _import_articles(request, source, articles) else: messages.error( request, u'Invalid file type. Only .bib or .bibtex files are accepted.') return redirect( r('import_studies', args=(review.author.username, review.name)))
def convert(inpath): ''' Convert from bibtex to bibjson. One argument expected: path to bibtex file. ''' import bibtexparser from bibtexparser.bparser import BibTexParser import json parser = BibTexParser() with open(inpath) as bibtex_file: parser.customization = customizations bib_database = bibtexparser.load(bibtex_file, parser=parser) myRecords = list() num = 1 records = dict() for record in bib_database.entries: record1 = dict() record1 = record record1["_id"] = num record1["collection"] = "test01" num = num + 1 myRecords.append(record1) #temp = json.dumps(record, indent=2, sort_keys=True) #t #myRecords records["records"] = myRecords return records
def add_bibtex_string(bibtex_str): """Load input bibtex string into database.""" # parse input biblio as unicode: parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex_str, parser=parser) for bib in bib_database.entries: try: bib_entry = BiblioEntry(ID=bib.get("ID", ""), ENTRYTYPE=bib.get("ENTRYTYPE", ""), authors=bib.get("author", ""), title=bib.get("title", ""), year=bib.get("year", ""), month=bib.get("month", ""), publisher=bib.get("publisher", ""), journal=bib.get("journal", ""), school=bib.get("school", ""), pdf=bib.get("pdf", ""), url=bib.get("url", ""), tag=bib.get("tag", "undefined"), keywords=bib.get("keywords", "")) db.session.add(bib_entry) db.session.commit() except: print("Entry already in database: ", bib.get("title"))
def bib2jekyllcol(inputFile, outputDir): "This prints the bibtex file to output directory as jekyll collection folder(s)" # read and parse bib file with open(inputFile) as bibtex_file: bibtex_str = bibtex_file.read() parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex_str, parser=parser) # create dictionary for transformation of month to number month_list = [ "jan", "feb", "mar", "apr", "may", "june", "july", "aug", "sept", "oct", "nov", "dec" ] # type names: type_list = [ "title", "author", "journal", "volume", "number", "year", "month", "doi", "pages", "publisher", "booktitle", "note" ] if not os.path.exists(outputDir): os.makedirs(outputDir) else: print("Deleting existing collection file...\n") for file in os.listdir(outputDir): file_path = os.path.join(outputDir, file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception, e: print e
def load(self, model): """ Loads the object info from data model content Parameters ---------- model : str or DataModelDict Model content or file path to model content. """ # Check if model is data model try: model = DM(model) except: bibtex = model else: bibtex = model.find('bibtex') for key in self.asdict(): delattr(self, key) # Parse and extract content parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex, parser=parser) assert len( bib_database.entries) == 1, 'bibtex must be for a single reference' bibdict = bib_database.entries[0] for key, value in bibdict.items(): setattr(self, key, value)
def get_bibtex(f): parser = BibTexParser(common_strings=False) parser.ignore_nonstandard_types = False parser.homogenise_fields = True parser.customization = clean_tex return bibtexparser.load(f, parser)
def bib2jekyllcol (inputFile, outputDir): "This prints the bibtex file to output directory as jekyll collection folder(s)" # read and parse bib file with open(inputFile) as bibtex_file: bibtex_str = bibtex_file.read() parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex_str, parser=parser) # create dictionary for transformation of month to number month_list = ["jan", "feb", "mar", "apr", "may", "june", "july", "aug", "sept", "oct", "nov", "dec"] # type names: type_list = ["title", "author", "journal", "volume", "number", "year", "month", "doi", "pages", "publisher", "booktitle", "note"] if not os.path.exists(outputDir): os.makedirs(outputDir) else: print("Deleting existing collection file...\n") for file in os.listdir(outputDir): file_path = os.path.join(outputDir, file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception, e: print e
def _ingest_citations(rc): import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import getnames parser = BibTexParser() parser.ignore_nonstandard_types = False def customizations(record): for n in ['author', 'editor']: if n in record: a = [i for i in record[n].replace('\n', ' ').split(', ')] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record parser.customization = customizations with open(rc.filename, 'r') as f: bibs = bibtexparser.load(f, parser=parser) coll = rc.client[rc.db][rc.coll] for bib in bibs.entries: bibid = bib.pop('ID') bib['entrytype'] = bib.pop('ENTRYTYPE') if 'author' in bib: bib['author'] = [a.strip() for b in bib['author'] for a in RE_AND.split(b)] if 'title' in bib: bib['title'] = RE_SPACE.sub(' ', bib['title']) rc.client.update_one(rc.db, rc.coll, {'_id': bibid}, bib, upsert=True)
def process_dir(bibfile, md_files, out_dir, build_dir): outlitfile = out_dir / Path('{}.md'.format(bibfile.stem)) outlitfilehtml = Path('..') / Path(out_dir.name) / Path('{}.html'.format( bibfile.stem)) out_dir.mkdir(parents=True, exist_ok=True) with open(bibfile, 'r') as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode bib_data = bibtexparser.load(bibtex_file, parser=parser) n = 1 refs = '' for fname in md_files: if fname.resolve() == (out_dir / fname.name).resolve(): raise ValueError( "Script would overwrite the input. Choose different out_dir.") with open(fname, 'r') as f, open(out_dir / fname.name, 'w') as fp: processed_lines, bib_data, refs, n = preprocess_markdown_file( f, bib_data, reffile=outlitfilehtml, n=n, refs=refs) for l in processed_lines: fp.write(l) with open(outlitfile, 'w') as outfile: outfile.write('title: References\n') outfile.write('---') outfile.write(refs)
def load_model(self, model, name=None): """ Loads the object info from data model content Parameters ---------- model : str or DataModelDict Model content or file path to model content. name : str, optional The name to use when saving the record. """ try: super().load_model(model, name=name) except: bibtex = model else: bibtex = self.model.find('bibtex') # Parse and extract content parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex, parser=parser) assert len( bib_database.entries) == 1, 'bibtex must be for a single reference' self.__bib = bib_database.entries[0] try: self.name except: self.name = self.doifname try: self.model except: self.build_model()
def biblatex(self): """Return entry formatted in BibLateX style as dictionary or 'None'.""" entry_str = self.as_str("BibTeX") if entry_str: parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(entry_str, parser=parser) # convert 'journal' to 'journaltitle' for e in bib_database.entries: if 'journal' in e.keys(): e['journaltitle'] = e['journal'] del e['journal'] # special_terms = {" &": ' \&'} # for key in special_terms.keys(): # regexp = re.compile(key) # e['title'] = regexp.sub(special_terms[key], e['title']) # print(e['title']) bibtex_string = bibtexparser.dumps(bib_database) return _entry_from_str(bibtex_string) else: return None
def printable_bibtex_entry(entry): # converts a dictionary BibTeX entry to LaTeX format entry_str = bibtex_entry_str(entry) parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib_database = bibtexparser.loads(entry_str, parser = parser) return(bib_database.entries[0])
def bib_parse(path): with open(path) as bibtex_file: parser = BibTexParser() parser.customization = custom_callback bib_database = bibtexparser.load(bibtex_file, parser=parser) input_data = bib_database.entries return input_data
def load_bibtex_string(string): string_parser = BibTexParser(common_strings=True, ignore_nonstandard_types=True) string_parser.customization = customizations bib_database = bibtexparser.loads(string, parser=string_parser) return bib_database
def load_bib(filename): with open(filename) as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex_file.read().replace( "{{", "{").replace("}}", "}"), parser=parser) return bib_database
def load_bibtex_file(filepath): parser = BibTexParser(common_strings=True, ignore_nonstandard_types=True) parser.customization = customizations with open(filepath, "r") as bibtex: bib_database = bibtexparser.load(bibtex, parser=parser) return bib_database
def _parser(): ''' Return a configured bibtex parser. ''' parser = BibTexParser() parser.interpolate_strings = False parser.customization = cleanup_record return parser
def deserialize_publications(self, base_path): # Scientific publications sci_pubs_file = os.path.join(base_path, 'sci_publications.bib') if os.path.exists(sci_pubs_file): parser = BibTexParser() parser.customization = homogenize_latex_encoding with open(sci_pubs_file, encoding='utf-8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) self.scientificPubs = bib_database.entries # Popular publications pop_pubs_file = os.path.join(base_path, 'pop_publications.bib') if os.path.exists(pop_pubs_file): parser = BibTexParser() parser.customization = homogenize_latex_encoding with open(pop_pubs_file, encoding='utf-8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) self.popularPubs = bib_database.entries
def render_hal_biblio(keywords): """Send a query to HAL API and display returned bibtex entries.""" biblio = requests.get(HAL_QUERY_API.format(keywords)).text parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(biblio, parser=parser) bib_database.entries.sort(key=lambda x: x['year'], reverse=True) templateVars = format_bibdatabase(bib_database.entries) return render_template("hal.html", **templateVars)
def strip_and_write(inputfile, outputfile): bibdata = None with open(inputfile, 'r') as f: parser = BibTexParser() parser.customization = customization bibdata = bibtexparser.load(f, parser=parser) if bibdata is None: sys.exit("Could not load input file {}".format(inputfile)) with open(outputfile, 'w') as f: bibtexparser.dump(bibdata, f)
def parseFile(bib_file): """parse the bib file :param bib_file: bibtex file to be parsed :returns: -- a bibtex file object """ with open(bib_file) as bibtex_file: parser = BibTexParser() parser.homogenize = True parser.customization = customizations data = b.load(bibtex_file, parser = parser) return data
def parseFile(bib_file): """parse the bib file :param bib_file: bibtex file to be parsed :returns: -- a bibtex file object """ with open(bib_file) as bibtex_file: parser = BibTexParser() #import the parsers parser.homogenize = True parser.customization = customizations #add some customizations defined earlier data = b.load(bibtex_file, parser = parser) #process data yah! return data
def ref(self, id, published=False, standardised=False): """ Download bibliographic data of document. """ # Set up BibTeX parser. parser = BibTexParser() parser.customization = convert_to_unicode # Get correct document report number. number = self.number(id=id) # Get bibtex file and parse. request = requests.get('{}/papers/{}.bib'.format(self.url, number)) text = request.content.decode('utf8').replace(u'\xa0', u' ') bibtex = bibtexparser.loads(text, parser=parser).entries[0] # If 'published' keyword is true, fetch information on where document # was eventually published. if published: request = requests.get('{}/papers/{}'.format(self.url, number)) soup = BeautifulSoup(request.text, 'html.parser') bibtex['published'] = soup.find(attrs={'id': 'published_line'}) # Clean up returned text (get rid of extra spaces). if bibtex['published']: bibtex['published'] = ' '.join(bibtex['published'].text.strip().split()) # If 'standardised' keyword is true, return dictionary with common keywords. if standardised: standard = { 'NberID': bibtex['ID'][4:], 'Abstract': ' '.join(bibtex['abstract'].strip().split()), 'Month': bibtex['month'].strip(), 'Year': int(bibtex['year']), 'Title': ' '.join(bibtex['title'].strip().split()), 'Authors': [] } # All authors appear to be separated by 'and'. authors = bibtex['author'].split(' and ') for author in authors: standard['Authors'].append({'Name': author}) # Sometimes, the 'month' keyword returns the year for documents released # in January. if re.match('^\d{4}$', standard['Month']): standard['Month'] = 'January' if 'published' in bibtex: standard['Published'] = bibtex['published'] bibtex = standard return bibtex
def openBibLib(bibName): """Extracts the corpus information from a bibtex file @param bibName: the path to the file @type bibName: string @return: the BibDatabase object with all the information @rtype: BibDatabase""" with open(bibName) as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode bibtex_database = bibtexparser.load(bibtex_file, parser=parser) return bibtex_database
def collection_from_bibtex_str(bib_str, **kwargs): """ Transform a Bibtex string (e.g. from a .bib-file) to a BibJSON collection. :param bib_str: input bibtex string :param kwargs: metadata for the BibJSON collection. "collection" parameter must be set. :return BibJSON collection dictionary """ bib_parser = BibTexParser() bib_parser.ignore_nonstandard_types = False # this is flipped. this seems to be an error in the library bib_parser.customization = _parse_bib_entry bib_obj = bibtexparser.loads(bib_str, parser=bib_parser) return collection_from_dict(bib_obj.entries_dict, **kwargs)
def import_bibtex_raw_content(request): review_id = request.POST.get('review-id') source_id = request.POST.get('source-id') bibtex_file = request.POST.get('bibtex_file') review = Review.objects.get(pk=review_id) source = Source.objects.get(pk=source_id) parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex_file, parser=parser) articles = bibtex_to_article_object(bib_database, review, source) _import_articles(request, source, articles) return redirect(r('import_studies', args=(review.author.username, review.name)))
def parse_bibtex(bibtex): """ Parse a single bibtex record represented as a string to a dict """ parser = BibTexParser() parser.customization = customizations db = bibtexparser.loads(bibtex, parser=parser) if not db.entries: raise ValueError('No bibtex item was parsed.') if len(db.entries) > 1: logger.warning("%d bibtex items, defaulting to first one", len(db.entries)) return db.entries[0]
def _cleanupBibTex(self, count): """ Clean up bibtex and ensure uniform look. """ import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib = bibtexparser.loads(self.refs, parser=parser) # save results from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' writer.order_entries_by = ('id') self.number = len(bib.entries) self.refs = bibtexparser.dumps(bib, writer)
def get_all_entries(locations): """ Retrieve all the .bib files and parse their content to a list of simple dictionaries :param locations: locations of all the .bib files :return: return a list of dictionaries """ result = [] my_parser = BibTexParser() my_parser.customization = _customizations for file_location in locations: if file_location.endswith(".bib"): with open(file_location) as bib_file: try: result.extend(bibtexparser.load(bib_file, parser=my_parser).entries) except: logger.error(".bib parsing error: " + file_location) return result
def handle(self, *args, **kwargs): bibtex_files = settings.BIBTEX_FILES bibtex_output = settings.BIBTEX_OUTPUT db = [] for bib in bibtex_files: print "Processing BibTeX file %s" % bib try: parser = BibTexParser() parser.customization = customizations with open(bib, 'r') as inf: dbTemp = bibtexparser.load(inf, parser=parser).entries for entryTemp in dbTemp: if not any(entryTemp['title'].lower() == entry['title'].lower() for entry in db): db.append(deepcopy(entryTemp)) except: traceback.print_exc() print ('An error occured while processing [' + bib + ']. Its content will be ignored.') # Start rendering HTML try: for entry in db: print entry print 'Using template bib2html/ieee/' + entry['ENTRYTYPE'] + '.html' bibTemplate = get_template('bib2html/ieee/' + entry['ENTRYTYPE'] + '.html') entry['formatted'] = bibTemplate.render(entry) except: traceback.print_exc() print ('An error occured while processing the style files.' 'The program will exit without completing the task.') if_review = raw_input('Do you want to review? [N] ') if len(if_review) > 0 and if_review.lower()[0] == 'y': for entry in db: # write to output print entry['formatted'] if_write = raw_input('Write to %s? [Y] ' % (bibtex_output,)) if len(if_write) > 0 and if_write.lower()[0] == 'n': print 'Okay, won\'t write' else: if not os.path.exists(os.path.dirname(bibtex_output)): os.makedirs(os.path.dirname(bibtex_output)) with open(bibtex_output, 'w+') as out: out.write('<ul>' + ''.join([x['formatted'] for x in db]) + '</ul>')
def parse_bibfile(bibfile): with open(bibfile) as bibtex_file: parser = BibTexParser() parser.customization = customizations bib_database = bibtexparser.load(bibtex_file, parser=parser) papers = [] for e in bib_database.entries: p = extract_paper_info(e) if not e is None: papers.append(p) papers.sort(reverse=True) return papers
def handle_bibtex( infile, user=None, verbose=False ): if user: u = user else: u = User.objects.get(username='******') # manage data # load bibtex file to in-memory db parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.load(infile, parser=parser) for art in bib_database.entries: if len( Article.objects.filter(title=art['title']) ) == 0: art = clean_entry( art, [ 'title', 'author', 'journal', 'publisher', 'year', 'volume', 'pages'] ) a = Article(user=u, hide=False, **art) if verbose: print a a.save()
def parse_bibtex(bibtex): """ Parse a single bibtex record represented as a string to a dict """ bibtex = insert_newlines_in_bibtex(bibtex) parser = BibTexParser() parser.customization = convert_to_unicode db = bibtexparser.loads(bibtex)#, parser=parser) if len(db.entries) == 0: raise ValueError('No bibtex item was parsed.') if len(db.entries) > 1: print "Warning: %d Bibtex items in parse_bibtex, defaulting to the first one" % len(db.entries) entry = db.entries[0] entry['author'] = parse_authors_list(entry.get('author', '')) return entry
def fix_bibliography(bibtex_string): """ Given a bibliography file, `fixes` it by removing URLs from articles, ASCIIifying all the fields and replacing dates with years. """ # Make a parser that will ASCIIify everything: # See: https://bibtexparser.readthedocs.io/en/v0.6.2/tutorial.html#accents-and-weird-characters parser = BibTexParser() parser.customization = homogeneize_latex_encoding bibtex = bibtexparser.loads(bibtex_string, parser=parser) for entry in bibtex.entries: fix_entry(entry) # TODO: if py3k, do not encode. return bibtexparser.dumps(bibtex).encode("UTF-8")