def entries_to_file(entries, fn): writer = BibTexWriter() db = BibDatabase() db.entries = entries with codecs.open(fn, 'w', "utf-8") as bibtex_file: bibtex_file.write(writer.write(db))
def make_bibtex_file(pubs, pid, person_dir='.'): """Make a bibtex file given the publications Parameters ---------- pubs : list of dict The publications pid : str The person id person_dir : str, optional The person's directory """ if not HAVE_BIBTEX_PARSER: return None skip_keys = {'ID', 'ENTRYTYPE', 'author'} bibdb = BibDatabase() bibwriter = BibTexWriter() bibdb.entries = ents = [] for pub in pubs: ent = dict(pub) ent['ID'] = ent.pop('_id') ent['ENTRYTYPE'] = ent.pop('entrytype') for n in ['author', 'editor']: if n in ent: ent[n] = ' and '.join(ent[n]) for key in ent.keys(): if key in skip_keys: continue ent[key] = latex_safe(ent[key]) ents.append(ent) fname = os.path.join(person_dir, pid) + '.bib' with open(fname, 'w') as f: f.write(bibwriter.write(bibdb)) return fname
def save_tacl_bib(txt_file, year, volume): def name(n): t = n.split() return t[-1] + ', ' + ' '.join(t[:-1]) if len(t) > 1 else n.strip() entries = [] d = None for i, line in enumerate(open(txt_file)): line = line.strip() j = i % 3 if j == 0: authors = ' and '.join([name(n) for n in line[9:].split(';')]).strip() d = { 'ID': line[:8], 'ENTRYTYPE': 'article', 'author': authors, 'journal': 'Transactions of the Association for Computational Linguistics', 'year': str(year), 'volume': str(volume) } elif j == 1: d['title'] = line entries.append(d) db = BibDatabase() db.entries = entries writer = BibTexWriter() with open(txt_file + '.bib', 'w') as bout: bout.write(writer.write(db))
def export_bibtex(author_name, fol, recursive=False): from bibtexparser.bwriter import BibTexWriter from bibtexparser.bibdatabase import BibDatabase db = BibDatabase() papers = set() bib_fnames = get_bib_files(fol, recursive) for bib_fname in tqdm(bib_fnames): with open(bib_fname) as bibtex_file: bib = bibtexparser.load(bibtex_file) for entry in bib.entries: paper_name = entry['title'] if paper_name in papers: continue papers.add(paper_name) authors = parse_authors(entry) if author_name in authors: db.entries.append(entry) author_name = author_name.replace(' ', '').replace(',', '_') bibtex_fname = op.join(fol, '{}.bib'.format(author_name)) writer = BibTexWriter() with open(bibtex_fname, 'w') as bibfile: bibfile.write(writer.write(db)) print('The bibtex file with {} papers of {} where she cited you was exported to {}'.format( len(db.entries), author_name, bibtex_fname))
def format_paper_citation_dict(citation, indent=' '): """ Format a citation dict for a paper or a list of papers into a BibTeX record string. :param citation: A ``Paper`` citation dict or list of such dicts. :param indent: Indentation to be used in BibTeX output. """ if isinstance(citation, dict): entries = [citation] else: entries = citation # Handle conflicting ids for entries entries_ids = collections.defaultdict(lambda: 0) for entry in entries: entry_id = entry['ID'] entries_ids[entry_id] += 1 if entries_ids[entry_id] > 1: entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id]) writer = BibTexWriter() writer.indent = indent with io.StringIO('') as bibfile: db = BibDatabase() db.entries = entries bibfile.write(writer.write(db)) return bibfile.getvalue().strip()
def extract(keys_input, bibtex_input, bibtex_output, verbose): lines = keys_input.readlines() citation_keys = (line.strip() for line in lines) if verbose: print("Read {} keys from {}".format( len(lines), click.format_filename(keys_input.name))) main_bib = load_bib(bibtex_input) if verbose: print("Read {} entries from {}".format( len(main_bib.entries), click.format_filename(bibtex_input.name))) out_bib = BibDatabase() species_pattern = re.compile( r'({\\textless}i{\\textgreater}\w.*?{\\textless}/i{\\textgreater})') for key in citation_keys: e = main_bib.entries_dict[key] title = e['title'] groups = species_pattern.findall(title) for grp in groups: s = grp.replace('{\\textless}i{\\textgreater}', '').replace('{\\textless}/i{\\textgreater}', '') s = '\\textit{\\uppercase{' + s[0] + '}' + s[1:] + '}' title = title.replace(grp, s) e['title'] = title out_bib.entries.append(e) if verbose: print("Writing {} entries to {}".format( len(out_bib.entries), click.format_filename(bibtex_output.name))) writer = BibTexWriter() bibtex_output.write(writer.write(out_bib))
def parse(bibfile): with open(bibfile) as bibtex_file: writer = BibTexWriter() bib_database = bibtexparser.load(bibtex_file) for entry in bib_database.entries: print "\t-" print "\t\tlayout: paper" print "\t\tpaper-type: "+ preprocess(entry["type"]) print "\t\tyear: " + preprocess(entry["year"]) print "\t\tselected: no" print "\t\ttitle: >\n\t\t\t"+preprocess(entry["title"]) print "\t\tauthors: "+ parseauthors(preprocess(entry["author"])).encode('UTF8') print "\t\timg: " print "\t\tvenue: " if("pages" in entry.keys()): print "\t\tpages: "+preprocess(entry["pages"]) if("booktitle" in entry.keys()): print "\t\tbooktitle: "+preprocess(entry["booktitle"]) if("journal" in entry.keys()): print "\t\tjournal: "+preprocess(entry["journal"]) if("url" in entry.keys()): print "\t\tdoc-url: "+preprocess(entry["url"]) else: print "\t\tdoc-url: " if("abstract" in entry.keys()): print "\t\tabstract: >\n\t\t\t" + preprocess(entry["abstract"]).encode('UTF8') print "\t\tbibtex: >\n\t\t\t"+ writer._entry_to_bibtex(entry).replace("\n","\n\t\t\t").encode('UTF8')
def work_to_bibtex(work, name=None, acronym=False, rules=None): """Convert work to bibtex text Doctest: .. doctest:: >>> reload() >>> murta2014a = work_by_varname("murta2014a") >>> print(work_to_bibtex(murta2014a)) @inproceedings{murta2014a, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {International Provenance and Annotation Workshop}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> Custom name: >>> reload() >>> murta2014a = work_by_varname("murta2014a") >>> print(work_to_bibtex(murta2014a, name="other")) @inproceedings{other, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {International Provenance and Annotation Workshop}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> Use acronym for place name: >>> print(work_to_bibtex(murta2014a, acronym=True)) @inproceedings{murta2014a, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {IPAW}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> """ result = work_to_bibtex_entry(work, name=name, acronym=acronym, rules=rules) db = BibDatabase() db.entries = [result] writer = BibTexWriter() writer.indent = " " return writer.write(db)
def rekey(self, old_key, new_key): ''' Change the key of an existing document in the archive. ''' old_paths = self.get_doc(old_key).paths # If a new key has not been supplied, we take the key from the bibtex # file. if new_key is None: new_key = _key_from_bibtex(old_paths.bib_path) if self.has_key(new_key): msg = 'Archive already contains key {}. Aborting.'.format(new_key) raise LibraryException(msg) new_paths = DocumentPaths(self.archive_path, new_key) # Rename PDF and bibtex file and then rename the whole directory. shutil.move(old_paths.bib_path, os.path.join(old_paths.key_path, new_key + '.bib')) shutil.move(old_paths.pdf_path, os.path.join(old_paths.key_path, new_key + '.pdf')) shutil.move(old_paths.key_path, new_paths.key_path) # Write the new_key to the bibtex file with open(new_paths.bib_path, 'r') as f: bib_info = bibtexparser.load(f) bib_info.entries[0]['ID'] = new_key bib_writer = BibTexWriter() with open(new_paths.bib_path, 'w') as f: f.write(bib_writer.write(bib_info)) return new_key
def main(): p = argparse.ArgumentParser() p.add_argument("input_file", help='The original .bib file you want to sanitize.') p.add_argument("config", help='The config file in JSON format.') p.add_argument("output_file", help='Name of the new sanitized file.') args = p.parse_args() # parse original bibfile with open(args.input_file) as bibFile: bibDB = bibtexparser.load(bibFile) # parse config file with open(args.config) as configFile: confDB = json.load(configFile) checkDuplicates(args.input_file) bibDB = checkMandatoryFieldsAndKeywords(bibDB, confDB['read_config']) checkTags(bibDB, confDB['read_config']['tag_regex']) # write results writer = BibTexWriter() writer.contents = ['entries'] # use ordering that is defined in "sort_order" in the config.json file writer.order_entries_by = confDB["write_config"]["sort_order"] with open(args.output_file, 'w') as resFile: resFile.write(bibtexparser.dumps(bibDB, writer))
def export(self): # Open StringIO to grab in-memory file contents file = io.StringIO() db = BibDatabase() for paper in self._papers: entry = { 'abstract': paper.abstract, 'title': paper.title, 'year': str(paper.published_at.year), 'ID': self.generate_id(paper), 'doi': paper.doi, 'author': self.generate_authors(paper) } if paper.journal: entry['journal'] = paper.journal.displayname if paper.is_preprint: entry['ENTRYTYPE'] = 'unpublished' else: entry['ENTRYTYPE'] = 'article' db.entries.append(entry) writer = BibTexWriter() file.write(writer.write(db)) return file
def export_to_bibtex_one_file(self, path: str = "all.bib"): """stores publications in bibtex format in one file Parameters ---------- path : optional path where the resulting file should be stored, by default "all.bib" Raises ------ KeyError if the type of publication and the handle are not specified """ self._create_dir(path) for pub in self._dep_pubs: meta = pub.get_bibtex_representation() if not meta: print("This pub has no meta") else: if meta["type"] and meta["handle"]: handle = meta.pop("handle") pub_type = meta.pop("type") db = BibDatabase() db.entries = [meta.copy()] db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type}) writer = BibTexWriter() tot = [] for key in meta: tot.append(key) writer.display_order = tot with open(path, "a") as bibfile: bibfile.write(writer.write(db)) else: raise KeyError("the type of publication and metdata" + "are required")
def metaDictToBib(jobid, metadict, omit_keys, path_prefix): """Export meta data to bibtex format Args: jobid (int): id of job. metadict (DocMeta): meta dict of a doc. alt_dict (dict): dict for key changes. omit_keys (list): keys to omit in the converted dict. path_prefix (str): folder path to prepend to attachment file paths. Returns: rec (int): 0 if successful, 1 otherwise. jobid (int): the input jobid as it is. dbtext (str): formated bibtex entry, '' if <rec>==1. docid (int): id of the processed document. """ try: alt_dict = INV_ALT_KEYS ord_dict = toOrdinaryDict(metadict, alt_dict, omit_keys, path_prefix) db = BibDatabase() db.entries = [ ord_dict, ] writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False dbtext = writer.write(db) return 0, jobid, dbtext, metadict['id'] except Exception: LOGGER.exception('Failed to write to bibtex') return 1, jobid, '', metadict['id']
def exif_pdf(self, filename): fields = ["Author", "Year", "Journal", "Title", "Publisher", "Page", "Address", "Annote", "Booktitle", "Chapter", "Crossred", "Edition", "Editor", "HowPublished", "Institution", "Month", "Note", "Number", "Organization", "Pages", "School", "Series", "Type", "Url", "Volume", "Doi", "File"] op=pexif.get_json(filename) try: new_op = { field: str(value) for field in fields for key, value in op[0].items() if field.lower() in key.lower() } if 'Author' not in new_op: new_op['Author'] = 'Unknown' id_auth=new_op["Author"].split()[-1] id_tit = (new_op["Title"].split()[:2]) id_tit.append(id_auth) id_val = "_".join(id_tit) new_op["ID"] = str(id_val) new_op["ENTRYTYPE"] = "article" op[0] = new_op db = BibDatabase() db.entries = op writer = BibTexWriter() pdf_buff = (writer.write(db)) self.create_textview(pdf_buff) except: self.Messages.on_error_clicked("Can't extract data from this pdf file", "Try other methods")
def test_content_entries_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def test_content_entries_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib' ) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def load_and_replace(bibtex_file): with open(os.path.join('publications', bibtex_file), 'r', encoding="utf-8") as f: fdata = f.read() pdict = BibTexParser(fdata).get_entry_dict() plist = BibTexParser(fdata, bc.author).get_entry_list() by_year = {} for pub in plist: pubd = pdict[pub['ID']] db = BibDatabase() db.entries = [pubd] writer = BibTexWriter() writer.indent = '\t' bibentry = writer.write(db) pub['BIB_ENTRY'] = bibentry for field in pub: if field == 'BIB_ENTRY': continue pub[field] = context.make_replacements(pub[field]) pub['author'] = _format_author_list(pub['author']) y = int(pub['year']) if 'year' in pub else 1970 if y not in by_year: by_year[y] = [] by_year[y].append(pub) ret = [] for year, pubs in sorted(by_year.items(), reverse=True): for pub in pubs: ret.append(pub) return ret
def make_bibtex_file(pubs, pid, person_dir="."): """Make a bibtex file given the publications Parameters ---------- pubs : list of dict The publications pid : str The person id person_dir : str, optional The person's directory """ if not HAVE_BIBTEX_PARSER: return None skip_keys = {"ID", "ENTRYTYPE", "author"} bibdb = BibDatabase() bibwriter = BibTexWriter() bibdb.entries = ents = [] for pub in pubs: ent = dict(pub) ent["ID"] = ent.pop("_id") ent["ENTRYTYPE"] = ent.pop("entrytype") for n in ["author", "editor"]: if n in ent: ent[n] = " and ".join(ent[n]) for key in ent.keys(): if key in skip_keys: continue ent[key] = latex_safe(str(ent[key])) ents.append(ent) fname = os.path.join(person_dir, pid) + ".bib" with open(fname, "w", encoding="utf-8") as f: f.write(bibwriter.write(bibdb)) return fname
def convert_csv_to_bibtex(self): csv_dict = self.csv.to_dict('records') writer = BibTexWriter() with open(self.output_path, 'w', encoding="utf-8") as bibtex_file: for csv_entry in csv_dict: bibtex_entry = self.convert_csv_entry_to_bibtex_entry(csv_entry) bibtex_file.write(writer.write(bibtex_entry))
def main_resps(): in_file = Path("data/resps-perturbed-tides.txt") bibs = parse_citations(in_file) db = BibDatabase() db.entries = [ bib.to_bibtex() for bib in bibs ] id_to_count = defaultdict(lambda : 0) for entry in db.entries: id_to_count[entry["ID"]] += 1 for the_id, count in id_to_count.items(): if count > 1: for entry in [e for e in db.entries if e["ID"] == the_id]: count -= 1 entry["ID"] += ascii_lowercase[count] writer = BibTexWriter() writer.indent = " " with Path("data/resps-tides-perturbed-refs.bib").open("wb") as ref_file: ref_file.write(writer.write(db).encode())
def getcitation(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile: articles_database = bibtexparser.load(articlesfile, articlesparser) articleentries = articles_database.entries import random samplelist = random.sample(range(len(articleentries)), 20) print(samplelist) for i in samplelist: print("---------------------------") print("Entry number: " + str(i)) title = articleentries[i]['title'] clusterid = articleentries[i]['clusterid'] print("Title: " + title) print("Cluster ID: " + clusterid) if not clusterid == "unknown": print(str(i)) try: citations = os.popen( '''/usr/bin/python3 /home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''').read().strip().split()[ -1] except: citations = "unknown" else: citations = "unknown" print("new Citations: " + citations) if 'cited' in articleentries[i]: oldcitednumber = int(articleentries[i]['cited']) else: oldcitednumber = 0 print("Old Cited Number: " + str(oldcitednumber)) if not citations == "unknown": citednumber = int(citations) if citednumber > oldcitednumber and ((citednumber - oldcitednumber) < 8): articleentries[i]['cited'] = str(citednumber) writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order',) with open('/home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib tempcited-add-articles.bib") os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/articles.bib /home/limingtao/ircre-bibtex/ircreupdate/oldarticles.bib") with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) return 0
def bibtex_cleaner(bibtext, option): try: bib_database = bibtexparser.loads(bibtext) cleaned_database = clean_entries(bib_database, option) writer = BibTexWriter() return writer.write(cleaned_database) except Exception: return 'Error. 入力形式はbibtexですか?(または変換プログラムのバグの可能性があります)\n'
def test_bibexport(): a = bb.entries_dict['Yttri:Urban'] db = BibDatabase() db.entries = [a] writer = BibTexWriter() with open('testoutbib.bib', 'w') as bibfile: bibfile.write(writer.write(db)) sys.exit()
def persist(self): if self.__bib: self.__bib.entries[0]['note'] = self.note self.__bib.entries[0]['tags'] = ';'.join(self.tags) writer = BibTexWriter() with open(self.__bib_file_path, 'w') as bibfile: bibtex = writer.write(self.__bib) bibfile.write(bibtex)
def getBibtexStrFromAbstractDict(abstractDict): abstractDict.pop('url') abstractDict.pop('journal') db = BibDatabase() writer = BibTexWriter() writer.indent = ' ' db.entries = [abstractDict] return writer.write(db)
def write(self, path=""): path = self.path if not path else path bdb = BibDatabase() bdb.entries = self.entries bw = BibTexWriter() with open(path, 'w') as f: f.write(bw.write(bdb).encode('ascii', 'replace'))
def write_bib(bib_database, filen="dl4m.bib"): """Description of write_bib Write the items stored in bib_database into filen """ writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('noneyear', "author") with open(filen, "w", encoding="utf-8") as bibfile: bibfile.write(writer.write(bib_database))
def write_bibtex(db, filename): ''' Writes the database into the file named filename ''' with open(filename, 'w', encoding='utf-8') as bibtex_file: writer = BibTexWriter() writer.add_trailing_comma = True writer.indent = '' bibtexparser.dump(db, bibtex_file, writer)
def correct_lhc_authors(bib_tex): bib_data = bibtexparser.loads(bib_tex) entry = bib_data.entries[0] print(entry['collaboration']) entry['author'] = entry['collaboration'] + " Collaboration" bib_data.entries = [entry] writer = BibTexWriter() return writer.write(bib_data)
def write_bib_file(list_of_cited_entries, output_fname): db = BibDatabase() db.entries = list_of_cited_entries writer = BibTexWriter() with open(output_fname, 'w') as bibtex_file: bibtex_file.write(writer.write(db)) print('Output written to ' + output_fname)
def _writer(): ''' Return a configured bibtex writer. ''' writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('ID',) writer.display_order = ['title', 'author', 'editor'] return writer
def export(self, path: str = "./pubs/") -> None: """overrides superclass abstract method For each publication will be created a new folder with the title of that publication as the name of the folder. The bibtex file is named "cite.bib" and written inside the folder previously created. Parameters ---------- path : optional path where files should be saved, by default "./pubs/" Raises ------ KeyError if the type of publication and the handle are not specified """ self._create_dir(path) for pub in self._dep_pubs: meta = pub.get_bibtex_representation() if not meta: print("This pub has no meta") else: if meta["type"] and meta["handle"]: handle = meta.pop("handle") pub_type = meta.pop("type") db = BibDatabase() db.entries = [meta.copy()] db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type}) writer = BibTexWriter() tot = [] for key in meta: tot.append(key) # To keep the order of the elements inside the bibtex file writer.display_order = tot try: # Bibtex representation title has curly braces # meta["title"] = meta["title"][1:len(meta["title"]) - 1] my_dir = meta["title"].replace("/", "_").replace(" ", "-") \ .replace("\"", "") full_path = path + my_dir if not os.path.exists(full_path): os.mkdir(full_path, 0o755) with open(full_path + "/" + "cite.bib", "w") \ as bibfile: bibfile.write(writer.write(db)) else: with open(full_path + "/" + "cite.bib", "w") \ as bibfile: bibfile.write(writer.write(db)) except OSError: print("Creation of the directory failed {}", my_dir) else: raise KeyError("the type of publication and metdata" + "are required")
def make_bibs(self, prefix, output): all_bibs = self.get_all_bibs(prefix) bib_db = BibDatabase() bib_db.entries = all_bibs writer = BibTexWriter() writer.indent = '\t' with open(output, 'w') as f: bibtexparser.dump(bib_db, f, writer) logging.info('processed %d bib entries', len(all_bibs))
def test_trailing_comma(self): with io.open(_data_path('article.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('article_trailing_comma_output.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.add_trailing_comma = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def test_comma_first(self): with io.open(_data_path('book.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.indent = ' ' writer.comma_first = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def test_align(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test', 'thisisaverylongkey': 'longvalue'}] writer = BibTexWriter() writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test}, thisisaverylongkey = {longvalue} } """ self.assertEqual(result, expected) with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def test_content_comment_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['comments'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@comment{} @comment{A comment} """ self.assertEqual(result, expected)
def parsing_write(self, filename): # print(self.booklist) datalist = [] writer = BibTexWriter() writer.indent = ' ' for ref in self.TreeView.full_list: # print(type(ref)) datadict = dict((k, v) for k, v in zip(self.entries, ref) if v is not None) datalist.append(datadict) self.db.entries = datalist with open(filename, 'w') as bibfile: bibfile.write(writer.write(self.db))
def test_entry_separator(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test'}] writer = BibTexWriter() writer.entry_separator = '' result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test} } """ self.assertEqual(result, expected)
def test_indent(self): bib_database = BibDatabase() bib_database.entries = [{'id': 'abc123', 'type': 'book', 'author': 'test'}] writer = BibTexWriter() writer.indent = ' ' result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test} } """ self.assertEqual(result, expected)
def bibtex(self,simplified=False): if not self.__bib: return None from copy import deepcopy bib = deepcopy(self.__bib) if 'note' in bib.entries[0].keys(): del bib.entries[0]['note'] if 'tags' in bib.entries[0].keys(): del bib.entries[0]['tags'] if simplified: for k in ['doi','acmid','isbn', 'url','link']: if k in bib.entries[0].keys(): del bib.entries[0][k] writer = BibTexWriter() return writer.write(bib).strip()
def test_sort_missing_field(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'b', 'ENTRYTYPE': 'article', 'year': '2000'}, {'ID': 'c', 'ENTRYTYPE': 'book', 'year': '2010'}, {'ID': 'a', 'ENTRYTYPE': 'book'}] writer = BibTexWriter() writer.order_entries_by = ('year', ) result = bibtexparser.dumps(bib_database, writer) expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n" self.assertEqual(result, expected)
def _cleanupBibTex(self, count): """ Clean up bibtex and ensure uniform look. """ import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib = bibtexparser.loads(self.refs, parser=parser) # save results from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' writer.order_entries_by = ('id') self.number = len(bib.entries) self.refs = bibtexparser.dumps(bib, writer)
def report_results_to_user(self): ''' write database to self.args.output_path and inform user ''' writer = BibTexWriter() with open(self.args.output_path,'w') as outfile: outfile.write(writer.write(self.args.bibtex_database)) print() print('Finished crawling the ACL anthology!') print('%s bibtex files were found' % self.total_found) print('%s bibtex files were added ' % self.total_added) print('You ran the program with the following settings:') print() print('overwrite',self.args.overwrite) print('queries:',self.args.queries) print() print('the output file can be found at: %s' % self.args.output_path)
def write_bibtex_dict (stream, entries): """bibtexparser.write converts the entire database to one big string and writes it out in one go. I'm sure it will always all fit in RAM but some things just will not stand. """ from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter () writer.indent = ' ' writer.entry_separator = '' first = True for rec in entries: if first: first = False else: stream.write ('\n') stream.write (writer._entry_to_bibtex (rec))
def __init__(self, rc): self.rc = rc self.bldir = os.path.join(rc.builddir, self.btype) self.env = Environment(loader=FileSystemLoader([ 'templates', os.path.join(os.path.dirname(__file__), 'templates'), ])) self.construct_global_ctx() if HAVE_BIBTEX_PARSER: self.bibdb = BibDatabase() self.bibwriter = BibTexWriter()
def bibTexIn(filename): #load database into bibtexdatabase object with codecs.open(filename, 'r', 'utf-8') as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode bibtex_database = bibtexparser.load(bibtex_file,parser) #load all the keys we want to add into the dialectsDB database dbbibtexkeys = [x.bibTexKey for x in BiblioEntryBibTex.objects.all()] btdbdict = bibtex_database.entries_dict for item in dbbibtexkeys: try: currentbtitem = btdbdict[item] currentBEobject = BiblioEntryBibTex.objects.get(bibTexKey=item) writer = BibTexWriter() bibtexofentry = writer._entry_to_bibtex(currentbtitem) currentBEobject.fullBibtex = bibtexofentry print("Currently saving: {}".format(item)) currentBEobject.save() #The saving function should put the relevant info into the other fields except KeyError: print("No bibliography entry for: {}".format(item))
def main(): if len(sys.argv) < 3: print("Wrong number of arguments. Usage: \n") print("python3 dump_db.py name.db dump.bib") print("Dump database") print("Database: ", sys.argv[1]) engine = create_engine('sqlite:///app.db') Session = sessionmaker() Session.configure(bind=engine) session = Session() db = BibDatabase() db.entries = [] dbentries = session.query(BiblioEntry) for e in dbentries: db.entries.append( {'journal': e.journal, 'title': e.title, 'year': str(e.year), 'publisher': e.publisher, 'school': e.school, 'ID': e.ID, 'url': e.url, 'author': e.authors, 'keyword': e.keywords, 'ENTRYTYPE': e.ENTRYTYPE} ) print("Write file on", sys.argv[2]) writer = BibTexWriter() with open(sys.argv[2], 'w') as bibfile: bibfile.write(writer.write(db)) session.close() print("Connection closed.")
def write_bib(db, order=False): """ Write bibtex string. Args: db (BibDatabase): database object to dump.. order (bool): whether to reorder entries upon writing. Returns: The dumped string. """ # Custom writer writer = BibTexWriter() writer.indent = '\t' writer.order_entries_by = None # Replace month by numeric value for entry in db.entries: if 'month' in entry and entry['month'] in MONTHS: entry['month'] = '{:02d}'.format(MONTHS.index(entry['month']) + 1) if order: # Manual sort order_entries_by = ('year', 'author', 'ID') sort_entries(db, order_entries_by) if not config.use_utf8_characters: db.entries = [nomenclature.encode_ascii_latex(entry) for entry in db.entries] if config.protect_uppercase: for entry in db.entries: entry["title"] = latex.protect_uppercase(entry["title"]) # Write bib string return writer.write(db)
record['error'] = 'tag' # record = c.type(record) # record = c.author(record) # record = c.editor(record) # record = c.journal(record) # record = c.keyword(record) # record = c.link(record) # record = c.doi(record) # record['p_authors'] = [] # if 'author' in record: # record['p_authors'] = [c.splitname(x, False) for x in record['author']] return record parser.customization = custom with open(args.target, 'r') as f: logging.info("Loading bibtex") db = b.load(f, parser) logging.info("Bibtex loaded") #Get errors and write them out: errored = [x for x in db.entries if 'error' in x] with open('{}.errors'.format(args.output), 'w') as f: f.write("\n".join(["{} : {}".format(x['ID'], x['error']) for x in errored])) writer = BibTexWriter() with open(args.output,'w') as f: f.write(writer.write(db))
def toString(e): writer = BibTexWriter() writer.indent = ' ' # indent entries with 4 spaces instead of one writer.comma_first = True # place the comma at the beginning of the line return writer._entry_to_bibtex(e)
mon1 = dateobj.strftime("%b") dateobj = datetime.date(2000, int(m.group(2)), 1) mon2 = dateobj.strftime("%b") bib_obj["month"] = ", %s-%s" % (mon1, mon2) month = int(m.group(1)) else: m = re.match("^[0-9]+$", month) if m: dateobj = datetime.date(2000, int(month), 1) mon = dateobj.strftime("%b") bib_obj["month"] = mon month = int(month) if conf_kwd is not None: bib_id = "%s%s%s%s" % (author_kwd, conf_kwd, year, title_kwd) else: bib_id = "%s%s%s" % (author_kwd, year, title_kwd) if bib_id in id_dict: raise id_dict[bib_id] = True bib_obj["ID"] = bib_id print bib_id db.entries.append(bib_obj) writer = BibTexWriter() with open("seokhwankim.bib", "w") as bibfile: bibfile.write(writer.write(db))
# record = c.keyword(record) # record = c.link(record) # record = c.doi(record) record['tags'] = [i.strip() for i in re.split(',|;', record["tags"].replace("\n",""))] # record['p_authors'] = [] # if 'author' in record: # record['p_authors'] = [c.splitname(x, False) for x in record['author']] return record parser.customization = custom with open(args.target, 'r') as f: logging.info("Loading bibtex") db = b.load(f, parser) #go through entries, creating a new db for each tag, and year, and author db_dict = {} for entry in db.entries: for tag in entry['tags']: if tag not in db_dict: db_dict[tag] = BibDatabase() db_dict[tag].entries.append(entry) logging.info("Writing Bibtex") writer = BibTexWriter() for k,v in db_dict.items(): with open(join(args.output, "{}.bib".format(k)),'w') as f: f.write(writer.write(v))
class HtmlBuilder(object): btype = 'html' def __init__(self, rc): self.rc = rc self.bldir = os.path.join(rc.builddir, self.btype) self.env = Environment(loader=FileSystemLoader([ 'templates', os.path.join(os.path.dirname(__file__), 'templates'), ])) self.construct_global_ctx() if HAVE_BIBTEX_PARSER: self.bibdb = BibDatabase() self.bibwriter = BibTexWriter() def construct_global_ctx(self): self.gtx = gtx = {} rc = self.rc gtx['len'] = len gtx['True'] = True gtx['False'] = False gtx['None'] = None gtx['sorted'] = sorted gtx['groupby'] = groupby gtx['gets'] = gets gtx['date_key'] = date_key gtx['doc_date_key'] = doc_date_key gtx['level_val'] = level_val gtx['category_val'] = category_val gtx['rfc822now'] = rfc822now gtx['date_to_rfc822'] = date_to_rfc822 gtx['jobs'] = list(all_docs_from_collection(rc.client, 'jobs')) gtx['people'] = sorted(all_docs_from_collection(rc.client, 'people'), key=position_key, reverse=True) gtx['all_docs_from_collection'] = all_docs_from_collection def render(self, tname, fname, **kwargs): template = self.env.get_template(tname) ctx = dict(self.gtx) ctx.update(kwargs) ctx['rc'] = ctx.get('rc', self.rc) ctx['static'] = ctx.get('static', os.path.relpath('static', os.path.dirname(fname))) ctx['root'] = ctx.get('root', os.path.relpath('/', os.path.dirname(fname))) result = template.render(ctx) with open(os.path.join(self.bldir, fname), 'wt') as f: f.write(result) def build(self): rc = self.rc os.makedirs(self.bldir, exist_ok=True) self.root_index() self.people() self.projects() self.blog() self.jobs() self.nojekyll() self.cname() # static stsrc = os.path.join('templates', 'static') stdst = os.path.join(self.bldir, 'static') if os.path.isdir(stdst): shutil.rmtree(stdst) shutil.copytree(stsrc, stdst) def root_index(self): rc = self.rc self.render('root_index.html', 'index.html', title='Home') def people(self): rc = self.rc peeps_dir = os.path.join(self.bldir, 'people') os.makedirs(peeps_dir, exist_ok=True) for p in self.gtx['people']: names = frozenset(p.get('aka', []) + [p['name']]) pubs = self.filter_publications(names, reverse=True) bibfile = self.make_bibtex_file(pubs, pid=p['_id'], person_dir=peeps_dir) ene = p.get('employment', []) + p.get('education', []) ene.sort(key=ene_date_key, reverse=True) projs = self.filter_projects(names) self.render('person.html', os.path.join('people', p['_id'] + '.html'), p=p, title=p.get('name', ''), pubs=pubs, names=names, bibfile=bibfile, education_and_employment=ene, projects=projs) self.render('people.html', os.path.join('people', 'index.html'), title='People') def filter_publications(self, authors, reverse=False): rc = self.rc pubs = [] for pub in all_docs_from_collection(rc.client, 'citations'): if len(set(pub['author']) & authors) == 0: continue pubs.append(pub) pubs.sort(key=doc_date_key, reverse=reverse) return pubs def make_bibtex_file(self, pubs, pid, person_dir='.'): if not HAVE_BIBTEX_PARSER: return None self.bibdb.entries = ents = [] for pub in pubs: ent = dict(pub) ent['ID'] = ent.pop('_id') ent['ENTRYTYPE'] = ent.pop('entrytype') ent['author'] = ' and '.join(ent['author']) ents.append(ent) fname = os.path.join(person_dir, pid) + '.bib' with open(fname, 'w') as f: f.write(self.bibwriter.write(self.bibdb)) return fname def filter_projects(self, authors, reverse=False): rc = self.rc projs = [] for proj in all_docs_from_collection(rc.client, 'projects'): team_names = set(gets(proj['team'], 'name')) if len(team_names & authors) == 0: continue proj = dict(proj) proj['team'] = [x for x in proj['team'] if x['name'] in authors] projs.append(proj) projs.sort(key=id_key, reverse=reverse) return projs def projects(self): rc = self.rc projs = all_docs_from_collection(rc.client, 'projects') self.render('projects.html', 'projects.html', title='Projects', projects=projs) def blog(self): rc = self.rc blog_dir = os.path.join(self.bldir, 'blog') os.makedirs(blog_dir, exist_ok=True) posts = list(all_docs_from_collection(rc.client, 'blog')) posts.sort(key=ene_date_key, reverse=True) for post in posts: self.render('blog_post.html', os.path.join('blog', post['_id'] + '.html'), post=post, title=post['title']) self.render('blog_index.html', os.path.join('blog', 'index.html'), title='Blog', posts=posts) self.render('rss.xml', os.path.join('blog', 'rss.xml'), items=posts) def jobs(self): rc = self.rc jobs_dir = os.path.join(self.bldir, 'jobs') os.makedirs(jobs_dir, exist_ok=True) for job in self.gtx['jobs']: self.render('job.html', os.path.join('jobs', job['_id'] + '.html'), job=job, title='{0} ({1})'.format(job['title'], job['_id'])) self.render('jobs.html', os.path.join('jobs', 'index.html'), title='Jobs') def nojekyll(self): """Touches a nojekyll file in the build dir""" with open(os.path.join(self.bldir, '.nojekyll'), 'a+'): pass def cname(self): rc = self.rc if not hasattr(rc, 'cname'): return with open(os.path.join(self.bldir, 'CNAME'), 'w') as f: f.write(rc.cname)
def add_raw_bibtex(contents): parser = BibTexParser() bib = bibtexparser.loads(contents, parser=parser) writer = BibTexWriter() for i in bib.entries: print writer.write(i)
def formatText(self): if self.BibtexfilePath != '': self.openfile() else: self.readcontent() m = self.getMap() m['IEEE Global Communications Conference'] = m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] del m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] print m length = 0 nb = {} for bibtex in self.allbibtex: for key in bibtex.keys(): if len(key) > length and key != 'ENTRYTYPE': length = len(key) for k, v in bibtex.items(): if k == 'ENTRYTYPE' or k == 'ID': nb[k] = v continue elif k == 'ID': nb[k] = v continue elif k == 'doi' or k == 'ISSN' or k == 'keywords': continue elif v == '': continue elif 'url' in k: continue nk = k + (length - len(k)) * ' ' if 'booktitle' in nk: if '(' in v: v1 = v.split('(')[1].split(')')[0] nb[nk] = 'Proc. of ' + v1 continue flag = 0 # 未更改booktitle to_remove = "~`!@#$%^&*(){}[];':<>|-=_+" table = {ord(char): None for char in to_remove} clean_v = v.translate(table) #clean_v = v.translate(string.punctuation) #print clean_v for kk, vv in m.items(): if kk in clean_v: nb[nk] = 'Proc. of ' + vv[0] publish = 'publish' + (length - 7) * ' ' nb[publish] = vv[1] flag = 1 break if flag == 0: nb[nk] = v print v continue elif nk.strip() == 'title' and 'booktitle' not in nk: self.tilte = v nv = v.split(' ') for i in range(len(nv)): # 标题除介词和冠词外,首字母大写 if nv[i] in self.prep or nv[i] in self.artie: continue # 首字母大写 else: if 97 <= ord(nv[i][0]) <= 122: nv[i] = chr(ord(nv[i][0])-32)+nv[i][1:] v = ' '.join(nv) nb[nk] = '{' + v + '}' continue elif 'pages' in nk: if '--' in v: nb[nk] = v continue nb[nk] = v.replace('-', '--') continue elif 'author' in nk: if '\n' in v: nb[nk] = v.replace('\n', ' ') continue # 其他不做改变 nb[nk] = v db = BibDatabase() db.entries = [nb] writer = BibTexWriter() writer.indent = '\t' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line with open(self.tilte+'.bib', 'wb') as bibfile: bibfile.write(writer.write(db))
def test_sort_type_id(self): writer = BibTexWriter() writer.order_entries_by = ('ENTRYTYPE', 'ID') result = bibtexparser.dumps(self.bib_database, writer) expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n" self.assertEqual(result, expected)
def test_sort_none(self): writer = BibTexWriter() writer.order_entries_by = None result = bibtexparser.dumps(self.bib_database, writer) expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" self.assertEqual(result, expected)