def format_paper_citation_dict(citation, indent=' '): """ Format a citation dict for a paper or a list of papers into a BibTeX record string. :param citation: A ``Paper`` citation dict or list of such dicts. :param indent: Indentation to be used in BibTeX output. """ if isinstance(citation, dict): entries = [citation] else: entries = citation # Handle conflicting ids for entries entries_ids = collections.defaultdict(lambda: 0) for entry in entries: entry_id = entry['ID'] entries_ids[entry_id] += 1 if entries_ids[entry_id] > 1: entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id]) writer = BibTexWriter() writer.indent = indent with io.StringIO('') as bibfile: db = BibDatabase() db.entries = entries bibfile.write(writer.write(db)) return bibfile.getvalue().strip()
def load_and_replace(bibtex_file): with open(os.path.join('publications', bibtex_file), 'r', encoding="utf-8") as f: fdata = f.read() pdict = BibTexParser(fdata).get_entry_dict() plist = BibTexParser(fdata, bc.author).get_entry_list() by_year = {} for pub in plist: pubd = pdict[pub['ID']] db = BibDatabase() db.entries = [pubd] writer = BibTexWriter() writer.indent = '\t' bibentry = writer.write(db) pub['BIB_ENTRY'] = bibentry for field in pub: if field == 'BIB_ENTRY': continue pub[field] = context.make_replacements(pub[field]) pub['author'] = _format_author_list(pub['author']) y = int(pub['year']) if 'year' in pub else 1970 if y not in by_year: by_year[y] = [] by_year[y].append(pub) ret = [] for year, pubs in sorted(by_year.items(), reverse=True): for pub in pubs: ret.append(pub) return ret
def work_to_bibtex(work, name=None, acronym=False, rules=None): """Convert work to bibtex text Doctest: .. doctest:: >>> reload() >>> murta2014a = work_by_varname("murta2014a") >>> print(work_to_bibtex(murta2014a)) @inproceedings{murta2014a, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {International Provenance and Annotation Workshop}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> Custom name: >>> reload() >>> murta2014a = work_by_varname("murta2014a") >>> print(work_to_bibtex(murta2014a, name="other")) @inproceedings{other, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {International Provenance and Annotation Workshop}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> Use acronym for place name: >>> print(work_to_bibtex(murta2014a, acronym=True)) @inproceedings{murta2014a, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {IPAW}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> """ result = work_to_bibtex_entry(work, name=name, acronym=acronym, rules=rules) db = BibDatabase() db.entries = [result] writer = BibTexWriter() writer.indent = " " return writer.write(db)
def main_resps(): in_file = Path("data/resps-perturbed-tides.txt") bibs = parse_citations(in_file) db = BibDatabase() db.entries = [ bib.to_bibtex() for bib in bibs ] id_to_count = defaultdict(lambda : 0) for entry in db.entries: id_to_count[entry["ID"]] += 1 for the_id, count in id_to_count.items(): if count > 1: for entry in [e for e in db.entries if e["ID"] == the_id]: count -= 1 entry["ID"] += ascii_lowercase[count] writer = BibTexWriter() writer.indent = " " with Path("data/resps-tides-perturbed-refs.bib").open("wb") as ref_file: ref_file.write(writer.write(db).encode())
def save(self, bibfile=-1): """ save the biblist with : - the original filename without any arg or - the given file name if not empty """ if bibfile == -1: bibfile = self.name db = BibDatabase() for item in self: db.entries.append(item) writer = BibTexWriter() # this class is needed to prepare format writer.indent = ' ' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line writer.align_values = True # with a nice indentation print('') print(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile)) print('') with open(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile), 'w') as bf: bf.write('\n') bf.write(writer.write(db)) bf.write('\n')
def metaDictToBib(jobid, metadict, omit_keys, path_prefix): """Export meta data to bibtex format Args: jobid (int): id of job. metadict (DocMeta): meta dict of a doc. alt_dict (dict): dict for key changes. omit_keys (list): keys to omit in the converted dict. path_prefix (str): folder path to prepend to attachment file paths. Returns: rec (int): 0 if successful, 1 otherwise. jobid (int): the input jobid as it is. dbtext (str): formated bibtex entry, '' if <rec>==1. docid (int): id of the processed document. """ try: alt_dict = INV_ALT_KEYS ord_dict = toOrdinaryDict(metadict, alt_dict, omit_keys, path_prefix) db = BibDatabase() db.entries = [ ord_dict, ] writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False dbtext = writer.write(db) return 0, jobid, dbtext, metadict['id'] except Exception: LOGGER.exception('Failed to write to bibtex') return 1, jobid, '', metadict['id']
def format_paper_citation_dict(citation, indent=' '): """ Format a citation dict for a paper or a list of papers into a BibTeX record string. :param citation: A ``Paper`` citation dict or list of such dicts. :param indent: Indentation to be used in BibTeX output. """ if isinstance(citation, dict): entries = [citation] else: entries = citation # Handle conflicting ids for entries entries_ids = collections.defaultdict(lambda: 0) for entry in entries: entry_id = entry['ID'] entries_ids[entry_id] += 1 if entries_ids[entry_id] > 1: entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id]) writer = BibTexWriter() writer.indent = indent with io.StringIO('') as bibfile: db = BibDatabase() db.entries = entries bibfile.write(writer.write(db)) return bibfile.getvalue().strip()
def getcitation(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile: articles_database = bibtexparser.load(articlesfile, articlesparser) articleentries = articles_database.entries import random samplelist = random.sample(range(len(articleentries)), 20) print(samplelist) for i in samplelist: print("---------------------------") print("Entry number: " + str(i)) title = articleentries[i]['title'] clusterid = articleentries[i]['clusterid'] print("Title: " + title) print("Cluster ID: " + clusterid) if not clusterid == "unknown": print(str(i)) try: citations = os.popen( '''/usr/bin/python3 /home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''').read().strip().split()[ -1] except: citations = "unknown" else: citations = "unknown" print("new Citations: " + citations) if 'cited' in articleentries[i]: oldcitednumber = int(articleentries[i]['cited']) else: oldcitednumber = 0 print("Old Cited Number: " + str(oldcitednumber)) if not citations == "unknown": citednumber = int(citations) if citednumber > oldcitednumber and ((citednumber - oldcitednumber) < 8): articleentries[i]['cited'] = str(citednumber) writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order',) with open('/home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib tempcited-add-articles.bib") os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/articles.bib /home/limingtao/ircre-bibtex/ircreupdate/oldarticles.bib") with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) return 0
def getBibtexStrFromAbstractDict(abstractDict): abstractDict.pop('url') abstractDict.pop('journal') db = BibDatabase() writer = BibTexWriter() writer.indent = ' ' db.entries = [abstractDict] return writer.write(db)
def write_bib(bib_database, filen="dl4m.bib"): """Description of write_bib Write the items stored in bib_database into filen """ writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('noneyear', "author") with open(filen, "w", encoding="utf-8") as bibfile: bibfile.write(writer.write(bib_database))
def write_bibtex(db, filename): ''' Writes the database into the file named filename ''' with open(filename, 'w', encoding='utf-8') as bibtex_file: writer = BibTexWriter() writer.add_trailing_comma = True writer.indent = '' bibtexparser.dump(db, bibtex_file, writer)
def _writer(): ''' Return a configured bibtex writer. ''' writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('ID',) writer.display_order = ['title', 'author', 'editor'] return writer
def make_bibs(self, prefix, output): all_bibs = self.get_all_bibs(prefix) bib_db = BibDatabase() bib_db.entries = all_bibs writer = BibTexWriter() writer.indent = '\t' with open(output, 'w') as f: bibtexparser.dump(bib_db, f, writer) logging.info('processed %d bib entries', len(all_bibs))
def convert_to_bib(content, save_fpath): papers = parse_api_response(content) db = BibDatabase() db.entries = papers writer = BibTexWriter() writer.indent = " " writer.comma_first = True with open(save_fpath, "w+") as bibfile: bibfile.write(writer.write(db))
def dumps(bibman): db = bibtexparser.bparser.BibDatabase() db._entries_dict = bibman.cleaned db.entries = list(bibman.cleaned.values()) writer = BibTexWriter() # writer.order_entries_by = ('type', 'author', 'year') writer.order_entries_by = None writer.contents = ['comments', 'entries'] writer.indent = ' ' new_text = bibtexparser.dumps(db, writer) return new_text
def bibtexclassify(): parser = BibTexParser(common_strings=False) parser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/ircre.bib', encoding='utf8') as bibtexfile: ircrebib_database = bibtexparser.load(bibtexfile, parser) allentries = ircrebib_database.entries.copy() # ---------------------------------------- # get all articles # ----------------------------------------- article_entries = [] for i in range(len(allentries)): if allentries[i]['ENTRYTYPE'] == 'article': article_entries.append(allentries[i].copy()) article_database = BibDatabase() article_database.entries = article_entries writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order',) with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as article_file: bibtexparser.dump(article_database, article_file, writer=writer) otherentries= [] for i in range(len(allentries)): if allentries[i]['ENTRYTYPE'] == 'inbook' or allentries[i]['ENTRYTYPE'] == 'inproceedings' or allentries[i]['ENTRYTYPE'] == 'incollection': otherentries.append(allentries[i].copy()) other_database = BibDatabase() other_database.entries = otherentries writer2 = BibTexWriter() writer2.indent = ' ' writer2.order_entries_by = ('order',) with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', 'w', encoding='utf8') as others_file: bibtexparser.dump(other_database, others_file, writer=writer2) return 0
def test_comma_first(self): with io.open(_data_path('book.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.indent = ' ' writer.comma_first = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def printCandidates(candidates: List[Dict]): writer = BibTexWriter() writer.align_values = True writer.indent = " " db = BibDatabase() db.entries = candidates print(f"{len(candidates)} suggestions:\n") output = writer.write(db) print(output)
def bibtex_entries_to_string(entries: List[Dict]): if len(entries) == 0: return "" writer = BibTexWriter() writer.align_values = True writer.indent = " " db = BibDatabase() db.entries = entries return writer.write(db)
def test_comma_first(self): with io.open(_data_path('book.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.indent = ' ' writer.comma_first = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def write_bibliography(file, db): """Write bibliography entries to new file. Parameters ---------- file : str or writable file object db : """ writer = BibTexWriter() writer.indent = ' ' with file_context(file, 'w', encoding='utf-8') as f: f.write(writer.write(db))
def parsing_write(self, filename): # print(self.booklist) datalist = [] writer = BibTexWriter() writer.indent = ' ' for ref in self.TreeView.full_list: # print(type(ref)) datadict = dict((k, v) for k, v in zip(self.entries, ref) if v is not None) datalist.append(datadict) self.db.entries = datalist with open(filename, 'w') as bibfile: bibfile.write(writer.write(self.db))
def ircrebibmerge(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/sorted-articles.bib', encoding='utf8') as sortedarticle_file: sortedarticle_database = bibtexparser.load(sortedarticle_file, articlesparser) sortedarticles = sortedarticle_database.entries.copy() top15parser = BibTexParser(common_strings=False) top15parser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/top15.bib', encoding='utf8') as top15_file: top15_database = bibtexparser.load(top15_file, top15parser) top15articles = top15_database.entries.copy() othersparser = BibTexParser(common_strings = False) othersparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', encoding='utf8') as others_file: others_database = bibtexparser.load(others_file, othersparser) others = others_database.entries.copy() alldb = BibDatabase() entries = [] for i in range(len(top15articles)): entries.append(top15articles[i].copy()) for i in range(len(sortedarticles)): entries.append(sortedarticles[i].copy()) for i in range(len(others)): entries.append(others[i].copy()) alldb.entries = entries writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = None with open('/home/limingtao/ircre-bibtex/ircreupdate/newircre.bib', 'w', encoding='utf8') as newircrebibfile: bibtexparser.dump(alldb, newircrebibfile, writer=writer) return 0
def write_res(self, passed_entries, passed_name, failed_entries, failed_name): db = BibDatabase() db.entries = passed_entries writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False with open("results/" + passed_name, 'w') as bibfile: bibfile.write(writer.write(db)) with open("results/" + failed_name, 'w') as f: json.dump(failed_entries, f, indent=4) print("Writing data to filesystem!") print(" -successful results can be found in: results/" + passed_name) print(" -failed results can be found in: results/" + failed_name)
def test_indent(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test'}] writer = BibTexWriter() writer.indent = ' ' result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test} } """ self.assertEqual(result, expected)
def reformatbib(infile,outfile): with open(infile,'r') as bibfileIn: try: bib_database = bibtexparser.load(bibfileIn) writer = BibTexWriter() writer.align_values = True writer.indent = ' ' # indent entries with spaces # writer.comma_first = True # place the comma at the beginning of the line with open(outfile, 'w') as bibfileOut: bibfileOut.write(writer.write(bib_database)) print(f'Reformated {infile} written to {outfile}') except: print(f'{infile} not parsed')
def export_citations(citations, destination): db = BibDatabase() citation_dicts = (dict(row) for _, row in citations.iterrows()) citation_dicts = [{ attribute: value for attribute, value in citation.items() if value is not np.nan } for citation in citation_dicts] db.entries = citation_dicts with open(destination, "w") as bibtexfile: writer = BibTexWriter() writer.indent = " " bibtexparser.dump(db, bibtexfile, writer)
def _cleanupBibTex(self, count): """ Clean up bibtex and ensure uniform look. """ import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib = bibtexparser.loads(self.refs, parser=parser) # save results from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' writer.order_entries_by = ('id') self.number = len(bib.entries) self.refs = bibtexparser.dumps(bib, writer)
def getclusterid(title, author): parser = BibTexParser(common_strings=False) parser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as article_file: article_database = bibtexparser.load(article_file, parser) article_entries = article_database.entries.copy() entries = bib_database.entries print("---------------------------") print("---------------------------") print("---------------------------") print("Total articles number: " + str(len(entries))) print("---------------------------") print("---------------------------") print("---------------------------") writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order',) for i in range(len(entries)): if entries[i]['clusterid'] == 'unknown': print("---------------------------") print("Entry number: " + str(i)) title = entries[i]['title'] print("Title: " + title) clusterid = '' try: clusterid = os.popen( '''/home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -t --phrase="''' + title + '''" |grep ID| grep Cluster''').read().strip().split()[ -1] except: clusterid = "unknown" print("new Cluster ID: " + clusterid) entries[i]['clusterid'] = clusterid with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib /home/limingtao/ircre-bibtex/ircreupdate/tempclusterid-added-ircre.bib") with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) return 0
def proc_bib(input_io: TextIOWrapper, output_io: TextIOWrapper, jdb: JournalDB, silent: bool = False, output_format: str = "bib", abbrev_type="iso4"): if not hasattr(Journal, abbrev_type): raise ValueError(f"Invalid abbreviation type `{abbrev_type}`") bib_db = bibtexparser.load(input_io) for entry in bib_db.entries: journaltitle = entry.get("journaltitle") if journaltitle is None: continue journaltitle = braces_regex.sub("", journaltitle) name_pattern = re.compile(fr"^{re.escape(journaltitle)}(:?.*)$", RegexFlag.IGNORECASE) # TODO: query using lambdas? # TODO: normalize names (just in index?). res = jdb.journals.query_one(Journal.names_key, name_pattern) if res: _, journal = res abbrev = getattr(journal, abbrev_type) if output_format == "bib": entry["journaltitle"] = f"{{{abbrev or journaltitle}}}" elif output_format == "sourcemap": gen_sourcemap_map(journal, journaltitle, abbrev, output_io) abbrev_msg = f"abbreviating to '{abbrev}'" if res else f"no abbreviation found" if not silent: info(f"found journal name '{journaltitle}'; {abbrev_msg}.") if output_format == "bib": bib_writer = BibTexWriter() bib_writer.add_trailing_comma = True bib_writer.display_order = None bib_writer.indent = "\t" bib_writer.order_entries_by = None bibtex_code = bib_writer.write(bib_db) output_io.write(bibtex_code) elif output_format == "sourcemap": pass
def main(): import bibtexparser from bibtexparser.bwriter import BibTexWriter with open('ircre.bib', encoding='utf8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) entries = bib_database.entries print("---------------------------") print("---------------------------") print("---------------------------") print("Total articles number: " + str(len(entries))) print("---------------------------") print("---------------------------") print("---------------------------") writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order', ) for i in range(len(entries)): if entries[i]['clusterid'] == 'unknown': print("---------------------------") print("Entry number: " + str(i)) title = entries[i]['title'] print("Title: " + title) clusterid = '' try: clusterid = os.popen( '''./scholarpy/scholar.py -c 1 -t --phrase="''' + title + '''" |grep ID| grep Cluster''').read().strip().split()[-1] except: clusterid = "unknown" print("new Cluster ID: " + clusterid) entries[i]['clusterid'] = clusterid with open('clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) os.popen("cp clusterid-added-ircre.bib tempclusterid-added-ircre.bib") with open('clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile: bibtexparser.dump(bib_database, newbibfile, writer=writer) return 0
def main(): output = sys.argv[1] mds = sys.argv[2:] es = [] for fn in mds: # print(f"loading {fn}") with open(fn, "r", encoding='UTF-8') as f: ls = f.readlines()[1:] ls = itertools.takewhile(lambda x: x != "---\n", ls) e = yaml.load("".join(ls), Loader=yaml.FullLoader) e['ID'] = fn.split("/")[1][0:-3] for i in ['title', 'booktitle']: if i in e: s = e[i] s = s.replace("#", "\#") s = s.replace("&", "\&") e[i] = s e['title'] = "{" + e['title'] + "}" if 'authors' in e: e['author'] = " and ".join(e['authors']) del e['authors'] for i in ['isbn', 'pages', 'volume', 'year']: if i in e: e[i] = str(e[i]) for i in [ 'added', 'layout', 'notes', 'papers', 'read', 'readings', 'topics' ]: if i in e: del e[i] es.append(e) db = BibDatabase() db.entries = es writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' # writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') bibtex_str = bibtexparser.dumps(db, writer) with open(output, "w") as f: print(("#############################################\n" "# This file is machine generated, do not edit\n" "#############################################\n"), file=f) print(bibtex_str, file=f)
def write_bibtex_dict(stream, entries): """bibtexparser.write converts the entire database to one big string and writes it out in one go. I'm sure it will always all fit in RAM but some things just will not stand. """ from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter() writer.indent = ' ' writer.entry_separator = '' first = True for rec in entries: if first: first = False else: stream.write(b'\n') stream.write(writer._entry_to_bibtex(rec).encode('utf8'))
def print_candidates(candidates: List[Dict]): count = len(candidates) if count == 0: print("No suggestions.") else: if count == 1: print("One suggestion:\n") else: print(f"{count} suggestions:\n") writer = BibTexWriter() writer.align_values = True writer.indent = " " db = BibDatabase() db.entries = candidates output = writer.write(db) print(output)
def write_bibtex_dict (stream, entries): """bibtexparser.write converts the entire database to one big string and writes it out in one go. I'm sure it will always all fit in RAM but some things just will not stand. """ from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter () writer.indent = ' ' writer.entry_separator = '' first = True for rec in entries: if first: first = False else: stream.write ('\n') stream.write (writer._entry_to_bibtex (rec))
def write_bib(db, order=False): """ Write bibtex string. Args: db (BibDatabase): database object to dump.. order (bool): whether to reorder entries upon writing. Returns: The dumped string. """ # Custom writer writer = BibTexWriter() writer.indent = '\t' writer.order_entries_by = None # Replace month by numeric value for entry in db.entries: if 'month' in entry and entry['month'] in MONTHS: entry['month'] = '{:02d}'.format(MONTHS.index(entry['month']) + 1) if order: # Manual sort order_entries_by = ('year', 'author', 'ID') sort_entries(db, order_entries_by) if not config.use_utf8_characters: db.entries = [nomenclature.encode_ascii_latex(entry) for entry in db.entries] if config.protect_uppercase: for entry in db.entries: entry["title"] = latex.protect_uppercase(entry["title"]) # Write bib string return writer.write(db)
def formatText(self): if self.BibtexfilePath != '': self.openfile() else: self.readcontent() m = self.getMap() m['IEEE Global Communications Conference'] = m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] del m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] print m length = 0 nb = {} for bibtex in self.allbibtex: for key in bibtex.keys(): if len(key) > length and key != 'ENTRYTYPE': length = len(key) for k, v in bibtex.items(): if k == 'ENTRYTYPE' or k == 'ID': nb[k] = v continue elif k == 'ID': nb[k] = v continue elif k == 'doi' or k == 'ISSN' or k == 'keywords': continue elif v == '': continue elif 'url' in k: continue nk = k + (length - len(k)) * ' ' if 'booktitle' in nk: if '(' in v: v1 = v.split('(')[1].split(')')[0] nb[nk] = 'Proc. of ' + v1 continue flag = 0 # 未更改booktitle to_remove = "~`!@#$%^&*(){}[];':<>|-=_+" table = {ord(char): None for char in to_remove} clean_v = v.translate(table) #clean_v = v.translate(string.punctuation) #print clean_v for kk, vv in m.items(): if kk in clean_v: nb[nk] = 'Proc. of ' + vv[0] publish = 'publish' + (length - 7) * ' ' nb[publish] = vv[1] flag = 1 break if flag == 0: nb[nk] = v print v continue elif nk.strip() == 'title' and 'booktitle' not in nk: self.tilte = v nv = v.split(' ') for i in range(len(nv)): # 标题除介词和冠词外,首字母大写 if nv[i] in self.prep or nv[i] in self.artie: continue # 首字母大写 else: if 97 <= ord(nv[i][0]) <= 122: nv[i] = chr(ord(nv[i][0])-32)+nv[i][1:] v = ' '.join(nv) nb[nk] = '{' + v + '}' continue elif 'pages' in nk: if '--' in v: nb[nk] = v continue nb[nk] = v.replace('-', '--') continue elif 'author' in nk: if '\n' in v: nb[nk] = v.replace('\n', ' ') continue # 其他不做改变 nb[nk] = v db = BibDatabase() db.entries = [nb] writer = BibTexWriter() writer.indent = '\t' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line with open(self.tilte+'.bib', 'wb') as bibfile: bibfile.write(writer.write(db))
print("Looking for Dois...") before = 0 new = 0 total = len(bibliography.entries) for i,entry in enumerate(bibliography.entries): print("\r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="") try: if "doi" not in entry or entry["doi"].isspace(): title = entry["title"] authors = get_authors(entry) for author in authors: doi_match = searchdoi(title,author) if doi_match: doi = doi_match.groups()[0] entry["doi"] = doi new += 1 else: before += 1 except: pass print("") template="We added {new} DOIs !\nBefore: {before}/{total} entries had DOI\nNow: {after}/{total} entries have DOI" print(template.format(new=new,before=before,after=before+new,total=total)) outfile = sys.argv[1]+"_doi.bib" print("Writing result to ",outfile) writer = BibTexWriter() writer.indent = ' ' # indent entries with 4 spaces instead of one with open(outfile, 'w') as bibfile: bibfile.write(writer.write(bibliography))
'class':'logging.StreamHandler', }, }, 'loggers': { '': { 'handlers': ['default'], 'level': 'ERROR', 'formatter': 'standard', 'propagate': True } } }) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') def create_id(t, year, title): return str(t) + "_" + str(year) + "_" + str(space_to_underscore(title)) def pdf(pdf_files, shared_pdf, bibtex_folder, bibtex_files, gscholar): for pdf in pdf_files: txt = re.sub("\W", " ", gs.convert_pdf_to_txt(pdf)).lower() #Research determined that the cutting of 35 words gives the #highest accuracy words = txt.strip().split()[:35] words = " ".join(words) print words if gscholar == True: bib = load(gs.pdflookup(pdf, all, gs.FORMAT_BIBTEX)[0])
def toString(e): writer = BibTexWriter() writer.indent = ' ' # indent entries with 4 spaces instead of one writer.comma_first = True # place the comma at the beginning of the line return writer._entry_to_bibtex(e)