def test_align(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test', 'thisisaverylongkey': 'longvalue'}] writer = BibTexWriter() writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test}, thisisaverylongkey = {longvalue} } """ self.assertEqual(result, expected) with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def save_citation(citation_record): cite_anchor = citation_record.find('a', {'class': 'gs_nph', 'href': '#', "role": "button"}) if not cite_anchor or not cite_anchor['onclick']: logging.warn("No Cite anchor for citation: %s" % citation_record) return citation_id = cite_anchor['onclick'].split(',')[1][1:-1] logging.info("Getting formated cite from citation id: " + citation_id) params = {"q": "info:%s:scholar.google.com/" % citation_id, "output": "cite"} soup = create_soup_by_url("https://scholar.google.com/scholar", params) bib_anchor = soup.find('a', {"class": "gs_citi"}) if not bib_anchor: logging.debug("BibTex page soup is: %s" % soup.getText()) logging.warn("No BibTex citation provided for citation: %s" % citation_id) return soup = create_soup_by_url(bib_anchor['href']) global citation_num citation_num += 1 # Adding a tag to the bib entry about google scholar citation ID citation_entry = bibtexparser.loads(soup.getText()).entries[0] citationID = citation_entry['ID'] # e.g., melville2004review citation_entry["gscholar_id"] = citation_id db_entry=[] db_entry.append(citation_entry) db = BibDatabase() db.entries = db_entry g_bib_entry = bibtexparser.dumps(db) bib_entry = "%% [%d]\n%s" % (citation_num, g_bib_entry) logging.info(bib_entry.strip()) with open(opts.citation_name, "a+") as f: f.write(bib_entry.encode('utf-8')) if opts.should_download: pdf_div = citation_record.find('div', {"class": "gs_ggs gs_fl"}) if pdf_div: download_pdf(pdf_div.a['href'], citationID)
def _processResults(self, data): """ Get bibtex data from zbMATH website. """ bibs = re.findall("(?si)bibtex/.*?\d{3,}\.bib", data) data = [] import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = customizations if self.otherID: # setup for MRef fetching from msn import MRef mr = MRef() for bib in bibs: bibtext = urllib.urlopen("https://zbmath.org/" + bib).read() zbl = bibtexparser.loads(bibtext, parser=parser) if self.otherID and mr.fetch(bibtext): # found MRef match for zbMATH record msn = bibtexparser.loads(mr.refs) # use MSN bibtex entry with zbl number added # and doi transfered if missing msn.entries[0]['zbl'] = zbl.entries[0]['zbl'] if 'doi' not in msn.entries[0] and 'doi' in zbl.entries[0]: msn.entries[0]['doi'] = zbl.entries[0]['doi'] zbl = msn data.append(bibtexparser.dumps(zbl)) self.refs = "\n".join(data)
def test_multiple_string_write(self): bib_database = BibDatabase() bib_database.strings['name1'] = 'value1' bib_database.strings['name2'] = 'value2' # Order is important! result = bibtexparser.dumps(bib_database) expected = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n' self.assertEqual(result, expected)
def normalize_keyword_case(): for d in review.documents: bib = bibtexparser.loads(d.bib) if bib.entries[0].has_key('keyword'): bib.entries[0]['keyword'] = bib.entries[0]['keyword'].lower() d.bib = bibtexparser.dumps(bib) d.save()
def normalize_keyword_delimitter(): for d in review.documents: bib = bibtexparser.loads(d.bib) if bib.entries[0].has_key('keyword'): bib.entries[0]['keyword'] = bib.entries[0]['keyword'].replace(';',',') d.bib = bibtexparser.dumps(bib) d.save()
def normalize_keyword_visualization(): for d in review.documents: bib = bibtexparser.loads(d.bib) if bib.entries[0].has_key('keyword'): bib.entries[0]['keyword'] = bib.entries[0]['keyword'].replace('visualis','visualiz') d.bib = bibtexparser.dumps(bib) d.save()
def test_content_entries_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def main(): args = _args() bibfile = args.input_bib texfile = args.input_tex with open(bibfile) as bibtex_file: parser = BibTexParser() bib_database = bibtexparser.load(bibtex_file, parser=parser) citation_keys = set() re_cite = re.compile('cite\{([0-9A-Za-z,\s]+)\}') with open(texfile) as tex_file: for l in tex_file: labels = re_cite.findall(l) if labels: for l in labels: for z in l.split(','): citation_keys.add(z.strip()) print('Found {} citation keys'.format(len(citation_keys))) old_entries = bib_database.entries[:] bib_database.entries = [x for x in old_entries if x['ID'] in citation_keys] bibtex_string = bibtexparser.dumps(bib_database) with open(args.output_bib, 'w') as new_bibtex_file: new_bibtex_file.write(bibtex_string.encode('utf8')) print('Cleaned file saved in {}'.format(args.output_bib))
def normalize(input_file, output_file): """ read a *.bib file, change every 'title' and 'booktitle' field to only use uppercase for the first letter and write the changes to the output file. Parameters ---------- input_file : file the *.bib file to normalized output_file : file the *.bib output file """ bibtex_str = input_file.read() bib_database = bibtexparser.loads(bibtex_str) for entry in bib_database.entries: for field in ('title', 'booktitle'): if field in entry: field_str = entry[field] # don't touch titles that are (partially) enclosed in brackets if (not FIXED_TITLE_RE.match(field_str) and not BRACKETS_RE.search(field_str)): if ':' in field_str: # split no more than once title, subtitle = field_str.split(':', 1) entry[field] = u'{}: {}'.format(title, subtitle.lower()) else: new_field_str = field_str.capitalize() entry[field] = new_field_str new_bibstr = bibtexparser.dumps(bib_database) output_file.write(new_bibstr.encode('utf-8'))
def test_write_dependent_strings(self): bib_database = BibDatabase() bib_database.strings['title'] = 'Mr' expr = BibDataStringExpression([BibDataString(bib_database, 'title'), 'Smith']) bib_database.strings['name'] = expr result = bibtexparser.dumps(bib_database) expected = '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n' self.assertEqual(result, expected)
def test_write_common_strings(self): bib_database = BibDatabase() bib_database.load_common_strings() writer = BibTexWriter(write_common_strings=True) result = bibtexparser.dumps(bib_database, writer=writer) with io.open('bibtexparser/tests/data/common_strings.bib') as f: expected = f.read() self.assertEqual(result, expected)
def bibdatabase2bibtex(data): """ Convert a BibDatabase object to a BibTeX string. :param data: A ``bibtexparser.BibDatabase`` object. :return: A formatted BibTeX string. """ return bibtexparser.dumps(data)
def export_citation(paper_id): html_content = get(cfg.URL_BIBTEX.format(id=paper_id), execute_js=False) soup = BeautifulSoup(html_content, 'html.parser') if soup.pre: bibtex = soup.pre.get_text() bib = bibtexparser.loads(bibtex) bib.entries[0]['abstract'] = get_abstract(paper_id) return bibtexparser.dumps(bib) return ''
def get_citation_text(citation_id): log.info(citation_id) res = urlopen(cfg.URL_BIBTEX.format(id=citation_id)) if res: soup = BeautifulSoup(res.read()) if soup.pre: bibtex = soup.pre.get_text() bib = bp.loads(bibtex) bib.entries[0]['abstract'] = get_abstract(citation_id) return {citation_id: bp.dumps(bib)} return ''
def file_update(self,is_update): """ Update the current local file with changes selected by the user Args: is_update: Flag indicates if the user made any selections to update the current local file """ if(is_update): open(self.view.local_file.get(), 'w').close() with open(self.view.local_file.get(), 'w') as bibtex_file: bibtex_str = bibtexparser.dumps(self.model.bibdb_local) bibtex_file.write(bibtex_str.encode('utf8'))
def writeFile(self, fname): try: btex = bibtexparser.dumps(self.bibdb) fd = open(fname, 'wb') fd.write(btex.encode('utf8', 'replace')) fd.close() except IOError as e: print("I/O error({0}): '%s': {1}".\ format(e.errno, e.strerror) % fname) except: print("Unexpected error:", sys.exc_info()[0])
def write_bibtex_file(filename, db): """ Write BiBTeX file with content from db """ writer = BibTexWriter() writer.order_entries_by = ('counter', 'year', 'ID') with open(filename, 'w') as output_file: bibtex_str = bibtexparser.dumps(db, writer=writer) output_file.write(bibtex_str.encode('utf8')) print("Wrote %i records into filename '%s'" % (len(db.entries), filename))
def bibtex(self) -> str: """Returns the publication as a Bibtex entry :getter: Returns a Bibtex entry in text format :type: str """ if not self._filled: self.fill() a = BibDatabase() converted_dict = self.bib converted_dict['author_id'] = ', '.join(converted_dict['author_id']) a.entries = [converted_dict] return bibtexparser.dumps(a)
def test_content_comment_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['comments'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@comment{} @comment{A comment} """ self.assertEqual(result, expected)
def bibtex_passthrough(text, set_id=None): """ Fix errors in a bibtex record and optional change its ID. """ parser = bibtexparser.bparser.BibTexParser() parser.ignore_nonstandard_types = False bibdb = bibtexparser.loads(text, parser) entry, = bibdb.entries if 'author' in entry: entry['author'] = ' and '.join(entry['author'].rstrip(';').split('; ')) if set_id is not None: entry['ID'] = set_id return bibtexparser.dumps(bibdb)
def handle(self, *args, **options): inBibtexFile = options["bibtex_file"] if not os.path.isfile(inBibtexFile): self.stdout.write( self.style.ERROR( "File '{}' does not exist.".format(inBibtexFile))) sys.exit() try: with open(inBibtexFile, encoding="utf-8") as bibtexFile: bibtexData = bibtexparser.load(bibtexFile) except: self.stdout.write( self.style.WARNING( "Failed reading file with UTF-8 encoding, atttempting to read as Latin-1." )) try: with open(inBibtexFile, encoding="ISO-8859-1") as bibtexFile: bibtexData = bibtexparser.load(bibtexFile) except: self.stdout.write( self.style.ERROR( "Failed reading file with either UTF-8 or Latin-1 encoding." )) sys.exit() bibWriter = bibtexparser.bwriter.BibTexWriter() bibWriter.contents = ["entries"] bibWriter.indent = " " for entry in bibtexData.entries: singleEntryBibDatabase = bibtexparser.bibdatabase.BibDatabase singleEntryBibDatabase.entries = [entry] bibtex = bibtexparser.dumps(singleEntryBibDatabase, bibWriter) link = None for linkKey in ["url", "URL", "doi", "DOI"]: if linkKey in entry: link = entry[linkKey] break if not link: link = 'https://www.google.com/search?q="{}"'.format( entry["title"]) paper = Paper(bibtex=bibtex, link=link) paper.save() self.stdout.write( self.style.SUCCESS(" -- Imported: {}".format(paper))) self.stdout.write( self.style.SUCCESS("Successfully imported {} papers.".format( len(bibtexData.entries))))
def entry_sdiff(entries, color=True, bcolors=bcolors, best=None): """split diff """ if not entries: return '' assert all(entries), 'some entries are empty' if not color: bcolors = dummybcolors db = bibtexparser.bibdatabase.BibDatabase() db.entries.append(None) merged = merge_entries(entries) conflicting_fields = [ k for k in merged if isinstance(merged[k], ConflictingField) ] somemissing = [k for k in merged if any(k not in e for e in entries)] entry_strings = [] for i, entry in enumerate(entries): db.entries[0] = entry string = bibtexparser.dumps(db) if six.PY2: string = string.decode( 'utf-8') # decode to avoid failure in replace # color the conflicting fields lines = [] for line in string.splitlines(): for k in conflicting_fields + somemissing: fmt = lambda s: (bcolors.WARNING if k in conflicting_fields else bcolors.BOLD) + s + bcolors.ENDC if k != k.lower() and '@' in line: line = line.replace(entry[k], fmt(entry[k])) elif line.strip().startswith('{} = {{'.format(k)): line = fmt(line) lines.append(line) string = '\n'.join(lines) if best is None: entry_strings.append(bcolors.OKBLUE + '* (' + str(i + 1) + ')' + bcolors.ENDC + '\n' + string) elif entry == best: entry_strings.append(bcolors.OKBLUE + '* (' + str(i + 1) + ')' + bcolors.ENDC + '\n' + string) else: entry_strings.append(bcolors.OKBLUE + ' (' + str(i + 1) + ')' + bcolors.ENDC + '\n' + string) return '\n'.join(entry_strings)
def active_bibliography(self) -> str: """A BibTeX string containing only active entries. An active entry is an entry in the database that is relevant to at least one used function call. """ active_database = BibDatabase() if len(self.citations) == 0: return "" used_keys = set.union(*self.citations.values()) active_database.entries = [ entry for key, entry in self.bib_database.entries_dict.items() if key in used_keys ] return bibtexparser.dumps(active_database)
def test_entry_separator(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test'}] writer = BibTexWriter() writer.entry_separator = '' result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test} } """ self.assertEqual(result, expected)
def main(): # Get the command line arguments args = docopt(__doc__) if args['--doi']: bibtex_entry = bibtex_from_doi(args['--doi']) else: bibtex_entry = pyperclip.paste() # setup a parser customization to deal with non-english characters in Latex # options are needed to deal with months. parser = bibtexparser.bparser.BibTexParser(interpolate_strings=True, common_strings=True, customization=format) # create the bibfile object and parse to get the dictionary bib_database = bibtexparser.loads(bibtex_entry, parser=parser) # prepare a list of keys we want to delete from the entry remove_keys = [ 'month', 'keyword', 'language', 'read', 'rating', 'date-added', 'date-modified', 'abstract', 'local-url', 'file', 'uri', 'ISSN', 'issn', 'keywords' ] # Strip those keys from the dictionary for paper in bib_database.entries: for rkey in remove_keys: if rkey in paper: del paper[rkey] # Copy back to the pasteboard pyperclip.copy(bibtexparser.dumps(bib_database)) # output to the terminal print(bibtexparser.dumps(bib_database))
def crossref_to_bibtex(r): """convert crossref result to bibtex """ bib = {} if 'author' in r: family = lambda p: p['family'] if len(p['family'].split( )) == 1 else u'{' + p['family'] + u'}' bib['author'] = ' and '.join([ family(p) + ', ' + p.get('given', '') for p in r.get('author', []) if 'family' in p ]) # for k in ['issued','published-print', 'published-online']: k = 'issued' if k in r and 'date-parts' in r[k] and len(r[k]['date-parts']) > 0: date = r[k]['date-parts'][0] bib['year'] = str(date[0]) if len(date) >= 2: bib['month'] = str(date[1]) # break if 'DOI' in r: bib['doi'] = r['DOI'] if 'URL' in r: bib['url'] = r['URL'] if 'title' in r: bib['title'] = r['title'][0] if 'container-title' in r: bib['journal'] = r['container-title'][0] if 'volume' in r: bib['volume'] = r['volume'] if 'issue' in r: bib['number'] = r['issue'] if 'page' in r: bib['pages'] = r['page'] if 'publisher' in r: bib['publisher'] = r['publisher'] # entry type type = bib.get('type', 'journal-article') type_mapping = {'journal-article': 'article'} bib['ENTRYTYPE'] = type_mapping.get(type, type) # bibtex key year = str(bib.get('year', '0000')) if 'author' in r: ID = r['author'][0]['family'] + u'_' + six.u(year) else: ID = year # if six.PY2: # ID = str(''.join([c if ord(c) < 128 else '_' for c in ID])) # make sure the resulting string is ASCII bib['ID'] = ID db = bibtexparser.bibdatabase.BibDatabase() db.entries.append(bib) return bibtexparser.dumps(db)
def test_indent(self): bib_database = BibDatabase() bib_database.entries = [{'id': 'abc123', 'type': 'book', 'author': 'test'}] writer = BibTexWriter() writer.indent = ' ' result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test} } """ self.assertEqual(result, expected)
def test_sort_missing_field(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'b', 'ENTRYTYPE': 'article', 'year': '2000'}, {'ID': 'c', 'ENTRYTYPE': 'book', 'year': '2010'}, {'ID': 'a', 'ENTRYTYPE': 'book'}] writer = BibTexWriter() writer.order_entries_by = ('year', ) result = bibtexparser.dumps(bib_database, writer) expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n" self.assertEqual(result, expected)
def extract_bibtex(bib_database, id): # print("bib_database.entries: ", bib_database.entries) pos = None for i, entry in enumerate(bib_database.entries): if entry['ID'] == id: pos = i # print(entry['ID']) bib_db = copy.deepcopy(bib_database) # print(id) # print("pos:", pos) del bib_db.entries[pos + 1:] del bib_db.entries[:pos] str = bibtexparser.dumps(bib_db) return str
def do_clip(self, args): '''Copy bibtex entry to clipboard''' id_ = self._get_bibid(args) if id_ is None: print('Index out of range') return entry = self.bibtex.entries[id_] bib_database = bibtexparser.bibdatabase.BibDatabase() bib_database.entries = [entry] data = bibtexparser.dumps(bib_database) cmd = ['xsel','-b','-i'] subprocess.run(cmd, universal_newlines=True, input=data) print('Copied bibtex entry to clipboard')
def _cleanupBibTex(self, count): """ Clean up bibtex and ensure uniform look. """ import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib = bibtexparser.loads(self.refs, parser=parser) # save results from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' writer.order_entries_by = ('id') self.number = len(bib.entries) self.refs = bibtexparser.dumps(bib, writer)
def compile_bibtex(citations, big_bibtex_ffp): """ finds the bibtex entries that correspond to a list of cite keys, from a large bibtex file and writes a new bibtex file with just the required references. :param citations: a list of citation keys. :param big_bibtex_ffp: full file path to bibtex file :return: """ remove_keys = [ "annote", "date-added", "date-modified", "local-url", "file", "rating", "month", "uri", "read" "abstract", "read" ] with open(big_bibtex_ffp) as org_bibtex_file: org_bibtex_database = bibtexparser.load(org_bibtex_file) new_bibtex_db = bibtexparser.loads(" ") # create a new bibtex DB obj for entry in citations: if entry not in org_bibtex_database.entries_dict: print("Not found: ", entry) else: new_bibtex_db.entries.append( org_bibtex_database.entries_dict[entry]) # new_bibtex_db.entries_dict[entry['ID']] = org_bibtex_database.entries_dict[entry] # new_bibtex_db = copy.deepcopy(org_bibtex_database) # for entry in org_bibtex_database.entries_dict: # if entry not in citations: # print("Not found: ", entry) # del new_bibtex_db.entries_dict[entry] # else: # print("adding: ", entry) # new_bibtex_db.entries_dict[entry] = org_bibtex_database.entries_dict[entry] # print(new_bibtex_db.entries) for entry in new_bibtex_db.entries_dict: for r_key in remove_keys: if r_key in new_bibtex_db.entries_dict[entry]: del new_bibtex_db.entries_dict[entry][r_key] bibtex_str = bibtexparser.dumps(new_bibtex_db) return bibtex_str
def __str__(self): bib = BibDatabase() bib.entries = [{ 'ENTRYTYPE': 'article', 'ID': self.entry_number, 'author': self.author, 'journal': self.journal, 'title': self.title, 'year': self.year, 'volume': self.volume, 'number': self.number, 'pages': self.pages, 'abstract': self.abstract, 'keyword': self.keyword, 'doi': self.doi, 'issn': self.issn }] return bibtexparser.dumps(bib)
def fix_bibliography(bibtex_string): """ Given a bibliography file, `fixes` it by removing URLs from articles, ASCIIifying all the fields and replacing dates with years. """ # Make a parser that will ASCIIify everything: # See: https://bibtexparser.readthedocs.io/en/v0.6.2/tutorial.html#accents-and-weird-characters parser = BibTexParser() parser.customization = homogeneize_latex_encoding bibtex = bibtexparser.loads(bibtex_string, parser=parser) for entry in bibtex.entries: fix_entry(entry) # TODO: if py3k, do not encode. return bibtexparser.dumps(bibtex).encode("UTF-8")
def bibtex(self, publication: Publication) -> str: """Returns the publication as a Bibtex entry :param publication: Scholar or Citation publication container object :type publication: Publication :getter: Returns a Bibtex entry in text format :type: str """ if not publication['filled']: publication = self.fill(publication) a = BibDatabase() converted_dict = publication['bib'] converted_dict = remap_bib(converted_dict, _BIB_REVERSE_MAPPING) str_dict = {key: str(value) for key, value in converted_dict.items()} # convert every key of the dictionary to string to be Bibtex compatible a.entries = [str_dict] return bibtexparser.dumps(a)
def main(): output = sys.argv[1] mds = sys.argv[2:] es = [] for fn in mds: # print(f"loading {fn}") with open(fn, "r", encoding='UTF-8') as f: ls = f.readlines()[1:] ls = itertools.takewhile(lambda x: x != "---\n", ls) e = yaml.load("".join(ls), Loader=yaml.FullLoader) e['ID'] = fn.split("/")[1][0:-3] for i in ['title', 'booktitle']: if i in e: s = e[i] s = s.replace("#", "\#") s = s.replace("&", "\&") e[i] = s e['title'] = "{" + e['title'] + "}" if 'authors' in e: e['author'] = " and ".join(e['authors']) del e['authors'] for i in ['isbn', 'pages', 'volume', 'year']: if i in e: e[i] = str(e[i]) for i in [ 'added', 'layout', 'notes', 'papers', 'read', 'readings', 'topics' ]: if i in e: del e[i] es.append(e) db = BibDatabase() db.entries = es writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' # writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') bibtex_str = bibtexparser.dumps(db, writer) with open(output, "w") as f: print(("#############################################\n" "# This file is machine generated, do not edit\n" "#############################################\n"), file=f) print(bibtex_str, file=f)
def doi2bib(doi, cache): """ Return a bibTeX string of metadata for a given DOI. Reference: https://gist.github.com/jrsmith3/5513926 """ if doi in cache: return bibtexparser.loads(cache[doi]).entries[0] url = "http://dx.doi.org/" + doi bib_headers = {"accept": "application/x-bibtex"} r = requests.get(url, headers=bib_headers) new_bibtex = r.text json_headers = {"accept": "application/citeproc+json"} r = requests.get(url, headers=json_headers) r_json = json.loads(r.text) new_bib_entry_local = {} parsed_bibtex = bibtexparser.loads(new_bibtex) if len(parsed_bibtex.entries) > 0: new_bib_entry_local = parsed_bibtex.entries[0] new_bib_entry_local['ID'] = bib_entry['ID'] if len(r_json["subtitle"]) > 0: new_bib_entry_local["title"] = new_bib_entry_local[ "title"] + ": " + r_json["subtitle"][0] if len(r_json["subtitle"]) > 1: print("Multiple subtitles:", file=sys.stderr) print(r_json["subtitle"], file=sys.stderr) if "Andrew J. Ko" in new_bib_entry_local["author"]: new_bib_entry_local["author"] = new_bib_entry_local[ "author"].replace("Andrew J. Ko", "Amy J. Ko") print("Update Amy's name!", file=sys.stderr) parsed_bibtex.entries[0] = new_bib_entry_local print('%d / %d BibTex entries fixed!' % (i, j), file=sys.stderr) else: print("parse failed") print(doi) print(new_bibtex) cache[doi] = bibtexparser.dumps(parsed_bibtex) return new_bib_entry_local
def save_citation(citation_record): cite_anchor = citation_record.find('a', { 'class': 'gs_nph', 'href': '#', "role": "button" }) if not cite_anchor or not cite_anchor['onclick']: logging.warn("No Cite anchor for citation: %s" % citation_record) return citation_id = cite_anchor['onclick'].split(',')[1][1:-1] logging.info("Getting formated cite from citation id: " + citation_id) params = { "q": "info:%s:scholar.google.com/" % citation_id, "output": "cite" } soup = create_soup_by_url("https://scholar.google.com/scholar", params) bib_anchor = soup.find('a', {"class": "gs_citi"}) if not bib_anchor: logging.debug("BibTex page soup is: %s" % soup.getText()) logging.warn("No BibTex citation provided for citation: %s" % citation_id) return soup = create_soup_by_url(bib_anchor['href']) global citation_num citation_num += 1 # Adding a tag to the bib entry about google scholar citation ID citation_entry = bibtexparser.loads(soup.getText()).entries[0] citationID = citation_entry['ID'] # e.g., melville2004review citation_entry["gscholar_id"] = citation_id db_entry = [] db_entry.append(citation_entry) db = BibDatabase() db.entries = db_entry g_bib_entry = bibtexparser.dumps(db) bib_entry = "%% [%d]\n%s" % (citation_num, g_bib_entry) logging.info(bib_entry.strip()) with open(opts.citation_name, "ab+") as f: f.write(bib_entry.encode('utf-8')) if opts.should_download: pdf_div = citation_record.find('div', {"class": "gs_ggs gs_fl"}) if pdf_div: download_pdf(pdf_div.a['href'], citationID)
def bibtex_passthrough(text, set_id=None): """ Fix errors in a bibtex record and optional change its ID. """ parser = bibtexparser.bparser.BibTexParser() parser.ignore_nonstandard_types = False bibdb = bibtexparser.loads(text, parser) entry, = bibdb.entries if 'author' in entry: entry['author'] = ' and '.join(entry['author'].rstrip(';').split('; ')) # Set URL as url rather than link attribute if 'link' in entry and 'url' not in entry: entry['url'] = entry.pop('link') # Upgrade arxiv links to HTTPS if 'url' in entry: pattern = re.compile(r'^http://arxiv\.org/') entry['url'] = pattern.sub('https://arxiv.org/', entry['url']) if set_id is not None: entry['ID'] = set_id return bibtexparser.dumps(bibdb)
def main(): args = parse_arg() if not (os.path.isfile(args.bib[0]) and os.path.isfile(args.bib[1])): print("input file not found") exit(0) with open(args.bib[0]) as bibtex_file: database1 = bibtexparser.load(bibtex_file) with open(args.bib[1]) as bibtex_file: database2 = bibtexparser.load(bibtex_file) result = subtract(database1, database2) if args.output: with open(args.output, 'w') as bibtex_file: bibtexparser.dump(result, bibtex_file) else: print(bibtexparser.dumps(result))
def proc_bibtex(text, reverse=False): targets = ['author', 'title', 'journal'] converter = l2u if reverse else u2l parser = BibTexParser() parser.homogenise_fields = False bib = bibtex.loads(text, parser) for item in bib.entries: for target in targets: if target not in item: continue if '\$' in item[target]: sys.stderr.write('error: quoted latex math expression in {}:{}, abort\n' .format(item['id'], target)) sys.exit(1) elif '$' in item[target]: sys.stderr.write('warning: latex math expression in {}:{}, skipping\n' .format(item['id'], target)) continue item[target] = converter(item[target]) return bibtex.dumps(bib)
def cleanbib(in_fp): blacklist = [ 'file', 'annote', 'abstract', 'keywords', 'archivePrefix', 'mendeley-tags', 'mendeley-groups', ] with open(in_fp, 'r') as in_fh: bib_db = bibtexparser.loads(in_fh.read()) for e in bib_db.entries: remove = [k for k in e.keys() if k in blacklist] for k in remove: del e[k] return bibtexparser.dumps(bib_db)
def write_bibtex(bibtex_entries): bib_database = bibtexparser.bibdatabase.BibDatabase() for e in bibtex_entries: # pop the useless contents e.pop('created_time', None) e.pop('file', None) e.pop('abstract', None) for k in e: if isinstance(e[k], list): e[k] = ' and '.join(e[k]) e[k] = unicode_to_latex(e[k]) bib_database.entries = bibtex_entries writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') bibtex_str = bibtexparser.dumps(bib_database, writer) return bibtex_str
def entry_ndiff(entries, color=True): ' diff of many entries ' m = merge_entries(entries) SECRET_STRING = 'REPLACE_{}_FIELD' regex = re.compile(SECRET_STRING.format('(.*)')) # reg exp to find choices = {} somemissing = [] for k in m: if isinstance(m[k], ConflictingField): choices[k] = m[k].choices m[k] = SECRET_STRING.format(k) elif any(k not in e for e in entries): somemissing.append(k) db = bibtexparser.bibdatabase.BibDatabase() db.entries.append(m) s = bibtexparser.dumps(db) lines = [] for line in s.splitlines(): matches = regex.findall(line) if matches: k = matches[0] template = SECRET_STRING.format(k) lines.append('\u2304' * 3) for c in choices[k]: newline = ' ' + line.replace(template, '{}'.format(c)) lines.append( _colordiffline(newline, '!') if color else newline) lines.append('---') lines.pop() # remove last --- # lines.append('^^^') lines.append('\u2303' * 3) elif any('{} = {{'.format(k) in line for k in somemissing): newline = ' ' + line lines.append( _colordiffline(newline, sign='*') if color else newline) elif not line.startswith(('@', '}')): lines.append(' ' + line) else: lines.append(line) return '\n'.join(lines)
def biblatex_entry_by_doi(doi): # returns a BibLateX entry as dictionary or 'None' entry = printable_bibtex_entry_by_doi(doi) if entry: entry_str = bibtex_entry_str(entry) parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(entry_str, parser=parser) # convert 'journal' to 'journaltitle' for e in bib_database.entries: if 'journal' in e.keys(): e['journaltitle'] = e['journal'] del e['journal'] bibtex_string = bibtexparser.dumps(bib_database) return bibtex_entry_from_str(bibtex_string) else: return None return(bibtex_string)
def build_citation(self, bib_database): """ Converts bibtex into xml description Parameters ---------- bib_database : bibtexparser.bibdatabase.BibDatabase Object containing bibtex data """ bib_dict = bib_database.entries[0] citation = DM() if bib_dict['ENTRYTYPE'] == 'article': citation['document-type'] = 'journal' citation['title'] = bib_dict['title'] citation['author'] = self.parse_authors(bib_dict['author']) citation['publication-name'] = bib_dict['journal'] citation['publication-date'] = DM() citation['publication-date']['year'] = bib_dict['year'] citation['volume'] = bib_dict['volume'] if 'number' in bib_dict: citation['issue'] = bib_dict['number'] elif 'issue' in bib_dict: citation['issue'] = bib_dict['issue'] if 'abstract' in bib_dict: citation['abstract'] = bib_dict['abstract'] if 'pages' in bib_dict: citation['pages'] = bib_dict['pages'].replace('--', '-') citation['DOI'] = bib_dict['doi'] elif bib_dict['ENTRYTYPE'] == 'unpublished': citation['document-type'] = 'unspecified' citation['title'] = bib_dict['title'] citation['author'] = self.parse_authors(bib_dict['author']) citation['publication-date'] = DM() citation['publication-date']['year'] = bib_dict['year'] citation['bibtex'] = bibtexparser.dumps(bib_database) return citation
def bibtex(self, id=None): """Prepare bibtex entry for this article. Parameters ========== id : str The entry ID to use. If omitted, ``self.default_id()`` will be used. Returns ======= bibtex : str The prepared bibtex entry. """ metadata = self.metadata() metadata['ID'] = id if id is not None else self.default_id() # Convert to bibtex db = bibtexparser.bibdatabase.BibDatabase() db.entries = [metadata] bibtex = bibtexparser.dumps(db) return bibtex
def proc_bibtex(text, reverse=False): targets = ['author', 'title', 'journal'] converter = l2u if reverse else u2l parser = BibTexParser() parser.homogenise_fields = False bib = bibtex.loads(text, parser) for item in bib.entries: for target in targets: if target not in item: continue if '\$' in item[target]: sys.stderr.write( 'error: quoted latex math expression in {}:{}, abort\n'. format(item['id'], target)) sys.exit(1) elif '$' in item[target]: sys.stderr.write( 'warning: latex math expression in {}:{}, skipping\n'. format(item['id'], target)) continue item[target] = converter(item[target]) return bibtex.dumps(bib)
def scholar_get(title, db): # print(gscholar.query("linked open data", allresults=True)) if title not in db: query = gscholar.query(title) if len(query) < 1: return {"title": "", "authors": [], "year": "", "bibtex": ""} db[title] = query[0] time.sleep(random.randint(0, 10)) meta = {} parser = bibtexparser.bparser.BibTexParser() parser.customization = customizations raw_entry = bibtexparser.loads(db[title], parser=parser) entry = raw_entry.entries[0] meta["title"] = entry['title'].strip() meta["authors"] = entry['author'].replace(", ", " ").split("and") meta["year"] = entry.get('year', "").strip() meta["bibtex"] = bibtexparser.dumps(raw_entry) return meta
def simplify_bibtex(args): bib_db = load_bibtex(args.bibfile) bib_ieee = load_bibtex(args.bib_journal) strings = collect_ieee_titles(bib_ieee.strings) parser = DOIParser() strings_filtered = OrderedDict() for entry in bib_db.entries: if 'doi' in entry: doi_prefix = parser.get_doi_key(entry['doi']) if doi_prefix in strings: key, journal = strings[doi_prefix] strings_filtered[key] = journal entry['journaltitle'] = key if 'shortjournal' in entry: entry.pop('shortjournal') bib_db.strings = strings_filtered text = bibtexparser.dumps(bib_db) text_patched = JournalPatcher().patch(text) text_bibtex = StringPatcher().patch(text_patched) with open(args.output, 'w') as output_file: output_file.write(text_bibtex)
def test_sort_default(self): result = bibtexparser.dumps(self.bib_database) expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n" self.assertEqual(result, expected)
def test_sort_none(self): writer = BibTexWriter() writer.order_entries_by = None result = bibtexparser.dumps(self.bib_database, writer) expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" self.assertEqual(result, expected)
def test_sort_type_id(self): writer = BibTexWriter() writer.order_entries_by = ('ENTRYTYPE', 'ID') result = bibtexparser.dumps(self.bib_database, writer) expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n" self.assertEqual(result, expected)