def _ingest_citations(rc): import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import getnames parser = BibTexParser() parser.ignore_nonstandard_types = False def customizations(record): for n in ['author', 'editor']: if n in record: a = [i for i in record[n].replace('\n', ' ').split(', ')] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record parser.customization = customizations with open(rc.filename, 'r') as f: bibs = bibtexparser.load(f, parser=parser) coll = rc.client[rc.db][rc.coll] for bib in bibs.entries: bibid = bib.pop('ID') bib['entrytype'] = bib.pop('ENTRYTYPE') if 'author' in bib: bib['author'] = [a.strip() for b in bib['author'] for a in RE_AND.split(b)] if 'title' in bib: bib['title'] = RE_SPACE.sub(' ', bib['title']) rc.client.update_one(rc.db, rc.coll, {'_id': bibid}, bib, upsert=True)
def _bibtexQuery(self, query): """ Turn query into bibtex dictionary. """ import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib = bibtexparser.loads(query, parser=parser) if bib.entries: # only the first record record = bib.entries[0] # clean up entries if "author" in record: # just last name record["author"] = re.sub(r',.*?(and\s*|$)', ' ', record['author']) if "title" in record: record["title"] = self._citationQuery(record["title"])[0][1] if "journal" in record: record["journal"] = self._citationQuery(record["journal"])[0][1] if "year" in record: record["date"] = record["year"] # only use a few fields # TODO add numbers return [(k, v) for k, v in record.items() if k in {"author", "title", "journal", "mrnumber", "date", "arxiv", "zbl"}] else: return []
def parseEntry(s): # normalize unicode by reparsing parser = BibTexParser() parser.customization = convert_to_unicode db1 = bibtexparser.loads(s, parser=parser) es = db1.entries return es[0]
def save(self): bibfile = self.cleaned_data['file'].file bp = BibTexParser(bibfile, customization=convert_to_unicode) good = 0 bad = 0 results = [] for item in bp.get_entry_list(): # find the common keys keys = set(Reference._meta.get_all_field_names()).intersection( item.keys()) # populate the common fields r = Reference() for k in keys: if k == 'id': setattr(r, 'slug', item[k]) else: setattr(r, k, item[k]) try: r.save() except Exception as e: bad += 1 results.append("![{0}]: {1}".format(r.slug, e.message)) else: good += 1 results.append("[{0}]: {1}".format(r.slug, r.title)) return good, bad, results
def annotes_dicts(bibfile, pdfdir, filters, include_all=False): with open(bibfile, encoding="utf-8") as bibtex_file: bibtex_str = bibtex_file.read() parser = BibTexParser() parser.ignore_nonstandard_types = False bib_database = bibtexparser.loads(bibtex_str, parser) annotes_list = [] for entry in bib_database.entries: match = True for key, pattern in filters: if key not in entry or not re.search(pattern, entry[key]): match = False break filepath = '' if match and (entry.get('file') or entry.get('review') or include_all): if entry.get('file'): filepath = os.path.join(pdfdir, entry['file'][1:-4]) sys.stderr.write("%s\n" % filepath) annotes = get_annotes(filepath) else: annotes = [] if annotes or entry.get('review') is not None or include_all: info = {'file': filepath} annotes_list.append(info) for k in 'author', 'year', 'title', 'journal', 'review', 'ID', 'doi': info[k] = _to_utf(entry.get(k, None)) info['annotations'] = [{k:_to_utf(v) for k,v in j._asdict().items()} for j in annotes] annotes_list.sort(key=lambda x: x['ID']) return annotes_list
def test_article_cust_latex(self): with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile: bib = BibTexParser(bibfile.read(), customization=customizations_latex) res = bib.get_entry_list() expected = [{ 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french {\\\'e}rudit word', 'ENTRYTYPE': 'article', 'pages': '12--23', 'volume': '12', 'ID': 'Cesar2013', 'year': '2013', 'author': ['C{\\\'e}sar, Jean'], 'journal': { 'ID': 'NiceJournal', 'name': 'Nice Journal' }, 'comments': 'A comment', 'month': 'jan', 'keyword': ['keyword1', 'keyword2'], 'title': '{A}n amazing title' }] self.assertEqual(res, expected)
def import_bibtex(request): review_id = request.POST['review-id'] source_id = request.POST['source-id'] review = Review.objects.get(pk=review_id) source = Source.objects.get(pk=source_id) bibtex_file = request.FILES['bibtex'] list_bibtex_file = fix_bibtex_file(bibtex_file.readlines()) str_bibtex_file = '\r\n'.join(list_bibtex_file) ext = os.path.splitext(bibtex_file.name)[1] valid_extensions = ['.bib', '.bibtex'] if ext in valid_extensions or bibtex_file.content_type == 'application/x-bibtex': parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(str_bibtex_file, parser=parser) articles = bibtex_to_article_object(bib_database, review, source) import pdb; pdb.set_trace() _import_articles(request, source, articles) else: messages.error(request, u'Invalid file type. Only .bib or .bibtex files are accepted.') return redirect(r('import_studies', args=(review.author.username, review.name)))
def load_bibtex(bib_file_name): # Open and parse the BibTeX file in `bib_file_name` using # `bibtexparser` with open(bib_file_name, 'r') as bib_file: bp = BibTexParser(bib_file.read(), customization=convert_to_unicode) # Get a dictionary of dictionaries of key, value pairs from the # BibTeX file. The structure is # {ID:{authors:...},ID:{authors:...}}. refsdict = bp.get_entry_dict() # Create a list of all the types of documents found in the BibTeX # file, typically `article`, `inproceedings`, and `phdthesis`. # Dedupe the list. entry_types = [] for k, ref in refsdict.items(): entry_types.append(ref["ENTRYTYPE"]) entry_types = set(entry_types) # For each of the types of reference, we need to sort each by month # then year. We store the dictionary representing each reference in # a sorted list for each type of reference. Then we store each of # these sorted lists in a dictionary whose key is the type of # reference and value is the list of dictionaries. sort_dict = {} for t in entry_types: temp = sorted( [val for key, val in refsdict.items() if val["ENTRYTYPE"] == t], key=lambda l: datetime.strptime(l["month"], '%b').month, reverse=True) sort_dict[t] = sorted(temp, key=lambda k: k["year"], reverse=True) return sort_dict
def run(self): sort_type = self.options.get('sort', 'date') # Load the publications template if 'template' in self.options: template_path = self.options['template'] template_dir, template_name = os.path.split(template_path) env = Environment(loader=FileSystemLoader(template_dir)) template = env.get_template(template_name) else: # Use template from the Pelican theme template = pelican_generator.get_template('publications') parser = BibTexParser(common_strings=True) parser.customization = customize if self.arguments: bibtex_path = self.arguments[0].strip() with open(bibtex_path, 'r') as bibtex_file: bib = bibtexparser.load(bibtex_file, parser=parser) else: bib = bibtexparser.loads('\n'.join(self.content), parser=parser) entries_to_select = self.options.get('entries', []) if entries_to_select: d = bib.entries_dict entries = [d[e] for e in entries_to_select] else: entries = bib.entries entries = sort_entries(entries, sort_type) rendered_template = template.render(publications=entries) return [nodes.raw('', rendered_template, format='html')]
def getentries(filename): try: save_import_file(filename) except IOError as e: logg.error("bibtex import: save import file failed: {}".format(e)) raise IOError("save import file failed") # use utf-8-sig instead of utf-8 to get rid of BOM_UTF8, which confuses bibtex parser for encoding in ('utf-8-sig', 'utf-16', None): try: error = None fi = codecs.open(filename, "r", encoding=encoding) parser = BibTexParser(common_strings=True) # accept also non standard records like @SCIENCEREPORT parser.ignore_nonstandard_types = False parser.customization = _bibteximport_customize bibtex = bibtex_load(fi, parser=parser) # seems to be the correct encoding, don't try other encodings break except Exception as e: # check if there is a utf-encoding error, then try other encoding if (encoding is 'utf-8-sig' and str(e).lower().find('utf8') >= 0) or \ (encoding is 'utf-16' and str(e).lower().find('utf-16') >= 0): continue error = e break if error: logg.error("bibtex import: bibtexparser failed: {}".format(e)) raise ValueError("bibtexparser failed") return bibtex.entries
def convert(inpath): ''' Convert from bibtex to bibjson. One argument expected: path to bibtex file. ''' import bibtexparser from bibtexparser.bparser import BibTexParser import json parser = BibTexParser() with open(inpath) as bibtex_file: parser.customization = customizations bib_database = bibtexparser.load(bibtex_file, parser=parser) myRecords = list() num = 1 records = dict() for record in bib_database.entries: record1 = dict() record1 = record record1["_id"] = num record1["collection"] = "test01" num = num + 1 myRecords.append(record1) #temp = json.dumps(record, indent=2, sort_keys=True) #t #myRecords records["records"] = myRecords return records
def import_bibtex(bibtex, pub_dir="publication", featured=False, overwrite=False, normalize=False, dry_run=False): """Import publications from BibTeX file""" # Check BibTeX file exists. if not Path(bibtex).is_file(): err = "Please check the path to your BibTeX file and re-run" log.error(err) raise AcademicError(err) # Load BibTeX file for parsing. with open(bibtex, "r", encoding="utf-8") as bibtex_file: parser = BibTexParser(common_strings=True) parser.customization = convert_to_unicode parser.ignore_nonstandard_types = False bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: parse_bibtex_entry(entry, pub_dir=pub_dir, featured=featured, overwrite=overwrite, normalize=normalize, dry_run=dry_run)
def updateArXiv(entry): """Look for new versions of arXiv entry `entry` Returns False if no new versions or not an arXiv entry, Returns the new bibtex otherwise. """ bibtex = getBibtex(entry) # Check arXiv if('archiveprefix' not in bibtex or 'arXiv' not in bibtex['archiveprefix']): return False arxiv_id = bibtex['eprint'] arxiv_id_no_v = re.sub(r'v\d+\Z', '', arxiv_id) ids = set(arxiv_id) for entry in getEntries(): if('archiveprefix' not in bibtex or 'arXiv' not in bibtex['archiveprefix']): continue ids.add(bibtex['eprint']) last_bibtex = BibTexParser(fetcher.arXiv2Bib(arxiv_id_no_v)) last_bibtex = last_bibtex.get_entry_dict() last_bibtex = last_bibtex[list(last_bibtex.keys())[0]] if last_bibtex['eprint'] not in ids: return last_bibtex else: return False
def getBibtex(entry, file_id='both', clean=False): """Returns the bibtex entry corresponding to entry, as a dict entry is either a filename or a bibtex ident file_id is file or id or both to search for a file / id / both clean is to clean the ignored fields specified in config """ try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = BibTexParser(fh.read()) bibtex = bibtex.get_entry_dict() except (TypeError, IOError): tools.warning("Unable to open index file.") return False bibtex_entry = False if file_id == 'both' or file_id == 'id': try: bibtex_entry = bibtex[entry] except KeyError: pass if file_id == 'both' or file_id == 'file': if os.path.isfile(entry): for key in bibtex.keys(): if os.path.samefile(bibtex[key]['file'], entry): bibtex_entry = bibtex[key] break if clean: for field in config.get("ignore_fields"): try: del(bibtex_entry[field]) except KeyError: pass return bibtex_entry
def diffFilesIndex(): """Compute differences between Bibtex index and PDF files Returns a dict with bibtex entry: * full bibtex entry with file='' if file is not found * only file entry if file with missing bibtex entry """ files = tools.listDir(config.get("folder")) files = [i for i in files if tools.getExtension(i) in ['.pdf', '.djvu']] try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: index = BibTexParser(fh.read()) index_diff = index.get_entry_dict() except (TypeError, IOError): tools.warning("Unable to open index file.") return False for key in index_diff.keys(): if index_diff[key]['file'] not in files: index_diff[key]['file'] = '' else: files.remove(index_diff[key]['file']) for filename in files: index_diff[filename] = {'file': filename} return index.get_entry_dict()
def _FindBibEntriesParser(self): """ """ ret = [] parser = BibTexParser() parser.customization = bib_customizations for filename in self._Walk(self._main_directory, ".bib"): skip, cache = self._CacheDataAndSkip(filename) if skip: ret.extend(cache) continue resp = [] with open(filename) as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser=parser) for entry in bib_database.entries: if 'ID' not in entry: continue title = entry['title'] author = entry['author'] resp.append(responses.BuildCompletionData( entry['ID'], "%s (%s)" % (title, author) )) ret.extend(resp) self._cached_data[filename] = resp return ret
def test_article_comma_first(self): with open('bibtexparser/tests/data/article_comma_first.bib', 'r') as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [{ 'ENTRYTYPE': 'article', 'journal': 'Nice Journal', 'volume': '12', 'ID': 'Cesar2013', 'year': '2013', 'author': 'Jean Cesar', 'comments': 'A comment', 'keyword': 'keyword1, keyword2', 'title': 'An amazing title' }, { 'ENTRYTYPE': 'article', 'journal': 'Nice Journal', 'volume': '12', 'ID': 'Baltazar2013', 'year': '2013', 'author': 'Jean Baltazar', 'comments': 'A comment', 'keyword': 'keyword1, keyword2', 'title': 'An amazing title' }] self.assertEqual(res, expected)
def load_bib(bib, titles): """Returns dict {'BibTeX ID': {record}} """ with open(bib) as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode # Find the url field of a misc entry # https://github.com/sciunto-org/python-bibtexparser/issues/93 parser.homogenise_fields = False bib = bibtexparser.load(bibtex_file, parser=parser) with open(titles) as titles_file: parser = BibTexParser() parser.customization = convert_to_unicode titles = bibtexparser.load(titles_file, parser=parser) res = {} for entry in bib.entries: if 'journal' in entry and entry['journal'].lower() in titles.strings: entry['journal'] = titles.strings[entry['journal'].lower()] if 'author' in entry: # F**k me entry['author'] = entry['author'].replace('{́i}', 'í') res[entry['id'].strip()] = entry return res
def parse_urlfile(url_file): """ take a file of the form category: ads url and get the bibtex from the URL and return a list of Paper objects with the category stored as the subject """ papers = [] with open(url_file) as f: parser = BibTexParser() parser.customization = customizations for line in f: if line.startswith("#") or line.strip() == "": continue subject, url = line.split(": ") # for the ADS bibtex URL, lop off the paper_id paper_id = url.strip().split("/")[-1] bibtex_url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX".format(paper_id) # get the bibtex in html -- this is a little tricky, since # urlopen gives us a byte object that we need to decode # into unicode before we can play with it. print(bibtex_url) with urllib.request.urlopen(bibtex_url) as response: bibtex_html = response.read() raw_bibtex_html = bibtex_html.splitlines() bibtex_string = "" for line in raw_bibtex_html: bibtex_string += "{}\n".format(line.decode("utf8")) # strip off any header and just leave the bibtex found_start = False bibtex = "" for line in bibtex_string: if line.startswith("@"): found_start = True if found_start: bibtex += line # parse the bibtex string bib_database = bibtexparser.loads(bibtex, parser=parser) for e in bib_database.entries: p = extract_paper_info(e) if not e is None: p.subject = subject papers.append(p) papers.sort(reverse=True) return papers
def test_article_special_characters(self): with open( 'bibtexparser/tests/data/article_with_special_characters.bib', 'r') as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [ { 'ENTRYTYPE': 'article', 'journal': 'Nice Journal', 'volume': '12', 'pages': '12-23', 'ID': 'Cesar2013', 'year': '2013', 'month': 'jan', 'author': 'Jean C{\\\'e}sar{\\\"u}', 'comments': 'A comment', 'keyword': 'keyword1, keyword2', 'title': 'An amazing title', 'abstract': "This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word", }, ] self.assertEqual(res, expected)
def test_article(self): with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile: bib = BibTexParser(bibfile.read()) res_list = bib.get_entry_list() res_dict = bib.get_entry_dict() expected_list = [{'keyword': 'keyword1, keyword2', 'ENTRYTYPE': 'article', 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 'year': '2013', 'journal': 'Nice Journal', 'ID': 'Cesar2013', 'pages': '12-23', 'title': 'An amazing title', 'comments': 'A comment', 'author': 'Jean César', 'volume': '12', 'month': 'jan' }] expected_dict = {'Cesar2013': {'keyword': 'keyword1, keyword2', 'ENTRYTYPE': 'article', 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 'year': '2013', 'journal': 'Nice Journal', 'ID': 'Cesar2013', 'pages': '12-23', 'title': 'An amazing title', 'comments': 'A comment', 'author': 'Jean César', 'volume': '12', 'month': 'jan' }} self.assertEqual(res_list, expected_list) self.assertEqual(res_dict, expected_dict)
def main(argv=None) : if argv is None: argv = sys.argv # etc., replacing sys.argv with argv in the getopt() call. filename = "" parser = BibTexParser() parser.customization = customizations if len(argv) > 1 : filename = argv[1] else: filename = "example.bib" with open(filename) as bibtex_file: bibtex_str = bibtex_file.read() bib_database = bibtexparser.loads(bibtex_str, parser=parser) #print_books(bib_database.entries) print_summary(bib_database.entries) print_journals(bib_database.entries) print_conferences(bib_database.entries) return 0;
def parse_bibtex(bib): '''Parses the BibTex returned by the DOI resolver Args: bib (str): a BibTex record Returns: Dict containing reference data ''' for entity, repl in ENTITIES.iteritems(): bib = bib.replace(entity, repl) # Parse BibTex using the handy dandy bibtexparser module import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import convert_to_unicode parser = BibTexParser() parser.customization = convert_to_unicode parsed = bibtexparser.loads(bib, parser=parser).entries[0] # Miscellaneous clean up braces = re.compile(u'\{([A-Z_ \-]+|[\u0020-\uD7FF])\}', re.U) for key, val in parsed.iteritems(): val = braces.sub(r'\1', val) if '{' in val: raise Exception('Unhandled LaTeX: {}'.format(val.encode('cp1252'))) parsed[key] = val parsed['pages'] = parsed.get('pages', '').replace('--', '-') if parsed.get('publisher', '').endswith(')'): parsed['publisher'] = parsed['publisher'].rsplit('(', 1)[0].rstrip() #pp.pprint(parsed) return parsed
def deleteId(ident): """Delete a file based on its id in the bibtex file""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = BibTexParser(fh.read()) bibtex = bibtex.get_entry_dict() except (IOError, TypeError): tools.warning("Unable to open index file.") return False if ident not in bibtex.keys(): return False try: os.remove(bibtex[ident]['file']) except (KeyError, OSError): tools.warning("Unable to delete file associated to id "+ident+" : " + bibtex[ident]['file']) try: if not os.listdir(os.path.dirname(bibtex[ident]['file'])): os.rmdir(os.path.dirname(bibtex[ident]['file'])) except (KeyError, OSError): tools.warning("Unable to delete empty tag dir " + os.path.dirname(bibtex[ident]['file'])) try: del(bibtex[ident]) bibtexRewrite(bibtex) except KeyError: tools.warning("No associated bibtex entry in index for file " + bibtex[ident]['file']) return True
def test_multiple_entries(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile: bparser = BibTexParser() bib = bparser.parse_file(bibfile) expected = ["", "A comment"] self.assertEqual(bib.comments, expected)
def test_article(self): with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile: bib = BibTexParser(bibfile.read()) res_list = bib.get_entry_list() res_dict = bib.get_entry_dict() expected_list = [{'keyword': 'keyword1, keyword2', 'ENTRYTYPE': 'article', 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 'year': '2013', 'journal': 'Nice Journal', 'ID': 'Cesar2013', 'pages': '12-23', 'title': 'An amazing title', 'comments': 'A comment', 'author': 'Jean César', 'volume': '12', 'month': 'jan' }] expected_dict = {'Cesar2013': {'keyword': 'keyword1, keyword2', 'ENTRYTYPE': 'article', 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 'year': '2013', 'journal': 'Nice Journal', 'ID': 'Cesar2013', 'pages': '12-23', 'title': 'An amazing title', 'comments': 'A comment', 'author': 'Jean César', 'volume': '12', 'month': 'jan' }} self.assertEqual(res_list, expected_list) self.assertEqual(res_dict, expected_dict)
def test_article_comma_first(self): with open('bibtexparser/tests/data/article_comma_first.bib', 'r') as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [{'ENTRYTYPE': 'article', 'journal': 'Nice Journal', 'volume': '12', 'ID': 'Cesar2013', 'year': '2013', 'author': 'Jean Cesar', 'comments': 'A comment', 'keyword': 'keyword1, keyword2', 'title': 'An amazing title' }, {'ENTRYTYPE': 'article', 'journal': 'Nice Journal', 'volume': '12', 'ID': 'Baltazar2013', 'year': '2013', 'author': 'Jean Baltazar', 'comments': 'A comment', 'keyword': 'keyword1, keyword2', 'title': 'An amazing title' }] self.assertEqual(res, expected)
def _ingest_citations(rc): import bibtexparser from bibtexparser.bparser import BibTexParser from bibtexparser.customization import getnames parser = BibTexParser() parser.ignore_nonstandard_types = False def customizations(record): for n in ["author", "editor"]: if n in record: a = [i for i in record[n].replace("\n", " ").split(", ")] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record parser.customization = customizations with open(rc.filename, "r", encoding='utf-8') as f: bibs = bibtexparser.load(f, parser=parser) coll = rc.client[rc.db][rc.coll] for bib in bibs.entries: bibid = bib.pop("ID") bib["entrytype"] = bib.pop("ENTRYTYPE") if "author" in bib: bib["author"] = [ a.strip() for b in bib["author"] for a in RE_AND.split(b) ] if "title" in bib: bib["title"] = RE_SPACE.sub(" ", bib["title"]) rc.client.update_one(rc.db, rc.coll, {"_id": bibid}, bib, upsert=True)
def import_bibtex(request): review_id = request.POST['review-id'] source_id = request.POST['source-id'] review = Review.objects.get(pk=review_id) source = Source.objects.get(pk=source_id) bibtex_file = request.FILES['bibtex'] ext = os.path.splitext(bibtex_file.name)[1] valid_extensions = ['.bib', '.bibtex'] if ext in valid_extensions or bibtex_file.content_type == 'application/x-bibtex': parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.load(bibtex_file, parser=parser) articles = bibtex_to_article_object(bib_database, review, source) _import_articles(request, source, articles) else: messages.error( request, u'Invalid file type. Only .bib or .bibtex files are accepted.') return redirect( r('import_studies', args=(review.author.username, review.name)))
def save(self): bibfile = self.cleaned_data['file'].file bp = BibTexParser(bibfile, customization=convert_to_unicode) good = 0 bad = 0 results = [] for item in bp.get_entry_list(): # find the common keys keys = set(Reference._meta.get_all_field_names()).intersection(item.keys()) # populate the common fields r = Reference() for k in keys: if k == 'id': setattr(r, 'slug', item[k]) else: setattr(r, k, item[k]) try: r.save() except Exception as e: bad += 1 results.append("![{0}]: {1}".format(r.slug, e.message)) else: good += 1 results.append("[{0}]: {1}".format(r.slug, r.title)) return good, bad, results
def bib2jekyllcol (inputFile, outputDir): "This prints the bibtex file to output directory as jekyll collection folder(s)" # read and parse bib file with open(inputFile) as bibtex_file: bibtex_str = bibtex_file.read() parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex_str, parser=parser) # create dictionary for transformation of month to number month_list = ["jan", "feb", "mar", "apr", "may", "june", "july", "aug", "sept", "oct", "nov", "dec"] # type names: type_list = ["title", "author", "journal", "volume", "number", "year", "month", "doi", "pages", "publisher", "booktitle", "note"] if not os.path.exists(outputDir): os.makedirs(outputDir) else: print("Deleting existing collection file...\n") for file in os.listdir(outputDir): file_path = os.path.join(outputDir, file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception, e: print e
def read_bib_file(filename, homogenize=False): """ Read bibtex file. Args: filename (str): path of the bibtex file. homogenize (bool): whether to homogenize the entries upon reading. Returns: A BibDatabase object. """ # Read input bibtex file bibtex_str = "" if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as bibfile: bibtex_str = bibfile.read() # Choose parser parser = None if homogenize: parser = BibTexParser() parser.customization = nomenclature.homogenize_latex_encoding # Create database from string return bibtexparser.loads(bibtex_str, parser=parser)
def parseEntry(s): # normalize unicode by reparsing parser = BibTexParser() parser.customization = convert_to_unicode db1 = bibtexparser.loads(s, parser=parser) es = db1.entries return es[0]
def _processResults(self, data): """ Get bibtex data from zbMATH website. """ bibs = re.findall("(?si)bibtex/.*?\d{3,}\.bib", data) data = [] import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = customizations if self.otherID: # setup for MRef fetching from msn import MRef mr = MRef() for bib in bibs: bibtext = urllib.urlopen("https://zbmath.org/" + bib).read() zbl = bibtexparser.loads(bibtext, parser=parser) if self.otherID and mr.fetch(bibtext): # found MRef match for zbMATH record msn = bibtexparser.loads(mr.refs) # use MSN bibtex entry with zbl number added # and doi transfered if missing msn.entries[0]['zbl'] = zbl.entries[0]['zbl'] if 'doi' not in msn.entries[0] and 'doi' in zbl.entries[0]: msn.entries[0]['doi'] = zbl.entries[0]['doi'] zbl = msn data.append(bibtexparser.dumps(zbl)) self.refs = "\n".join(data)
def read_bib_file(filename, homogenize=False): """ Read bibtex file. Args: filename (str): path of the bibtex file. homogenize (bool): whether to homogenize the entries upon reading. Returns: A BibDatabase object. """ # Read input bibtex file bibtex_str = " " if os.path.exists(filename): with open(filename, 'r', encoding='utf-8') as bibfile: bibtex_str = bibfile.read() # Choose parser parser = None if homogenize: parser = BibTexParser() parser.customization = nomenclature.homogenize_latex_encoding # Create database from string return bibtexparser.loads(bibtex_str, parser=parser)
def test_homogenizes_fields(self): self.maxDiff = None with io.open('bibtexparser/tests/data/article_homogenize.bib', 'r', encoding='utf-8') as bibfile: bib = BibTexParser(bibfile.read(), homogenize_fields=True) expected_dict = { 'Cesar2013': { 'keyword': 'keyword1, keyword2', 'ENTRYTYPE': 'article', 'abstract': 'This is an abstract. This line should be ' 'long enough to test\nmultilines... and with ' 'a french érudit word', 'year': '2013', 'journal': 'Nice Journal', 'ID': 'Cesar2013', 'pages': '12-23', 'title': 'An amazing title', 'comments': 'A comment', 'author': 'Jean César', 'volume': '12', 'month': 'jan', 'url': "http://my.link/to-content", 'subject': "Some topic of interest", 'editor': "Edith Or", } } self.assertEqual(bib.get_entry_dict(), expected_dict)
def read_bibtex(filename): import bibtexparser from bibtexparser.bparser import BibTexParser def customizations(record): """ custom transformation applied during parsing """ record = bibtexparser.customization.convert_to_unicode(record) # Split author field from separated by 'and' into a list of "Name, Surname". record = bibtexparser.customization.author(record) # Split editor field from separated by 'and' into a list of "Name, Surname". record = editor_split(record) return record def editor_split(record): """ custom transformation - split editor field into a list of "Name, Surname" :record: dict -- the record :returns: dict -- the modified record """ if "editor" in record: if record["editor"]: record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")]) else: del record["editor"] return record with open(filename) as f: parser = BibTexParser() parser.customization = customizations return bibtexparser.load(f, parser=parser).entries
def test_article_comma_first(self): with open("bibtexparser/tests/data/article_comma_first.bib", "r") as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [ { "ENTRYTYPE": "article", "journal": "Nice Journal", "volume": "12", "ID": "Cesar2013", "year": "2013", "author": "Jean Cesar", "comments": "A comment", "keyword": "keyword1, keyword2", "title": "An amazing title", }, { "ENTRYTYPE": "article", "journal": "Nice Journal", "volume": "12", "ID": "Baltazar2013", "year": "2013", "author": "Jean Baltazar", "comments": "A comment", "keyword": "keyword1, keyword2", "title": "An amazing title", }, ] self.assertEqual(res, expected)
def test_crossref_cascading(self): input_file_path = 'bibtexparser/tests/data/crossref_cascading.bib' entries_expected = {'r1': {'ENTRYTYPE': 'book', 'ID': 'r1', '_FROM_CROSSREF': [], 'crossref': 'r2', 'date': '1911'}, 'r2': {'ENTRYTYPE': 'book', 'ID': 'r2', '_FROM_CROSSREF': [], 'crossref': 'r3', 'date': '1911'}, 'r3': {'ENTRYTYPE': 'book', 'ID': 'r3', '_FROM_CROSSREF': [], 'crossref': 'r4', 'date': '1911'}, 'r4': {'ENTRYTYPE': 'book', 'ID': 'r4', 'date': '1911'}} parser = BibTexParser(add_missing_from_crossref=True) with open(input_file_path) as bibtex_file: bibtex_database = parser.parse_file(bibtex_file) self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
def get_web_bib(params_obj, create_bib_snippets=False): """ Return the parsed paper list, customized for web formatting. If create_bib_snippet is True, generate a subsetted bibtex snippet. """ weblist = [] # web formatted biblist = [] # bib formatted # open and parse all bibfiles (web format) for bib_filetail in params_obj.BIB_FILES: with open(os.path.join(params_obj.BIB_FLDR, bib_filetail), "r") as bibfile: bibfile_str = bibfile.read() # parse bib file for web webparse = BibTexParser(bibfile_str, customization=web_customizations) weblist += webparse.get_entry_list() # parse bib file for bib snippets bibfile.seek(0) # rewind bibparse = BibTexParser(bibfile_str, customization=bib_customizations) biblist += bibparse.get_entry_list() # if required, create bib snippets for each record if create_bib_snippets: for record in biblist: create_bibtex_snippet(params_obj, record) # return web-formatted version sorted_by_year = sorted(weblist, key=sort_key, reverse=True) return sorted_by_year
def load_bibtex(bib_file_name): # Open and parse the BibTeX file in `bib_file_name` using # `bibtexparser` with open(bib_file_name, 'r') as bib_file: bp = BibTexParser(bib_file.read(), customization=convert_to_unicode) # Get a dictionary of dictionaries of key, value pairs from the # BibTeX file. The structure is # {ID:{authors:...},ID:{authors:...}}. refsdict = bp.get_entry_dict() # Create a list of all the types of documents found in the BibTeX # file, typically `article`, `inproceedings`, and `phdthesis`. # Dedupe the list. entry_types = [] for k, ref in refsdict.items(): entry_types.append(ref["ENTRYTYPE"]) entry_types = set(entry_types) # For each of the types of reference, we need to sort each by month # then year. We store the dictionary representing each reference in # a sorted list for each type of reference. Then we store each of # these sorted lists in a dictionary whose key is the type of # reference and value is the list of dictionaries. sort_dict = {} for t in entry_types: temp = sorted([val for key, val in refsdict.items() if val["ENTRYTYPE"] == t], key=lambda l: datetime.strptime(l["month"], '%b').month, reverse=True) sort_dict[t] = sorted(temp, key=lambda k: k["year"], reverse=True) return sort_dict
def getcitation(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile: articles_database = bibtexparser.load(articlesfile, articlesparser) articleentries = articles_database.entries import random samplelist = random.sample(range(len(articleentries)), 20) print(samplelist) for i in samplelist: print("---------------------------") print("Entry number: " + str(i)) title = articleentries[i]['title'] clusterid = articleentries[i]['clusterid'] print("Title: " + title) print("Cluster ID: " + clusterid) if not clusterid == "unknown": print(str(i)) try: citations = os.popen( '''/usr/bin/python3 /home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''').read().strip().split()[ -1] except: citations = "unknown" else: citations = "unknown" print("new Citations: " + citations) if 'cited' in articleentries[i]: oldcitednumber = int(articleentries[i]['cited']) else: oldcitednumber = 0 print("Old Cited Number: " + str(oldcitednumber)) if not citations == "unknown": citednumber = int(citations) if citednumber > oldcitednumber and ((citednumber - oldcitednumber) < 8): articleentries[i]['cited'] = str(citednumber) writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('order',) with open('/home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib tempcited-add-articles.bib") os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/articles.bib /home/limingtao/ircre-bibtex/ircreupdate/oldarticles.bib") with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as newarticlefile: bibtexparser.dump(articles_database, newarticlefile, writer=writer) return 0
def test_wrong(self): with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [{'author': 'correct', 'id': 'bar', 'type': 'article'}] self.assertEqual(res, expected)
def load_bibtex_string(string): string_parser = BibTexParser(common_strings=True, ignore_nonstandard_types=True) string_parser.customization = customizations bib_database = bibtexparser.loads(string, parser=string_parser) return bib_database
def load_bib(filename): with open(filename) as bibtex_file: parser = BibTexParser() parser.customization = convert_to_unicode bib_database = bibtexparser.loads(bibtex_file.read().replace( "{{", "{").replace("}}", "}"), parser=parser) return bib_database
def load_bibtex_file(filepath): parser = BibTexParser(common_strings=True, ignore_nonstandard_types=True) parser.customization = customizations with open(filepath, "r") as bibtex: bib_database = bibtexparser.load(bibtex, parser=parser) return bib_database
def test_nonstandard_ignored(self): with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [{'author': 'correct', 'ID': 'bar', 'ENTRYTYPE': 'article'}] self.assertEqual(res, expected)
def printable_bibtex_entry(entry): # converts a dictionary BibTeX entry to LaTeX format entry_str = bibtex_entry_str(entry) parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib_database = bibtexparser.loads(entry_str, parser = parser) return(bib_database.entries[0])
def test_wrong(self): with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [{'author': 'correct', 'id': 'bar', 'type': 'article'}] self.assertEqual(res, expected)
def _parser(): ''' Return a configured bibtex parser. ''' parser = BibTexParser() parser.interpolate_strings = False parser.customization = cleanup_record return parser
def test_nonstandard_ignored(self): with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile: bib = BibTexParser(bibfile.read()) res = bib.get_entry_list() expected = [{'author': 'correct', 'ID': 'bar', 'ENTRYTYPE': 'article'}] self.assertEqual(res, expected)
def bib_parse(path): with open(path) as bibtex_file: parser = BibTexParser() parser.customization = custom_callback bib_database = bibtexparser.load(bibtex_file, parser=parser) input_data = bib_database.entries return input_data
def updatestatistics(): articlesparser = BibTexParser(common_strings=False) articlesparser.ignore_nonstandard_types = False with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile: articles_database = bibtexparser.load(articlesfile, articlesparser) articleentries = articles_database.entries totalcitations = 0 totalif = 0.0 citationlist = [] jourallist = [] hihonumber = 0 totalpublications = len(articleentries) + 28 totalarticles = len(articleentries) for i in range(len(articleentries)): if 'cited' in articleentries[i]: citednumber = int(articleentries[i]['cited']) else: citednumber = 0 if 'impactfactor' in articleentries[i]: impactfactor = float(articleentries[i]['impactfactor']) else: impactfactor = 0.0 if 'hihosubject' in articleentries[i]: hihonumber = hihonumber + 1 citationlist.append(citednumber) jourallist.append(articleentries[i]['journal']) totalcitations = totalcitations + citednumber totalif = totalif + impactfactor hindex = Hindex(citationlist) i10index = I10index(citationlist) totalcitations = totalcitations + 19 citationperpaper = totalcitations / len(articleentries) journalnumber = len(set(jourallist)) averageif = totalif / len(articleentries) # print(totalcitations) # print(hindex) # print(i10index) # print(citationperpaper) # print(journalnumber) # print(averageif) # print(hihonumber) # print(totalpublications) with open('/home/limingtao/ircre-bibtex/ircreupdate/newstatistics.js', 'w', encoding='utf8') as statisticsjsfile: statisticsjsfile.write('totalpublications = "%d";\n' % totalpublications) statisticsjsfile.write('totalarticles = "%d";\n' % totalarticles) statisticsjsfile.write('totalcitations = "%d";\n' % totalcitations) statisticsjsfile.write('hindex = "%d";\n' % hindex) statisticsjsfile.write('i10index = "%d";\n' % i10index) statisticsjsfile.write('numberjournals = "%d";\n' % journalnumber) statisticsjsfile.write('numberesihighlycited = "%d";\n' % hihonumber) statisticsjsfile.write('citationperpaper = "%.2f";\n' % citationperpaper) statisticsjsfile.write('averageif = "%.3f";\n' % averageif) return 0
def load_records(self, bibtex_filename=None): """Load all bibtex items as valid records""" with open(bibtex_filename) as bibtex_file: # Parse BibTex file parser = BibTexParser() parser.customization = td_biblio_customization bp = bibtexparser.load(bibtex_file, parser=parser) self.records = [self.to_record(r) for r in bp.get_entry_list()]
def load_bibtex_file(filepath): """Parse BibTeX file and return entry list""" with open(filepath, 'rU') as bibfile: bp = BibTexParser(bibfile) entries = bp.get_entry_list() entries = list(map(_capitalize_entry_title, entries)) entries = list(map(_format_entry_authors, entries)) return entries
def get_bibtex_data(filename): parser = BibTexParser() parser.ignore_nonstandard_types = False with open(filename) as f: bib_database = bibtexparser.loads(f.read(), parser) sources_dict_lst = [] for entry in bib_database.entries: sources_dict_lst.append(entry) return sources_dict_lst
def load_bibtex(f): parser = BibTexParser() parser.alt_dict = {} with open(f, 'r') as fd: txt = re.sub('^\s*%.*$', '', fd.read(), flags=re.MULTILINE) bib = bibtexparser.loads(txt, parser=parser) return bib.entries
def read_bibtex(bibtex_str): parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenize_fields = True bib_database = parser.parse(bibtex_str) keyworded = map(bibtexparser.customization.keyword, bib_database.entries) converted = list(map(bibtexparser.customization.convert_to_unicode, keyworded)) authored = map(bibtexparser.customization.author, converted) return list(authored)
def generateFromBib(bibfile): with open(bibfile) as bibtex_file: bibtex_str = bibtex_file.read() parser = BibTexParser() parser.customization = customizations bib_database = bibtexparser.loads(bibtex_str, parser=parser) for entry in bib_database.entries: if entry["ID"] in pubList: prettyPrintEntry(entry)