def test_multiple_entries(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib' ) as bibfile: bparser = BibTexParser() bib = bparser.parse_file(bibfile) expected = ["", "A comment"] self.assertEqual(bib.comments, expected)
def test_multiple_entries(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile: bparser = BibTexParser() bib = bparser.parse_file(bibfile) expected = ["", "A comment"] self.assertEqual(bib.comments, expected)
def test_crossref_cascading(self): input_file_path = 'bibtexparser/tests/data/crossref_cascading.bib' entries_expected = {'r1': {'ENTRYTYPE': 'book', 'ID': 'r1', '_FROM_CROSSREF': [], 'crossref': 'r2', 'date': '1911'}, 'r2': {'ENTRYTYPE': 'book', 'ID': 'r2', '_FROM_CROSSREF': [], 'crossref': 'r3', 'date': '1911'}, 'r3': {'ENTRYTYPE': 'book', 'ID': 'r3', '_FROM_CROSSREF': [], 'crossref': 'r4', 'date': '1911'}, 'r4': {'ENTRYTYPE': 'book', 'ID': 'r4', 'date': '1911'}} parser = BibTexParser(add_missing_from_crossref=True) with open(input_file_path) as bibtex_file: bibtex_database = parser.parse_file(bibtex_file) self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
def test_crossref_cascading_cycle(self): input_file_path = 'bibtexparser/tests/data/crossref_cascading_cycle.bib' entries_expected = { 'circ1': { 'ENTRYTYPE': 'book', 'ID': 'circ1', '_FROM_CROSSREF': [], 'crossref': 'circ2', 'date': '1911' }, 'circ2': { 'ENTRYTYPE': 'book', 'ID': 'circ2', '_FROM_CROSSREF': [], 'crossref': 'circ1', 'date': '1911' } } parser = BibTexParser(add_missing_from_crossref=True) with self.assertLogs('bibtexparser.bibdatabase', level='ERROR') as cm: with open(input_file_path) as bibtex_file: bibtex_database = parser.parse_file(bibtex_file) self.assertIn( "ERROR:bibtexparser.bibdatabase:Circular crossref dependency: circ1->circ2->circ1.", cm.output) self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
def parse_bibtex(bibfile: str): """ Read in a .bib file of compendium entries (as exported by Zotero) and convert it into a dictionary. """ parser = BibTexParser(common_strings=True) with open(bibfile, "r") as f: bib_db = parser.parse_file(f).entries_dict entries = [] for (ii, entry) in enumerate(bib_db.values()): year, month, day = _extract_date(entry) entries.append({ "id": ii, "title": _extract_title(entry), "abstract": entry.get("abstract"), "publisher_text": _extract_publisher(entry), "year": year, "month": month, "day": day, "url": entry.get("url"), "authors": _extract_authors(entry), "tags": _extract_tags(entry), }) return entries
def get_bibtex_dict (stream): from bibtexparser.bparser import BibTexParser parser = BibTexParser () parser.ignore_nonstandard_types = False parser.homogenise_fields = False # TODO: one bit of homogenization that might be nice: it seems that # newlines get preserved, in `author` records at least. Those should be # replaced with spaces (and multiple spaces collapsed if needed). return parser.parse_file (stream).get_entry_dict ()
def get_bibtex_dict(stream): from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenise_fields = False # TODO: one bit of homogenization that might be nice: it seems that # newlines get preserved, in `author` records at least. Those should be # replaced with spaces (and multiple spaces collapsed if needed). return parser.parse_file(stream).get_entry_dict()
def _read_bibtex_file(self, bibtex_file) -> dict: parser = BibTexParser(common_strings=True) try: bib_db = parser.parse_file(bibtex_file) except Exception as ex: raise forms.ValidationError( _("Error parsing BibTeX: %(msg)s"), params={"msg": str(ex)}, code="invalid_bibtex", ) return bib_db.entries_dict
def test_crossref_missing_entries(self): input_file_path = 'bibtexparser/tests/data/crossref_missing_entries.bib' entries_expected = {'mcr': {'ENTRYTYPE': 'inbook', 'ID': 'mcr', '_crossref': 'missing1', 'author': 'Megan Mistrel', 'crossref': 'missing1', 'origdate': '1933', 'title': 'Lumbering Lunatics'}} parser = BibTexParser(add_missing_from_crossref=True) with self.assertLogs('bibtexparser.bibdatabase', level='ERROR') as cm: with open(input_file_path) as bibtex_file: bibtex_database = parser.parse_file(bibtex_file) self.assertIn("ERROR:bibtexparser.bibdatabase:Crossref reference missing1 for mcr is missing.", cm.output) self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
def test_crossref_cascading_cycle(self): input_file_path = 'bibtexparser/tests/data/crossref_cascading_cycle.bib' entries_expected = {'circ1': {'ENTRYTYPE': 'book', 'ID': 'circ1', '_FROM_CROSSREF': [], 'crossref': 'circ2', 'date': '1911'}, 'circ2': {'ENTRYTYPE': 'book', 'ID': 'circ2', '_FROM_CROSSREF': [], 'crossref': 'circ1', 'date': '1911'}} parser = BibTexParser(add_missing_from_crossref=True) with self.assertLogs('bibtexparser.bibdatabase', level='ERROR') as cm: with open(input_file_path) as bibtex_file: bibtex_database = parser.parse_file(bibtex_file) self.assertIn("ERROR:bibtexparser.bibdatabase:Circular crossref dependency: circ1->circ2->circ1.", cm.output) self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
def read(self, filename): metadata = { 'title': 'Publications', 'category': 'Publications', 'date': str(datetime.datetime.now()) } parsed = {} for key, value in metadata.items(): parsed[key] = self.process_metadata(key, value) with open(filename) as f: parser = BibTexParser() parser.ignore_nonstandard_types = False db = parser.parse_file(f) entries = [self._parse_entry(e) for e in db.entries] thesis = [] publications = [] arxiv = [] workshops = [] non_refereed = [] media = [] projects = [] thesis = [e for e in entries if e['type'] == 'thesis'] publications = [e for e in entries if e['type'] == 'publication'] arxiv = [e for e in entries if e['type'] == 'arxiv'] workshops = [e for e in entries if e['type'] == 'workshop'] non_refereed = [e for e in entries if e['type'] == 'non-refereed'] media = [e for e in entries if e['type'] == 'media'] projects = [e for e in entries if e['type'] == 'project'] for e in entries: del e['type'] jinja_env = Environment() jinja_env.filters['compile_jsx'] = compile_jsx html = jinja_env.from_string(template).render( thesis=thesis, publications=publications, media=media, arxiv=arxiv, non_refereed=non_refereed, projects=projects, workshops=workshops) return html, parsed
def test_crossref(self): self.maxDiff = None input_file_path = 'bibtexparser/tests/data/crossref_entries.bib' entries_expected = {'cr1': {'ENTRYTYPE': 'inbook', 'ID': 'cr1', '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 'archiveprefix': 'SomEPrFiX', 'author': 'Graham Gullam', 'crossref': 'cr_m', 'editor': 'Edgar Erbriss', 'origdate': '1955', 'primaryclass': 'SOMECLASS', 'publisher': 'Grimble', 'title': 'Great and Good Graphs', 'year': '1974'}, 'cr2': {'ENTRYTYPE': 'inbook', 'ID': 'cr2', '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 'author': 'Frederick Fumble', 'crossref': 'cr_m', 'editor': 'Edgar Erbriss', 'institution': 'Institution', 'origdate': '1943', 'publisher': 'Grimble', 'school': 'School', 'title': 'Fabulous Fourier Forms', 'year': '1974'}, 'cr3': {'ENTRYTYPE': 'inbook', 'ID': 'cr3', '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 'archiveprefix': 'SomEPrFiX', 'author': 'Arthur Aptitude', 'crossref': 'crt', 'editor': 'Mark Monkley', 'eprinttype': 'sometype', 'origdate': '1934', 'publisher': 'Rancour', 'title': 'Arrangements of All Articles', 'year': '1996'}, 'cr4': {'ENTRYTYPE': 'inbook', 'ID': 'cr4', '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 'author': 'Morris Mumble', 'crossref': 'crn', 'editor': 'Jeremy Jermain', 'origdate': '1911', 'publisher': 'Pillsbury', 'title': 'Enterprising Entities', 'year': '1945'}, 'cr5': {'ENTRYTYPE': 'inbook', 'ID': 'cr5', '_FROM_CROSSREF': ['editor', 'publisher', 'year'], 'author': 'Oliver Ordinary', 'crossref': 'crn', 'editor': 'Jeremy Jermain', 'origdate': '1919', 'publisher': 'Pillsbury', 'title': 'Questionable Quidities', 'year': '1945'}, 'cr6': {'ENTRYTYPE': 'inproceedings', 'ID': 'cr6', '_FROM_CROSSREF': ['address', 'editor', 'eventdate', 'eventtitle', 'publisher', 'venue'], 'address': 'Address', 'author': 'Author, Firstname', 'booktitle': 'Manual booktitle', 'crossref': 'cr6i', 'editor': 'Editor', 'eventdate': '2009-08-21/2009-08-24', 'eventtitle': 'Title of the event', 'pages': '123--', 'publisher': 'Publisher of proceeding', 'title': 'Title of inproceeding', 'venue': 'Location of event', 'year': '2009'}, 'cr6i': {'ENTRYTYPE': 'proceedings', 'ID': 'cr6i', 'address': 'Address', 'author': 'Spurious Author', 'editor': 'Editor', 'eventdate': '2009-08-21/2009-08-24', 'eventtitle': 'Title of the event', 'publisher': 'Publisher of proceeding', 'title': 'Title of proceeding', 'venue': 'Location of event', 'year': '2009'}, 'cr7': {'ENTRYTYPE': 'inbook', 'ID': 'cr7', '_FROM_CROSSREF': ['publisher', 'subtitle', 'titleaddon', 'verba'], 'author': 'Author, Firstname', 'crossref': 'cr7i', 'pages': '123--126', 'publisher': 'Publisher of proceeding', 'subtitle': 'Book Subtitle', 'title': 'Title of Book bit', 'titleaddon': 'Book Titleaddon', 'verba': 'String', 'year': '2010'}, 'cr7i': {'ENTRYTYPE': 'book', 'ID': 'cr7i', 'author': 'Brian Bookauthor', 'publisher': 'Publisher of proceeding', 'subtitle': 'Book Subtitle', 'title': 'Book Title', 'titleaddon': 'Book Titleaddon', 'verba': 'String', 'year': '2009'}, 'cr8': {'ENTRYTYPE': 'incollection', 'ID': 'cr8', '_FROM_CROSSREF': ['editor', 'publisher', 'subtitle', 'titleaddon'], 'author': 'Smith, Firstname', 'crossref': 'cr8i', 'editor': 'Brian Editor', 'pages': '1--12', 'publisher': 'Publisher of Collection', 'subtitle': 'Book Subtitle', 'title': 'Title of Collection bit', 'titleaddon': 'Book Titleaddon', 'year': '2010'}, 'cr8i': {'ENTRYTYPE': 'collection', 'ID': 'cr8i', 'editor': 'Brian Editor', 'publisher': 'Publisher of Collection', 'subtitle': 'Book Subtitle', 'title': 'Book Title', 'titleaddon': 'Book Titleaddon', 'year': '2009'}, 'cr_m': {'ENTRYTYPE': 'book', 'ID': 'cr_m', 'editor': 'Edgar Erbriss', 'publisher': 'Grimble', 'title': 'Graphs of the Continent', 'year': '1974'}, 'crn': {'ENTRYTYPE': 'book', 'ID': 'crn', 'editor': 'Jeremy Jermain', 'publisher': 'Pillsbury', 'title': 'Vanquished, Victor, Vandal', 'year': '1945'}, 'crt': {'ENTRYTYPE': 'book', 'ID': 'crt', 'editor': 'Mark Monkley', 'publisher': 'Rancour', 'title': 'Beasts of the Burbling Burns', 'year': '1996'}} parser = BibTexParser(add_missing_from_crossref=True, ignore_nonstandard_types=False) with open(input_file_path) as bibtex_file: bibtex_database = parser.parse_file(bibtex_file) self.assertDictEqual(bibtex_database.entries_dict, entries_expected)
sys.exit(0) urlBase = "https://dblp.org/search/publ/api?" db = BibDatabase() errors = [] print("Cleaning .bibtex bibliography with dblp") print( "\033[1;34mThis tool leaves unchanged books, thesis and all items that cannot be found on https://dblp.org\033[0m" ) with open(sys.argv[1]) as bibFile: parser = BibTexParser(common_strings=True) parser.customization = convert_to_unicode bibData = parser.parse_file(bibFile) bar = progressbar.ProgressBar(len(bibData.entries)).start() n = 0 for b in bibData.entries: if b["ENTRYTYPE"] == "book" or "thesis" in b["ENTRYTYPE"].lower(): db.entries.append(b) n = n + 1 continue while b["title"][0] == '{' and b["title"][2] != '}': b["title"] = b["title"][1:-1] url = urlBase + "q=" + b["title"].replace(" ", "+") + "&format=bib" dblp = requests.get(url)
def test_parse_file(self): parser = BibTexParser() with open(self.input_file_path) as bibtex_file: bibtex_database = parser.parse_file(bibtex_file) self.assertEqual(bibtex_database.entries, self.entries_expected)