def uploaded_file(filename, collectionname=None): flag = "success" try: parser = bibtex.Parser() bib_data = parser.parse_file('temp/' + filename) #enteries = ['id','type','author','journal','keywords','pages','title','volume','year','collectionname'] enteries = [ 'author', 'journal', 'keywords', 'pages', 'title', 'volume', 'year' ] data = [] for k in bib_data.entries.keys(): data.append(k) print k data.append(bib_data.entries[k].type) for e in enteries: try: data.append(bib_data.entries[k].fields[e]) except: data.append("Not available") data.append(collectionname) print "------------------------------------------" print data[0] print "------------------------------------------" bibdata = Bibparse(str(data[0]), str(data[1]), str(data[2]), str(data[3]), str(data[4]), str(data[5]), str(data[6]), data[7], data[8], str(data[9])) db.session.add(bibdata) db.session.commit() except Exception, e: flag = "failure" print e
def parse(self): with codecs.open(self.bibfile, encoding="latex+utf8") as ff: # remove {} useful in TeX, not in html f = re.sub(u"{(\w)}", u"\\1", ff.read(), re.UNICODE) buf = StringIO(f) db = bibtex.Parser().parse_stream(buf) for k, v in db.entries.items(): v.fields['key'] = k # fragment is the bibtex key sanitised for use in html anchors fragment = re.sub("[^\w]", "_", k) fragment = re.sub("^(\d+)", "_\\1", fragment) v.fields['fragment'] = fragment def _sortkey(entry): e = entry.fields try: year = '{:04d}'.format(int(e['year'])) except KeyError: return "000000" try: monthnum = _month_match(e['month']) year += '{:02d}'.format(monthnum) except KeyError: year += '00' return year self.db = db
def main(bibfile, template): # Load the template. tenv = jinja2.sandbox.SandboxedEnvironment() tenv.filters['author_fmt'] = _author_fmt tenv.filters['author_list'] = _author_list tenv.filters['title'] = _title tenv.filters['venue_type'] = _venue_type tenv.filters['venue'] = _venue tenv.filters['main_url'] = _main_url tenv.filters['extra_urls'] = _extra_urls tenv.filters['monthname'] = _month_name with open(template) as f: tmpl = tenv.from_string(f.read()) # Parse the BibTeX file. with open(bibfile) as f: db = bibtex.Parser().parse_stream(f) # Include the bibliography key in each entry. for k, v in db.entries.items(): v.fields['key'] = k # Render the template. bib_sorted = sorted(db.entries.values(), key=_sortkey, reverse=True) out = tmpl.render(entries=bib_sorted) print out
def ADStoAuthorDate(infilename, outfilename): """ Changes the citation key from whatever it is (e.g., 2012MNRAS.416..465L) to AuthorDATE (e.g. Longmore2011) """ parser = bibtex.Parser() bib_data = parser.parse_file(infilename) new_bib_data = pybtex.database.BibliographyData() for key in bib_data.entries.keys(): entry = bib_data.entries[key] entry.fields['key'] = key new_key = entry.persons['author'][0].last()[0].strip( "{}") + entry.fields['year'] entry.key = new_key # add lower-case letter suffix for repeated name/date combinations alphabet = itertools.cycle("abcdefghijklmnopqrstuvqxyz") while new_key in new_bib_data.entries: new_key += alphabet.next() new_bib_data.add_entry(new_key, entry) writer = Writer() writer.write_file(new_bib_data, outfilename)
def resolve_dupes(bibfile): k = 0 while True: try: parser = bibtex.Parser() bibdata = parser.parse_file(bibfile) except Exception as e: err_msg = str(e) if "repeated" in str(err_msg): print(err_msg) # extract duplicate item name repreated_item = err_msg.split(":")[-1][1:] # Read in the file with open(bibfile, 'r') as file : filedata = file.read() # Replace the target string filedata = filedata.replace(repreated_item, repreated_item + str(k), 1) k += 1 # Write the file out again with open(bibfile, 'w') as file: file.write(filedata) else: print(err_msg) return False continue break print("duplicates resolved.") return True
def main(args): parser = bibtex.Parser() bib_data = parser.parse_file(args.inp) def sort_by_year(x): return int(x[1].fields['year']) bib_sorted = sorted(bib_data.entries.items(), key=sort_by_year) papers = [] tags = {} for key, value in bib_sorted: print(key, value.fields['title']) papers.append(value.fields['title']) taglist = value.fields['swy'].split('.') taglist = [TAGS[tag] for tag in taglist] tags[value.fields['title']] = taglist with open(args.out, 'w') as f: f.writelines('var color = {\n') for i, human in enumerate(TAGS.values()): f.writelines(" '{}': \"{}\",\n".format(human, to_hex(rgb[i]))) f.writelines("};\n") # write edges: f.writelines('var data = [\n') for i, human in enumerate(papers): for val in tags[human]: f.writelines(" ['{}', \"{}\", {}],\n".format( val, human, width)) f.writelines("];\n")
def YAMLprinter(inputFile, outputFile): # Open BibTeX file parser = bibtex.Parser() # Initialize TeX Accent Converter converter = TeXAccents.AccentConverter() bibdata = parser.parse_file(inputFile) for bibId in bibdata.entries: bibEntry = bibdata.entries[bibId].fields bibAuthors = bibdata.entries[bibId].persons['author'] print(f"- id: {bibId}", file=open(outputFile, "a")) print(f" entrytype: {bibdata.entries[bibId].type}", file=open(outputFile, "a")) print(" authors:", file=open(outputFile, "a")) for author in range(len(bibAuthors)): detexed_author = converter.decode_Tex_Accents( str(bibAuthors[author]), utf8_or_ascii=1).replace('{', '').replace('}', '') print(f" - names: {detexed_author.split(',')[1].lstrip()}", file=open(outputFile, "a")) print(f" surnames: {detexed_author.split(',')[0].lstrip()}", file=open(outputFile, "a")) for field in bibEntry: # Remove accents and brackets from BibTeX field, and substitute underscores detexed_field = converter.decode_Tex_Accents( bibEntry[field], utf8_or_ascii=1).replace('{\_}', '_').replace( '{', '').replace('}', '').replace('--', '–') # Print field print(f' {field}: "{detexed_field}"', file=open(outputFile, "a"))
def bib2csv(file): from pybtex.database.input import bibtex data = [] header = {} parser = bibtex.Parser() bibdata = parser.parse_file(file) for bib_id in bibdata.entries: b = bibdata.entries[bib_id].fields for key in b.keys(): if key not in header: header[key] = len(header) # print bibdata.entries[bib_id] """ FieldDict([('title', 'StreamJIT: A Commensal Compiler for High-performance Stream Programming'), ('journal', 'SIGPLAN Not.'), ('issue_date', 'October 2014'), ('volume', '49'), ('number', '10'), ('month', 'October'), ('year', '2014'), ('issn', '0362-1340'), ('pages', '177--195'), ('numpages', '19'), ('url', 'http://doi.acm.org/10.1145/2714064.2660236'), ('doi', '10.1145/2714064.2660236'), ('acmid', '2660236'), ('publisher', 'ACM'), ('address', 'New York, NY, USA'), ('keywords', 'domain-specific languages, embedded domain-specific languages')]) """ current = [] for key, _value in sorted(header.iteritems(), key=lambda (k, v): v): value = '' if key in b: value = b[key] current.append(value) data.append(current) print header print data[2]
def main(output_dir, file_name=None, header_file="header.tpl", footer_file="footer.tpl"): """ Entry point for this tool. """ create_directory(output_dir) # Read a BibTeX file (if given) or otherwise read from stdin. parser = bibtex.Parser() if file_name: bibdata = parser.parse_file(file_name) else: bibdata = parser.parse_stream(sys.stdin) header = read_file(header_file) footer = read_file(footer_file) # Write HTML files sorted by year and reverse year. write_file(os.path.join(output_dir, "year.html"), header + sort_by_year(bibdata, output_dir) + footer) write_file( os.path.join(output_dir, "year_reverse.html"), header + sort_by_year(bibdata, output_dir, sort_reverse=True) + footer) # Write HTML files sorted by author and reverse author. write_file(os.path.join(output_dir, "author.html"), header + sort_by_author(bibdata, output_dir) + footer) write_file( os.path.join(output_dir, "author_reverse.html"), header + sort_by_author(bibdata, output_dir, sort_reverse=True) + footer) # Create HTML-formatted BibTex file. data = [ """ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html lang="en"> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8"> <title>BibTeX entries</title> </head> <body> """ ] for bibkey in bibdata.entries: data.append(dump_bibtex_entry(bibdata.entries[bibkey])) data.append("</body>\n</html>\n") write_file(os.path.join(output_dir, "bibtex.html"), "".join(data)) return 0
def main(argv=None): parser = _get_parser() args = parser.parse_args(argv) # As of Python 3.6, all dictionaries are ordered. data = bibtex.Parser().parse_file(args.infile) # Use an ordered dictionary to make sure that the entries are written out # sorted by their BibTeX key if demanded. tuples = data.entries.items() if args.sort_by_bibkey: tuples = sorted(data.entries.items()) d = dict(tuples) d = tools.decode(d) d = sync(d, args.source, args.long_journal_names, args.num_concurrent_requests) d = adapt_doi_urls(d, args.doi_url_type) d = journal_abbrev(d, args.long_journal_names, args.extra_abbrev_file) tools.write(d, args.outfile, args.delimeter_type, tab_indent=args.tab_indent) return
def __init__(self, l=None, n=0, doicoll=None, matcoll=None, db_yaml='materials_db_dev.yaml'): self.endpoint = 'https://www.osti.gov/elink/2416api' self.bibtex_parser = bibtex.Parser() self.matad = OstiMongoAdapter.from_config(db_yaml=db_yaml) \ if doicoll is None or matcoll is None else \ OstiMongoAdapter.from_collections(doicoll, matcoll) self.materials = self.matad.get_materials_cursor(l, n) research_org = 'Lawrence Berkeley National Laboratory (LBNL), Berkeley, CA (United States)' self.records = [] for material in self.materials: self.material = material # prepare record self.records.append( OrderedDict([ ('osti_id', self.matad.get_osti_id(material)), ('dataset_type', 'SM'), ('title', self._get_title()), ('creators', 'Kristin Persson'), ('product_nos', self.material['task_id']), ('contract_nos', 'AC02-05CH11231; EDCBEE'), ('originating_research_org', research_org), ('publication_date', self._get_publication_date()), ('language', 'English'), ('country', 'US'), ('sponsor_org', 'USDOE Office of Science (SC), Basic Energy Sciences (BES) (SC-22)' ), ('site_url', self._get_site_url(self.material['task_id'])), ('contact_name', 'Kristin Persson'), ('contact_org', 'LBNL'), ('contact_email', '*****@*****.**'), ('contact_phone', '+1(510)486-7218'), ('related_resource', 'https://materialsproject.org/citing'), ('contributor_organizations', 'MIT; UC Berkeley; Duke; U Louvain' ), # not listed in research_org ('subject_categories_code', '36 MATERIALS SCIENCE'), ('keywords', self._get_keywords()), ('description', 'Computed materials data using density ' 'functional theory calculations. These calculations determine ' 'the electronic structure of bulk materials by solving ' 'approximations to the Schrodinger equation. For more ' 'information, see https://materialsproject.org/docs/calculations' ) ])) if not self.records[-1]['osti_id']: self.records[-1].pop('osti_id', None) self.records_xml = parseString( dicttoxml(self.records, custom_root='records', attr_type=False)) items = self.records_xml.getElementsByTagName('item') for item in items: self.records_xml.renameNode(item, '', item.parentNode.nodeName[:-1]) logger.info(self.records_xml.toprettyxml())
def main(bibfile, template, output): # pylint: disable=unused-argument """Render a BibTeX .bib file to HTML using an HTML template.""" tenv = jinja2.sandbox.SandboxedEnvironment() tenv.filters['author_fmt'] = _author_fmt tenv.filters['author_list'] = _author_list tenv.filters['title'] = _title tenv.filters['venue_type'] = _venue_type tenv.filters['venue'] = _venue tenv.filters['main_url'] = _main_url tenv.filters['extra_urls'] = _extra_urls tenv.filters['monthname'] = _month_name tmpl = tenv.from_string(template.read()) # Parse the BibTeX file. db = bibtex.Parser().parse_stream(bibfile) # Include the bibliography key in each entry. for k, v in db.entries.items(): v.fields['key'] = k # Render the template. bib_sorted = sorted(db.entries.values(), key=_sortkey, reverse=True) out = tmpl.render(entries=bib_sorted) print(out)
def parse_bib(bibtex_str): bib_parser = bibtex.Parser() entry = bib_parser.parse_string(bibtex_str) entry = entry.entries assert len(entry) == 1 entry = list(entry.values())[0] return entry
def bibtex_to_doi(bibfile, prev_doi): title = bibfile.split("/")[-1].split(".")[0] # open a bibtex filess parser = bibtex.Parser() bibdata = parser.parse_file(bibfile) # collect founded references in a dictionary {doi:title} item = {"doi" : None, "title" : None, "size" : None, "references": []} item["doi"] = prev_doi item["title"] = title item["size"] = len(bibdata.entries) # loop through the individual references print("\nExtracting DOIs and titles from: \n %s...bib"%(bibfile[:40])) for bib_id in bibdata.entries: b = bibdata.entries[bib_id].fields try: doi = str(b["doi"]) title = str(b["title"]) map_doi_title.add((doi, title)) # if not check_doi(doi, title): # print("\n *** Fuzzy match fails for %s: \n %s..."%(bib_id, title[0:50])) # print(" pdx-extract's doi is: %s"%(doi)) item["references"].append(prev_doi + DOI_DELIMITER + doi) except KeyError as e: print("\n !!! Key error for %s: %s"%(bib_id, str(e))) continue # print results print("\n Input bibtex db consists in:\n") for x in item["references"]: print(" " + x + "...") return item
def from_bibtex(cls, filename=None, stream=None): from pybtex.database.input import bibtex parser = bibtex.Parser() if filename is not None: bib_data = parser.parse_file(filename) elif stream is not None: bib_data = parser.parse_stream(stream) citations = [] for id_, entry in bib_data.entries.iteritems(): d = {} avail = entry.fields.keys() for prop in cls.T.properties: if prop.name == 'authors' or prop.name not in avail: continue d[prop.name] = entry.fields[prop.name] if 'author' in entry.persons: d['authors'] = [] for person in entry.persons['author']: d['authors'].append(unicode(person)) c = Citation(id=id_, type=entry.type, **d) citations.append(c) return citations
def initialize_bib(bib_file): global _BIBLIOGRAPHY global _BIBLIOGRAPHY_TO_OUTPUT _BIBLIOGRAPHY = bibtex.Parser().parse_file(bib_file) _BIBLIOGRAPHY_TO_OUTPUT.append( _BIBLIOGRAPHY.entries['haghighat2021sciann']) _BIBLIOGRAPHY_TO_OUTPUT.append(_BIBLIOGRAPHY.entries['raissi2019physics'])
def read_str(bibstr): from pybtex.database.input import bibtex from pybtex import errors errors.enable_strict_mode() from io import StringIO parser = bibtex.Parser() return parser.parse_stream(StringIO(bibstr))
def getEntryFromString(s): parser = bibtex.Parser() try: parser.parse_stream(StringIO(s)) key, entry = parser.data.entries.items()[0] except: bib = parser.parse_stream(StringIO(DUMMY)) key, entry = parser.data.entries.items()[0] return entry
def check_num_entries(bibfile): parser = bibtex.Parser() bibdata = parser.parse_file(bibfile) n_bibitems = len(bibdata.entries) n_refs = int(raw_input("How many entries in: %s"%(bibfile))) if n_bibitems == n_refs: return True print("\n*** pdf-extract has found only %d entries\n"%(n_bibitems)) return False
def main(bibfile, template, save_path, save_individual=False): # Make sure save_path is a directory if save_individual, and a valid file path otherwise if save_individual and not os.path.isdir(save_path): print( 'save_individual is true, but save_path is not a directory. Quitting' ) return elif not save_individual and not os.path.isdir( os.path.abspath(os.path.dirname(save_path))): print( 'save_individual is false, but save_path is not a valid file location. Quitting' ) return # Load the template. tenv = jinja2.sandbox.SandboxedEnvironment() tenv.filters['author_fmt'] = _author_fmt tenv.filters['author_list'] = _author_list tenv.filters['title'] = _title tenv.filters['venue_type'] = _venue_type tenv.filters['venue'] = _venue tenv.filters['main_url'] = _main_url tenv.filters['extra_urls'] = _extra_urls tenv.filters['monthname'] = _month_name with open(template) as f: tmpl = tenv.from_string(f.read()) # Parse the BibTeX file. with open(bibfile) as f: db = bibtex.Parser().parse_stream(f) for k, v in db.entries.items(): # Include the bibliography key in each entry. v.fields['key'] = k # Include the full BibTeX in each entry, minus fields to ignore filtered_v_field_items = filter( lambda x: x[0] not in _ignore_fields_bibtex_source, v.fields.items()) filtered_v = Entry(v.type, fields=filtered_v_field_items, persons=v.persons) v.fields['bibtex'] = BibliographyData({ k: filtered_v }).to_string('bibtex').strip() # Render the template. bib_sorted = sorted(db.entries.values(), key=_sortkey, reverse=True) if save_individual: for bib in bib_sorted: out = tmpl.render(entry=bib) file_path = os.path.join(save_path, '%s.html' % bib.key) with open(file_path, 'w') as f: f.write(out) else: out = tmpl.render(entries=bib_sorted) with open(save_path, 'w') as f: f.write(out)
def import_bibs(filelist: list) -> list: """ Takes a list of bibtex files and returns entries as a list of dictionaries representing the info on each work """ articles = [] references_field = "Cited-References" affiliation_field = "Affiliation" print('Reading .bibs') for filename in tqdm(filelist): try: # since pybtex removes the \n from this field, we do it ourselves # (but we are not fully replacing pybtex because of the extra consistency checks it has) references = parse(filename, keepOnly=[references_field, affiliation_field]) for k in references: if (references_field not in references[k]): references[k][references_field] = [] else: references[k][references_field] = [ x.strip() for x in references[k][references_field].split('\n') ] if (affiliation_field not in references[k]): references[k][affiliation_field] = [] else: references[k][affiliation_field] = [ x.strip() for x in references[k][affiliation_field].split('\n') ] bibdata = {} parser = bibtex.Parser() # The site is ignoring bibtex format, so this kludge fixes it with open(filename, 'r') as fin: file_contents = str(fin.read()).replace( '(Last-180-days)', '-Last-180-days').replace('Early Access Date', 'Early-Access-Date') bibdata = parser.parse_string(file_contents) for bib_id in bibdata.entries: articles.append( extract_article_info(bibdata.entries[bib_id].fields, bibdata.entries[bib_id].persons, references[bib_id][references_field])) articles[-1][affiliation_field] = references[bib_id][ affiliation_field] except: print('Error with the file ', filename) raise print('Imported {0} articles.'.format(thous(len(articles)))) return (articles)
def getEntryFromString(s): parser = bibtex.Parser() try: parser.parse_stream(StringIO(s)) key, entry = parser.data.entries.items()[0] except Exception as e: log.warn('BibTex parsing failed: %s' % e) parser.parse_stream(StringIO(DUMMY)) key, entry = parser.data.entries.items()[0] return entry
def read_bibtex(filename, mode='r'): '''read a yaml file, only including sections between dashes ''' from pybtex.database.input import bibtex parser = bibtex.Parser() try: data = parser.parse_file(filename) return data.entries except Exception as e: bot.error(e)
def main(argv=None): parser = _get_parser() args = parser.parse_args(argv) data = bibtex.Parser().parse_file(args.infile) d = dict(data.entries.items()) d = journal_abbrev(d, args.long_journal_names, args.extra_abbrev_file) tools.write(d, args.outfile, "braces", tab_indent=False)
def get_bibtex(self): """Retrive the BibTeX entry for this paper""" logging.debug("Fetching BibTex file") self._bibtex = urllib2.urlopen(self._bibtex_url.format( self.bibcode)).read() parser = bibtex.Parser() bd = parser.parse_stream( StringIO.StringIO(self._bibtex.decode('utf-8'))) bibkey = [x for x in bd.entries][0] self.bibdata = bd.entries[bibkey]
def bibtex_edit(filename, tax_id, ref_doc): parser = bib_in.Parser() bib_data = parser.parse_file(filename) writer = bib_out.Writer(encoding='ascii') assert len(bib_data.entries.keys()) == 1 firstEntry = bib_data.entries.keys()[0] ref_doc = set_ref_from_entry(firstEntry, bib_data, ref_doc) ReferenceFamily(tax_id).save_reference(ref_doc)
def export_web(path="~/Papers", inline=False, regenerate_all_previews=False, **kwargs): papers_path = Path(path).expanduser().absolute() # Get template tenv = jinja2.sandbox.SandboxedEnvironment() tenv.filters["author_fmt"] = _author_fmt tenv.filters["author_list"] = _author_list tenv.filters["keyword_list"] = _keyword_list tenv.filters["title"] = _title tenv.filters["venue_type"] = _venue_type tenv.filters["venue"] = _venue tenv.filters["main_url"] = _main_url tenv.filters["extra_urls"] = _extra_urls tenv.filters["monthname"] = _month_name template_path = Path( os.path.realpath(__file__)).parent / "templates/index.html" with open(template_path, "r+") as f: tmpl = tenv.from_string(f.read()) # Make BibTeX file bibfile_path = export_bib( path=path, aux=None, output=papers_path / "index.bib", ) # Parse the BibTeX file with open(bibfile_path, "r+") as f: db = bibtex.Parser().parse_stream(f) # Include the bibliography key in each entry for k, v in db.entries.items(): v.fields["key"] = k paper_path = papers_path / f"{k}/{k}.pdf" preview_path = papers_path / f"{k}/preview.jpg" if paper_path.exists(): if not preview_path.exists() or regenerate_all_previews: preview(fname_in=paper_path, fname_out=preview_path) if inline and preview_path.exists(): v.fields["preview"] = base64.b64encode( open(preview_path, "rb").read()).decode("ascii") # Render the template bib_sorted = sorted(db.entries.values(), key=_sortkey, reverse=True) out = tmpl.render(entries=bib_sorted) # Write contents (overrides index.html) Path(papers_path / "index.html").write_text(out)
def loadBibtexFile(self): if self.bibtexRefs == None: if self.verbose > 0: print("load Bibtex file ...") sys.stdout.flush() self.bibtexRefs = bibtex.Parser().parse_file(self.bibtexFile) # bibtexHandle = open(self.bibtexFile) # bibtexRefs = bibtexparser.load(bibtexHandle) # bibtexHandle.close() if self.verbose > 0: print("Bibtex file: %i entries" % len(self.bibtexRefs.entries))
def _parse_bibtex_entries(entries): biblio_list = [] for entry in entries: stream = StringIO(entry) parser = bibtex.Parser() try: biblio = parser.parse_stream(stream) biblio_list += [biblio] except (PybtexSyntaxError, PybtexError), error: error = error print format_error(error, prefix='BIBTEX_ERROR: ')
def sort_bib_data(input, output): parser = bibtex.Parser() bib_data = parser.parse_file(input) sorted_data = findBibType(bib_data) output_file = output + "/{}.bib" for key in sorted_data: name = output_file.format(key) entries = sorted_data[key] data = {bib: bib_data.entries[bib] for bib in entries} write_result(name, data) logger.debug(pformat(sorted_data))