def read_bibtex(bibtex_str): parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenize_fields = True bib_database = parser.parse(bibtex_str) keyworded = map(bibtexparser.customization.keyword, bib_database.entries) converted = list(map(bibtexparser.customization.convert_to_unicode, keyworded)) authored = map(bibtexparser.customization.author, converted) return list(authored)
def _open_bib_db(self, bibfile): """Open the bibtex database""" self._bib_path = os.path.dirname(bibfile) parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenize_fields = True with open(bibfile) as bib_: bib_db = bibtexparser.load(bib_, parser) return bib_db
def __init__(self, file): def customizations(record): """Use some functions delivered by the library :param record: a record :returns: -- customized record """ record = convert_to_unicode(record) record = add_plaintext_fields( record ) # Removes {} etc. from a entry and puts them into plain_[entry] return record parser = BibTexParser() parser.homogenize_fields = True parser.customization = customizations bib_database = bibtexparser.loads(self.fix_missing_cite_keys(file), parser) file_papers = [] self._ignored_raw_entries = list() self._missing_entries = list() for entry in bib_database.entries: if "doi" in entry: if "title" in entry: file_papers.append( LiteratureEntry(doi=entry["doi"], title=entry["plain_title"])) else: file_papers.append( LiteratureEntry(doi=entry["doi"], title="")) elif "title" in entry: file_papers.append( LiteratureEntry(doi="", title=entry["plain_title"])) else: self._ignored_raw_entries.append(entry) self._papers = self.load_entries_from_database(file_papers) for entry in file_papers: if not entry.result_found: self._missing_entries.append(entry)
def sync(): global repo, bib_database, token_db parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenize_fields = True repo = git.Repo(repo_path) origin = repo.remotes.origin origin.pull() with open(repo_path + "/" + repo_name) as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser) try: with open(repo_path + "/tokens.json") as tokens: token_db = json.load(tokens) except: tokens = False return "Synced!"
def main(): args = _args() bibfile = args.input_bib with open(bibfile) as bibtex_file: parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenize_fields = True parser.common_strings = True parser.customization = keep_uppercase bib_database = bibtexparser.load(bibtex_file, parser=parser) if args.clean: for entry in bib_database.entries: for k in ('file', 'annote', 'abstract', 'url', 'file', 'link'): entry.pop(k, None) for entry in bib_database.entries: entry['title'] = '{{{}}}'.format(entry['title']) bibwriter = BibTexWriter() with open(args.output_bib, 'w') as outbib: bibtexparser.dump(bib_database, outbib, bibwriter)
def readBibFile(bibfile): """Read and parse bibtex file. Args: bibfile (str): abspath to input bibtex file. Returns: results (list): DocMeta dicts, each for an parsed entry in the bibtex file. """ bibfile = os.path.abspath(bibfile) if not os.path.exists(bibfile): return None with open(bibfile, 'r') as fin: parser = BibTexParser() parser.homogenize_fields = True parser.customization = customizations bib = bibtexparser.load(fin, parser=parser) LOGGER.info('Read in bib file: %s' % bibfile) results = [] for eii in bib.entries: eii = splitNames(eii) eii = altKeys(eii, ALT_KEYS) eii['citationkey'] = eii['ID'] if 'folders_l' in eii: del eii['folders_l'] docii = sqlitedb.DocMeta() docii.update(eii) results.append(docii) return results
os.environ['PYTHONUNBUFFERED'] = '1' buf_arg = 1 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', buf_arg) sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', buf_arg) # due to my mistake in after rename, the bibtex id/keys are not synced with numpaperorder; do that here DOTHEMOVES = False # True # False # get list of immediate child subdirs SO:973473 : subdirs = sorted(next(os.walk(dir_data))[1]) #ok # 02_SMC Conference 2015:044/74: orig 'G. Presti and D.A. Mauro and G. Haus' -> _DATA_/02_SMC\ Conference\ 2015/smc_2015_044.pdf nummoved = 0 # homogenize_fields: Sanitize BibTeX field names, for example change `url` to `link` etc. tbparser = BibTexParser() tbparser.homogenize_fields = False # no dice tbparser.alt_dict[ 'url'] = 'url' # this finally prevents change 'url' to 'link' for subdir in subdirs: bibsubdir = os.path.join(dir_data, subdir) bibfile = os.path.join(bibsubdir, "%s.bib" % (subdir)) print((bibfile, os.path.isfile(bibfile))) with open(bibfile) as bibtex_file: bibtex_str = bibtex_file.read() bib_database = bibtexparser.loads(bibtex_str, tbparser) #pprint.pprint(bib_database.entries) # already here,replaces 'url' with 'link' confbiblen = len(bib_database.entries) havemoves = False for icpbe, confpaperbibentry in enumerate(bib_database.entries): oldid = confpaperbibentry['ID'] year = confpaperbibentry['year']
def create_bibtex_parser() -> BibTexParser: bibtex_parser = BibTexParser(common_strings=True) bibtex_parser.ignore_nonstandard_types = False bibtex_parser.homogenize_fields = True bibtex_parser.customization = homogenize_latex_encoding return bibtex_parser
def parse_bib(self): # docs = [] content_saved = 0 for root, dirs, files in os.walk(self.directory): for file in files: if file.endswith('.bib'): with open(self.directory + file) as bibtex_file: try: parser = BibTexParser() bibtex_str = bibtex_file.read() st = bibtex_str.replace("Web of Science-Category", "Web-of-Science-Category").replace("\{", "[").replace( "\}", "]").replace("\{", "[").replace("\%", "%").replace("\\", '') st = st.replace('Early Access Date', 'month').replace('Early Access Year', 'year') # .replace('\n', ' ') parser.ignore_nonstandard_types = False parser.customization = convert_to_unicode parser.homogenize_fields = True bib_database = bibtexparser.loads(st, parser=parser) except: self.error.append(file) raise content_entries = len(bib_database.entries) self.total_entries += content_entries print 'O arquivo ' + file + ' tem: ' + str(content_entries) + ' registros' for key, item in bib_database.entries_dict.items(): for k,v in item.iteritems(): item[k]=self.remove_chaves(v) doc = [] # article = key id = ('id', str(content_saved + 1)) doc.append(id) try: unique_id = ('unique_id', (self.remove_chaves(item['unique-id'])).strip()) doc.append(unique_id) # print(unique_id) except KeyError: pass try: title = ('title', (self.remove_chaves(item['title'])).strip()) doc.append(title) # print(title) except KeyError: pass padrao = ',|\n|\*|;' try: authors = item['author'] for author in re.split('\sand\s', authors): name_list = re.split(padrao, author) name_list.reverse() name = ('Author', (' '.join(name_list)).strip()) doc.append(name) # docs.append(doc) except KeyError: pass try: publisher = (item['publisher']).strip() publisher_add = ('publisher', publisher) doc.append(publisher_add) journal = (item['journal']).strip() try: volume = item['volume'] pub_journal = ('publisher_journal_volume_facet', "{}|{}|{}".format(publisher.strip(), journal.strip(), volume.strip())) doc.append(pub_journal) except: pub_journal = ( 'publisher_journal_volume_facet', "{}|{}| ".format(publisher.strip(), journal.strip())) doc.append(pub_journal) except KeyError: pass # print(pub_journal) try: year = item['year'] group = gYear(int(year)) except: year = '' group = '' doc.append(('Year', year.strip())) try: month = item['month'][0:4] year_month = ( 'Year_Month_facet', '{}|{}|{}'.format(group.strip(), year.strip(), month.strip())) doc.append(year_month) except KeyError: year_month = ('Year_Month_facet', '{}|{}|'.format(group.strip(), year.strip())) doc.append(year_month) # print(year_month) try: abstract = ('abstract', self.remove_chaves(item['abstract'])) doc.append(abstract) # print(abstract) except KeyError: pass try: if (item['address'].split(',')[-1]).split(' ') < 2: address = ('address', (item['address'].split(',')[-1]).strip()) else: address = ( 'address', (item['address'].split(',')[-1].split(' ')[-1]).strip()) doc.append(address) # print((item['address'].split())[-1][0:-1]) except: pass try: tipe = ('type', (item['type']).strip()) doc.append(tipe) # print(type) except KeyError: pass try: affiliations = (self.remove_chaves(item['affiliation'])).split('\n') for aff in affiliations: affiliation = ('affiliation', aff) # c = aff.split(',')[-1].replace('.','') country_aff = ( 'affiliation_country', aff.split(',')[-1].split(' ')[-1].replace('.', '')) # print country_aff doc.append(affiliation) doc.append(country_aff) # print(type) except KeyError: pass try: language = ('language', (self.remove_chaves(item['language'])).strip()) doc.append(language) # print(language) except KeyError: pass try: DOI = ('DOI', (self.remove_chaves(item['doi'])).strip()) doc.append(DOI) except KeyError: pass # print(DOI) try: ISSN = ('ISSN', (item['issn']).strip()) doc.append(ISSN) except KeyError: pass # print(ISSN) try: EISSN = ('EISSN', (item['eissn']).strip()) doc.append(EISSN) # print(EISSN) except KeyError: pass try: keywords = re.split(padrao, self.remove_chaves(item['keywords'])) for k in keywords: if k: doc.append(('keyword', k.strip())) except KeyError: pass try: keywords_plus = re.split(padrao, (item['keywords-plus']).strip()) for k in keywords_plus: if k: doc.append(('keyword_plus', k.strip())) except KeyError: pass try: research_area = ( 'research_areas', ((item['research-areas']).replace('; ', '|')).strip()) doc.append(research_area) # print(research_area) except KeyError: pass try: web_of_science_categories = ( 'web_of_science_categories', ((item['web-of-science-categories']).replace('; ', '|')).strip()) doc.append(web_of_science_categories) # print(web_of_science_categories) except KeyError: pass try: funding_acknowledgement = ( 'funding_acknowledgement', (item['funding-acknowledgement']).strip()) # print(funding_acknowledgement) doc.append(funding_acknowledgement) except KeyError: pass try: nro_cited_ref = ( 'number_of_cited_references', (item['number-of-cited-references']).strip()) doc.append(nro_cited_ref) # print(nro_cited_ref) except KeyError: pass try: tc = (item['times-cited']).strip() times_cited = ('times_cited', tc) doc.append(times_cited) # print(times_cited) except KeyError: pass try: times_cited_facet = ('times_cited_facet',facet_citations(tc)) doc.append(times_cited_facet) except: pass try: journal_iso = ('journal_iso', (item['journal-iso']).strip()) doc.append(journal_iso) # print(journal_iso) except KeyError: pass # docs.append(doc) self.generateDoc(doc) content_saved += 1 # print("{} arquivos salvos".format(content)) # print(file) # for doc in docs: # self.generateDoc(doc) self.save_xml() self.total_saved = content_saved self.logger()
def generate_sources(bib_path, bibids): import bibtexparser from bibtexparser.bparser import BibTexParser def customizations(record): """Use some functions delivered by the library :param record: a record :returns: -- customized record """ #record = bibtexparser.customization.author(record) record = bibtexparser.customization.add_plaintext_fields(record) record = bibtexparser.customization.doi(record) return record parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenize_fields = True parser.customization = customizations def read_bib_file(path: str): with open(path) as bibtex_file: bib_database = parser.parse_file(bibtex_file) return bib_database.entries_dict def parse_for_olca(bibids, d): key_dict = {'description': ['plain_author', 'plain_publisher', 'plain_title', 'plain_journal', 'year'], 'textReference': '', 'year': 'plain_year', 'url': 'url', } s = [] for bibid, name in bibids.items(): try: record = d[bibid] except KeyError: print(f'{bibid} not found') continue source = {} source['name'] = bibids[bibid] for key, value in key_dict.items(): try: if isinstance(value, list): source[key] = ', '.join([record[v] for v in value if v in record]) else: source[key] = record[value] except KeyError: source[key] = '' s.append(_Source(source)) return s d = read_bib_file(bib_path) source_list = parse_for_olca(bibids, d) return source_list
save_bib_hash() def show_error(obj): if "reason" in obj: if obj["reason"] == "access_denied": print( "[!] Access denied! Your token is not valid for this operation. Verify whether the file '%s' contains a valid token." % args.token_file) action = args.action parser = BibTexParser(common_strings=True) parser.ignore_nonstandard_types = False parser.homogenize_fields = True if not os.path.exists("main.bib") or os.stat("main.bib").st_size == 0: bib_database = bibtexparser.loads("\n") else: try: with open('main.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file, parser) #print(bib_database.entries) except Exception as e: print("Malformed bibliography file!\n") print(e) sys.exit(1) response = requests.get(server + "version") version_info = response.json()
def convert(): parser = BibTexParser(common_strings=False) parser.ignore_nonstandard_types=False parser.homogenize_fields= False if request.method== 'POST': file = request.files['filer_input'] if file.filename == '': flash('no selected file') return redirect(request.url) if file and allowed_file(file.filename): if file and file_bib(file.filename): bibtex_file = file.stream bibtex_str = bibtex_file.read() bib_database = bibtexparser.loads(bibtex_str, parser) df = pd.DataFrame(bib_database.entries) df.index += 1 return render_template_string(html, filenm = file.filename, table = df.to_html(index='Nomor', header='true', table_id='example', classes='table table-striped table-bordered')) elif file and file_ris(file.filename): ris_file = file.stream.read() reader = io.BytesIO(ris_file) wrapper = io.TextIOWrapper(reader, encoding='utf-8') entries = readris(wrapper) df = pd.DataFrame(entries) df['publication_year'] = df['publication_year'].apply(remove_punch) df.index += 1 return render_template_string(html, filenm = file.filename, table = df.to_html(header='true', table_id='example', classes='table table-striped table-bordered')) else: data = xmltodict.parse(file) json_data = json.dumps(data) wanda = pd.read_json(StringIO(json_data)) vision = wanda["xml"]['records']['record'] axel = pd.DataFrame(vision) if axel.index[0] == '@name': if axel['database'] is not None: del axel['database'] # axel.index +=1 else: None else: if axel['database'] is not None: del axel['database'] else: None #for get type value type_data = [] for i, each in enumerate(axel['ref-type']): if '@name' in axel['ref-type'][i].keys(): try: data_type = axel['ref-type'][i]['@name'] type_data.append(data_type) except Exception as e: print(e) axel['ref-type'] = type_data #for get author value authors = [] for i, each in enumerate(axel['contributors']): if 'authors' in axel['contributors'][i].keys(): try: data_author = axel['contributors'][i]['authors']['author'] authors.append(data_author) except Exception as e: print(e) axel['contributors'] = authors # for get title titles=[] for i, each in enumerate(axel['titles']): title_data = axel['titles'][i].values() titles.append(title_data) axel['titles']= titles # for get full-tittle periodical=[] for i, each in enumerate(axel['periodical']): hulk = axel['periodical'][i] if hulk is not None: # else hulk = 'NaN' periodical_data = hulk.values() periodical.append(periodical_data) axel['periodical']= periodical # FOR GET keywords # keyw=[] # for i, each in enumerate(axel['keywords']): # hawk_eye = axel['keywords'][i] # if hawk_eye is not None: # else hulk = 'NaN' # keyw_data = hawk_eye.values() # keyw.append(keyw_data) # axel['keywords']= keyw axel.index += 1 return render_template_string(html, filenm = file.filename, table = axel.to_html(header='true', table_id='example', classes='table table-striped table-bordered')) else: return render_template('output1.html') return render_template('index.html')