def tidy_up(bib_entries, report, verbose): info("tidying up the parsed bib file ...") non_standard_entry_list = [] entry_type_dict = {} for bib_entry in bib_entries: # reformat the author names if 'author' in bib_entry: bib_entry = author(bib_entry) bib_entry['author'] = ' and '.join(bib_entry['author']) # warn about non-standard entry types if bib_entry['ENTRYTYPE'] not in STANDARD_TYPES: non_standard_entry_list.append( (bib_entry['ID'], bib_entry['ENTRYTYPE'])) if verbose: info("entry: {} has a non-standard type: {}".format( bib_entry['ID'], bib_entry['ENTRYTYPE'])) # count the number of entries for different entry types if bib_entry['ENTRYTYPE'] not in entry_type_dict: entry_type_dict[bib_entry['ENTRYTYPE']] = 1 else: entry_type_dict[bib_entry['ENTRYTYPE']] += 1 # build up signatures for bib entries # TODO: maybe use more signatures bib_entry['sig1'] = bib_entry['title'].replace(' ', '').lower() report['non_standard_list'] = non_standard_entry_list
def __call__ (self, rec): from bibtexparser.customization import author, type, convert_to_unicode rec = type (convert_to_unicode (rec)) for key in rec.keys (): val = rec.get (key) val = (val .replace ('{\\nbsp}', nbsp) .replace ('``', u'“') .replace ("''", u'”')) rec[key] = val if 'journal' in rec: rec['journal'] = _bib_journals.get (rec['journal'].lower (), rec['journal']) rec = author (rec) if 'author' in rec: newauths = [] for idx, text in enumerate (rec['author']): text = text.replace ('{', '').replace ('}', '').replace ('~', ' ') surname, rest = text.split (',', 1) if surname.lower () == self.mylsurname: rec['wl_mypos'] = unicode (idx + 1) newauths.append (rest + ' ' + surname.replace (' ', '_')) rec['author'] = '; '.join (newauths) rec['wl_cite'] = _bib_cite (rec) return rec
def tags(record): record = b.customization.convert_to_unicode(record) record = c.author(record) record = c.editor(record) tags = set() if 'tags' in record: tags.update([ i.strip() for i in re.split(',|;', record["tags"].replace('\n', '')) ]) record['tags'] = tags record['p_authors'] = [] logging.debug(f"Handling: {record['ID']}") if 'author' in record: try: record['p_authors'] = [ c.splitname(x, False) for x in record['author'] ] except Exception as err: breakpoint() if 'editor' in record: record['p_authors'] = [c.splitname(x, False) for x in record['editor']] return record
def __call__(self, rec): from bibtexparser.customization import author, type, convert_to_unicode rec = type(convert_to_unicode(rec)) for key in rec.keys(): val = rec.get(key) val = val.replace("{\\nbsp}", nbsp).replace("``", u"“").replace("''", u"”") rec[key] = val if "journal" in rec: rec["journal"] = _bib_journals.get(rec["journal"].lower(), rec["journal"]) rec = author(rec) if "author" in rec: newauths = [] for idx, text in enumerate(rec["author"]): text = text.replace("{", "").replace("}", "").replace("~", " ") surname, rest = text.split(",", 1) if surname.lower() == self.mylsurname: rec["wl_mypos"] = unicode(idx + 1) newauths.append(rest + " " + surname.replace(" ", "_")) rec["author"] = "; ".join(newauths) rec["wl_cite"] = _bib_cite(rec) return rec
def _customizations(self, record): """Use some functions delivered by the library :param record: a record :returns: -- customized record """ orig_author_name = record["author"] record = author(record) if "author" in record: author_name = record["author"][0].split(",")[0] else: author_name = "NONE" year = "0000" title = "NONE" if "year" in record: year = record["year"] if "title" in record: title = record["title"] title = title.encode('ascii', 'ignore') title = re.sub(self._REGEX, '_', title) if "ID" in record: record["ID"] = author_name + ":" + year + ":" + title record["author"] = orig_author_name return record
def _customizations_unicode(record): """ This function curstumizes record for raw style. See bibtexparser lib for more info. """ record = customization.page_double_hyphen(record) record = customization.convert_to_unicode(record) record = customization.author(record) return record
def _customizations_latex(record): """ This function curstumizes record for bibtex. See bibtexparser lib for more info. """ record = customization.page_double_hyphen(record) record = customization.homogenize_latex_encoding(record) record = customization.author(record) return record
def _customizations_latex(record): """ This function curstumizes record for bibtex. See bibtexparser lib for more info. """ record = customization.page_double_hyphen(record) record = customization.homogeneize_latex_encoding(record) record = customization.author(record) return record
def _customizations(record): """ Bibtexparser customizations that are applied to every entry found in the .bib files """ record = convert_to_unicode(record) record = type(record) # make the entry types lower-case record = author(record) # split the authors into a list record = editor(record) # split the editors into a list return record
def customizations(record): record = bib_type(record) record = author(record) record = editor(record) record = journal(record) record = keyword(record) record = link(record) record = page_double_hyphen(record) record = doi(record) return record
def guess_key(entry): entry = author(deepcopy(entry)) if len(entry["author"]) > 2: a = entry["author"][0].split(",")[0].lower() else: a = "-".join([a.split(",")[0].lower() for a in entry["author"]]) # Use YYYY if the year is not present year = entry.get("year", "YYYY") return f"{a}-{year}"
def customizations(record): """Use some functions delivered by the library.""" # record = type(record) record = author(record) # record = editor(record) # record = journal(record) # record = keyword(record) # record = link(record) # record = page_double_hyphen(record) # record = doi(record) record = convert_to_unicode(record) record['annote'] = strip_chars(record['annote']) return record
def td_biblio_customization(record): """ Customize BibTex records parsing """ # Convert crapy things to latex record = to_latex(record) # and then to unicode record = bp_customization.convert_to_unicode(record) record = bp_customization.type(record) record = bp_customization.author(record) record = bp_customization.editor(record) record = bp_customization.page_double_hyphen(record) return record
def _parse_bib_entry(entry): """ Customization function for bibtexparser. :param entry: bibtex record to modify :return bibtex record """ if CONVERT_TO_UNICODE: entry = bib_custom.convert_to_unicode(entry) entry = bib_custom.author(entry) entry = bib_custom.editor(entry) entry = bib_custom.keyword(entry) entry = bib_custom.page_double_hyphen(entry) return entry
def customizations(record): """Use some functions delivered by the library :param record: a record :returns: -- customized record """ record = convert_to_unicode(record) # record = type(record) record = author(record) record = editor(record) # record = journal(record) # Do not use! # record = keyword(record) # record = link(record) record = page_double_hyphen(record) # record = doi(record) return record
def customize(record): """ Customise bibtexparser records """ record = customization.convert_to_unicode(record) for field_name in ['author', 'title', 'journal']: try: field = record[field_name] record[field_name] = tex_to_html(field) except KeyError: pass # Splits author into a list of authors: record = customization.author(record) # Now convert each author into a tuple of last, first name record = split_authors(record) record = pages_endash(record) return record
def customizations(record): """Use some functions delivered by the library :param record: a record :returns: -- customized record """ record = bc.convert_to_unicode(record) record = bc.type(record) # lowercase record = bc.author(record) record = bc.editor(record) record = bc.journal(record) record = bc.keyword(record) record = bc.link(record) record = bc.page_double_hyphen(record) record = bc.doi(record) return record
def customizations(record): ''' Use some customizations for bibtexparser Args: record: A record Returns: record: Customized record ''' record = convert_to_unicode(record) # record = type(record) record = author(record) record = editor(record) # record = journal(record) # Do not use! # record = keyword(record) # record = link(record) record = page_double_hyphen(record) # record = doi(record) return record
def customizations(record): """Use some functions delivered by the library :param record: a record :returns: -- customized record """ # record = homogenize_latex_encoding(record) # record = customization.type(record) record = customization.author(record) record = editor(record) # record = editor(record) # # print(record) # # This makes it a dict # # record = journal(record) # # print(record) # record = keyword(record) # record = link(record) record = customization.page_double_hyphen(record) # record = doi(record) return record
def id_from_authoryear(record): try: first_author = author({'author': record['author']})['author'][0] except KeyError: first_author = editor({'editor': record['editor']})['editor'][0]['name'] surname = re.split('\s|,', first_author)[0] surname = latex2str(surname, lambda u: unidecode(u) if u is not None else '').lower() surname = surname.replace('-', '') # extract the first words from the title title = re.split('\s', record['title']) first = next(e.lower() for e in title if e.lower() not in stopwords) first = latex2str(first, lambda u: unidecode(u) if u is not None else '').lower() if '-' in first: first = first.split('-')[0] new_id = '%s%s%s' % (surname, record['year'], first) record['id'] = new_id return record
def customizations(record): """Use some functions delivered by the library Args: record (dict): record dict. Returns: record (dict): the modified record """ record = bibcus.type(record) record = bibcus.author(record) #record = bibcus.editor(record) #record = bibcus.journal(record) record = bibcus.keyword(record) #record = bibcus.link(record) record = bibcus.page_double_hyphen(record) #record = bibcus.doi(record) record = splitFields(record, 'folder') record = splitFields(record, 'url', '\n') record = splitFields(record, 'file', ',|;|\n') record = getPublication(record) return record
def custom(record): record = c.type(record) record = c.author(record) record = c.editor(record) record = c.journal(record) record = c.keyword(record) record = c.link(record) record = c.doi(record) tags = set() if 'tags' in record: tags.update([i.strip() for i in re.split(',|;', record["tags"].replace('\n', ''))]) if "keywords" in record: tags.update([i.strip() for i in re.split(',|;', record["keywords"].replace('\n', ''))]) if "mendeley-tags" in record: tags.update([i.strip() for i in re.split(',|;', record["mendeley-tags"].replace('\n', ''))]) record['tags'] = tags record['p_authors'] = [] if 'author' in record: record['p_authors'] = [c.splitname(x, False) for x in record['author']] return record
def clean_full(record): record = c.type(record) record = c.author(record) record = c.editor(record) record = c.journal(record) record = c.keyword(record) record = c.link(record) record = c.doi(record) tags = set() if 'tags' in record: tags.update([ i.strip() for i in re.split(',|;', record["tags"].replace('\n', '')) ]) if "keywords" in record: tags.update([ i.strip() for i in re.split(',|;', record["keywords"].replace('\n', '')) ]) if "mendeley-tags" in record: tags.update([ i.strip() for i in re.split(',|;', record["mendeley-tags"].replace('\n', '')) ]) record['tags'] = tags record['p_authors'] = [] if 'author' in record: record['p_authors'] += [x.split(' and ') for x in record['author']] if 'editor' in record: record['p_authors'] += [ c.splitname(x, False) for x in record['editor'] ] return record
def bib_customizations(record): def truncate_title(record): title = record['title'] if 'title' in record else '' title = smart_truncate(title) record['title'] = title return record def et_al(record): author = record['author'] if 'author' in record else [] author = [a.replace(', ', ' ').replace(',', ' ') for a in author] if len(author) == 0: record['author'] = '' elif len(author) == 1: record['author'] = author[0] else: record['author'] = author[0] + ' et al.' return record record = convert_to_unicode(record) record = author(record) record = et_al(record) record = truncate_title(record) return record
def note_template(entry): """Return a Zim note template for *entry*.""" entry = author(entry) def surname(index): return entry["author"][index].split(",")[0] now = datetime.now(timezone.utc).astimezone().replace(microsecond=0) values = { "date": now.isoformat(), "date_text": now.strftime("%A %d %B %Y"), "year": entry["year"], "title": entry["title"], } if len(entry["author"]) > 2: values["author"] = surname(0) + " et al." elif len(entry["author"]) == 2: values["author"] = "{} & {}".format(surname(0), surname(1)) else: values["author"] = surname(0) return note_string.format(**values)
def customize(record): def fix_newlines(record): for key, value in record.items(): if key in 'url': record[key] = value.replace("\n", "") if key not in ('author', 'url', 'editor'): value = value.replace("\n", " ") record[key] = value.replace(r"\par", "\n\n") return record record = fix_newlines(record) record = customization.type(record) record = customization.convert_to_unicode(record) def split_author(record): if 'author' in record: authors = [] for author in record['author']: lastname, firstname = author.split(", ") authors.append(Author(firstname, lastname)) record['author'] = authors return record def parse_kind(kind, record): if kind in record and record[kind]: remove_translate_table = str.maketrans('', '', ', .') # record_id determines the name of the PDF # it's been hard-coded in the view: # layouts/partials/publications_icons.html # ----> this might want to be refactored record_id = record[kind].translate(remove_translate_table) record[kind] = {'name': record[kind], 'ID': record_id} return record record = customization.author(record) record = customization.journal(record) record = customization.keyword(record) record = customization.link(record) record = customization.doi(record) record = customization.page_double_hyphen(record) record = split_author(record) for kind in ('booktitle', 'series'): record = parse_kind(kind, record) def pdf_is_there(record): #print(record["ID"]) filename = record["ID"] + ".pdf" path_to_file = os.path.join(LOCAL_PDF_VAULT, filename) print(path_to_file) if os.path.isfile(path_to_file): print("\t PDF found!") else: print("\t NO PDF!!!") record["paper"] = "no" return record if ("paper" in record.keys() and record["paper"] == "yes"): #print(record) return pdf_is_there(record) return record
def author_extract(record): record = c.author(record) record = c.editor(record) return record
def customization(record): """ A customization for the output of bibtexparser. """ return author(record)
def cust2(record): record = customization.author(record) record = customization.page_double_hyphen(record) record = customization.homogenize_latex_encoding(record) return record
def _mixed_customization(record): record = homogeneize_latex_encoding(record) record = convert_to_unicode(record) record = bc.author(record) return record
def format_authors(entry, abbreviate_first=True, et_al_at=1000): """ this is the way i like it, tweak as needed. """ # Split author field into a list of “Name, Surname”. seems to be inplace, # thats why we copy first r = entry.copy() btxc.author(r) names = r["author"] authors = [] for name in names: # {'first': ['F.', 'Paul'], 'last': ['Spitzner'], 'von': [], 'jr': []} split = btxc.splitname(name) # print(split) if not abbreviate_first: first = " ".join(split["first"]) else: first = "" for f in split["first"]: # name spelled out if len(f) > 2: first += f[0] + "." elif f[1] in ".:;": first += f[0] + "." else: print( f"Adapt the `format_authors` script to your needs for entry {r['ID']}" ) last = " ".join(split["last"]) von = " ".join(split["von"]) jr = " ".join(split["jr"]) # stitch the name together and fix capitalziation temp = first.title() if len(von) > 0: temp += " " + von.lower() temp += " " + last # do not title case this, breaks e.g. "de Heuvel" if len(jr) > 0: temp += " " + jr.lower() authors.append(temp) res = "" # now we have a list of authors nicely formatted, make this a readable # one-liner for the webiste if len(authors) > et_al_at: res = authors[0] + " et al." elif len(authors) == 1: res = authors[0] else: res = authors[0] for a in authors[1:-1]: res += ", " + a res += " and " + authors[-1] # cleanup bibtex brackets res = cleanup(res) # res = res.replace("{", "") # res = res.replace("}", "") return res
def test_author_none(self): record = {'author': None} result = author(record) expected = {} self.assertEqual(result, expected)
def test_author_others(self): record = {'author': 'Foo G. Bar and Lee B. Smith and others'} result = author(record) expected = {'author': [['Foo G.', 'Bar'],['Lee B.', 'Smith'],['', 'others']]} self.assertEqual(result, expected)
def cust(rec): rec = cst.author(rec) return rec
def customize(record): record = customization.convert_to_unicode(record) record = customization.author(record) return record
def _bibtexparser_customizations(record): record = author(record) record = keyword(record) record = _fix_text_grouping(record) return record