def tidy_up(bib_entries, report, verbose):
    info("tidying up the parsed bib file ...")
    non_standard_entry_list = []
    entry_type_dict = {}

    for bib_entry in bib_entries:
        # reformat the author names
        if 'author' in bib_entry:
            bib_entry = author(bib_entry)
            bib_entry['author'] = ' and '.join(bib_entry['author'])
        # warn about non-standard entry types
        if bib_entry['ENTRYTYPE'] not in STANDARD_TYPES:
            non_standard_entry_list.append(
                (bib_entry['ID'], bib_entry['ENTRYTYPE']))
            if verbose:
                info("entry: {} has a non-standard type: {}".format(
                    bib_entry['ID'], bib_entry['ENTRYTYPE']))
        # count the number of entries for different entry types
        if bib_entry['ENTRYTYPE'] not in entry_type_dict:
            entry_type_dict[bib_entry['ENTRYTYPE']] = 1
        else:
            entry_type_dict[bib_entry['ENTRYTYPE']] += 1
        # build up signatures for bib entries
        # TODO: maybe use more signatures
        bib_entry['sig1'] = bib_entry['title'].replace(' ', '').lower()

    report['non_standard_list'] = non_standard_entry_list
Example #2
0
    def __call__ (self, rec):
        from bibtexparser.customization import author, type, convert_to_unicode
        rec = type (convert_to_unicode (rec))

        for key in rec.keys ():
            val = rec.get (key)
            val = (val
                   .replace ('{\\nbsp}', nbsp)
                   .replace ('``', u'“')
                   .replace ("''", u'”'))
            rec[key] = val

        if 'journal' in rec:
            rec['journal'] = _bib_journals.get (rec['journal'].lower (),
                                                rec['journal'])

        rec = author (rec)

        if 'author' in rec:
            newauths = []

            for idx, text in enumerate (rec['author']):
                text = text.replace ('{', '').replace ('}', '').replace ('~', ' ')
                surname, rest = text.split (',', 1)
                if surname.lower () == self.mylsurname:
                    rec['wl_mypos'] = unicode (idx + 1)
                newauths.append (rest + ' ' + surname.replace (' ', '_'))

            rec['author'] = '; '.join (newauths)

        rec['wl_cite'] = _bib_cite (rec)
        return rec
def tags(record):
    record = b.customization.convert_to_unicode(record)
    record = c.author(record)
    record = c.editor(record)
    tags = set()

    if 'tags' in record:
        tags.update([
            i.strip()
            for i in re.split(',|;', record["tags"].replace('\n', ''))
        ])

    record['tags'] = tags
    record['p_authors'] = []
    logging.debug(f"Handling: {record['ID']}")
    if 'author' in record:
        try:
            record['p_authors'] = [
                c.splitname(x, False) for x in record['author']
            ]
        except Exception as err:
            breakpoint()
    if 'editor' in record:
        record['p_authors'] = [c.splitname(x, False) for x in record['editor']]

    return record
Example #4
0
    def __call__(self, rec):
        from bibtexparser.customization import author, type, convert_to_unicode

        rec = type(convert_to_unicode(rec))

        for key in rec.keys():
            val = rec.get(key)
            val = val.replace("{\\nbsp}", nbsp).replace("``", u"“").replace("''", u"”")
            rec[key] = val

        if "journal" in rec:
            rec["journal"] = _bib_journals.get(rec["journal"].lower(), rec["journal"])

        rec = author(rec)

        if "author" in rec:
            newauths = []

            for idx, text in enumerate(rec["author"]):
                text = text.replace("{", "").replace("}", "").replace("~", " ")
                surname, rest = text.split(",", 1)
                if surname.lower() == self.mylsurname:
                    rec["wl_mypos"] = unicode(idx + 1)
                newauths.append(rest + " " + surname.replace(" ", "_"))

            rec["author"] = "; ".join(newauths)

        rec["wl_cite"] = _bib_cite(rec)
        return rec
Example #5
0
    def _customizations(self, record):
        """Use some functions delivered by the library
    
        :param record: a record
        :returns: -- customized record
        """

        orig_author_name = record["author"]
        record = author(record)
        if "author" in record:
            author_name = record["author"][0].split(",")[0]
        else:
            author_name = "NONE"
        year = "0000"
        title = "NONE"
        if "year" in record:
            year = record["year"]
        if "title" in record:
            title = record["title"]
            title = title.encode('ascii', 'ignore')
            title = re.sub(self._REGEX, '_', title)
        if "ID" in record:
            record["ID"] = author_name + ":" + year + ":" + title
        record["author"] = orig_author_name
        return record
Example #6
0
def _customizations_unicode(record):
    """
    This function curstumizes record for raw style.
    See bibtexparser lib for more info.
    """
    record = customization.page_double_hyphen(record)
    record = customization.convert_to_unicode(record)
    record = customization.author(record)
    return record
Example #7
0
def _customizations_latex(record):
    """
    This function curstumizes record for bibtex.
    See bibtexparser lib for more info.
    """
    record = customization.page_double_hyphen(record)
    record = customization.homogenize_latex_encoding(record)
    record = customization.author(record)
    return record
Example #8
0
def _customizations_latex(record):
    """
    This function curstumizes record for bibtex.
    See bibtexparser lib for more info.
    """
    record = customization.page_double_hyphen(record)
    record = customization.homogeneize_latex_encoding(record)
    record = customization.author(record)
    return record
Example #9
0
def _customizations_unicode(record):
    """
    This function curstumizes record for raw style.
    See bibtexparser lib for more info.
    """
    record = customization.page_double_hyphen(record)
    record = customization.convert_to_unicode(record)
    record = customization.author(record)
    return record
def _customizations(record):
    """
    Bibtexparser customizations that are applied to every entry found in the .bib files
    """
    record = convert_to_unicode(record)
    record = type(record)    # make the entry types lower-case
    record = author(record)  # split the authors into a list
    record = editor(record)  # split the editors into a list
    return record
Example #11
0
def customizations(record):
    record = bib_type(record)
    record = author(record)
    record = editor(record)
    record = journal(record)
    record = keyword(record)
    record = link(record)
    record = page_double_hyphen(record)
    record = doi(record)
    return record
Example #12
0
def guess_key(entry):
    entry = author(deepcopy(entry))
    if len(entry["author"]) > 2:
        a = entry["author"][0].split(",")[0].lower()
    else:
        a = "-".join([a.split(",")[0].lower() for a in entry["author"]])

    # Use YYYY if the year is not present
    year = entry.get("year", "YYYY")

    return f"{a}-{year}"
Example #13
0
def customizations(record):
    """Use some functions delivered by the library."""
    # record = type(record)
    record = author(record)
    # record = editor(record)
    # record = journal(record)
    # record = keyword(record)
    # record = link(record)
    # record = page_double_hyphen(record)
    # record = doi(record)
    record = convert_to_unicode(record)
    record['annote'] = strip_chars(record['annote'])
    return record
Example #14
0
def td_biblio_customization(record):
    """
    Customize BibTex records parsing
    """
    # Convert crapy things to latex
    record = to_latex(record)
    # and then to unicode
    record = bp_customization.convert_to_unicode(record)
    record = bp_customization.type(record)
    record = bp_customization.author(record)
    record = bp_customization.editor(record)
    record = bp_customization.page_double_hyphen(record)

    return record
Example #15
0
def _parse_bib_entry(entry):
    """
    Customization function for bibtexparser.
    :param entry: bibtex record to modify
    :return bibtex record
    """
    if CONVERT_TO_UNICODE:
        entry = bib_custom.convert_to_unicode(entry)

    entry = bib_custom.author(entry)
    entry = bib_custom.editor(entry)
    entry = bib_custom.keyword(entry)
    entry = bib_custom.page_double_hyphen(entry)

    return entry
Example #16
0
def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record
    """
    record = convert_to_unicode(record)
    # record = type(record)
    record = author(record)
    record = editor(record)
    # record = journal(record) # Do not use!
    # record = keyword(record)
    # record = link(record)
    record = page_double_hyphen(record)
    # record = doi(record)
    return record
def customize(record):
    """ Customise bibtexparser records
    """
    record = customization.convert_to_unicode(record)
    for field_name in ['author', 'title', 'journal']:
        try:
            field = record[field_name]
            record[field_name] = tex_to_html(field)
        except KeyError:
            pass
    # Splits author into a list of authors:
    record = customization.author(record)
    # Now convert each author into a tuple of last, first name
    record = split_authors(record)
    record = pages_endash(record)
    return record
def customize(record):
    """ Customise bibtexparser records
    """
    record = customization.convert_to_unicode(record)
    for field_name in ['author', 'title', 'journal']:
        try:
            field = record[field_name]
            record[field_name] = tex_to_html(field)
        except KeyError:
            pass
    # Splits author into a list of authors:
    record = customization.author(record)
    # Now convert each author into a tuple of last, first name
    record = split_authors(record)
    record = pages_endash(record)
    return record
Example #19
0
def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record

    """
    record = bc.convert_to_unicode(record)
    record = bc.type(record)  # lowercase
    record = bc.author(record)
    record = bc.editor(record)
    record = bc.journal(record)
    record = bc.keyword(record)
    record = bc.link(record)
    record = bc.page_double_hyphen(record)
    record = bc.doi(record)
    return record
def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record

    """
    record = bc.convert_to_unicode(record)
    record = bc.type(record)    # lowercase
    record = bc.author(record)
    record = bc.editor(record)
    record = bc.journal(record)
    record = bc.keyword(record)
    record = bc.link(record)
    record = bc.page_double_hyphen(record)
    record = bc.doi(record)
    return record
def customizations(record):
    ''' Use some customizations for bibtexparser

    Args:
        record: A record

    Returns:
        record: Customized record
    '''
    record = convert_to_unicode(record)
    # record = type(record)
    record = author(record)
    record = editor(record)
    # record = journal(record) # Do not use!
    # record = keyword(record)
    # record = link(record)
    record = page_double_hyphen(record)
    # record = doi(record)
    return record
Example #22
0
def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record
    """
    # record = homogenize_latex_encoding(record)
    # record = customization.type(record)
    record = customization.author(record)
    record = editor(record)
    # record = editor(record)
    # # print(record)
    # # This makes it a dict
    # # record = journal(record)
    # # print(record)
    # record = keyword(record)
    # record = link(record)
    record = customization.page_double_hyphen(record)
    # record = doi(record)
    return record
Example #23
0
def id_from_authoryear(record):
    try:
        first_author = author({'author': record['author']})['author'][0]
    except KeyError:
        first_author = editor({'editor': record['editor']})['editor'][0]['name']

    surname = re.split('\s|,', first_author)[0]
    surname = latex2str(surname, lambda u: unidecode(u) if u is not None else '').lower()
    surname = surname.replace('-', '')

    # extract the first words from the title
    title = re.split('\s', record['title'])
    first = next(e.lower() for e in title if e.lower() not in stopwords)
    first = latex2str(first, lambda u: unidecode(u) if u is not None else '').lower()
    if '-' in first:
        first = first.split('-')[0]

    new_id = '%s%s%s' % (surname, record['year'], first)
    record['id'] = new_id
    return record
Example #24
0
def customizations(record):
    """Use some functions delivered by the library

    Args:
        record (dict): record dict.

    Returns: record (dict): the modified record
    """
    record = bibcus.type(record)
    record = bibcus.author(record)
    #record = bibcus.editor(record)
    #record = bibcus.journal(record)
    record = bibcus.keyword(record)
    #record = bibcus.link(record)
    record = bibcus.page_double_hyphen(record)
    #record = bibcus.doi(record)
    record = splitFields(record, 'folder')
    record = splitFields(record, 'url', '\n')
    record = splitFields(record, 'file', ',|;|\n')
    record = getPublication(record)
    return record
Example #25
0
def custom(record):
    record = c.type(record)
    record = c.author(record)
    record = c.editor(record)
    record = c.journal(record)
    record = c.keyword(record)
    record = c.link(record)
    record = c.doi(record)
    tags = set()

    if 'tags' in record:
        tags.update([i.strip() for i in re.split(',|;', record["tags"].replace('\n', ''))])
    if "keywords" in record:
        tags.update([i.strip() for i in re.split(',|;', record["keywords"].replace('\n', ''))])
    if "mendeley-tags" in record:
        tags.update([i.strip() for i in re.split(',|;', record["mendeley-tags"].replace('\n', ''))])

    record['tags'] = tags
    record['p_authors'] = []
    if 'author' in record:
        record['p_authors'] = [c.splitname(x, False) for x in record['author']]
    return record
def clean_full(record):
    record = c.type(record)
    record = c.author(record)
    record = c.editor(record)
    record = c.journal(record)
    record = c.keyword(record)
    record = c.link(record)
    record = c.doi(record)
    tags = set()

    if 'tags' in record:
        tags.update([
            i.strip()
            for i in re.split(',|;', record["tags"].replace('\n', ''))
        ])
    if "keywords" in record:
        tags.update([
            i.strip()
            for i in re.split(',|;', record["keywords"].replace('\n', ''))
        ])
    if "mendeley-tags" in record:
        tags.update([
            i.strip()
            for i in re.split(',|;', record["mendeley-tags"].replace('\n', ''))
        ])

    record['tags'] = tags
    record['p_authors'] = []

    if 'author' in record:
        record['p_authors'] += [x.split(' and ') for x in record['author']]

    if 'editor' in record:
        record['p_authors'] += [
            c.splitname(x, False) for x in record['editor']
        ]

    return record
Example #27
0
def bib_customizations(record):
    def truncate_title(record):
        title = record['title'] if 'title' in record else ''
        title = smart_truncate(title)
        record['title'] = title
        return record

    def et_al(record):
        author = record['author'] if 'author' in record else []
        author = [a.replace(', ', ' ').replace(',', ' ') for a in author]
        if len(author) == 0:
            record['author'] = ''
        elif len(author) == 1:
            record['author'] = author[0]
        else:
            record['author'] = author[0] + ' et al.'
        return record

    record = convert_to_unicode(record)
    record = author(record)
    record = et_al(record)
    record = truncate_title(record)
    return record
def bib_customizations(record):
    def truncate_title(record):
        title = record['title'] if 'title' in record else ''
        title = smart_truncate(title)
        record['title'] = title
        return record

    def et_al(record):
        author = record['author'] if 'author' in record else []
        author = [a.replace(', ', ' ').replace(',', ' ') for a in author]
        if len(author) == 0:
            record['author'] = ''
        elif len(author) == 1:
            record['author'] = author[0]
        else:
            record['author'] = author[0] + ' et al.'
        return record

    record = convert_to_unicode(record)
    record = author(record)
    record = et_al(record)
    record = truncate_title(record)
    return record
Example #29
0
File: read.py Project: khaeru/bib
def note_template(entry):
    """Return a Zim note template for *entry*."""
    entry = author(entry)

    def surname(index):
        return entry["author"][index].split(",")[0]

    now = datetime.now(timezone.utc).astimezone().replace(microsecond=0)

    values = {
        "date": now.isoformat(),
        "date_text": now.strftime("%A %d %B %Y"),
        "year": entry["year"],
        "title": entry["title"],
    }

    if len(entry["author"]) > 2:
        values["author"] = surname(0) + " et al."
    elif len(entry["author"]) == 2:
        values["author"] = "{} & {}".format(surname(0), surname(1))
    else:
        values["author"] = surname(0)

    return note_string.format(**values)
Example #30
0
def customize(record):
    def fix_newlines(record):
        for key, value in record.items():
            if key in 'url':
                record[key] = value.replace("\n", "")
            if key not in ('author', 'url', 'editor'):
                value = value.replace("\n", " ")
                record[key] = value.replace(r"\par", "\n\n")
        return record

    record = fix_newlines(record)
    record = customization.type(record)
    record = customization.convert_to_unicode(record)

    def split_author(record):
        if 'author' in record:
            authors = []
            for author in record['author']:
                lastname, firstname = author.split(", ")
                authors.append(Author(firstname, lastname))
            record['author'] = authors
        return record

    def parse_kind(kind, record):
        if kind in record and record[kind]:
            remove_translate_table = str.maketrans('', '', ', .')
            # record_id determines the name of the PDF
            # it's been hard-coded in the view:
            # layouts/partials/publications_icons.html
            # ----> this might want to be refactored
            record_id = record[kind].translate(remove_translate_table)
            record[kind] = {'name': record[kind], 'ID': record_id}
        return record

    record = customization.author(record)
    record = customization.journal(record)
    record = customization.keyword(record)
    record = customization.link(record)
    record = customization.doi(record)
    record = customization.page_double_hyphen(record)
    record = split_author(record)

    for kind in ('booktitle', 'series'):
        record = parse_kind(kind, record)

    def pdf_is_there(record):
        #print(record["ID"])
        filename = record["ID"] + ".pdf"
        path_to_file = os.path.join(LOCAL_PDF_VAULT, filename)
        print(path_to_file)
        if os.path.isfile(path_to_file):
            print("\t PDF found!")
        else:
            print("\t NO PDF!!!")
            record["paper"] = "no"
        return record

    if ("paper" in record.keys() and record["paper"] == "yes"):
        #print(record)
        return pdf_is_there(record)

    return record
def author_extract(record):
    record = c.author(record)
    record = c.editor(record)
    return record
def customization(record):
    """
    A customization for the output of bibtexparser.
    """
    return author(record)
 def cust2(record):
     record = customization.author(record)
     record = customization.page_double_hyphen(record)
     record = customization.homogenize_latex_encoding(record)
     return record
 def _mixed_customization(record):
     record = homogeneize_latex_encoding(record)
     record = convert_to_unicode(record)
     record = bc.author(record)
     return record
def format_authors(entry, abbreviate_first=True, et_al_at=1000):
    """
        this is the way i like it, tweak as needed.
    """

    # Split author field into a list of “Name, Surname”. seems to be inplace,
    # thats why we copy first
    r = entry.copy()
    btxc.author(r)
    names = r["author"]
    authors = []

    for name in names:
        # {'first': ['F.', 'Paul'], 'last': ['Spitzner'], 'von': [], 'jr': []}
        split = btxc.splitname(name)
        # print(split)
        if not abbreviate_first:
            first = " ".join(split["first"])
        else:
            first = ""
            for f in split["first"]:
                # name spelled out
                if len(f) > 2:
                    first += f[0] + "."
                elif f[1] in ".:;":
                    first += f[0] + "."
                else:
                    print(
                        f"Adapt the `format_authors` script to your needs for entry {r['ID']}"
                    )

        last = " ".join(split["last"])
        von = " ".join(split["von"])
        jr = " ".join(split["jr"])

        # stitch the name together and fix capitalziation
        temp = first.title()
        if len(von) > 0:
            temp += " " + von.lower()
        temp += " " + last  # do not title case this, breaks e.g. "de Heuvel"
        if len(jr) > 0:
            temp += " " + jr.lower()

        authors.append(temp)

    res = ""
    # now we have a list of authors nicely formatted, make this a readable
    # one-liner for the webiste
    if len(authors) > et_al_at:
        res = authors[0] + " et al."
    elif len(authors) == 1:
        res = authors[0]
    else:
        res = authors[0]
        for a in authors[1:-1]:
            res += ", " + a
        res += " and " + authors[-1]

    # cleanup bibtex brackets
    res = cleanup(res)
    # res = res.replace("{", "")
    # res = res.replace("}", "")
    return res
 def test_author_none(self):
     record = {'author': None}
     result = author(record)
     expected = {}
     self.assertEqual(result, expected)
 def test_author_others(self):
     record = {'author': 'Foo G. Bar and Lee B. Smith and others'}
     result = author(record)
     expected = {'author': [['Foo G.', 'Bar'],['Lee B.', 'Smith'],['', 'others']]}
     self.assertEqual(result, expected)
Example #38
0
def cust(rec):
    rec = cst.author(rec)
    return rec
Example #39
0
def customization(record):
    """
    A customization for the output of bibtexparser.
    """
    return author(record)
def customize(record):
    record = customization.convert_to_unicode(record)
    record = customization.author(record)
    return record
Example #41
0
 def cust2(record):
     record = customization.author(record)
     record = customization.page_double_hyphen(record)
     record = customization.homogenize_latex_encoding(record)
     return record
Example #42
0
def _bibtexparser_customizations(record):
    record = author(record)
    record = keyword(record)
    record = _fix_text_grouping(record)
    return record