Python convert_to_unicode Examples, bibtexparser.customization.convert_to_unicode Python Examples

Example #1

0

Show file

File: test_customization.py Project: starfox644/bibmngr2015py

 def test_convert_to_unicode(self):
     record = {'toto': '{\`a} \`{a}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'à à'}
     self.assertEqual(result, expected)
     record = {'toto': '{\\"u} \\"{u}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'ü ü'}
     self.assertEqual(result, expected)

Example #2

0

Show file

 def test_convert_to_unicode(self):
     record = {'toto': '{\`a} \`{a}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'à à'}
     self.assertEqual(result, expected)
     record = {'toto': '{\\"u} \\"{u}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'ü ü'}
     self.assertEqual(result, expected)

Example #3

0

Show file

 def extract_all_entries(bibfile, unicode_conversion=False):
     """
     Return dict: {citekey: {title, authors, year}}
     """
     entries = defaultdict(lambda: defaultdict(str))
     if not os.path.exists(bibfile):
         print('bibfile not found:', bibfile)
         return {}
     with open(bibfile, mode='r', encoding='utf-8') as f:
         for line in f:
             line = line.strip()
             if line.endswith(','):
                 line = line[:-1]
             match = Autobib.citekey_matcher.findall(line)
             if match:
                 current_citekey = match[0]
                 continue
             match = Autobib.author_matcher.findall(line)
             if match:
                 authors = match[0]
                 if unicode_conversion:
                     authors = convert_to_unicode({'author':
                                                   authors})['author']
                 authors = Autobib.parse_authors(authors)
                 entries[current_citekey]['authors'] = authors
                 continue
             match = Autobib.editor_matcher.findall(line)
             if match:
                 editors = match[0]
                 if unicode_conversion:
                     editors = convert_to_unicode({'editor':
                                                   editors})['editor']
                 editors = Autobib.parse_authors(editors)
                 entries[current_citekey]['editors'] = authors
                 continue
             match = Autobib.title_matcher.findall(line)
             if match:
                 title = match[0]
                 if unicode_conversion:
                     title = convert_to_unicode({'title': title})['title']
                 title = Autobib.remove_latex_commands(title)
                 entries[current_citekey]['title'] = title
                 continue
             match = Autobib.year_matcher.findall(line)
             if match:
                 year = match[0]
                 year = Autobib.remove_latex_commands(year)
                 entries[current_citekey]['year'] = year
                 continue
     return entries

Example #4

0

Show file

 def test_convert_to_unicode(self):
     record = {'toto': '{\`a} \`{a}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'à à'}
     self.assertEqual(result, expected)
     record = {'toto': '{\\"u} \\"{u}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'ü ü'}
     self.assertEqual(result, expected)
     # From issue 121
     record = {'title': '{Two Gedenk\\"uberlieferung der Angelsachsen}'}
     result = convert_to_unicode(record)
     expected = {'title': '{Two Gedenküberlieferung der Angelsachsen}'}
     self.assertEqual(result, expected)

Example #5

0

Show file

File: clean.py Project: jgrey4296/bookmark_organizer

def maybe_unicode(record):
    try:
        record = c.convert_to_unicode(record)
    except TypeError as e:
        logging.warning("Unicode Error on: {}".format(record['ID']))
        record['unicode_error'] = str(e)
        record['error'].append("unicode_error")

Example #6

0

Show file

File: worklog.py Project: pkgw/worklog-tools

    def __call__(self, rec):
        from bibtexparser.customization import author, type, convert_to_unicode

        rec = type(convert_to_unicode(rec))

        for key in rec.keys():
            val = rec.get(key)
            val = val.replace("{\\nbsp}", nbsp).replace("``", u"“").replace("''", u"”")
            rec[key] = val

        if "journal" in rec:
            rec["journal"] = _bib_journals.get(rec["journal"].lower(), rec["journal"])

        rec = author(rec)

        if "author" in rec:
            newauths = []

            for idx, text in enumerate(rec["author"]):
                text = text.replace("{", "").replace("}", "").replace("~", " ")
                surname, rest = text.split(",", 1)
                if surname.lower() == self.mylsurname:
                    rec["wl_mypos"] = unicode(idx + 1)
                newauths.append(rest + " " + surname.replace(" ", "_"))

            rec["author"] = "; ".join(newauths)

        rec["wl_cite"] = _bib_cite(rec)
        return rec

Example #7

0

Show file

File: worklog.py Project: keflavich/worklog-tools

    def __call__ (self, rec):
        from bibtexparser.customization import author, type, convert_to_unicode
        rec = type (convert_to_unicode (rec))

        for key in rec.keys ():
            val = rec.get (key)
            val = (val
                   .replace ('{\\nbsp}', nbsp)
                   .replace ('``', u'“')
                   .replace ("''", u'”'))
            rec[key] = val

        if 'journal' in rec:
            rec['journal'] = _bib_journals.get (rec['journal'].lower (),
                                                rec['journal'])

        rec = author (rec)

        if 'author' in rec:
            newauths = []

            for idx, text in enumerate (rec['author']):
                text = text.replace ('{', '').replace ('}', '').replace ('~', ' ')
                surname, rest = text.split (',', 1)
                if surname.lower () == self.mylsurname:
                    rec['wl_mypos'] = unicode (idx + 1)
                newauths.append (rest + ' ' + surname.replace (' ', '_'))

            rec['author'] = '; '.join (newauths)

        rec['wl_cite'] = _bib_cite (rec)
        return rec

Example #8

0

Show file

def customizations(entry):
    entry = clear_empty(entry)
    entry = author(entry)
    entry = page_endash(entry)
    entry = convert_to_unicode(entry)
    entry = clean_latex(entry)

    return entry

Example #9

0

Show file

File: utils.py Project: bl4ck5un/bib2html

def customizations(entry):
    entry = clear_empty(entry)
    entry = author(entry)
    entry = page_endash(entry)
    entry = convert_to_unicode(entry)
    entry = clean_latex(entry)

    return entry

Example #10

0

Show file

def _customizations_unicode(record):
    """
    This function curstumizes record for raw style.
    See bibtexparser lib for more info.
    """
    record = customization.page_double_hyphen(record)
    record = customization.convert_to_unicode(record)
    record = customization.author(record)
    return record

Example #11

0

Show file

File: main.py Project: sumutcan/AIFB-citation-generator

def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record
    """
    record = convert_to_unicode(record)

    return record

Example #12

0

Show file

File: importer.py Project: sciunto-org/CiteBib

def _customizations_unicode(record):
    """
    This function curstumizes record for raw style.
    See bibtexparser lib for more info.
    """
    record = customization.page_double_hyphen(record)
    record = customization.convert_to_unicode(record)
    record = customization.author(record)
    return record

Example #13

0

Show file

File: bibparser.py Project: barbarosbecet/webir-project-1

def _customizations(record):
    """
    Bibtexparser customizations that are applied to every entry found in the .bib files
    """
    record = convert_to_unicode(record)
    record = type(record)    # make the entry types lower-case
    record = author(record)  # split the authors into a list
    record = editor(record)  # split the editors into a list
    return record

Example #14

0

Show file

 def test_convert_to_unicode(self):
     record = {'toto': '{\`a} \`{a}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'à à'}
     self.assertEqual(result, expected)
     record = {'toto': '{\\"u} \\"{u}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'ü ü'}
     self.assertEqual(result, expected)
     # From issue 121
     record = {'title': '{Two Gedenk\\"uberlieferung der Angelsachsen}'}
     result = convert_to_unicode(record)
     expected = {'title': 'Two Gedenküberlieferung der Angelsachsen'}
     self.assertEqual(result, expected)
     # From issue 161
     record = {'title': r"p\^{a}t\'{e}"}
     result = convert_to_unicode(record)
     expected = {'title': "pâté"}
     self.assertEqual(result, expected)
     record = {'title': r"\^{i}le"}
     result = convert_to_unicode(record)
     expected = {'title': "île"}
     self.assertEqual(result, expected)
     record = {'title': r"\texttimes{}{\texttimes}\texttimes"}
     result = convert_to_unicode(record)
     expected = {'title': "×××"}
     self.assertEqual(result, expected)

Example #15

0

Show file

File: entries.py Project: AnttiHaerkoenen/bibliografia

 def __init__(self, data_: dict):
     data_ = bib_custom.convert_to_unicode(data_)
     for k, v in data_.items():
         if isinstance(v, str):
             data_[k] = v.replace('<br>', '').strip()
     data_ = handle_authors(data_)
     data_ = handle_pages(data_)
     data_ = bib_custom.type(data_)
     data_ = bib_custom.doi(data_)
     super().__init__(self)
     self.data = data_
     for field in set.union(self.required_fields, self.optional_fields):
         self[field] = self.data.get(field, None)

Example #16

0

Show file

def customizations(record):
    """Use some functions delivered by the library."""
    # record = type(record)
    record = author(record)
    # record = editor(record)
    # record = journal(record)
    # record = keyword(record)
    # record = link(record)
    # record = page_double_hyphen(record)
    # record = doi(record)
    record = convert_to_unicode(record)
    record['annote'] = strip_chars(record['annote'])
    return record

Example #17

0

Show file

File: test_customization.py Project: gpoo/python-bibtexparser

 def test_convert_to_unicode(self):
     record = {'toto': '{\`a} \`{a}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'à à'}
     self.assertEqual(result, expected)
     record = {'toto': '{\\"u} \\"{u}'}
     result = convert_to_unicode(record)
     expected = {'toto': 'ü ü'}
     self.assertEqual(result, expected)
     # From issue 121
     record = {'title': '{Two Gedenk\\"uberlieferung der Angelsachsen}'}
     result = convert_to_unicode(record)
     expected = {'title': 'Two Gedenküberlieferung der Angelsachsen'}
     self.assertEqual(result, expected)
     # From issue 161
     record = {'title': r"p\^{a}t\'{e}"}
     result = convert_to_unicode(record)
     expected = {'title': "pâté"}
     self.assertEqual(result, expected)
     record = {'title': r"\^{i}le"}
     result = convert_to_unicode(record)
     expected = {'title': "île"}
     self.assertEqual(result, expected)

Example #18

0

Show file

def bibtex_cleaner(entry):
    entry = clean.keyword(entry)
    if entry.get('keyword'):
        entry['keyword'] = ','.join(entry['keyword']).lower()
    # print(entry.get('keyword'))
    entry = clean.page_double_hyphen(entry)
    entry = clean.convert_to_unicode(entry)
    # entry = clean.add_plaintext_fields(entry)

    entry = clean.link(entry)
    entry = clean.doi(entry)
    # print(entry.get('keyword'))

    return entry

Example #19

0

Show file

File: loaders.py Project: n-elie/django-tailordev-biblio

def td_biblio_customization(record):
    """
    Customize BibTex records parsing
    """
    # Convert crapy things to latex
    record = to_latex(record)
    # and then to unicode
    record = bp_customization.convert_to_unicode(record)
    record = bp_customization.type(record)
    record = bp_customization.author(record)
    record = bp_customization.editor(record)
    record = bp_customization.page_double_hyphen(record)

    return record

Example #20

0

Show file

File: bibjson.py Project: arccoder/bibtex2bibjson

def _parse_bib_entry(entry):
    """
    Customization function for bibtexparser.
    :param entry: bibtex record to modify
    :return bibtex record
    """
    if CONVERT_TO_UNICODE:
        entry = bib_custom.convert_to_unicode(entry)

    entry = bib_custom.author(entry)
    entry = bib_custom.editor(entry)
    entry = bib_custom.keyword(entry)
    entry = bib_custom.page_double_hyphen(entry)

    return entry

Example #21

0

Show file

def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record
    """
    record = convert_to_unicode(record)
    # record = type(record)
    record = author(record)
    record = editor(record)
    # record = journal(record) # Do not use!
    # record = keyword(record)
    # record = link(record)
    record = page_double_hyphen(record)
    # record = doi(record)
    return record

Example #22

0

Show file

File: publications.py Project: matael/pelican_publications

def customize(record):
    """ Customise bibtexparser records
    """
    record = customization.convert_to_unicode(record)
    for field_name in ['author', 'title', 'journal']:
        try:
            field = record[field_name]
            record[field_name] = tex_to_html(field)
        except KeyError:
            pass
    # Splits author into a list of authors:
    record = customization.author(record)
    # Now convert each author into a tuple of last, first name
    record = split_authors(record)
    record = pages_endash(record)
    return record

Example #23

0

Show file

File: publications.py Project: dimazest/pelican_publications

def customize(record):
    """ Customise bibtexparser records
    """
    record = customization.convert_to_unicode(record)
    for field_name in ['author', 'title', 'journal']:
        try:
            field = record[field_name]
            record[field_name] = tex_to_html(field)
        except KeyError:
            pass
    # Splits author into a list of authors:
    record = customization.author(record)
    # Now convert each author into a tuple of last, first name
    record = split_authors(record)
    record = pages_endash(record)
    return record

Example #24

0

Show file

File: parser.py Project: teams-scidac/teams-scidac.github.io

def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record

    """
    record = bc.convert_to_unicode(record)
    record = bc.type(record)    # lowercase
    record = bc.author(record)
    record = bc.editor(record)
    record = bc.journal(record)
    record = bc.keyword(record)
    record = bc.link(record)
    record = bc.page_double_hyphen(record)
    record = bc.doi(record)
    return record

Example #25

0

Show file

def customizations(record):
    """Use some functions delivered by the library

    :param record: a record
    :returns: -- customized record

    """
    record = bc.convert_to_unicode(record)
    record = bc.type(record)  # lowercase
    record = bc.author(record)
    record = bc.editor(record)
    record = bc.journal(record)
    record = bc.keyword(record)
    record = bc.link(record)
    record = bc.page_double_hyphen(record)
    record = bc.doi(record)
    return record

Example #26

0

Show file

File: ingesters.py Project: roly97/ckanext-spc-pdh

    def process(self, record):
        record = convert_to_unicode(record)
        data_dict = {
            'id':
            record['ID'],
            'title':
            record['title'].strip('{}'),
            'name':
            munge_title_to_name(record['ID'] + record['title']),
            'notes':
            record['abstract'],
            'harvest_source':
            'MENDELEY',
            'creator':
            record['author'].replace(',', '').split(' and '),
            'tag_string':
            ','.join(munge_tag(tag) for tag in record['keywords'].split(',')),
            'owner_org':
            tk.config.get('ckanext.ingestor.config.mendeley_bib.owner_org',
                          'iaea'),
            'type':
            'publications'
        }
        identifiers = []
        if 'doi' in record:
            identifiers.append('doi:' + record['doi'])
        if 'isbn' in record:
            identifiers.append('isbn:' + record['isbn'])
        if 'pmid' in record:
            identifiers.append('pmid:' + record['pmid'])
        data_dict['identifier'] = identifiers

        if 'editor' in record:
            data_dict['contributor'] = [record['editor']]
        if 'publisher' in record:
            data_dict['publisher'] = [record['publisher']]
        if 'language' in record:
            data_dict['language'] = [record['language']]

        data_dict['source'] = record.get('url')
        user = tk.get_action('get_site_user')({'ignore_auth': True})
        existing = model.Package.get(data_dict['id'])
        action = tk.get_action(
            'package_update' if existing else 'package_create')
        action({'ignore_auth': True, 'user': user['name']}, data_dict)

Example #27

0

Show file

File: mdx_bibtex.py Project: martisak/slidedeck

 def btex_custom(self, record):
     r = convert_to_unicode(record)
     if "pages" in record:  # fix -- -> –
         if "-" in record["pages"]:
             p = [i.strip().strip('-') for i in record["pages"].split("-")]
             record["pages"] = p[0] + u'–' + p[-1]
     authors = r.get('author')
     if not authors:
         authors = r.get('editor', 'Anon.')
     _authors = getnames(authors.split(" and "))
     _and_surnames = self.and_authors(
         [s.split(",")[0].strip() for s in _authors])
     r['author'] = self.and_authors(_authors)
     r['surnames'] = _and_surnames
     r['author_year'] = _and_surnames + u" " + r.get('year', '')
     r['unique_suffix'] = self.unique_suffix(r['author_year'])
     r['author_year'] += r['unique_suffix']
     r['title'] = r['title']  # .replace("{", "").replace("}","")
     return r

Example #28

0

Show file

File: mdx_bibtex.py Project: anteprandium/mdx_bibtex

 def btex_custom(self, record):
     r = convert_to_unicode(record)
     if "pages" in record:  # fix -- -> –
         if "-" in record["pages"]:
             p = [i.strip().strip('-') for i in record["pages"].split("-")]
             record["pages"] = p[0] + u'–' + p[-1]
     authors = r.get('author')
     if not authors:
         authors = r.get('editor', 'Anon.')
     _authors = getnames(authors.split(" and "))
     _and_surnames = self.and_authors(
         [s.split(",")[0].strip() for s in _authors])
     r['author'] = self.and_authors(_authors)
     r['surnames'] = _and_surnames
     r['author_year'] = _and_surnames + u" " + r['year']
     r['unique_suffix'] = self.unique_suffix(r['author_year'])
     r['author_year'] += r['unique_suffix']
     r['title'] = r['title']  # .replace("{", "").replace("}","")
     return r

Example #29

0

Show file

File: bibtex2dict.py Project: nitinkamra1992/bibtex2dict2csv

def customizations(record):
    ''' Use some customizations for bibtexparser

    Args:
        record: A record

    Returns:
        record: Customized record
    '''
    record = convert_to_unicode(record)
    # record = type(record)
    record = author(record)
    record = editor(record)
    # record = journal(record) # Do not use!
    # record = keyword(record)
    # record = link(record)
    record = page_double_hyphen(record)
    # record = doi(record)
    return record

Example #30

0

Show file

def custom_callback(record):

    get_ADS_jrnls()

    # Convert to unicode
    record = cus.convert_to_unicode(record)

    # Convert jounal macro to real name
    if 'journal' in record and '\\' in record['journal']:
        record["journal"] = macro['j_name'][
            macro['macro'] == record["journal"].strip('\\')].values[0]

    # Convert author strings
    if 'author' in record:

        rep = {"{": "", "}": "", "~": " "}
        rep = dict((re.escape(k), v) for k, v in rep.items())
        pattern = re.compile("|".join(rep.keys()))
        record['author'] = pattern.sub(lambda m: rep[re.escape(m.group(0))],
                                       record['author'])

    return record

Example #31

0

Show file

def bib_customizations(record):
    def truncate_title(record):
        title = record['title'] if 'title' in record else ''
        title = smart_truncate(title)
        record['title'] = title
        return record

    def et_al(record):
        author = record['author'] if 'author' in record else []
        author = [a.replace(', ', ' ').replace(',', ' ') for a in author]
        if len(author) == 0:
            record['author'] = ''
        elif len(author) == 1:
            record['author'] = author[0]
        else:
            record['author'] = author[0] + ' et al.'
        return record

    record = convert_to_unicode(record)
    record = author(record)
    record = et_al(record)
    record = truncate_title(record)
    return record

Example #32

0

Show file

File: latex_completer.py Project: Cocophotos/vim-ycm-latex-semantic-completer

def bib_customizations(record):
    def truncate_title(record):
        title = record['title'] if 'title' in record else ''
        title = smart_truncate(title)
        record['title'] = title
        return record

    def et_al(record):
        author = record['author'] if 'author' in record else []
        author = [a.replace(', ', ' ').replace(',', ' ') for a in author]
        if len(author) == 0:
            record['author'] = ''
        elif len(author) == 1:
            record['author'] = author[0]
        else:
            record['author'] = author[0] + ' et al.'
        return record

    record = convert_to_unicode(record)
    record = author(record)
    record = et_al(record)
    record = truncate_title(record)
    return record

Example #33

0

Show file

File: bibbrev.py Project: auxym/bibbreviate

def main():
    parser = ArgumentParser()
    parser.add_argument("target", help="The bib file to abbreviate.")
    parser.add_argument(
        "-o",
        "--output",
        help="The output file name.  If missing, output will be sent to stdout.")
    parser.add_argument(
        "-r",
        "--reverse",
        help="Reverse the process and unabbreviate journal names.",
        action="store_true")
    parser.add_argument(
        "-a",
        "--abbreviations",
        help="Path to a file of abbreviations in the form (one per line): Journal of Biological Science = J. Sci. Biol.",
        default=os.path.join(determine_path(),
                             "journal_files",
                             "journal_abbreviations_general.txt",
                             )
        )
    parser.add_argument("-v", "--verbose", action="store_true")

    args = parser.parse_args()

    level = logging.WARNING if not args.verbose else logging.INFO
    logger.setLevel(level)

    input = open(args.target, "r")
    output = open(args.output, "w") if args.output else sys.stdout

    refs_bp = BibTexParser(input.read())
    refs = refs_bp.get_entry_dict()

    abbrevs = load_abbrevs(args.abbreviations, reverse=args.reverse)

    # Assume that if it has a journal key, then it needs abbreviating.  I'm doing this
    # instead of testing for type==article in case I've forgotten about a case where
    # type != article but there's a journal field.
    # Also, journal names with one word ('Nature') don't require
    # abbreviation.
    refs = {key: ref for key, ref in refs.items() if 'journal' in ref}
    refs = {key: ref for key, ref in refs.items()
            if len(ref['journal'].split(' ')) > 1}

    for ref in refs:
        journal = convert_to_unicode(copy(refs[ref]))['journal'].lower()

        # Handle any difficult characters.  TODO: check that this list
        # is complete.
        journal_clean = re.sub('[{}]', '', journal)

        try:
            refs[ref]['journal'] = abbrevs[journal_clean]
            logger.info('%s replaced with %s for key %s' %
                        (journal, abbrevs[journal_clean], ref))
        except KeyError:
            logger.error('%s not found in abbreviations!' %
                         (journal_clean))

    output_bib = to_bibtex(refs_bp)
    output.write(output_bib)

Example #34

0

Show file

File: test_customization.py Project: njalerikson/bibtex_cleanup_parser

 def test_convert_to_unicode3(self):
     record = {'toto': "\\c \\'"}
     result = convert_to_unicode(record)
     expected = {'toto': " \u0327\u0301"}
     self.assertEqual(result, expected)

Example #35

0

Show file

File: bibcleaner.py Project: jgrey4296/bookmark_organizer

def custom(record):
    try:
        record = c.convert_to_unicode(record)
    except TypeError as e:
        logging.warning("Unicode Error on: {}".format(record['ID']))
        record['error'] = 'unicode'

    try:
        #add md5 of associated files
        files = [add_slash_if_necessary(y) for x in record['file'].split(';') for y in x.split(':') if bool(y.strip()) and y.strip().lower() != 'pdf']
        file_set = set(files)
        if not 'hashes' in record:
            hashes = [file_to_hash(x) for x in file_set]
            record['hashes'] = ";".join(hashes)
            #regularize format of files list
            record['file'] = ";".join(file_set)
    except Exception as e:
        logging.warning("File Error: {} : {}".format(record['ID'], e.args[0]))
        record['error'] = 'file'

    #todo: if file is not in the library common prefix, move it there
    #look for year, then first surname, then copy in, making dir if necessary
    if file_set:
        for x in file_set:
            try:
                current_path = realpath(x)
                common = commonpath([current_path, args.library])
                if common != args.library:
                    logging.info("Found file outside library: {}".format(current_path))
                    logging.info("Common: {}".format(common))
                    #get the author and year
                    year = record['year']
                    authors = c.getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")])
                    authors_split = [c.splitname(a) for a in authors]
                    author_surnames = [a['last'][0] for a in authors_split]
                    new_path = join(args.library, year, ", ".join(author_surnames))
                    logging.info("New Path: {}".format(new_path))
                    #create directory if necessary
                    #copy file
                    full_new_path = join(new_path, split(current_path)[1])
                    logging.info("Copying file")
                    logging.info("From: {}".format(current_path))
                    logging.info("To: {}".format(full_new_path))
                    response = input("Enter to confirm: ")
                    if response == "":
                        logging.info("Proceeding")
                        if not exists(new_path):
                            mkdir(new_path)
                        if exists(full_new_path):
                            raise Exception("File already exists")
                        copyfile(x, full_new_path)
                        file_set.remove(x)
                        file_set.add(full_new_path)
                        record['file'] = ";".join(file_set)
            except Exception as e:
                logging.info("Issue copying file for: {}".format(x))
                logging.info(e)
                record['error'] = 'file_copy'


    #regularize keywords
    try:
        keywords = set()
        if 'tags' not in record:
            if 'keywords' in record:
                keywords.update([x.strip() for x in record['keywords'].split(',')])
                del record['keywords']
            if 'mendeley-tags' in record:
                keywords.update([x.strip() for x in record['mendeley-tags'].split(',')])
                del record['mendeley-tags']

            record['tags'] = ",".join(keywords)
    except Error as e:
        logging.warning("Tag Error: {}".format(record['ID']))
        record['error'] = 'tag'

    # record = c.type(record)
    # record = c.author(record)
    # record = c.editor(record)
    # record = c.journal(record)
    # record = c.keyword(record)
    # record = c.link(record)
    # record = c.doi(record)
    # record['p_authors'] = []
    # if 'author' in record:
    #     record['p_authors'] = [c.splitname(x, False) for x in record['author']]
    return record

Example #36

0

Show file

File: gen_publist.py Project: tendence/zhenwendai.github.io

def customize(record):
    record = customization.convert_to_unicode(record)
    record = customization.author(record)
    return record

Example #37

0

Show file

 def get_unicode_bibdata(self):
     """Converts latex in bibdata fields to unicode."""
     return convert_to_unicode(self.bibdata)

Example #38

0

Show file

File: parse_publications.py Project: OptimalDesignLab/optimaldesignlab.github.io

 def _mixed_customization(record):
     record = homogeneize_latex_encoding(record)
     record = convert_to_unicode(record)
     record = bc.author(record)
     return record

Example #39

0

Show file

def customize(record):
    def fix_newlines(record):
        for key, value in record.items():
            if key in 'url':
                record[key] = value.replace("\n", "")
            if key not in ('author', 'url', 'editor'):
                value = value.replace("\n", " ")
                record[key] = value.replace(r"\par", "\n\n")
        return record

    record = fix_newlines(record)
    record = customization.type(record)
    record = customization.convert_to_unicode(record)

    def split_author(record):
        if 'author' in record:
            authors = []
            for author in record['author']:
                lastname, firstname = author.split(", ")
                authors.append(Author(firstname, lastname))
            record['author'] = authors
        return record

    def parse_kind(kind, record):
        if kind in record and record[kind]:
            remove_translate_table = str.maketrans('', '', ', .')
            # record_id determines the name of the PDF
            # it's been hard-coded in the view:
            # layouts/partials/publications_icons.html
            # ----> this might want to be refactored
            record_id = record[kind].translate(remove_translate_table)
            record[kind] = {'name': record[kind], 'ID': record_id}
        return record

    record = customization.author(record)
    record = customization.journal(record)
    record = customization.keyword(record)
    record = customization.link(record)
    record = customization.doi(record)
    record = customization.page_double_hyphen(record)
    record = split_author(record)

    for kind in ('booktitle', 'series'):
        record = parse_kind(kind, record)

    def pdf_is_there(record):
        #print(record["ID"])
        filename = record["ID"] + ".pdf"
        path_to_file = os.path.join(LOCAL_PDF_VAULT, filename)
        print(path_to_file)
        if os.path.isfile(path_to_file):
            print("\t PDF found!")
        else:
            print("\t NO PDF!!!")
            record["paper"] = "no"
        return record

    if ("paper" in record.keys() and record["paper"] == "yes"):
        #print(record)
        return pdf_is_there(record)

    return record

Example #40

0

Show file

def customizations(record):
	record = convert_to_unicode(record)
	pdf(record)
	return record