def filter_bibentry(self, entry): # # entry is a pybtex.database.Entry object # for role in self.roles: if role not in entry.persons: continue for k in range(len(entry.persons[role])): p = entry.persons[role][k]; # de-latex the person first pstr = unicode(p); # BUG: FIXME: remove space after any macros pstr = re.sub(r'(\\[a-zA-Z]+)\s+', r'\1{}', pstr); # replace "blah\macro blah" by "blah\macro{}blah" if (self._names_to_utf8): pstr = latex2text.latex2text(pstr) p = Person(pstr) if self._only_single_letter_firsts: from pybtex.textutils import abbreviate def getparts(p, x): for part in p.get_part(x, False): if len(part) == 1: yield abbreviate(part) else: yield part pnew = Person('', " ".join(getparts(p, 'first')), " ".join(getparts(p, 'middle')), " ".join(p.prelast()), " ".join(p.last()), " ".join(p.lineage())); else: pnew = Person('', " ".join(p.first(True)), " ".join(p.middle(True)), " ".join(p.prelast()), " ".join(p.last()), " ".join(p.lineage())); entry.persons[role][k] = pnew #logger.debug("nameinitials: %r became %r" % (p, pnew)); return
def thefilter(x): if (self.fix_swedish_a): x = re.sub(r'\\AA\s+', r'\AA{}', x); if (self.encode_utf8_to_latex): x = latexencode.utf8tolatex(x, non_ascii_only=True); if (self.encode_latex_to_utf8): x = latex2text.latex2text(x); return x
def delatex_for_xml(s): s = unicode(s) logger.longdebug('delatexing `%s\' [:100] ...', s[:100]) text = latex2text.latex2text(s, tolerant_parsing=True, keep_comments=True) #logger.longdebug(' --> text is %r [:100]', text[:100]) xml = unicode_to_xml(text) logger.longdebug(' --> xml is `%s\' [:100]', xml[:100]) return xml
def thefilter(x): if (self.fix_space_after_escape): x = do_fix_space_after_escape(x) if (self.fix_swedish_a): # OBSOLETE, but still accepted for backwards compatibility x = re.sub(r'\\AA\s+', r'\AA{}', x); x = re.sub(r'\\o\s+', r'\o{}', x); if (self.encode_utf8_to_latex): # Need non_ascii_only=True because we might have e.g. braces or other # LaTeX code we want to preserve. x = latexencode.utf8tolatex(x, non_ascii_only=True); if (self.encode_latex_to_utf8): x = latex2text.latex2text(x); return x
def delatex(s): # Fixed: bug in pybtex. # ### FIXME: Where the hell are all the "\~"'s being replaced by "\ " ?? # s = s.replace(r'\ ', r'\~'); return latex2text.latex2text(unicode(s));
def delatex(s): if (not isinstance(s, unicode)): s = unicode(s.decode('utf-8')) return latex2text.latex2text(s);