def titlefilter(text): if not text: return "" text = text.strip() if not text: return "" key = hashlib.md5(text.encode('utf8')).hexdigest() cached = cache.get(key) if cached: return mark_safe(cached) text = widont( smartypants( caps( amp( text ) ) ) ) cache.set( key, text ) return mark_safe(text)
def adjust_typo(texte, html=True): texte = smart_unicode(texte).strip() if not texte or (html and re.match(r'(\s*<(/?[^>]*[^>/]|br /)>\s*)+$', texte, re.UNICODE | re.IGNORECASE)): return u'' # TODO: add unit tests # TODO: in regex add code to ignore tags replacement if html: # remove HTML tags before processing text tokens = re.findall(u'<[^>]+>', texte) for idx, value in enumerate(tokens): texte = texte.replace(value, ']TAG%s[' % idx, 1) # replace OE and AE by their correct ligature, Œ and Æ. for old, new in ligatures: texte = texte.replace(old, new) # TODO: verify if these cases are cover # s/—/—/g; # s/ - / — /g; # s/--/—/g; # s/—/—/g; # s/ — / — /g; # s/—/—/g; # do some typographic adjustments (mostly putting non-breaking space where needed) regexs = [ (u' +', u' '), # remove more then one normal space (u' +', u' '), # remove more then one special space (u'«(\s| )+', u'« '), # make space non-breaking after « (u'(\s| )+»', u' »'), # make space non-breaking before » (u'«([^&])', u'« \g<1>'), # add non-breaking space after « (u'([^;])»', u'\g<1> »'), # add non-breaking space before » (u'(\s| )+(:|;|\?|!|$|%)', u' \g<2>'), # make space non-breaking before :, ?, !, $, % (u'(\d)(\s| )+(cm)', u'\g<1> \g<3>'), # put non-breaking space between groups in long numbers (ex.: 23 000) (u'(\d)(\s| )+(\d{3})', u'\g<1> \g<3>'), # put non-breaking space between groups in long numbers (ex.: 23 000) (u'(\s| )P\.(\s| )', u'\g<1>P. '), # put non-breaking space after Page abbreviation (u'(\s| )p\.', u' p.'), # put non-breaking space before page abbreviation (u' -- ', u' — '), # changed 2 hyphen in a EM dash (u'&(l|g)t;', u'&\g<1>t;'), # to keep < and > as entities when doing unescape_entities ] if html: regexs.extend([ (u'(\d)(ème|e|es)(\s| |-)', u'\g<1><sup>\g<2></sup>\g<3>'), # put number extension in exposant (ex. 2e) (u'([IVX])e(\s| )', u'\g<1><sup>e</sup>\g<2>'), # put roman number extension in exposant (ex. Xe) (u'1er(\s| |-)', u'1<sup>er</sup>\g<1>'), # put 1 extension in exposant (ex. 1er) ]) for old, new in regexs: texte = re.sub(old, new, texte) # replace html tags at their good location if html: for idx, value in enumerate(tokens): texte = texte.replace(']TAG%s[' % idx, value, 1) # do more typographic adjustments with smartypants texte = typogrify.smartypants(texte) return unescape_entities(texte).strip()
def filter(content, block): return smartypants(content)
def adjust_typo(texte, html=True): texte = smart_unicode(texte).strip() if not texte or (html and re.match(r'(\s*<(/?[^>]*[^>/]|br /)>\s*)+$', texte, re.UNICODE | re.IGNORECASE)): return u'' # TODO: add unit tests # TODO: in regex add code to ignore tags replacement if html: # remove HTML tags before processing text tokens = re.findall(u'<[^>]+>', texte) for idx, value in enumerate(tokens): texte = texte.replace(value, ']TAG%s[' % idx, 1) # replace OE and AE by their correct ligature, Œ and Æ. for old, new in ligatures: texte = texte.replace(old, new) # TODO: verify if these cases are cover # s/—/—/g; # s/ - / — /g; # s/--/—/g; # s/—/—/g; # s/ — / — /g; # s/—/—/g; # do some typographic adjustments (mostly putting non-breaking space where needed) regexs = [ (u' +', u' '), # remove more then one normal space (u' +', u' '), # remove more then one special space (u'«(\s| )+', u'« '), # make space non-breaking after « (u'(\s| )+»', u' »'), # make space non-breaking before » (u'«([^&])', u'« \g<1>'), # add non-breaking space after « (u'([^;])»', u'\g<1> »'), # add non-breaking space before » (u'(\s| )+(:|;|\?|!|$|%)', u' \g<2>'), # make space non-breaking before :, ?, !, $, % ( u'(\d)(\s| )+(cm)', u'\g<1> \g<3>' ), # put non-breaking space between groups in long numbers (ex.: 23 000) ( u'(\d)(\s| )+(\d{3})', u'\g<1> \g<3>' ), # put non-breaking space between groups in long numbers (ex.: 23 000) (u'(\s| )P\.(\s| )', u'\g<1>P. '), # put non-breaking space after Page abbreviation (u'(\s| )p\.', u' p.'), # put non-breaking space before page abbreviation (u' -- ', u' — '), # changed 2 hyphen in a EM dash (u'&(l|g)t;', u'&\g<1>t;' ), # to keep < and > as entities when doing unescape_entities ] if html: regexs.extend([ (u'(\d)(ème|e|es)(\s| |-)', u'\g<1><sup>\g<2></sup>\g<3>' ), # put number extension in exposant (ex. 2e) (u'([IVX])e(\s| )', u'\g<1><sup>e</sup>\g<2>' ), # put roman number extension in exposant (ex. Xe) (u'1er(\s| |-)', u'1<sup>er</sup>\g<1>'), # put 1 extension in exposant (ex. 1er) ]) for old, new in regexs: texte = re.sub(old, new, texte) # replace html tags at their good location if html: for idx, value in enumerate(tokens): texte = texte.replace(']TAG%s[' % idx, value, 1) # do more typographic adjustments with smartypants texte = typogrify.smartypants(texte) return unescape_entities(texte).strip()