Example #1
0
def titlefilter(text):
    if not text: return ""
    text = text.strip()
    if not text: return ""

    key = hashlib.md5(text.encode('utf8')).hexdigest()
    cached = cache.get(key)
    if cached: return mark_safe(cached)

    text = widont( smartypants( caps( amp( text ) ) ) )

    cache.set( key, text )
    return mark_safe(text)
Example #2
0
def adjust_typo(texte, html=True):
    texte = smart_unicode(texte).strip()
    if not texte or (html and re.match(r'(\s*<(/?[^>]*[^>/]|br /)>\s*)+$', texte, re.UNICODE | re.IGNORECASE)):
        return u''

    # TODO: add unit tests
    # TODO: in regex add code to ignore tags replacement

    if html:
        # remove HTML tags before processing text
        tokens = re.findall(u'<[^>]+>', texte)

        for idx, value in enumerate(tokens):
            texte = texte.replace(value, ']TAG%s[' % idx, 1)

    # replace OE and AE by their correct ligature, Œ and Æ.
    for old, new in ligatures:
        texte = texte.replace(old, new)

# TODO: verify if these cases are cover
#    s/—/&#151;/g;
#    s/ - / &#151; /g;
#    s/--/—/g;
#    s/—/&#151;/g;
#    s/ — / —&nbsp;/g;
#    s/—/&#151;/g;

    # do some typographic adjustments (mostly putting non-breaking space where needed)
    regexs = [
        (u'  +', u' '),  # remove more then one normal space
        (u'  +', u' '),  # remove more then one special space
        (u'«(\s| )+', u'«&nbsp;'),  # make space non-breaking after «
        (u'(\s| )+»', u'&nbsp;»'),  # make space non-breaking before »
        (u'«([^&])', u'«&nbsp;\g<1>'),  # add non-breaking space after «
        (u'([^;])»', u'\g<1>&nbsp;»'),  # add non-breaking space before »
        (u'(\s| )+(:|;|\?|!|$|%)', u'&nbsp;\g<2>'),  # make space non-breaking before :, ?, !, $, %
        (u'(\d)(\s| )+(cm)', u'\g<1>&nbsp;\g<3>'),  # put non-breaking space between groups in long numbers (ex.: 23 000)
        (u'(\d)(\s| )+(\d{3})', u'\g<1>&nbsp;\g<3>'),  # put non-breaking space between groups in long numbers (ex.: 23 000)
        (u'(\s| )P\.(\s| )', u'\g<1>P.&nbsp;'),  # put non-breaking space after Page abbreviation
        (u'(\s| )p\.', u'&nbsp;p.'),  # put non-breaking space before page abbreviation

        (u' -- ', u' — '),  # changed 2 hyphen in a EM dash

        (u'&(l|g)t;', u'&amp;\g<1>t;'),  # to keep &lt; and &gt; as entities when doing unescape_entities
    ]

    if html:
        regexs.extend([
            (u'(\d)(ème|e|es)(\s| |-)', u'\g<1><sup>\g<2></sup>\g<3>'),  # put number extension in exposant (ex. 2e)
            (u'([IVX])e(\s| )', u'\g<1><sup>e</sup>\g<2>'),  # put roman number extension in exposant (ex. Xe)
            (u'1er(\s| |-)', u'1<sup>er</sup>\g<1>'),  # put 1 extension in exposant (ex. 1er)
        ])

    for old, new in regexs:
        texte = re.sub(old, new, texte)

    # replace html tags at their good location
    if html:
        for idx, value in enumerate(tokens):
            texte = texte.replace(']TAG%s[' % idx, value, 1)

    # do more typographic adjustments with smartypants
    texte = typogrify.smartypants(texte)
    return unescape_entities(texte).strip()
def filter(content, block):
    return smartypants(content)
Example #4
0
def filter(content, block):
    return smartypants(content)
Example #5
0
def adjust_typo(texte, html=True):
    texte = smart_unicode(texte).strip()
    if not texte or (html and re.match(r'(\s*<(/?[^>]*[^>/]|br /)>\s*)+$',
                                       texte, re.UNICODE | re.IGNORECASE)):
        return u''

    # TODO: add unit tests
    # TODO: in regex add code to ignore tags replacement

    if html:
        # remove HTML tags before processing text
        tokens = re.findall(u'<[^>]+>', texte)

        for idx, value in enumerate(tokens):
            texte = texte.replace(value, ']TAG%s[' % idx, 1)

    # replace OE and AE by their correct ligature, Œ and Æ.
    for old, new in ligatures:
        texte = texte.replace(old, new)

# TODO: verify if these cases are cover
#    s/—/&#151;/g;
#    s/ - / &#151; /g;
#    s/--/—/g;
#    s/—/&#151;/g;
#    s/ — / —&nbsp;/g;
#    s/—/&#151;/g;

# do some typographic adjustments (mostly putting non-breaking space where needed)
    regexs = [
        (u'  +', u' '),  # remove more then one normal space
        (u'  +', u' '),  # remove more then one special space
        (u'«(\s| )+', u'«&nbsp;'),  # make space non-breaking after «
        (u'(\s| )+»', u'&nbsp;»'),  # make space non-breaking before »
        (u'«([^&])', u'«&nbsp;\g<1>'),  # add non-breaking space after «
        (u'([^;])»', u'\g<1>&nbsp;»'),  # add non-breaking space before »
        (u'(\s| )+(:|;|\?|!|$|%)',
         u'&nbsp;\g<2>'),  # make space non-breaking before :, ?, !, $, %
        (
            u'(\d)(\s| )+(cm)', u'\g<1>&nbsp;\g<3>'
        ),  # put non-breaking space between groups in long numbers (ex.: 23 000)
        (
            u'(\d)(\s| )+(\d{3})', u'\g<1>&nbsp;\g<3>'
        ),  # put non-breaking space between groups in long numbers (ex.: 23 000)
        (u'(\s| )P\.(\s| )',
         u'\g<1>P.&nbsp;'),  # put non-breaking space after Page abbreviation
        (u'(\s| )p\.',
         u'&nbsp;p.'),  # put non-breaking space before page abbreviation
        (u' -- ', u' — '),  # changed 2 hyphen in a EM dash
        (u'&(l|g)t;', u'&amp;\g<1>t;'
         ),  # to keep &lt; and &gt; as entities when doing unescape_entities
    ]

    if html:
        regexs.extend([
            (u'(\d)(ème|e|es)(\s| |-)', u'\g<1><sup>\g<2></sup>\g<3>'
             ),  # put number extension in exposant (ex. 2e)
            (u'([IVX])e(\s| )', u'\g<1><sup>e</sup>\g<2>'
             ),  # put roman number extension in exposant (ex. Xe)
            (u'1er(\s| |-)',
             u'1<sup>er</sup>\g<1>'),  # put 1 extension in exposant (ex. 1er)
        ])

    for old, new in regexs:
        texte = re.sub(old, new, texte)

    # replace html tags at their good location
    if html:
        for idx, value in enumerate(tokens):
            texte = texte.replace(']TAG%s[' % idx, value, 1)

    # do more typographic adjustments with smartypants
    texte = typogrify.smartypants(texte)
    return unescape_entities(texte).strip()