def number_suffix_filter(text): """Wraps date suffix in <span class="ord"> so they can be styled with CSS. >>> number_suffix("10th") u'10<span class="rod">th</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. """ tokens = smartypants._tokenize(text) result = [] in_skipped_tag = False suffix_finder = re.compile(r"(?P<number>[\d]+)(?P<ord>st|nd|rd|th)") def _suffix_process(groups): number = groups.group("number") suffix = groups.group("ord") return "%s<span class='ord'>%s</span>" % (number, suffix) output = suffix_finder.sub(_suffix_process, text) return mark_safe(output)
def ligature(text): """Replaces common di- and tri-graphs with ligatures. >>> Typogrify.ligature('define flargle') 'define flargle' >>> Typogrify.ligature('<p>define flargle</p>') '<p>define flargle</p>' >>> Typogrify.ligature('<pre>define flargle</pre>') '<pre>define flargle</pre>' >>> Typogrify.ligature('<pre><span>define flargle</span></pre>') '<pre><span>define flargle</span></pre>' """ replacements = { 'ff': 'ff', 'fi': 'fi', 'fl': 'fl', 'ffi': 'ffi', 'ffl': 'ffl' } ngraph_re = '|'.join(replacements.keys()) def replace(match): return replacements[match.group()] try: import smartypants except ImportError: raise TypogrifyError, "Error in {% ligature %} filter: The Python SmartyPants library isn't installed." tokens = smartypants._tokenize(text) result = [] tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) skippage = 0 for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match: if close_match.group(1) == None: skippage += 1 else: skippage -= 1 else: if skippage > 0: result.append(token[1]) else: result.append(re.sub(ngraph_re, replace, token[1])) output = "".join(result) return output
def smartycaps(text): tokens = _tokenize(text) result = [] in_pre = False tags_to_skip_regex = _tags_to_skip_regex() caps_re = re.compile(r'[0-9]*([A-Z][0-9\/\-.+&]?){1,}[A-Z][0-9]*\.?') for cur_token in tokens: if not cur_token[0] == 'tag': t = cur_token[1] if not in_pre: t = re.sub(caps_re, _wrap_caps, t) result.append(t) return ''.join(result)
def number_suffix_filter(text): """Wraps date suffix in <span class="ord"> so they can be styled with CSS. >>> number_suffix("10th") u'10<span class="rod">th</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. """ tokens = smartypants._tokenize(text) result = [] in_skipped_tag = False suffix_finder = re.compile(r'(?P<number>[\d]+)(?P<ord>st|nd|rd|th)') def _suffix_process(groups): number = groups.group('number') suffix = groups.group('ord') return "%s<span class='ord'>%s</span>" % (number, suffix) output = suffix_finder.sub(_suffix_process, text) return mark_safe(output)
def caps(text): """Wraps multiple capital letters in ``<span class="caps">`` so they can be styled with CSS. >>> caps("A message from KU") u'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps("<PRE>CAPS</pre> more CAPS") u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>' >>> caps("A message from 2KU2 with digits") u'A message from <span class="caps">2KU2</span> with digits' >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") u'<span class="caps">JIMMY\\'S</span>' >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' """ tokens = _smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile(r"""( (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string must at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:\s|\b|$)) """, re.VERBOSE) def _cap_wrapper(matchobj): """This is necessary to keep dotted cap strings to pick up extra spaces""" if matchobj.group(2): return """<span class="caps">%s</span>""" % matchobj.group(2) else: if matchobj.group(3)[-1] == " ": caps = matchobj.group(3)[:-1] tail = ' ' else: caps = matchobj.group(3) tail = '' return """<span class="caps">%s</span>%s""" % (caps, tail) tags_to_skip_regex = re.compile( "<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) is None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) return "".join(result)
def caps(text): """Wraps multiple capital letters in ``<span class="caps">`` so they can be styled with CSS. >>> caps("A message from KU") u'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps("<PRE>CAPS</pre> more CAPS") u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>' >>> caps("A message from 2KU2 with digits") u'A message from <span class="caps">2KU2</span> with digits' >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") u'<span class="caps">JIMMY\\'S</span>' >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' """ tokens = _smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile(r"""( (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string must at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:\s|\b|$)) """, re.VERBOSE) def _cap_wrapper(matchobj): """This is necessary to keep dotted cap strings to pick up extra spaces""" if matchobj.group(2): return """<span class="caps">%s</span>""" % matchobj.group(2) else: if matchobj.group(3)[-1] == " ": caps = matchobj.group(3)[:-1] tail = ' ' else: caps = matchobj.group(3) tail = '' return """<span class="caps">%s</span>%s""" % (caps, tail) tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) is None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) return "".join(result)
def caps(text): """Wraps multiple capital letters in ``<span class="caps">`` so they can be styled with CSS. >>> caps("A message from KU") 'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps("<SCRIPT>CAPS</script> more CAPS") '<SCRIPT>CAPS</script> more <span class="caps">CAPS</span>' >>> caps("A message from 2KU2 with digits") 'A message from <span class="caps">2KU2</span> with digits' >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") 'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") '<span class="caps">JIMMY\\'S</span>' >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' """ try: import smartypants except ImportError: raise TypogrifyError("Error in {% caps %} filter: The Python SmartyPants library isn't installed.") tokens = smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile(r"""( (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:\s|\b|$)) """, re.VERBOSE) def _cap_wrapper(matchobj): """This is necessary to keep dotted cap strings to pick up extra spaces""" if matchobj.group(2): return """<span class="caps">%s</span>""" % matchobj.group(2) else: if matchobj.group(3)[-1] == " ": caps = matchobj.group(3)[:-1] tail = ' ' else: caps = matchobj.group(3) tail = '' return """<span class="caps">%s</span>%s""" % (caps, tail) # Add additional tags whose content should be # ignored here. Note - <pre> and <code> tag are # ignored by default and therefore are not here tags_to_skip_regex = re.compile("<(/)?(?:kbd|script)[^>]*>", re.IGNORECASE) for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) == None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) output = "".join(result) return output
def caps(text): """Wraps multiple capital letters in ``<span class="caps">`` # noqa: E501 so they can be styled with CSS. >>> caps("A message from KU") u'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps("<PRE>CAPS</pre> more CAPS") u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>' >>> caps("A message from 2KU2 with digits") u'A message from <span class="caps">2KU2</span> with digits' >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") u'<span class="caps">JIMMY\\'S</span>' >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' >>> caps('"I.W.W." "IWW"') u'"<span class="caps">I.W.W.</span>" "<span class="caps">IWW</span>"' >>> caps("'I.W.W.' 'IWW'") u'\\'<span class="caps">I.W.W.</span>\\' \\'<span class="caps">IWW</span>\\'' """ text = force_text(text) try: import smartypants except ImportError: if settings.DEBUG: raise template.TemplateSyntaxError( "Error in {% caps %} filter:" + " The Python SmartyPants library isn't installed." ) return text tokens = smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile( r"""( # noqa: E501 (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:'|"|\s|\b|$)) """, re.VERBOSE) def _cap_wrapper(matchobj): """This is necessary to keep dotted cap strings to pick up extra spaces""" # noqa: E501 if matchobj.group(2): return """<span class="caps">%s</span>""" % matchobj.group(2) else: if matchobj.group(3)[-1] == " ": caps = matchobj.group(3)[:-1] tail = ' ' elif matchobj.group(1)[-1] in ("'", '"'): caps = matchobj.group(3) tail = matchobj.group(1)[-1] else: caps = matchobj.group(3) tail = '' return """<span class="caps">%s</span>%s""" % (caps, tail) tags_to_skip_regex = re.compile( "<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) is None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) output = "".join(result) return mark_safe(output)
def caps(text): """Wraps multiple capital letters in ``<span class="caps">`` so they can be styled with CSS. >>> caps("A message from KU") u'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps("<PRE>CAPS</pre> more CAPS") u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>' >>> caps("A message from 2KU2 with digits") u'A message from <span class="caps">2KU2</span> with digits' >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") u'<span class="caps">JIMMY\\'S</span>' >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' >>> caps('"I.W.W." "IWW"') u'"<span class="caps">I.W.W.</span>" "<span class="caps">IWW</span>"' >>> caps("'I.W.W.' 'IWW'") u'\\'<span class="caps">I.W.W.</span>\\' \\'<span class="caps">IWW</span>\\'' """ text = force_unicode(text) try: import smartypants except ImportError: if settings.DEBUG: raise template.TemplateSyntaxError, "Error in {% caps %} filter: The Python SmartyPants library isn't installed." return text tokens = smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile(r"""( (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:'|"|\s|\b|$)) """, re.VERBOSE) def _cap_wrapper(matchobj): """This is necessary to keep dotted cap strings to pick up extra spaces""" #print matchobj.groups() if matchobj.group(2): return """<span class="caps">%s</span>""" % matchobj.group(2) else: if matchobj.group(3)[-1] == " ": caps = matchobj.group(3)[:-1] tail = ' ' elif matchobj.group(1)[-1] in ("'",'"'): caps = matchobj.group(3) tail = matchobj.group(1)[-1] else: caps = matchobj.group(3) tail = '' return """<span class="caps">%s</span>%s""" % (caps, tail) tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) == None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) output = "".join(result) return mark_safe(output)
def caps(text): '''Wraps multiple capital letters in ``<span class="caps">`` so they can be styled with CSS. >>> caps('A message from KU') 'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps('<PRE>CAPS</pre> more CAPS') '<PRE>CAPS</pre> more <span class="caps">CAPS</span>' >>> caps('A message from 2KU2 with digits') 'A message from <span class="caps">2KU2</span> with digits' >>> caps('Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.') 'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") '<span class="caps">JIMMY\\'S</span>' >>> caps('<i>D.O.T.</i>HE34T<b>RFID</b>') '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' ''' try: import smartypants except ImportError: raise Exception, "The Python SmartyPants library isn't installed." return text tokens = smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile(r'''( (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:\s|\b|$)) ''', re.VERBOSE) def _cap_wrapper(matchobj): '''This is necessary to keep dotted cap strings to pick up extra spaces''' if matchobj.group(2): return '''<span class="caps">%s</span>''' % matchobj.group(2) else: if matchobj.group(3)[-1] == ' ': caps = matchobj.group(3)[:-1] tail = ' ' else: caps = matchobj.group(3) tail = '' return '''<span class="caps">%s</span>%s''' % (caps, tail) tags_to_skip_regex = re.compile('<(/)?(?:pre|code|kbd|script|math)[^>]*>', re.IGNORECASE) for token in tokens: if token[0] == 'tag': # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) == None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) output = ''.join(result) return output
def caps(text): """Wraps multiple capital letters in ``<span class="caps">`` so they can be styled with CSS. >>> caps("A message from KU") 'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps("<SCRIPT>CAPS</script> more CAPS") '<SCRIPT>CAPS</script> more <span class="caps">CAPS</span>' >>> caps("A message from 2KU2 with digits") 'A message from <span class="caps">2KU2</span> with digits' >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") 'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") '<span class="caps">JIMMY\\'S</span>' >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' """ try: import smartypants except ImportError: raise TypogrifyError( "Error in {% caps %} filter: The Python SmartyPants library isn't installed." ) tokens = smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile( r"""( (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string much at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:\s|\b|$)) """, re.VERBOSE) def _cap_wrapper(matchobj): """This is necessary to keep dotted cap strings to pick up extra spaces""" if matchobj.group(2): return """<span class="caps">%s</span>""" % matchobj.group(2) else: if matchobj.group(3)[-1] == " ": caps = matchobj.group(3)[:-1] tail = ' ' else: caps = matchobj.group(3) tail = '' return """<span class="caps">%s</span>%s""" % (caps, tail) # Add additional tags whose content should be # ignored here. Note - <pre> and <code> tag are # ignored by default and therefore are not here tags_to_skip_regex = re.compile("<(/)?(?:kbd|script)[^>]*>", re.IGNORECASE) for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) == None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) output = "".join(result) return output