Ejemplo n.º 1
0
def number_suffix_filter(text):
    """Wraps date suffix in <span class="ord">
    so they can be styled with CSS.
    
    >>> number_suffix("10th")
    u'10<span class="rod">th</span>'
    
    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.
    
    """

    tokens = smartypants._tokenize(text)
    result = []
    in_skipped_tag = False

    suffix_finder = re.compile(r"(?P<number>[\d]+)(?P<ord>st|nd|rd|th)")

    def _suffix_process(groups):
        number = groups.group("number")
        suffix = groups.group("ord")

        return "%s<span class='ord'>%s</span>" % (number, suffix)

    output = suffix_finder.sub(_suffix_process, text)
    return mark_safe(output)
Ejemplo n.º 2
0
    def ligature(text):
        """Replaces common di- and tri-graphs with ligatures.
        
        >>> Typogrify.ligature('define flargle')
        'de&#xfb01;ne &#xfb02;argle'

        >>> Typogrify.ligature('<p>define flargle</p>')
        '<p>de&#xfb01;ne &#xfb02;argle</p>'

        >>> Typogrify.ligature('<pre>define flargle</pre>')
        '<pre>define flargle</pre>'

        >>> Typogrify.ligature('<pre><span>define flargle</span></pre>')
        '<pre><span>define flargle</span></pre>'
        """
        replacements = {
            'ff': '&#xfb00;',
            'fi': '&#xfb01;',
            'fl': '&#xfb02;',
            'ffi': '&#xfb03;',
            'ffl': '&#xfb04;'
        }
        ngraph_re = '|'.join(replacements.keys())

        def replace(match):
            return replacements[match.group()]

        try:
            import smartypants
        except ImportError:
            raise TypogrifyError, "Error in {% ligature %} filter: The Python SmartyPants library isn't installed."


        tokens = smartypants._tokenize(text)
        result = []

        tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE)
        skippage = 0

        for token in tokens:
            if token[0] == "tag":
                # Don't mess with tags.
                result.append(token[1])
                close_match = tags_to_skip_regex.match(token[1])
                if close_match:
                    if close_match.group(1) == None:
                        skippage += 1
                    else:
                        skippage -= 1
            else:
                if skippage > 0:
                    result.append(token[1])
                else:
                    result.append(re.sub(ngraph_re, replace, token[1]))
        output = "".join(result)
        return output
Ejemplo n.º 3
0
def smartycaps(text):
    tokens = _tokenize(text)
    result = []
    in_pre = False
    tags_to_skip_regex = _tags_to_skip_regex()
    caps_re = re.compile(r'[0-9]*([A-Z][0-9\/\-.+&]?){1,}[A-Z][0-9]*\.?')
    for cur_token in tokens:
        if not cur_token[0] == 'tag':
            t = cur_token[1]
            if not in_pre:
                t = re.sub(caps_re, _wrap_caps, t)
            result.append(t)
    return ''.join(result)
Ejemplo n.º 4
0
def number_suffix_filter(text):
    """Wraps date suffix in <span class="ord">
    so they can be styled with CSS.
    
    >>> number_suffix("10th")
    u'10<span class="rod">th</span>'
    
    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.
    
    """
    
    tokens = smartypants._tokenize(text)
    result = []
    in_skipped_tag = False
    
    suffix_finder = re.compile(r'(?P<number>[\d]+)(?P<ord>st|nd|rd|th)')
    
    def _suffix_process(groups):
        number = groups.group('number')
        suffix = groups.group('ord')
        
        return "%s<span class='ord'>%s</span>" % (number, suffix)
    output = suffix_finder.sub(_suffix_process, text)
    return mark_safe(output)
def caps(text):
    """Wraps multiple capital letters in ``<span class="caps">``
    so they can be styled with CSS.

    >>> caps("A message from KU")
    u'A message from <span class="caps">KU</span>'

    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.

    >>> caps("<PRE>CAPS</pre> more CAPS")
    u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>'

    >>> caps("A message from 2KU2 with digits")
    u'A message from <span class="caps">2KU2</span> with digits'

    >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T.   like so.")
    u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span>  like so.'

    All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
    >>> caps("JIMMY'S")
    u'<span class="caps">JIMMY\\'S</span>'

    >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
    u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'
    """

    tokens = _smartypants._tokenize(text)
    result = []
    in_skipped_tag = False

    cap_finder = re.compile(r"""(
                            (\b[A-Z\d]*        # Group 2: Any amount of caps and digits
                            [A-Z]\d*[A-Z]      # A cap string must at least include two caps (but they can have digits between them)
                            [A-Z\d']*\b)       # Any amount of caps and digits or dumb apostsrophes
                            | (\b[A-Z]+\.\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space
                            (?:[A-Z]+\.\s?)+)  # Followed by the same thing at least once more
                            (?:\s|\b|$))
                            """, re.VERBOSE)

    def _cap_wrapper(matchobj):
        """This is necessary to keep dotted cap strings to pick up extra spaces"""
        if matchobj.group(2):
            return """<span class="caps">%s</span>""" % matchobj.group(2)
        else:
            if matchobj.group(3)[-1] == " ":
                caps = matchobj.group(3)[:-1]
                tail = ' '
            else:
                caps = matchobj.group(3)
                tail = ''
            return """<span class="caps">%s</span>%s""" % (caps, tail)

    tags_to_skip_regex = re.compile(
        "<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE)

    for token in tokens:
        if token[0] == "tag":
            # Don't mess with tags.
            result.append(token[1])
            close_match = tags_to_skip_regex.match(token[1])
            if close_match and close_match.group(1) is None:
                in_skipped_tag = True
            else:
                in_skipped_tag = False
        else:
            if in_skipped_tag:
                result.append(token[1])
            else:
                result.append(cap_finder.sub(_cap_wrapper, token[1]))
    return "".join(result)
Ejemplo n.º 6
0
def caps(text):
    """Wraps multiple capital letters in ``<span class="caps">``
    so they can be styled with CSS.

    >>> caps("A message from KU")
    u'A message from <span class="caps">KU</span>'

    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.

    >>> caps("<PRE>CAPS</pre> more CAPS")
    u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>'

    >>> caps("A message from 2KU2 with digits")
    u'A message from <span class="caps">2KU2</span> with digits'

    >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T.   like so.")
    u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span>  like so.'

    All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
    >>> caps("JIMMY'S")
    u'<span class="caps">JIMMY\\'S</span>'

    >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
    u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'
    """

    tokens = _smartypants._tokenize(text)
    result = []
    in_skipped_tag = False

    cap_finder = re.compile(r"""(
                            (\b[A-Z\d]*        # Group 2: Any amount of caps and digits
                            [A-Z]\d*[A-Z]      # A cap string must at least include two caps (but they can have digits between them)
                            [A-Z\d']*\b)       # Any amount of caps and digits or dumb apostsrophes
                            | (\b[A-Z]+\.\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space
                            (?:[A-Z]+\.\s?)+)  # Followed by the same thing at least once more
                            (?:\s|\b|$))
                            """, re.VERBOSE)

    def _cap_wrapper(matchobj):
        """This is necessary to keep dotted cap strings to pick up extra spaces"""
        if matchobj.group(2):
            return """<span class="caps">%s</span>""" % matchobj.group(2)
        else:
            if matchobj.group(3)[-1] == " ":
                caps = matchobj.group(3)[:-1]
                tail = ' '
            else:
                caps = matchobj.group(3)
                tail = ''
            return """<span class="caps">%s</span>%s""" % (caps, tail)

    tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE)

    for token in tokens:
        if token[0] == "tag":
            # Don't mess with tags.
            result.append(token[1])
            close_match = tags_to_skip_regex.match(token[1])
            if close_match and close_match.group(1) is None:
                in_skipped_tag = True
            else:
                in_skipped_tag = False
        else:
            if in_skipped_tag:
                result.append(token[1])
            else:
                result.append(cap_finder.sub(_cap_wrapper, token[1]))
    return "".join(result)
Ejemplo n.º 7
0
def caps(text):
    """Wraps multiple capital letters in ``<span class="caps">``
    so they can be styled with CSS.

    >>> caps("A message from KU")
    'A message from <span class="caps">KU</span>'

    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.

    >>> caps("<SCRIPT>CAPS</script> more CAPS")
    '<SCRIPT>CAPS</script> more <span class="caps">CAPS</span>'

    >>> caps("A message from 2KU2 with digits")
    'A message from <span class="caps">2KU2</span> with digits'

    >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T.   like so.")
    'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span>  like so.'

    All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
    >>> caps("JIMMY'S")
    '<span class="caps">JIMMY\\'S</span>'

    >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
    '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'
    """
    try:
        import smartypants
    except ImportError:
        raise TypogrifyError("Error in {% caps %} filter: The Python SmartyPants library isn't installed.")

    tokens = smartypants._tokenize(text)
    result = []
    in_skipped_tag = False

    cap_finder = re.compile(r"""(
                            (\b[A-Z\d]*        # Group 2: Any amount of caps and digits
                            [A-Z]\d*[A-Z]      # A cap string much at least include two caps (but they can have digits between them)
                            [A-Z\d']*\b)       # Any amount of caps and digits or dumb apostsrophes
                            | (\b[A-Z]+\.\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space
                            (?:[A-Z]+\.\s?)+)  # Followed by the same thing at least once more
                            (?:\s|\b|$))
                            """, re.VERBOSE)

    def _cap_wrapper(matchobj):
        """This is necessary to keep dotted cap strings to pick up extra spaces"""
        if matchobj.group(2):
            return """<span class="caps">%s</span>""" % matchobj.group(2)
        else:
            if matchobj.group(3)[-1] == " ":
                caps = matchobj.group(3)[:-1]
                tail = ' '
            else:
                caps = matchobj.group(3)
                tail = ''
            return """<span class="caps">%s</span>%s""" % (caps, tail)

    # Add additional tags whose content should be
    # ignored here. Note - <pre> and <code> tag are
    # ignored by default and therefore are not here
    tags_to_skip_regex = re.compile("<(/)?(?:kbd|script)[^>]*>", re.IGNORECASE)

    for token in tokens:
        if token[0] == "tag":
            # Don't mess with tags.
            result.append(token[1])
            close_match = tags_to_skip_regex.match(token[1])
            if close_match and close_match.group(1) == None:
                in_skipped_tag = True
            else:
                in_skipped_tag = False
        else:
            if in_skipped_tag:
                result.append(token[1])
            else:
                result.append(cap_finder.sub(_cap_wrapper, token[1]))
    output = "".join(result)
    return output
Ejemplo n.º 8
0
def caps(text):
    """Wraps multiple capital letters in ``<span class="caps">``  # noqa: E501
    so they can be styled with CSS.

    >>> caps("A message from KU")
    u'A message from <span class="caps">KU</span>'

    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.

    >>> caps("<PRE>CAPS</pre> more CAPS")
    u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>'

    >>> caps("A message from 2KU2 with digits")
    u'A message from <span class="caps">2KU2</span> with digits'

    >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T.   like so.")
    u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span>  like so.'

    All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
    >>> caps("JIMMY'S")
    u'<span class="caps">JIMMY\\'S</span>'

    >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
    u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'

    >>> caps('"I.W.W." "IWW"')
    u'"<span class="caps">I.W.W.</span>" "<span class="caps">IWW</span>"'

    >>> caps("'I.W.W.' 'IWW'")
    u'\\'<span class="caps">I.W.W.</span>\\' \\'<span class="caps">IWW</span>\\''
    """
    text = force_text(text)
    try:
        import smartypants
    except ImportError:
        if settings.DEBUG:
            raise template.TemplateSyntaxError(
                "Error in {% caps %} filter:"
                + " The Python SmartyPants library isn't installed."
            )
        return text

    tokens = smartypants._tokenize(text)
    result = []
    in_skipped_tag = False

    cap_finder = re.compile(
        r"""(  # noqa: E501
        (\b[A-Z\d]*        # Group 2: Any amount of caps and digits
        [A-Z]\d*[A-Z]      # A cap string much at least include two caps (but they can have digits between them)
        [A-Z\d']*\b)       # Any amount of caps and digits or dumb apostsrophes
        | (\b[A-Z]+\.\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space
        (?:[A-Z]+\.\s?)+)  # Followed by the same thing at least once more
        (?:'|"|\s|\b|$))
        """, re.VERBOSE)

    def _cap_wrapper(matchobj):
        """This is necessary to keep dotted cap strings to pick up extra spaces"""  # noqa: E501
        if matchobj.group(2):
            return """<span class="caps">%s</span>""" % matchobj.group(2)
        else:
            if matchobj.group(3)[-1] == " ":
                caps = matchobj.group(3)[:-1]
                tail = ' '
            elif matchobj.group(1)[-1] in ("'", '"'):
                caps = matchobj.group(3)
                tail = matchobj.group(1)[-1]
            else:
                caps = matchobj.group(3)
                tail = ''
            return """<span class="caps">%s</span>%s""" % (caps, tail)

    tags_to_skip_regex = re.compile(
        "<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE)

    for token in tokens:
        if token[0] == "tag":
            # Don't mess with tags.
            result.append(token[1])
            close_match = tags_to_skip_regex.match(token[1])
            if close_match and close_match.group(1) is None:
                in_skipped_tag = True
            else:
                in_skipped_tag = False
        else:
            if in_skipped_tag:
                result.append(token[1])
            else:
                result.append(cap_finder.sub(_cap_wrapper, token[1]))
    output = "".join(result)
    return mark_safe(output)
Ejemplo n.º 9
0
def caps(text):
    """Wraps multiple capital letters in ``<span class="caps">`` 
    so they can be styled with CSS. 
    
    >>> caps("A message from KU")
    u'A message from <span class="caps">KU</span>'
    
    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.
    
    >>> caps("<PRE>CAPS</pre> more CAPS")
    u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>'

    >>> caps("A message from 2KU2 with digits")
    u'A message from <span class="caps">2KU2</span> with digits'
        
    >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T.   like so.")
    u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span>  like so.'

    All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
    >>> caps("JIMMY'S")
    u'<span class="caps">JIMMY\\'S</span>'

    >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
    u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'

    >>> caps('"I.W.W." "IWW"')
    u'"<span class="caps">I.W.W.</span>" "<span class="caps">IWW</span>"'

    >>> caps("'I.W.W.' 'IWW'")
    u'\\'<span class="caps">I.W.W.</span>\\' \\'<span class="caps">IWW</span>\\''
    """
    text = force_unicode(text)
    try:
        import smartypants
    except ImportError:
        if settings.DEBUG:
            raise template.TemplateSyntaxError, "Error in {% caps %} filter: The Python SmartyPants library isn't installed."
        return text
        
    tokens = smartypants._tokenize(text)
    result = []
    in_skipped_tag = False    
    
    cap_finder = re.compile(r"""(
                            (\b[A-Z\d]*        # Group 2: Any amount of caps and digits
                            [A-Z]\d*[A-Z]      # A cap string much at least include two caps (but they can have digits between them)
                            [A-Z\d']*\b)       # Any amount of caps and digits or dumb apostsrophes
                            | (\b[A-Z]+\.\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space
                            (?:[A-Z]+\.\s?)+)  # Followed by the same thing at least once more
                            (?:'|"|\s|\b|$))
                            """, re.VERBOSE)

    def _cap_wrapper(matchobj):
        """This is necessary to keep dotted cap strings to pick up extra spaces"""
        #print matchobj.groups()
        if matchobj.group(2):
            return """<span class="caps">%s</span>""" % matchobj.group(2)
        else:
            if matchobj.group(3)[-1] == " ":
                caps = matchobj.group(3)[:-1]
                tail = ' '
            elif matchobj.group(1)[-1] in ("'",'"'):
                caps = matchobj.group(3)
                tail = matchobj.group(1)[-1]
            else:
                caps = matchobj.group(3)
                tail = ''
            return """<span class="caps">%s</span>%s""" % (caps, tail)

    tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE)
    
    
    for token in tokens:
        if token[0] == "tag":
            # Don't mess with tags.
            result.append(token[1])
            close_match = tags_to_skip_regex.match(token[1])
            if close_match and close_match.group(1) == None:
                in_skipped_tag = True
            else:
                in_skipped_tag = False
        else:
            if in_skipped_tag:
                result.append(token[1])
            else:
                result.append(cap_finder.sub(_cap_wrapper, token[1]))
    output = "".join(result)
    return mark_safe(output)
Ejemplo n.º 10
0
def caps(text):
    '''Wraps multiple capital letters in ``<span class="caps">`` 
    so they can be styled with CSS. 
    
    >>> caps('A message from KU')
    'A message from <span class="caps">KU</span>'
    
    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.
    
    >>> caps('<PRE>CAPS</pre> more CAPS')
    '<PRE>CAPS</pre> more <span class="caps">CAPS</span>'

    >>> caps('A message from 2KU2 with digits')
    'A message from <span class="caps">2KU2</span> with digits'
        
    >>> caps('Dotted caps followed by spaces should never include them in the wrap D.O.T.   like so.')
    'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span>  like so.'

    All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.

    >>> caps("JIMMY'S")
    '<span class="caps">JIMMY\\'S</span>'

    >>> caps('<i>D.O.T.</i>HE34T<b>RFID</b>')
    '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'
    '''
    try:
        import smartypants
    except ImportError:
        raise Exception, "The Python SmartyPants library isn't installed."
        return text
        
    tokens = smartypants._tokenize(text)
    result = []
    in_skipped_tag = False    
    
    cap_finder = re.compile(r'''(
                            (\b[A-Z\d]*        # Group 2: Any amount of caps and digits
                            [A-Z]\d*[A-Z]      # A cap string much at least include two caps (but they can have digits between them)
                            [A-Z\d']*\b)       # Any amount of caps and digits or dumb apostsrophes
                            | (\b[A-Z]+\.\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space
                            (?:[A-Z]+\.\s?)+)  # Followed by the same thing at least once more
                            (?:\s|\b|$))
                            ''', re.VERBOSE)

    def _cap_wrapper(matchobj):
        '''This is necessary to keep dotted cap strings to pick up extra spaces'''
        if matchobj.group(2):
            return '''<span class="caps">%s</span>''' % matchobj.group(2)
        else:
            if matchobj.group(3)[-1] == ' ':
                caps = matchobj.group(3)[:-1]
                tail = ' '
            else:
                caps = matchobj.group(3)
                tail = ''
            return '''<span class="caps">%s</span>%s''' % (caps, tail)

    tags_to_skip_regex = re.compile('<(/)?(?:pre|code|kbd|script|math)[^>]*>', re.IGNORECASE)
    
    
    for token in tokens:
        if token[0] == 'tag':
            # Don't mess with tags.
            result.append(token[1])
            close_match = tags_to_skip_regex.match(token[1])
            if close_match and close_match.group(1) == None:
                in_skipped_tag = True
            else:
                in_skipped_tag = False
        else:
            if in_skipped_tag:
                result.append(token[1])
            else:
                result.append(cap_finder.sub(_cap_wrapper, token[1]))
    output = ''.join(result)
    return output
Ejemplo n.º 11
0
def caps(text):
    """Wraps multiple capital letters in ``<span class="caps">``
    so they can be styled with CSS.

    >>> caps("A message from KU")
    'A message from <span class="caps">KU</span>'

    Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't.

    >>> caps("<SCRIPT>CAPS</script> more CAPS")
    '<SCRIPT>CAPS</script> more <span class="caps">CAPS</span>'

    >>> caps("A message from 2KU2 with digits")
    'A message from <span class="caps">2KU2</span> with digits'

    >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T.   like so.")
    'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span>  like so.'

    All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
    >>> caps("JIMMY'S")
    '<span class="caps">JIMMY\\'S</span>'

    >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>")
    '<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>'
    """
    try:
        import smartypants
    except ImportError:
        raise TypogrifyError(
            "Error in {% caps %} filter: The Python SmartyPants library isn't installed."
        )

    tokens = smartypants._tokenize(text)
    result = []
    in_skipped_tag = False

    cap_finder = re.compile(
        r"""(
                            (\b[A-Z\d]*        # Group 2: Any amount of caps and digits
                            [A-Z]\d*[A-Z]      # A cap string much at least include two caps (but they can have digits between them)
                            [A-Z\d']*\b)       # Any amount of caps and digits or dumb apostsrophes
                            | (\b[A-Z]+\.\s?   # OR: Group 3: Some caps, followed by a '.' and an optional space
                            (?:[A-Z]+\.\s?)+)  # Followed by the same thing at least once more
                            (?:\s|\b|$))
                            """, re.VERBOSE)

    def _cap_wrapper(matchobj):
        """This is necessary to keep dotted cap strings to pick up extra spaces"""
        if matchobj.group(2):
            return """<span class="caps">%s</span>""" % matchobj.group(2)
        else:
            if matchobj.group(3)[-1] == " ":
                caps = matchobj.group(3)[:-1]
                tail = ' '
            else:
                caps = matchobj.group(3)
                tail = ''
            return """<span class="caps">%s</span>%s""" % (caps, tail)

    # Add additional tags whose content should be
    # ignored here. Note - <pre> and <code> tag are
    # ignored by default and therefore are not here
    tags_to_skip_regex = re.compile("<(/)?(?:kbd|script)[^>]*>", re.IGNORECASE)

    for token in tokens:
        if token[0] == "tag":
            # Don't mess with tags.
            result.append(token[1])
            close_match = tags_to_skip_regex.match(token[1])
            if close_match and close_match.group(1) == None:
                in_skipped_tag = True
            else:
                in_skipped_tag = False
        else:
            if in_skipped_tag:
                result.append(token[1])
            else:
                result.append(cap_finder.sub(_cap_wrapper, token[1]))
    output = "".join(result)
    return output