Exemple #1
0
def educate_tokens(text_tokens: Iterable[Tuple[str, str]],
                   attr: str = smartquotes.default_smartypants_attr,
                   language: str = 'en') -> Generator[str, None, None]:
    """Return iterator that "educates" the items of `text_tokens`.

    This is modified to intercept the ``attr='2'`` as it was used by the
    Docutils 0.13.1 SmartQuotes transform in a hard coded way. Docutils 0.14
    uses ``'qDe'``` and is configurable, and its choice is backported here
    for use by Sphinx with earlier Docutils releases. Similarly ``'1'`` is
    replaced by ``'qde'``.

    Use ``attr='qDbe'``, resp. ``'qdbe'`` to recover Docutils effect of ``'2'``,
    resp. ``'1'``.

    refs: https://sourceforge.net/p/docutils/mailman/message/35869025/
    """

    # Parse attributes:
    # 0 : do nothing
    # 1 : set all (but backticks)
    # 2 : set all (but backticks), using old school en- and em- dash shortcuts
    # 3 : set all, using inverted old school en and em- dash shortcuts
    #
    # q : quotes
    # b : backtick quotes (``double'' only)
    # B : backtick quotes (``double'' and `single')
    # d : dashes
    # D : old school dashes
    # i : inverted old school dashes
    # e : ellipses
    # w : convert " entities to " for Dreamweaver users

    convert_quot = False  # translate " entities into normal quotes?
    do_dashes = 0
    do_backticks = 0
    do_quotes = False
    do_ellipses = False
    do_stupefy = False

    if attr == "1":  # Do everything, turn all options on.
        do_quotes = True
        # do_backticks = 1
        do_dashes = 1
        do_ellipses = True
    elif attr == "2":
        # Do everything, turn all options on, use old school dash shorthand.
        do_quotes = True
        # do_backticks = 1
        do_dashes = 2
        do_ellipses = True
    elif attr == "3":
        # Do everything, use inverted old school dash shorthand.
        do_quotes = True
        do_backticks = 1
        do_dashes = 3
        do_ellipses = True
    elif attr == "-1":  # Special "stupefy" mode.
        do_stupefy = True
    else:
        if "q" in attr:
            do_quotes = True
        if "b" in attr:
            do_backticks = 1
        if "B" in attr:
            do_backticks = 2
        if "d" in attr:
            do_dashes = 1
        if "D" in attr:
            do_dashes = 2
        if "i" in attr:
            do_dashes = 3
        if "e" in attr:
            do_ellipses = True
        if "w" in attr:
            convert_quot = True

    prev_token_last_char = " "
    # Last character of the previous text token. Used as
    # context to curl leading quote characters correctly.

    for (ttype, text) in text_tokens:

        # skip HTML and/or XML tags as well as empty text tokens
        # without updating the last character
        if ttype == 'tag' or not text:
            yield text
            continue

        # skip literal text (math, literal, raw, ...)
        if ttype == 'literal':
            prev_token_last_char = text[-1:]
            yield text
            continue

        last_char = text[-1:]  # Remember last char before processing.

        text = smartquotes.processEscapes(text)

        if convert_quot:
            text = re.sub('"', '"', text)

        if do_dashes == 1:
            text = smartquotes.educateDashes(text)
        elif do_dashes == 2:
            text = smartquotes.educateDashesOldSchool(text)
        elif do_dashes == 3:
            text = smartquotes.educateDashesOldSchoolInverted(text)

        if do_ellipses:
            text = smartquotes.educateEllipses(text)

        # Note: backticks need to be processed before quotes.
        if do_backticks:
            text = smartquotes.educateBackticks(text, language)

        if do_backticks == 2:
            text = smartquotes.educateSingleBackticks(text, language)

        if do_quotes:
            # Replace plain quotes to prevent conversion to
            # 2-character sequence in French.
            context = prev_token_last_char.replace('"', ';').replace("'", ';')
            text = educateQuotes(context + text, language)[1:]

        if do_stupefy:
            text = smartquotes.stupefyEntities(text, language)

        # Remember last char as context for the next token
        prev_token_last_char = last_char

        text = smartquotes.processEscapes(text, restore=True)

        yield text
def educate_tokens(text_tokens, attr=smartquotes.default_smartypants_attr, language='en'):
    # type: (Iterable[Tuple[str, str]], str, str) -> Generator[str, None, None]
    """Return iterator that "educates" the items of `text_tokens`.

    This is modified to intercept the ``attr='2'`` as it was used by the
    Docutils 0.13.1 SmartQuotes transform in a hard coded way. Docutils 0.14
    uses ``'qDe'``` and is configurable, and its choice is backported here
    for use by Sphinx with earlier Docutils releases. Similarly ``'1'`` is
    replaced by ``'qde'``.

    Use ``attr='qDbe'``, resp. ``'qdbe'`` to recover Docutils effect of ``'2'``,
    resp. ``'1'``.

    refs: https://sourceforge.net/p/docutils/mailman/message/35869025/
    """

    # Parse attributes:
    # 0 : do nothing
    # 1 : set all (but backticks)
    # 2 : set all (but backticks), using old school en- and em- dash shortcuts
    # 3 : set all, using inverted old school en and em- dash shortcuts
    #
    # q : quotes
    # b : backtick quotes (``double'' only)
    # B : backtick quotes (``double'' and `single')
    # d : dashes
    # D : old school dashes
    # i : inverted old school dashes
    # e : ellipses
    # w : convert " entities to " for Dreamweaver users

    convert_quot = False  # translate " entities into normal quotes?
    do_dashes = 0
    do_backticks = 0
    do_quotes = False
    do_ellipses = False
    do_stupefy = False

    if attr == "1":  # Do everything, turn all options on.
        do_quotes = True
        # do_backticks = 1
        do_dashes = 1
        do_ellipses = True
    elif attr == "2":
        # Do everything, turn all options on, use old school dash shorthand.
        do_quotes = True
        # do_backticks = 1
        do_dashes = 2
        do_ellipses = True
    elif attr == "3":
        # Do everything, use inverted old school dash shorthand.
        do_quotes = True
        do_backticks = 1
        do_dashes = 3
        do_ellipses = True
    elif attr == "-1":  # Special "stupefy" mode.
        do_stupefy = True
    else:
        if "q" in attr:
            do_quotes = True
        if "b" in attr:
            do_backticks = 1
        if "B" in attr:
            do_backticks = 2
        if "d" in attr:
            do_dashes = 1
        if "D" in attr:
            do_dashes = 2
        if "i" in attr:
            do_dashes = 3
        if "e" in attr:
            do_ellipses = True
        if "w" in attr:
            convert_quot = True

    prev_token_last_char = " "
    # Last character of the previous text token. Used as
    # context to curl leading quote characters correctly.

    for (ttype, text) in text_tokens:

        # skip HTML and/or XML tags as well as emtpy text tokens
        # without updating the last character
        if ttype == 'tag' or not text:
            yield text
            continue

        # skip literal text (math, literal, raw, ...)
        if ttype == 'literal':
            prev_token_last_char = text[-1:]
            yield text
            continue

        last_char = text[-1:]  # Remember last char before processing.

        text = smartquotes.processEscapes(text)

        if convert_quot:
            text = re.sub('"', '"', text)

        if do_dashes == 1:
            text = smartquotes.educateDashes(text)
        elif do_dashes == 2:
            text = smartquotes.educateDashesOldSchool(text)
        elif do_dashes == 3:
            text = smartquotes.educateDashesOldSchoolInverted(text)

        if do_ellipses:
            text = smartquotes.educateEllipses(text)

        # Note: backticks need to be processed before quotes.
        if do_backticks:
            text = smartquotes.educateBackticks(text, language)

        if do_backticks == 2:
            text = smartquotes.educateSingleBackticks(text, language)

        if do_quotes:
            # Replace plain quotes to prevent converstion to
            # 2-character sequence in French.
            context = prev_token_last_char.replace('"', ';').replace("'", ';')
            text = educateQuotes(context + text, language)[1:]

        if do_stupefy:
            text = smartquotes.stupefyEntities(text, language)

        # Remember last char as context for the next token
        prev_token_last_char = last_char

        text = smartquotes.processEscapes(text, restore=True)

        yield text
Exemple #3
0
def educate_tokens(text_tokens, attr='1', language='en'):
    # type: (Iterable[Tuple[str, unicode]], unicode, unicode) -> Iterator
    """Return iterator that "educates" the items of `text_tokens`.
    """

    # Parse attributes:
    # 0 : do nothing
    # 1 : set all
    # 2 : set all, using old school en- and em- dash shortcuts
    # 3 : set all, using inverted old school en and em- dash shortcuts
    #
    # q : quotes
    # b : backtick quotes (``double'' only)
    # B : backtick quotes (``double'' and `single')
    # d : dashes
    # D : old school dashes
    # i : inverted old school dashes
    # e : ellipses
    # w : convert " entities to " for Dreamweaver users

    convert_quot = False  # translate " entities into normal quotes?
    do_dashes = 0
    do_backticks = 0
    do_quotes = False
    do_ellipses = False
    do_stupefy = False

    if attr == "0":  # Do nothing.
        pass
    elif attr == "1":  # Do everything, turn all options on.
        do_quotes = True
        do_backticks = 1
        do_dashes = 1
        do_ellipses = True
    elif attr == "2":
        # Do everything, turn all options on, use old school dash shorthand.
        do_quotes = True
        do_backticks = 1
        do_dashes = 2
        do_ellipses = True
    elif attr == "3":
        # Do everything, use inverted old school dash shorthand.
        do_quotes = True
        do_backticks = 1
        do_dashes = 3
        do_ellipses = True
    elif attr == "-1":  # Special "stupefy" mode.
        do_stupefy = True
    else:
        if "q" in attr:
            do_quotes = True
        if "b" in attr:
            do_backticks = 1
        if "B" in attr:
            do_backticks = 2
        if "d" in attr:
            do_dashes = 1
        if "D" in attr:
            do_dashes = 2
        if "i" in attr:
            do_dashes = 3
        if "e" in attr:
            do_ellipses = True
        if "w" in attr:
            convert_quot = True

    prev_token_last_char = " "
    # Last character of the previous text token. Used as
    # context to curl leading quote characters correctly.

    for (ttype, text) in text_tokens:

        # skip HTML and/or XML tags as well as emtpy text tokens
        # without updating the last character
        if ttype == 'tag' or not text:
            yield text
            continue

        # skip literal text (math, literal, raw, ...)
        if ttype == 'literal':
            prev_token_last_char = text[-1:]
            yield text
            continue

        last_char = text[-1:]  # Remember last char before processing.

        text = smartquotes.processEscapes(text)

        if convert_quot:
            text = re.sub('"', '"', text)

        if do_dashes == 1:
            text = smartquotes.educateDashes(text)
        elif do_dashes == 2:
            text = smartquotes.educateDashesOldSchool(text)
        elif do_dashes == 3:
            text = smartquotes.educateDashesOldSchoolInverted(text)

        if do_ellipses:
            text = smartquotes.educateEllipses(text)

        # Note: backticks need to be processed before quotes.
        if do_backticks:
            text = smartquotes.educateBackticks(text, language)

        if do_backticks == 2:
            text = smartquotes.educateSingleBackticks(text, language)

        if do_quotes:
            # Replace plain quotes to prevent converstion to
            # 2-character sequence in French.
            context = prev_token_last_char.replace('"', ';').replace("'", ';')
            text = educateQuotes(context + text, language)[1:]

        if do_stupefy:
            text = smartquotes.stupefyEntities(text, language)

        # Remember last char as context for the next token
        prev_token_last_char = last_char

        text = smartquotes.processEscapes(text, restore=True)

        yield text