예제 #1
0
    def apply(self):
        smart_quotes = self.document.settings.smart_quotes
        if not smart_quotes:
            return
        try:
            alternative = smart_quotes.startswith('alt')
        except AttributeError:
            alternative = False
        # print repr(alternative)

        document_language = self.document.settings.language_code

        # "Educate" quotes in normal text. Handle each block of text
        # (TextElement node) as a unit to keep context around inline nodes:
        for node in self.document.traverse(nodes.TextElement):
            # skip preformatted text blocks and special elements:
            if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
                continue
            # nested TextElements are not "block-level" elements:
            if isinstance(node.parent, nodes.TextElement):
                continue

            # list of text nodes in the "text block":
            txtnodes = [
                txtnode for txtnode in node.traverse(nodes.Text)
                if not isinstance(txtnode.parent, nodes.option_string)
            ]

            # language: use smart-quotes for language "lang"
            lang = node.get_language_code(document_language)
            # use alternative form if `smart-quotes` setting starts with "alt":
            if alternative:
                if '-x-altquot' in lang:
                    lang = lang.replace('-x-altquot', '')
                else:
                    lang += '-x-altquot'
            # drop subtags missing in quotes:
            for tag in utils.normalize_language_tag(lang):
                if tag in smartquotes.smartchars.quotes:
                    lang = tag
                    break
            else:  # language not supported: (keep ASCII quotes)
                if lang not in self.unsupported_languages:
                    self.document.reporter.warning(
                        'No smart quotes '
                        'defined for language "%s".' % lang,
                        base_node=node)
                self.unsupported_languages.add(lang)
                lang = ''

            # Iterator educating quotes in plain text:
            # '2': set all, using old school en- and em- dash shortcuts
            teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
                                                 attr='2',
                                                 language=lang)

            for txtnode, newtext in zip(txtnodes, teacher):
                txtnode.parent.replace(txtnode, nodes.Text(newtext))

            self.unsupported_languages = set()  # reset
예제 #2
0
    def apply(self):
        smart_quotes = self.document.settings.smart_quotes
        if not smart_quotes:
            return
        try:
            alternative = smart_quotes.startswith('alt')
        except AttributeError:
            alternative = False
        # print repr(alternative)

        document_language = self.document.settings.language_code
        lc_smartquotes = self.document.settings.smartquotes_locales
        if lc_smartquotes:
            smartquotes.smartchars.quotes.update(dict(lc_smartquotes))

        # "Educate" quotes in normal text. Handle each block of text
        # (TextElement node) as a unit to keep context around inline nodes:
        for node in self.document.traverse(nodes.TextElement):
            # skip preformatted text blocks and special elements:
            if isinstance(node, self.nodes_to_skip):
                continue
            # nested TextElements are not "block-level" elements:
            if isinstance(node.parent, nodes.TextElement):
                continue

            # list of text nodes in the "text block":
            txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
                        if not isinstance(txtnode.parent,
                                          nodes.option_string)]

            # language: use typographical quotes for language "lang"
            lang = node.get_language_code(document_language)
            # use alternative form if `smart-quotes` setting starts with "alt":
            if alternative:
                if '-x-altquot' in lang:
                    lang = lang.replace('-x-altquot', '')
                else:
                    lang += '-x-altquot'
            # drop unsupported subtags:
            for tag in utils.normalize_language_tag(lang):
                if tag in smartquotes.smartchars.quotes:
                    lang = tag
                    break
            else: # language not supported: (keep ASCII quotes)
                if lang not in self.unsupported_languages:
                    self.document.reporter.warning('No smart quotes '
                        'defined for language "%s".'%lang, base_node=node)
                self.unsupported_languages.add(lang)
                lang = ''

            # Iterator educating quotes in plain text:
            # (see "utils/smartquotes.py" for the attribute setting)
            teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
                                attr=self.smartquotes_action, language=lang)

            for txtnode, newtext in zip(txtnodes, teacher):
                txtnode.parent.replace(txtnode, nodes.Text(newtext,
                                       rawsource=txtnode.rawsource))

        self.unsupported_languages = set() # reset
예제 #3
0
    def apply(self):
        # We are using our own config variable instead of
        # self.document.settings.smart_quotes in order to avoid the builtin
        # SmartQuotes to be executed as well
        if not settings['M_HTMLSANITY_SMART_QUOTES']:
            return
        try:
            alternative = settings['M_HTMLSANITY_SMART_QUOTES'].startswith('alt')
        except AttributeError:
            alternative = False
        # print repr(alternative)

        document_language = extract_document_language(self.document)

        # "Educate" quotes in normal text. Handle each block of text
        # (TextElement node) as a unit to keep context around inline nodes:
        for node in self.document.traverse(nodes.TextElement):
            # skip preformatted text blocks and special elements:
            if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
                continue
            # nested TextElements are not "block-level" elements:
            if isinstance(node.parent, nodes.TextElement):
                continue

            # list of text nodes in the "text block":
            # Patched here to exclude more stuff.
            txtnodes = []
            for txtnode in node.traverse(nodes.Text):
                if not can_apply_typography(txtnode): continue
                # Don't convert -- in option strings
                if isinstance(txtnode.parent, nodes.option_string): continue

                txtnodes += [txtnode]

            # language: use typographical quotes for language "lang"
            lang = node.get_language_code(document_language)
            # use alternative form if `smart-quotes` setting starts with "alt":
            if alternative:
                if '-x-altquot' in lang:
                    lang = lang.replace('-x-altquot', '')
                else:
                    lang += '-x-altquot'
            # drop subtags missing in quotes:
            for tag in utils.normalize_language_tag(lang):
                if tag in smartquotes.smartchars.quotes:
                    lang = tag
                    break
            else: # language not supported: (keep ASCII quotes)
                lang = ''

            # Iterator educating quotes in plain text:
            # '2': set all, using old school en- and em- dash shortcuts
            teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
                                                 attr='2', language=lang)

            for txtnode, newtext in zip(txtnodes, teacher):
                txtnode.parent.replace(txtnode, nodes.Text(newtext))

            self.unsupported_languages = set() # reset
예제 #4
0
    def apply(self):
        smart_quotes = self.document.settings.smart_quotes
        if not smart_quotes:
            return
        try:
            alternative = smart_quotes.startswith("alt")
        except AttributeError:
            alternative = False
        # print repr(alternative)

        document_language = self.document.settings.language_code

        # "Educate" quotes in normal text. Handle each block of text
        # (TextElement node) as a unit to keep context around inline nodes:
        for node in self.document.traverse(nodes.TextElement):
            # skip preformatted text blocks and special elements:
            if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
                continue
            # nested TextElements are not "block-level" elements:
            if isinstance(node.parent, nodes.TextElement):
                continue

            # list of text nodes in the "text block":
            txtnodes = [
                txtnode for txtnode in node.traverse(nodes.Text) if not isinstance(txtnode.parent, nodes.option_string)
            ]

            # language: use smart-quotes for language "lang"
            lang = node.get_language_code(document_language)
            # use alternative form if `smart-quotes` setting starts with "alt":
            if alternative:
                if "-x-altquot" in lang:
                    lang = lang.replace("-x-altquot", "")
                else:
                    lang += "-x-altquot"
            # drop subtags missing in quotes:
            for tag in utils.normalize_language_tag(lang):
                if tag in smartquotes.smartchars.quotes:
                    lang = tag
                    break
            else:  # language not supported: (keep ASCII quotes)
                if lang not in self.unsupported_languages:
                    self.document.reporter.warning(
                        "No smart quotes " 'defined for language "%s".' % lang, base_node=node
                    )
                self.unsupported_languages.add(lang)
                lang = ""

            # Iterator educating quotes in plain text:
            # '2': set all, using old school en- and em- dash shortcuts
            teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes), attr="2", language=lang)

            for txtnode, newtext in zip(txtnodes, teacher):
                txtnode.parent.replace(txtnode, nodes.Text(newtext))

            self.unsupported_languages = set()  # reset
예제 #5
0
    def apply(self):
        smart_quotes = self.document.settings.smart_quotes
        if not smart_quotes:
            return
        try:
            alternative = smart_quotes.startswith('alt')
        except AttributeError:
            alternative = False

        document_language = self.document.settings.language_code
        lc_smartquotes = self.document.settings.smartquotes_locales
        if lc_smartquotes:
            smartquotes.smartchars.quotes.update(dict(lc_smartquotes))

        # "Educate" quotes in normal text. Handle each block of text
        # (TextElement node) as a unit to keep context around inline nodes:
        for node in self.document.traverse(nodes.TextElement):
            # skip preformatted text blocks and special elements:
            if isinstance(node, self.nodes_to_skip):
                continue
            # nested TextElements are not "block-level" elements:
            if isinstance(node.parent, nodes.TextElement):
                continue

            # list of text nodes in the "text block":
            txtnodes = [
                txtnode for txtnode in node.traverse(nodes.Text)
                if not isinstance(txtnode.parent, nodes.option_string)
            ]

            # language: use typographical quotes for language "lang"
            lang = node.get_language_code(document_language)
            # use alternative form if `smart-quotes` setting starts with "alt":
            if alternative:
                if '-x-altquot' in lang:
                    lang = lang.replace('-x-altquot', '')
                else:
                    lang += '-x-altquot'
            # drop unsupported subtags:
            for tag in utils.normalize_language_tag(lang):
                if tag in smartquotes.smartchars.quotes:
                    lang = tag
                    break
            else:  # language not supported: (keep ASCII quotes)
                if lang not in self.unsupported_languages:
                    self.document.reporter.warning(
                        'No smart quotes '
                        'defined for language "%s".' % lang,
                        base_node=node)
                self.unsupported_languages.add(lang)
                lang = ''

            # Iterator educating quotes in plain text:
            # (see "utils/smartquotes.py" for the attribute setting)
            teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
                                                 attr=self.smartquotes_action,
                                                 language=lang)

            for txtnode, newtext in zip(txtnodes, teacher):
                txtnode.parent.replace(
                    txtnode, nodes.Text(newtext, rawsource=txtnode.rawsource))

        self.unsupported_languages = set()  # reset