Beispiel #1
0
    def _postAnalyzeImpl(self):
        self.trace('Analysis complete. Beginning processing...')

        #-------------------------------------------------------------------------------------------
        # CLEANUP HTML WHITESPACE
        #       For html already in the page, either explicit or created by the preprocessor remove
        #       the newlines between tags to prevent them from being viewed as line breaks during
        #       Markup rendering.
        self._result = self._removeHTMLTagWhitespace(self._raw.replace(u'\r', u' '))
        self.trace('CLEANED -> preliminary HTML whitespace', showResult=True)

        tags = self._createTags()

        #-------------------------------------------------------------------------------------------
        # REMOVE UNWANTED LINE BREAKS
        # Newlines that occur between close and open Markup tags should be removed.
        res = MarkupProcessor._STRIP_WHITESPACE_PATTERN.finditer(self._result)
        if res:
            for r in res:
                start        = r.start()
                end          = r.end()
                length       = end - start
                self._result = self._result[:start] + u' '*length + self._result[end:]

        res = MarkupProcessor._CONTINUE_LINE_PATTERN.finditer(self._result)
        if res:
            for r in res:
                start   = r.start()
                end     = r.end()
                length  = end - start
                replace = u' '*length
                self._result = self._result[:start] + replace + self._result[end:]

        res = MarkupProcessor._UNWANTED_NEWLINE_PATTERN.finditer(self._result)
        if res:
            for r in res:
                start   = r.start()
                end     = r.end()

                # Skip line breaks inside of leaf tags.
                skip     = False
                startTag = None
                endTag   = None
                for t in tags:
                    if not startTag:
                        c = t.findContainingBlock(start)
                        if c and c.isLeafTag:
                            skip = True
                            break
                        startTag = c

                    if not endTag:
                        endTag = t.findContainingBlock(end)

                    if startTag and endTag:
                        break

                if skip:
                    continue

                # Skip tags that are not display:block or voids
                skip = (startTag and not (startTag.isBlockDisplay or startTag.isVoidTag)) or \
                       (endTag and not (endTag.isBlockDisplay or endTag.isVoidTag))
                if skip:
                    continue

                replace = r.group('close') + r.group('space').replace(u'\n', u' ') + r.group('open')
                self._result = self._result[:start] + replace + self._result[end:]

        self.trace('Line breaks cleaned up; Starting Tag rendering...')

        #-------------------------------------------------------------------------------------------
        # RENDER RESULT
        for b in self._blocks:
            if b.blockType == BlockSyntaxEnum.COMMENT:
                start = b.start
                while start - 1 > 0 and self._result[start-1] in [u'\r', u'\n', u'\t']:
                    start -= 1

                end = b.end
                while end < len(self._result) and self._result[end] in [u'\r', u'\n', u'\t']:
                    end += 1

                length = end - start
                if length > 0:
                    self._result = self._result[:start] + u' '*length + self._result[end:]

        #-------------------------------------------------------------------------------------------
        # EXECUTE THE MULTI-STAGE RENDERING PROCESS
        self._executeTagActions(tags, 'redefinitionTraversal', 'Redefinition Traversal Failure')
        self._executeTagActions(tags, 'redefinitionReversal', 'Redefinition Reversal Failure')

        self._executeTagActions(tags, 'connectivityTraversal', 'Connectivity Traversal Failure')
        self._executeTagActions(tags, 'connectivityReversal', 'Connectivity Reversal Failure')

        self.trace(self.traceHierarchy('Global pre-render complete:', tags))
        self._executeTagActions(tags, 'render', 'Render Failure')

        self._executeTagActions(tags, 'cleanupTraversal', 'Cleanup Traversal Failure')
        self._executeTagActions(tags, 'cleanupReversal', 'Cleanup Render Failure')

        self.trace('RENDERED RESULT -> Markup converted to HTML', showResult=True)

        #-------------------------------------------------------------------------------------------
        # POST-PROCESSING HTML WHITESPACE CLEANUP
        self._result = self._removeHTMLTagWhitespace(self._result.replace(u'\r', u' '))
        self.trace('CLEANED -> post HTML whitespace', showResult=True)

        #-------------------------------------------------------------------------------------------
        # STRIP MARKUP
        res    = MarkupProcessor._STRIP_PATTERN.finditer(self._result)
        offset = 0
        for r in res:
            s       = r.start() + offset
            e       = r.end() + offset
            offset += self.insertCharacters(s, e, u'')
        self.trace('STRIPPED -> markup', showResult=True)

        #-------------------------------------------------------------------------------------------
        # HANDLE NEWLINES NOT IN TAGS
        offset         = 0
        tagIndex       = 0
        preserveBlocks = DomUtils.getPreserveBlocks(self._result)
        while True:
            index = self._result.find('\n', offset)

            if index == -1:
                break

            # Ignore line breaks inside preserved tags
            skip = False
            for b in preserveBlocks:
                if b.start <= index <= b.end:
                    offset = b.end + 1
                    skip   = True
                    break
            if skip:
                continue

            while tagIndex < len(tags):
                if index > tags[tagIndex].end():
                    tagIndex += 1
                    continue
                break

            if tagIndex < len(tags) and tags[tagIndex].contains(index):
                offset    = tags[tagIndex].end()
                tagIndex += 1
                continue

            offset = index + self.insertCharacters(index, index+1, u'<br />')
        self.trace('ADDED breaks to newlines not in tags', showResult=True)

        #-------------------------------------------------------------------------------------------
        # REMOVE BR DIV COMBOS
        res    = MarkupProcessor._DIV_LINE_BREAK_PATTERN.finditer(self._result)
        offset = 0
        for r in res:
            s       = r.start() + offset
            e       = r.end() + offset
            offset += self.insertCharacters(s, e, u'')

        self._result = self._result.replace(u'[*', u'[').replace(u'*]', u']')
        self._tags   = tags

        #-------------------------------------------------------------------------------------------
        # FINAL MODIFICATIONS TO THE RENDERED DOM
        self._result = self._modifyResult(self._result)

        #-------------------------------------------------------------------------------------------
        # CLEANUP WHITE SPACE
        self._result = DomUtils.minifyDom(self._result)

        self.trace('Post processing complete.')
Beispiel #2
0
    def _removeHTMLTagWhitespace(self, source):
        preserveBlocks = DomUtils.getPreserveBlocks(source)

        res = MarkupProcessor._HTML_INSIDE_TAG_PATTERN.finditer(source)
        if res:
            for r in res:
                start   = r.start()
                end     = r.end()
                replace = r.group('tag').replace(u'\n', u' ')
                source  = source[:start] + replace + source[end:]

        res = MarkupProcessor._HTML_PRE_TAG_WHITESPACE_PATTERN.finditer(source)
        if res:
            for r in res:
                start = r.start()
                end   = r.end()

                tag = r.group('tag')
                preSource = source[:start]
                if StringUtils.begins(tag, (u'<span', u'<a')):
                    strippedPreSource = preSource.strip()

                    # Preserve lines between span tags
                    if StringUtils.ends(strippedPreSource, (u'</span>', u'</a>')):
                        continue

                    # Preserve lines between span tags and non-html entities like text
                    if not strippedPreSource.endswith(u'>'):
                        continue

                skip  = False
                for b in preserveBlocks:
                    if b.start <= start <= b.end or b.start <= end <= b.end:
                        skip = True
                        break
                if skip:
                    continue

                length  = len(r.group('whitespace'))
                replace = u' '*length + tag
                source  = preSource + replace + source[end:]

        res = MarkupProcessor._HTML_POST_TAG_WHITESPACE_PATTERN.finditer(source)
        if res:
            for r in res:
                start = r.start()
                end   = r.end()

                tag        = r.group('tag')
                postSource = source[end:]
                if tag in (u'</span>', u'</a>'):
                    strippedPostSource = postSource.strip()

                    # Preserve lines between span tags
                    if StringUtils.begins(strippedPostSource, (u'<span', u'<a')):
                        continue

                    # Preserve lines between span tags and non-html entities like text
                    if not strippedPostSource.startswith(u'<'):
                        continue

                skip  = False
                for b in preserveBlocks:
                    if b.start <= start <= b.end or b.start <= end <= b.end:
                        skip = True
                        break
                if skip:
                    continue

                length  = len(r.group('whitespace'))
                replace = tag + u' '*length
                source  = source[:start] + replace + postSource

        return source