def _prettifyETree(self, elem):
        i = '\n'
        if util.isBlockLevel(elem.tag) and elem.tag not in ('code', 'pre'):
            if (not elem.text or not elem.text.strip()) and len(elem) and util.isBlockLevel(elem[0].tag):
                elem.text = i
            for e in elem:
                if util.isBlockLevel(e.tag):
                    self._prettifyETree(e)

            if not elem.tail or not elem.tail.strip():
                elem.tail = i
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
Exemple #2
0
    def _prettifyETree(self, elem):
        i = '\n'
        if util.isBlockLevel(elem.tag) and elem.tag not in ('code', 'pre'):
            if (not elem.text or
                    not elem.text.strip()) and len(elem) and util.isBlockLevel(
                        elem[0].tag):
                elem.text = i
            for e in elem:
                if util.isBlockLevel(e.tag):
                    self._prettifyETree(e)

            if not elem.tail or not elem.tail.strip():
                elem.tail = i
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    def _prettifyETree(self, elem):
        """ Recursively add linebreaks to ElementTree children. """
        i = '\n'
        if util.isBlockLevel(elem.tag) and elem.tag not in ('code', 'pre'):
            if (not elem.text or
                    not elem.text.strip()) and len(elem) and util.isBlockLevel(
                        elem[0].tag):
                elem.text = i
            for e in elem:
                if util.isBlockLevel(e.tag):
                    self._prettifyETree(e)

            if not elem.tail or not elem.tail.strip():
                elem.tail = i
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
Exemple #4
0
 def isblocklevel(self, html):
     m = re.match('^\\<\\/?([^ >]+)', html)
     if m:
         if m.group(1)[0] in ('!', '?', '@', '%'):
             return True
         return util.isBlockLevel(m.group(1))
     return False
 def isblocklevel(self, html):
     m = re.match('^\\<\\/?([^ >]+)', html)
     if m:
         if m.group(1)[0] in ('!', '?', '@', '%'):
             return True
         return util.isBlockLevel(m.group(1))
     return False
 def run(self, doc):
     for elem in doc.getiterator():
         #import pdb; pdb.set_trace()
         if isBlockLevel(elem.tag):
             # Block level: check for attrs on last line of text
             RE = self.BLOCK_RE
             if isheader(elem):
                 # header: check for attrs at end of line
                 RE = self.HEADER_RE
             if len(elem) and elem[-1].tail:
                 # has children. Get from tail of last child
                 m = RE.search(elem[-1].tail)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem[-1].tail = elem[-1].tail[:m.start()]
                     if isheader(elem):
                         # clean up trailing #s
                         elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
             elif elem.text:
                 # no children. Get from text.
                 m = RE.search(elem.text)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem.text = elem.text[:m.start()]
                     if isheader(elem):
                         # clean up trailing #s
                         elem.text = elem.text.rstrip('#').rstrip()
         else:
             # inline: check for attrs at start of tail
             if elem.tail:
                 m = self.INLINE_RE.match(elem.tail)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem.tail = elem.tail[m.end():]
 def run(self, doc):
     for elem in doc.getiterator():
         if isBlockLevel(elem.tag):
             # Block level: check for attrs on last line of text
             RE = self.BLOCK_RE
             if isheader(elem):
                 # header: check for attrs at end of line
                 RE = self.HEADER_RE
             if len(elem) and elem[-1].tail:
                 # has children. Get from tail of last child
                 m = RE.search(elem[-1].tail)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem[-1].tail = elem[-1].tail[:m.start()]
                     if isheader(elem):
                         # clean up trailing #s
                         elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
             elif elem.text:
                 # no children. Get from text.
                 m = RE.search(elem.text)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem.text = elem.text[:m.start()]
                     if isheader(elem):
                         # clean up trailing #s
                         elem.text = elem.text.rstrip('#').rstrip()
         else:
             # inline: check for attrs at start of tail
             if elem.tail:
                 m = self.INLINE_RE.match(elem.tail)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem.tail = elem.tail[m.end():]
Exemple #8
0
 def run(self, doc):
     for elem in doc.getiterator():
         if isBlockLevel(elem.tag):
             RE = self.BLOCK_RE
             if isheader(elem):
                 RE = self.HEADER_RE
             if len(elem) and elem[-1].tail:
                 m = RE.search(elem[-1].tail)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem[-1].tail = elem[-1].tail[:m.start()]
                     if isheader(elem):
                         elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
             elif elem.text:
                 m = RE.search(elem.text)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem.text = elem.text[:m.start()]
                     if isheader(elem):
                         elem.text = elem.text.rstrip('#').rstrip()
         elif elem.tail:
             m = self.INLINE_RE.match(elem.tail)
             if m:
                 self.assign_attrs(elem, m.group(1))
                 elem.tail = elem.tail[m.end():]
    def _parseID(self, element):
        ''' recursively parse all {#idname}s at eol into ids '''
        if isBlockLevel(element.tag) and element.tag not in ['code', 'pre']:
            #print element
            if element.text and element.text.strip():
                m = ID_RE.search(element.text)
                if m:
                    if m.group('type') == '#':
                        element.set('id',m.group('id'))
                    else:
                        element.set('class',m.group('id'))
                    element.text = element.text[:m.start()]
                    # TODO: should this be restricted to <h1>..<h4> only?
                    if element.tag in SECTIONLINK_PERMITTED_TAGS:
                        child = etree.Element("a")
                        for k,v in {
                                      'class': 'elementid-sectionlink',
                                      'href': '#'+m.group('id'),
                                      'title': 'Link to this section',
                                   }.iteritems():
                            child.set(k, v)
                        # child.text = r" ¶" # U+00B6 PILCROW SIGN
                        child.text = "&para;"

                        # Actually append the child, and a space before it too.
                        #element.append(child)
                        #if len(element):
                        #    element.text += " "
                        #else:
                        #    element[-1].tail += " "
            for e in element:
                self._parseID(e)
        return element
Exemple #10
0
    def _parseID(self, element):
        ''' recursively parse all {#idname}s at eol into ids '''
        if isBlockLevel(element.tag) and element.tag not in ['code', 'pre']:
            #print element
            if element.text and element.text.strip():
                m = ID_RE.search(element.text)
                if m:
                    if m.group('type') == '#':
                        element.set('id', m.group('id'))
                    else:
                        element.set('class', m.group('id'))
                    element.text = element.text[:m.start()]
                    # TODO: should this be restricted to <h1>..<h4> only?
                    if element.tag in SECTIONLINK_PERMITTED_TAGS:
                        child = etree.Element("a")
                        for k, v in {
                                'class': 'elementid-sectionlink',
                                'href': '#' + m.group('id'),
                                'title': 'Link to this section',
                        }.iteritems():
                            child.set(k, v)
                        # child.text = r" ¶" # U+00B6 PILCROW SIGN
                        child.text = "&para;"

                        # Actually append the child, and a space before it too.
                        #element.append(child)
                        #if len(element):
                        #    element.text += " "
                        #else:
                        #    element[-1].tail += " "
            for e in element:
                self._parseID(e)
        return element
Exemple #11
0
 def run(self, root):
     blocks = root.getiterator('pre')
     for block in blocks:
         if isBlockLevel(block.tag):
             cls = block.get('class')
             if cls:
                 block.set('class', '%s %s' % (cls, PRE_CLASS))
             else:
                 block.set('class', PRE_CLASS)
     
     return root
Exemple #12
0
 def run(self, root):
     if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']:
         child = etree.Element("style")
         for k, v in {
                 'type': 'text/css',
         }.iteritems():
             child.set(k, v)
         # Note upstream doc bug: it's not called markdown.AtomicString().
         child.text = markdown.util.AtomicString(SECTIONLINK_CSS)
         root.insert(0, child)
         child.tail = root.text
         root.text = None
 def run(self, root):
     '''
     Find and remove all id specs references from the text,
     and add them as the id attribute of the element.
     
     ROOT is div#section_content.
     '''
     if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']:
         self._parseID(root)
         child = etree.Element("style")
         for k,v in {
                       'type': 'text/css',
                    }.iteritems():
             child.set(k, v)
         # Note upstream doc bug: it's not called markdown.AtomicString().
         child.text = markdown.util.AtomicString(SECTIONLINK_CSS)
         #root.insert(0, child)
         # child.tail = root.text; root.text = None;
     return root
Exemple #14
0
 def run(self, root):
     '''
     Find and remove all id specs references from the text,
     and add them as the id attribute of the element.
     
     ROOT is div#section_content.
     '''
     if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']:
         self._parseID(root)
         child = etree.Element("style")
         for k, v in {
                 'type': 'text/css',
         }.iteritems():
             child.set(k, v)
         # Note upstream doc bug: it's not called markdown.AtomicString().
         child.text = markdown.util.AtomicString(SECTIONLINK_CSS)
         #root.insert(0, child)
         # child.tail = root.text; root.text = None;
     return root
Exemple #15
0
    def _parseID(self, element):
        ''' recursively parse all {#idname}s at eol into ids '''
        if isBlockLevel(element.tag) and element.tag not in ['code', 'pre']:
            #print element
            while element.text and element.text.strip():
                m = ID_RE.search(element.text)
                if m:
                    if m.group('type') == '#':
                        element.set('id',m.group('id'))
                        element.text = element.text[:m.start()]

                        # TODO: should this be restricted to <h1>..<h4> only?
                        if element.tag not in HEADER_TAGS:
                            child = etree.Element("a")
                            for k,v in {
                                    'class': 'elementid-permalink',
                                    'href': '#'+m.group('id'),
                                    'title': 'Permanent link',
                            }.iteritems():
                                child.set(k, v)
                            # child.text = r" ¶" # U+00B6 PILCROW SIGN
                            child.text = "&para;"
                            # Actually append the child, and a space before it too.
                            element.append(child)
#                            if len(element):
#                                element.text += " "
#                            else:
#                                element[-1].tail += " "

                            if element.tag not in self.seen_block_tag:  
                                self.css += '%s:hover > .elementid-permalink, ' % element.tag
                                self.seen_block_tag[element.tag] = True

                    else:
                        element.set('class',m.group('id'))
                        element.text = element.text[:m.start()]

                else:
                    break
            for e in element:
                self._parseID(e)
        return element
Exemple #16
0
 def run(self, root):
     '''
     Find and remove all id specs references from the text,
     and add them as the id attribute of the element.
     
     ROOT is div#section_content.
     '''
     if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']:
         self._parseID(root)
         child = etree.Element("style")
         for k,v in {
                       'type': 'text/css',
                    }.iteritems():
             child.set(k, v)
         # Note upstream doc bug: it's not called markdown.AtomicString().
         self.css += 'dt:hover > .elementid-permalink { visibility: visible }'
         child.text = markdown.util.AtomicString(self.css)
         root.insert(0, child)
         self.css = CSS
         self.seen_block_tag = {}
         # child.tail = root.text; root.text = None;
     return root
Exemple #17
0
 def run(self, doc):
     for elem in doc.getiterator():
         if isBlockLevel(elem.tag):
             # Block level: check for attrs on last line of text
             RE = self.BLOCK_RE
             if isheader(elem) or elem.tag == 'dt':
                 # header or def-term: check for attrs at end of line
                 RE = self.HEADER_RE
             if len(elem) and elem.tag == 'li':
                 # special case list items. children may include a ul or ol.
                 pos = None
                 # find the ul or ol position
                 for i, child in enumerate(elem):
                     if child.tag in ['ul', 'ol']:
                         pos = i
                         break
                 if pos is None and elem[-1].tail:
                     # use tail of last child. no ul or ol.
                     m = RE.search(elem[-1].tail)
                     if m:
                         self.assign_attrs(elem, m.group(1))
                         elem[-1].tail = elem[-1].tail[:m.start()]
                 elif pos is not None and pos > 0 and elem[pos-1].tail:
                     # use tail of last child before ul or ol
                     m = RE.search(elem[pos-1].tail)
                     if m:
                         self.assign_attrs(elem, m.group(1))
                         elem[pos-1].tail = elem[pos-1].tail[:m.start()]
                 elif elem.text:
                     # use text. ul is first child.
                     m = RE.search(elem.text)
                     if m:
                         self.assign_attrs(elem, m.group(1))
                         elem.text = elem.text[:m.start()]
             elif len(elem) and elem[-1].tail:
                 # has children. Get from tail of last child
                 m = RE.search(elem[-1].tail)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem[-1].tail = elem[-1].tail[:m.start()]
                     if isheader(elem):
                         # clean up trailing #s
                         elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
             elif elem.text:
                 # no children. Get from text.
                 m = RE.search(elem.text)
                 if not m and elem.tag == 'td':
                     m = re.search(self.BASE_RE, elem.text)
                     if m:
                       print elem.text, self.BASE_RE, m
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem.text = elem.text[:m.start()]
                     if isheader(elem):
                         # clean up trailing #s
                         elem.text = elem.text.rstrip('#').rstrip()
         else:
             # inline: check for attrs at start of tail
             if elem.tail:
                 m = self.INLINE_RE.match(elem.tail)
                 if m:
                     self.assign_attrs(elem, m.group(1))
                     elem.tail = elem.tail[m.end():]
    def run(self, lines):
        text = '\n'.join(lines)
        new_blocks = []
        text = text.split('\n\n')
        items = []
        left_tag = ''
        right_tag = ''
        in_tag = False
        while text:
            block = text[0]
            if block.startswith('\n'):
                block = block[1:]
            text = text[1:]
            if block.startswith('\n'):
                block = block[1:]
            if not in_tag:
                if block.startswith('<') and len(block.strip()) > 1:
                    if block[1] == '!':
                        left_tag, left_index, attrs = '--', 2, {}
                    else:
                        left_tag, left_index, attrs = self._get_left_tag(block)
                    right_tag, data_index = self._get_right_tag(left_tag, left_index, block)
                    if data_index < len(block) and (util.isBlockLevel(left_tag) or left_tag == '--'):
                        text.insert(0, block[data_index:])
                        block = block[:data_index]
                    if not (util.isBlockLevel(left_tag) or block[1] in ('!', '?', '@', '%')):
                        new_blocks.append(block)
                        continue
                    if self._is_oneliner(left_tag):
                        new_blocks.append(block.strip())
                        continue
                    if block.rstrip().endswith('>') and self._equal_tags(left_tag, right_tag):
                        if self.markdown_in_raw and 'markdown' in attrs.keys():
                            start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', block[:left_index])
                            end = block[-len(right_tag) - 2:]
                            block = block[left_index:-len(right_tag) - 2]
                            new_blocks.append(self.markdown.htmlStash.store(start))
                            new_blocks.append(block)
                            new_blocks.append(self.markdown.htmlStash.store(end))
                        else:
                            new_blocks.append(self.markdown.htmlStash.store(block.strip()))
                        continue
                    else:
                        if util.isBlockLevel(left_tag) or left_tag == '--' and not block.rstrip().endswith('>'):
                            items.append(block.strip())
                            in_tag = True
                        else:
                            new_blocks.append(self.markdown.htmlStash.store(block.strip()))
                        continue
                new_blocks.append(block)
            else:
                items.append(block)
                right_tag, data_index = self._get_right_tag(left_tag, 0, block)
                if self._equal_tags(left_tag, right_tag):
                    if data_index < len(block):
                        items[-1] = block[:data_index]
                        text.insert(0, block[data_index:])
                    in_tag = False
                    if self.markdown_in_raw and 'markdown' in attrs.keys():
                        start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', items[0][:left_index])
                        items[0] = items[0][left_index:]
                        end = items[-1][-len(right_tag) - 2:]
                        items[-1] = items[-1][:-len(right_tag) - 2]
                        new_blocks.append(self.markdown.htmlStash.store(start))
                        new_blocks.extend(items)
                        new_blocks.append(self.markdown.htmlStash.store(end))
                    else:
                        new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
                    items = []

        if items:
            if self.markdown_in_raw and 'markdown' in attrs.keys():
                start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', items[0][:left_index])
                items[0] = items[0][left_index:]
                end = items[-1][-len(right_tag) - 2:]
                items[-1] = items[-1][:-len(right_tag) - 2]
                new_blocks.append(self.markdown.htmlStash.store(start))
                new_blocks.extend(items)
                if end.strip():
                    new_blocks.append(self.markdown.htmlStash.store(end))
            else:
                new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
            new_blocks.append('\n')
        new_text = '\n\n'.join(new_blocks)
        return new_text.split('\n')
    def run(self, lines):
        text = '\n'.join(lines)
        new_blocks = []
        text = text.split('\n\n')
        items = []
        left_tag = ''
        right_tag = ''
        in_tag = False
        while text:
            block = text[0]
            if block.startswith('\n'):
                block = block[1:]
            text = text[1:]
            if block.startswith('\n'):
                block = block[1:]
            if not in_tag:
                if block.startswith('<') and len(block.strip()) > 1:
                    if block[1] == '!':
                        left_tag, left_index, attrs = '--', 2, {}
                    else:
                        left_tag, left_index, attrs = self._get_left_tag(block)
                    right_tag, data_index = self._get_right_tag(
                        left_tag, left_index, block)
                    if data_index < len(block) and (util.isBlockLevel(left_tag)
                                                    or left_tag == '--'):
                        text.insert(0, block[data_index:])
                        block = block[:data_index]
                    if not (util.isBlockLevel(left_tag)
                            or block[1] in ('!', '?', '@', '%')):
                        new_blocks.append(block)
                        continue
                    if self._is_oneliner(left_tag):
                        new_blocks.append(block.strip())
                        continue
                    if block.rstrip().endswith('>') and self._equal_tags(
                            left_tag, right_tag):
                        if self.markdown_in_raw and 'markdown' in attrs.keys():
                            start = re.sub(
                                '\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '',
                                block[:left_index])
                            end = block[-len(right_tag) - 2:]
                            block = block[left_index:-len(right_tag) - 2]
                            new_blocks.append(
                                self.markdown.htmlStash.store(start))
                            new_blocks.append(block)
                            new_blocks.append(
                                self.markdown.htmlStash.store(end))
                        else:
                            new_blocks.append(
                                self.markdown.htmlStash.store(block.strip()))
                        continue
                    else:
                        if util.isBlockLevel(
                                left_tag
                        ) or left_tag == '--' and not block.rstrip().endswith(
                                '>'):
                            items.append(block.strip())
                            in_tag = True
                        else:
                            new_blocks.append(
                                self.markdown.htmlStash.store(block.strip()))
                        continue
                new_blocks.append(block)
            else:
                items.append(block)
                right_tag, data_index = self._get_right_tag(left_tag, 0, block)
                if self._equal_tags(left_tag, right_tag):
                    if data_index < len(block):
                        items[-1] = block[:data_index]
                        text.insert(0, block[data_index:])
                    in_tag = False
                    if self.markdown_in_raw and 'markdown' in attrs.keys():
                        start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?',
                                       '', items[0][:left_index])
                        items[0] = items[0][left_index:]
                        end = items[-1][-len(right_tag) - 2:]
                        items[-1] = items[-1][:-len(right_tag) - 2]
                        new_blocks.append(self.markdown.htmlStash.store(start))
                        new_blocks.extend(items)
                        new_blocks.append(self.markdown.htmlStash.store(end))
                    else:
                        new_blocks.append(
                            self.markdown.htmlStash.store('\n\n'.join(items)))
                    items = []

        if items:
            if self.markdown_in_raw and 'markdown' in attrs.keys():
                start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '',
                               items[0][:left_index])
                items[0] = items[0][left_index:]
                end = items[-1][-len(right_tag) - 2:]
                items[-1] = items[-1][:-len(right_tag) - 2]
                new_blocks.append(self.markdown.htmlStash.store(start))
                new_blocks.extend(items)
                if end.strip():
                    new_blocks.append(self.markdown.htmlStash.store(end))
            else:
                new_blocks.append(
                    self.markdown.htmlStash.store('\n\n'.join(items)))
            new_blocks.append('\n')
        new_text = '\n\n'.join(new_blocks)
        return new_text.split('\n')