def _prettifyETree(self, elem): i = '\n' if util.isBlockLevel(elem.tag) and elem.tag not in ('code', 'pre'): if (not elem.text or not elem.text.strip()) and len(elem) and util.isBlockLevel(elem[0].tag): elem.text = i for e in elem: if util.isBlockLevel(e.tag): self._prettifyETree(e) if not elem.tail or not elem.tail.strip(): elem.tail = i if not elem.tail or not elem.tail.strip(): elem.tail = i
def _prettifyETree(self, elem): i = '\n' if util.isBlockLevel(elem.tag) and elem.tag not in ('code', 'pre'): if (not elem.text or not elem.text.strip()) and len(elem) and util.isBlockLevel( elem[0].tag): elem.text = i for e in elem: if util.isBlockLevel(e.tag): self._prettifyETree(e) if not elem.tail or not elem.tail.strip(): elem.tail = i if not elem.tail or not elem.tail.strip(): elem.tail = i
def _prettifyETree(self, elem): """ Recursively add linebreaks to ElementTree children. """ i = '\n' if util.isBlockLevel(elem.tag) and elem.tag not in ('code', 'pre'): if (not elem.text or not elem.text.strip()) and len(elem) and util.isBlockLevel( elem[0].tag): elem.text = i for e in elem: if util.isBlockLevel(e.tag): self._prettifyETree(e) if not elem.tail or not elem.tail.strip(): elem.tail = i if not elem.tail or not elem.tail.strip(): elem.tail = i
def isblocklevel(self, html): m = re.match('^\\<\\/?([^ >]+)', html) if m: if m.group(1)[0] in ('!', '?', '@', '%'): return True return util.isBlockLevel(m.group(1)) return False
def run(self, doc): for elem in doc.getiterator(): #import pdb; pdb.set_trace() if isBlockLevel(elem.tag): # Block level: check for attrs on last line of text RE = self.BLOCK_RE if isheader(elem): # header: check for attrs at end of line RE = self.HEADER_RE if len(elem) and elem[-1].tail: # has children. Get from tail of last child m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] if isheader(elem): # clean up trailing #s elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: # no children. Get from text. m = RE.search(elem.text) if m: self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] if isheader(elem): # clean up trailing #s elem.text = elem.text.rstrip('#').rstrip() else: # inline: check for attrs at start of tail if elem.tail: m = self.INLINE_RE.match(elem.tail) if m: self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():]
def run(self, doc): for elem in doc.getiterator(): if isBlockLevel(elem.tag): # Block level: check for attrs on last line of text RE = self.BLOCK_RE if isheader(elem): # header: check for attrs at end of line RE = self.HEADER_RE if len(elem) and elem[-1].tail: # has children. Get from tail of last child m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] if isheader(elem): # clean up trailing #s elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: # no children. Get from text. m = RE.search(elem.text) if m: self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] if isheader(elem): # clean up trailing #s elem.text = elem.text.rstrip('#').rstrip() else: # inline: check for attrs at start of tail if elem.tail: m = self.INLINE_RE.match(elem.tail) if m: self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():]
def run(self, doc): for elem in doc.getiterator(): if isBlockLevel(elem.tag): RE = self.BLOCK_RE if isheader(elem): RE = self.HEADER_RE if len(elem) and elem[-1].tail: m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] if isheader(elem): elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: m = RE.search(elem.text) if m: self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] if isheader(elem): elem.text = elem.text.rstrip('#').rstrip() elif elem.tail: m = self.INLINE_RE.match(elem.tail) if m: self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():]
def _parseID(self, element): ''' recursively parse all {#idname}s at eol into ids ''' if isBlockLevel(element.tag) and element.tag not in ['code', 'pre']: #print element if element.text and element.text.strip(): m = ID_RE.search(element.text) if m: if m.group('type') == '#': element.set('id',m.group('id')) else: element.set('class',m.group('id')) element.text = element.text[:m.start()] # TODO: should this be restricted to <h1>..<h4> only? if element.tag in SECTIONLINK_PERMITTED_TAGS: child = etree.Element("a") for k,v in { 'class': 'elementid-sectionlink', 'href': '#'+m.group('id'), 'title': 'Link to this section', }.iteritems(): child.set(k, v) # child.text = r" ¶" # U+00B6 PILCROW SIGN child.text = "¶" # Actually append the child, and a space before it too. #element.append(child) #if len(element): # element.text += " " #else: # element[-1].tail += " " for e in element: self._parseID(e) return element
def _parseID(self, element): ''' recursively parse all {#idname}s at eol into ids ''' if isBlockLevel(element.tag) and element.tag not in ['code', 'pre']: #print element if element.text and element.text.strip(): m = ID_RE.search(element.text) if m: if m.group('type') == '#': element.set('id', m.group('id')) else: element.set('class', m.group('id')) element.text = element.text[:m.start()] # TODO: should this be restricted to <h1>..<h4> only? if element.tag in SECTIONLINK_PERMITTED_TAGS: child = etree.Element("a") for k, v in { 'class': 'elementid-sectionlink', 'href': '#' + m.group('id'), 'title': 'Link to this section', }.iteritems(): child.set(k, v) # child.text = r" ¶" # U+00B6 PILCROW SIGN child.text = "¶" # Actually append the child, and a space before it too. #element.append(child) #if len(element): # element.text += " " #else: # element[-1].tail += " " for e in element: self._parseID(e) return element
def run(self, root): blocks = root.getiterator('pre') for block in blocks: if isBlockLevel(block.tag): cls = block.get('class') if cls: block.set('class', '%s %s' % (cls, PRE_CLASS)) else: block.set('class', PRE_CLASS) return root
def run(self, root): if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']: child = etree.Element("style") for k, v in { 'type': 'text/css', }.iteritems(): child.set(k, v) # Note upstream doc bug: it's not called markdown.AtomicString(). child.text = markdown.util.AtomicString(SECTIONLINK_CSS) root.insert(0, child) child.tail = root.text root.text = None
def run(self, root): ''' Find and remove all id specs references from the text, and add them as the id attribute of the element. ROOT is div#section_content. ''' if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']: self._parseID(root) child = etree.Element("style") for k,v in { 'type': 'text/css', }.iteritems(): child.set(k, v) # Note upstream doc bug: it's not called markdown.AtomicString(). child.text = markdown.util.AtomicString(SECTIONLINK_CSS) #root.insert(0, child) # child.tail = root.text; root.text = None; return root
def run(self, root): ''' Find and remove all id specs references from the text, and add them as the id attribute of the element. ROOT is div#section_content. ''' if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']: self._parseID(root) child = etree.Element("style") for k, v in { 'type': 'text/css', }.iteritems(): child.set(k, v) # Note upstream doc bug: it's not called markdown.AtomicString(). child.text = markdown.util.AtomicString(SECTIONLINK_CSS) #root.insert(0, child) # child.tail = root.text; root.text = None; return root
def _parseID(self, element): ''' recursively parse all {#idname}s at eol into ids ''' if isBlockLevel(element.tag) and element.tag not in ['code', 'pre']: #print element while element.text and element.text.strip(): m = ID_RE.search(element.text) if m: if m.group('type') == '#': element.set('id',m.group('id')) element.text = element.text[:m.start()] # TODO: should this be restricted to <h1>..<h4> only? if element.tag not in HEADER_TAGS: child = etree.Element("a") for k,v in { 'class': 'elementid-permalink', 'href': '#'+m.group('id'), 'title': 'Permanent link', }.iteritems(): child.set(k, v) # child.text = r" ¶" # U+00B6 PILCROW SIGN child.text = "¶" # Actually append the child, and a space before it too. element.append(child) # if len(element): # element.text += " " # else: # element[-1].tail += " " if element.tag not in self.seen_block_tag: self.css += '%s:hover > .elementid-permalink, ' % element.tag self.seen_block_tag[element.tag] = True else: element.set('class',m.group('id')) element.text = element.text[:m.start()] else: break for e in element: self._parseID(e) return element
def run(self, root): ''' Find and remove all id specs references from the text, and add them as the id attribute of the element. ROOT is div#section_content. ''' if isBlockLevel(root.tag) and root.tag not in ['code', 'pre']: self._parseID(root) child = etree.Element("style") for k,v in { 'type': 'text/css', }.iteritems(): child.set(k, v) # Note upstream doc bug: it's not called markdown.AtomicString(). self.css += 'dt:hover > .elementid-permalink { visibility: visible }' child.text = markdown.util.AtomicString(self.css) root.insert(0, child) self.css = CSS self.seen_block_tag = {} # child.tail = root.text; root.text = None; return root
def run(self, doc): for elem in doc.getiterator(): if isBlockLevel(elem.tag): # Block level: check for attrs on last line of text RE = self.BLOCK_RE if isheader(elem) or elem.tag == 'dt': # header or def-term: check for attrs at end of line RE = self.HEADER_RE if len(elem) and elem.tag == 'li': # special case list items. children may include a ul or ol. pos = None # find the ul or ol position for i, child in enumerate(elem): if child.tag in ['ul', 'ol']: pos = i break if pos is None and elem[-1].tail: # use tail of last child. no ul or ol. m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] elif pos is not None and pos > 0 and elem[pos-1].tail: # use tail of last child before ul or ol m = RE.search(elem[pos-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[pos-1].tail = elem[pos-1].tail[:m.start()] elif elem.text: # use text. ul is first child. m = RE.search(elem.text) if m: self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] elif len(elem) and elem[-1].tail: # has children. Get from tail of last child m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] if isheader(elem): # clean up trailing #s elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: # no children. Get from text. m = RE.search(elem.text) if not m and elem.tag == 'td': m = re.search(self.BASE_RE, elem.text) if m: print elem.text, self.BASE_RE, m if m: self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] if isheader(elem): # clean up trailing #s elem.text = elem.text.rstrip('#').rstrip() else: # inline: check for attrs at start of tail if elem.tail: m = self.INLINE_RE.match(elem.tail) if m: self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():]
def run(self, lines): text = '\n'.join(lines) new_blocks = [] text = text.split('\n\n') items = [] left_tag = '' right_tag = '' in_tag = False while text: block = text[0] if block.startswith('\n'): block = block[1:] text = text[1:] if block.startswith('\n'): block = block[1:] if not in_tag: if block.startswith('<') and len(block.strip()) > 1: if block[1] == '!': left_tag, left_index, attrs = '--', 2, {} else: left_tag, left_index, attrs = self._get_left_tag(block) right_tag, data_index = self._get_right_tag(left_tag, left_index, block) if data_index < len(block) and (util.isBlockLevel(left_tag) or left_tag == '--'): text.insert(0, block[data_index:]) block = block[:data_index] if not (util.isBlockLevel(left_tag) or block[1] in ('!', '?', '@', '%')): new_blocks.append(block) continue if self._is_oneliner(left_tag): new_blocks.append(block.strip()) continue if block.rstrip().endswith('>') and self._equal_tags(left_tag, right_tag): if self.markdown_in_raw and 'markdown' in attrs.keys(): start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', block[:left_index]) end = block[-len(right_tag) - 2:] block = block[left_index:-len(right_tag) - 2] new_blocks.append(self.markdown.htmlStash.store(start)) new_blocks.append(block) new_blocks.append(self.markdown.htmlStash.store(end)) else: new_blocks.append(self.markdown.htmlStash.store(block.strip())) continue else: if util.isBlockLevel(left_tag) or left_tag == '--' and not block.rstrip().endswith('>'): items.append(block.strip()) in_tag = True else: new_blocks.append(self.markdown.htmlStash.store(block.strip())) continue new_blocks.append(block) else: items.append(block) right_tag, data_index = self._get_right_tag(left_tag, 0, block) if self._equal_tags(left_tag, right_tag): if data_index < len(block): items[-1] = block[:data_index] text.insert(0, block[data_index:]) in_tag = False if self.markdown_in_raw and 'markdown' in attrs.keys(): start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', items[0][:left_index]) items[0] = items[0][left_index:] end = items[-1][-len(right_tag) - 2:] items[-1] = items[-1][:-len(right_tag) - 2] new_blocks.append(self.markdown.htmlStash.store(start)) new_blocks.extend(items) new_blocks.append(self.markdown.htmlStash.store(end)) else: new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) items = [] if items: if self.markdown_in_raw and 'markdown' in attrs.keys(): start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', items[0][:left_index]) items[0] = items[0][left_index:] end = items[-1][-len(right_tag) - 2:] items[-1] = items[-1][:-len(right_tag) - 2] new_blocks.append(self.markdown.htmlStash.store(start)) new_blocks.extend(items) if end.strip(): new_blocks.append(self.markdown.htmlStash.store(end)) else: new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) new_blocks.append('\n') new_text = '\n\n'.join(new_blocks) return new_text.split('\n')
def run(self, lines): text = '\n'.join(lines) new_blocks = [] text = text.split('\n\n') items = [] left_tag = '' right_tag = '' in_tag = False while text: block = text[0] if block.startswith('\n'): block = block[1:] text = text[1:] if block.startswith('\n'): block = block[1:] if not in_tag: if block.startswith('<') and len(block.strip()) > 1: if block[1] == '!': left_tag, left_index, attrs = '--', 2, {} else: left_tag, left_index, attrs = self._get_left_tag(block) right_tag, data_index = self._get_right_tag( left_tag, left_index, block) if data_index < len(block) and (util.isBlockLevel(left_tag) or left_tag == '--'): text.insert(0, block[data_index:]) block = block[:data_index] if not (util.isBlockLevel(left_tag) or block[1] in ('!', '?', '@', '%')): new_blocks.append(block) continue if self._is_oneliner(left_tag): new_blocks.append(block.strip()) continue if block.rstrip().endswith('>') and self._equal_tags( left_tag, right_tag): if self.markdown_in_raw and 'markdown' in attrs.keys(): start = re.sub( '\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', block[:left_index]) end = block[-len(right_tag) - 2:] block = block[left_index:-len(right_tag) - 2] new_blocks.append( self.markdown.htmlStash.store(start)) new_blocks.append(block) new_blocks.append( self.markdown.htmlStash.store(end)) else: new_blocks.append( self.markdown.htmlStash.store(block.strip())) continue else: if util.isBlockLevel( left_tag ) or left_tag == '--' and not block.rstrip().endswith( '>'): items.append(block.strip()) in_tag = True else: new_blocks.append( self.markdown.htmlStash.store(block.strip())) continue new_blocks.append(block) else: items.append(block) right_tag, data_index = self._get_right_tag(left_tag, 0, block) if self._equal_tags(left_tag, right_tag): if data_index < len(block): items[-1] = block[:data_index] text.insert(0, block[data_index:]) in_tag = False if self.markdown_in_raw and 'markdown' in attrs.keys(): start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', items[0][:left_index]) items[0] = items[0][left_index:] end = items[-1][-len(right_tag) - 2:] items[-1] = items[-1][:-len(right_tag) - 2] new_blocks.append(self.markdown.htmlStash.store(start)) new_blocks.extend(items) new_blocks.append(self.markdown.htmlStash.store(end)) else: new_blocks.append( self.markdown.htmlStash.store('\n\n'.join(items))) items = [] if items: if self.markdown_in_raw and 'markdown' in attrs.keys(): start = re.sub('\\smarkdown(=[\\\'"]?[^> ]*[\\\'"]?)?', '', items[0][:left_index]) items[0] = items[0][left_index:] end = items[-1][-len(right_tag) - 2:] items[-1] = items[-1][:-len(right_tag) - 2] new_blocks.append(self.markdown.htmlStash.store(start)) new_blocks.extend(items) if end.strip(): new_blocks.append(self.markdown.htmlStash.store(end)) else: new_blocks.append( self.markdown.htmlStash.store('\n\n'.join(items))) new_blocks.append('\n') new_text = '\n\n'.join(new_blocks) return new_text.split('\n')