def handleMatch(self, m): div = etree.Element("div") div.set("class", "figure") img = etree.SubElement(div, "img") src_parts = m.group(4).split() if src_parts: src = src_parts[0] if src[0] == "<" and src[-1] == ">": src = src[1:-1] img.set('src', self.sanitize_url(src)) else: img.set('src', "") if len(src_parts) > 1: title = dequote(" ".join(src_parts[1:])) img.set('title', title) truealt = m.group(2) img.set('alt', truealt) caption = etree.SubElement(div, "div") caption.set("class", "caption") caption.text = "__Figure " + m.group(3) + ".__ " + truealt return div
def run(self, parent, blocks): """ Parse a table block and build table. """ block = blocks.pop(0).split('\n') header = block[:2] rows = block[2:] # Get format type (bordered by pipes or not) border = False if header[0].startswith('|'): border = True # Get alignment of columns align = [] for c in self._split_row(header[1], border): if c.startswith(':') and c.endswith(':'): align.append('center') elif c.startswith(':'): align.append('left') elif c.endswith(':'): align.append('right') else: align.append(None) # Build table table = etree.SubElement(parent, 'table class="table"') thead = etree.SubElement(table, 'thead') self._build_row(header[0], thead, align, border) tbody = etree.SubElement(table, 'tbody') for row in rows: self._build_row(row, tbody, align, border)
def makeFootnotesDiv(self, root): """ Return div of footnotes as et Element. """ if not self.footnotes.keys(): return None div = etree.Element("div") div.set('class', 'footnote') hr = etree.SubElement(div, "hr") ol = etree.SubElement(div, "ol") for id in self.footnotes.keys(): li = etree.SubElement(ol, "li") li.set("id", self.makeFootnoteId(id)) self.parser.parseChunk(li, self.footnotes[id]) backlink = etree.Element("a") backlink.set("href", "#" + self.makeFootnoteRefId(id)) backlink.set("rev", "footnote") backlink.set("title", "Jump back to footnote %d in the text" % \ (self.footnotes.index(id)+1)) backlink.text = FN_BACKLINK_TEXT if li.getchildren(): node = li[-1] if node.tag == "p": node.text = node.text + NBSP_PLACEHOLDER node.append(backlink) else: p = etree.SubElement(li, "p") p.append(backlink) return div
def __processParagraph(self, parentElem, paragraph, inList, looseList): if (parentElem.tag == 'li' and not (looseList or parentElem.getchildren())): # If this is the first paragraph inside "li", don't # put <p> around it - append the paragraph bits directly # onto parentElem el = parentElem else: # Otherwise make a "p" element el = etree.SubElement(parentElem, "p") dump = [] # Searching for hr or header for line in paragraph: # it's hr if CORE_RE["isline3"].match(line): el.text = "\n".join(dump) self.__processHR(el) dump = [] # it's header elif line.startswith("#"): el.text = "\n".join(dump) self.__processHeader(parentElem, [line]) dump = [] else: dump.append(line) if dump: text = "\n".join(dump) el.text = text
def _build_row(self, row, parent, align, border): """ Given a row of text, build table cells. """ tr = etree.SubElement(parent, 'tr') tag = 'td' if parent.tag == 'thead': tag = 'th' cells = self._split_row(row, border) # We use align here rather than cells to ensure every row # contains the same number of columns. for i, a in enumerate(align): c = etree.SubElement(tr, tag) try: c.text = cells[i].strip() except IndexError: c.text = "" if a: c.set('align', a)
def __processHeader(self, parentElem, paragraph): m = CORE_RE['header'].match(paragraph[0]) if m: level = len(m.group(1)) h = etree.SubElement(parentElem, "h%d" % level) h.text = m.group(2).strip() else: message(CRITICAL, "We've got a problem header!")
def handleMatch(self, m): sup = etree.Element("sup") a = etree.SubElement(sup, "a") id = m.group(2) sup.set('id', self.footnotes.makeFootnoteRefId(id)) a.set('href', '#' + self.footnotes.makeFootnoteId(id)) a.set('rel', 'footnote') a.text = str(self.footnotes.footnotes.index(id) + 1) return sup
def run(self, parent, blocks): block = blocks.pop(0) m = self.RE.search(block) terms = [l.strip() for l in block[:m.start()].split('\n') if l.strip()] d, theRest = self.detab(block[m.end():]) if d: d = '%s\n%s' % (m.group(2), d) else: d = m.group(2) #import ipdb; ipdb.set_trace() sibling = self.lastChild(parent) if not terms and sibling.tag == 'p': # The previous paragraph contains the terms state = 'looselist' terms = sibling.text.split('\n') parent.remove(sibling) # Aquire new sibling sibling = self.lastChild(parent) else: state = 'list' if sibling and sibling.tag == 'dl': # This is another item on an existing list dl = sibling if len(dl) and dl[-1].tag == 'dd' and len(dl[-1]): state = 'looselist' else: # This is a new list dl = etree.SubElement(parent, 'dl') # Add terms for term in terms: dt = etree.SubElement(dl, 'dt') dt.text = term # Add definition self.parser.state.set(state) dd = etree.SubElement(dl, 'dd') self.parser.parseBlocks(dd, [d]) self.parser.state.reset() if theRest: blocks.insert(0, theRest)
def __processCodeBlock(self, parentElem, lines, inList): """ Given a list of document lines starting with a code block finds the end of the block, puts it into the ElementTree verbatim wrapped in ("<pre><code>") and recursively processes the the remainder of the text file. Keyword arguments: * parentElem: ElementTree element to which the content will be added * lines: a list of lines * inList: a level Returns: None """ detabbed, theRest = self.detectTabbed(lines) pre = etree.SubElement(parentElem, "pre") code = etree.SubElement(pre, "code") text = "\n".join(detabbed).rstrip() + "\n" code.text = markdown.AtomicString(text) self.parseChunk(parentElem, theRest, inList)
def run(self, root): rss = etree.Element("rss") rss.set("version", "2.0") channel = etree.SubElement(rss, "channel") for tag, text in (("title", self.ext.getConfig("TITLE")), ("link", self.ext.getConfig("URL")), ("description", None)): element = etree.SubElement(channel, tag) element.text = text for child in root: if child.tag in ["h1", "h2", "h3", "h4", "h5"]: heading = child.text.strip() item = etree.SubElement(channel, "item") link = etree.SubElement(item, "link") link.text = self.ext.getConfig("URL") title = etree.SubElement(item, "title") title.text = heading guid = ''.join([x for x in heading if x.isalnum()]) guidElem = etree.SubElement(item, "guid") guidElem.text = guid guidElem.set("isPermaLink", "false") elif child.tag in ["p"]: try: description = etree.SubElement(item, "description") except UnboundLocalError: # Item not defined - moving on pass else: if len(child): content = "\n".join( [etree.tostring(node) for node in child]) else: content = child.text pholder = self.markdown.htmlStash.store("<![CDATA[ %s]]>" % content) description.text = pholder return rss
def __processQuote(self, parentElem, lines, inList): """ Given a list of document lines starting with a quote finds the end of the quote, unindents it and recursively processes the body of the quote and the remainder of the text file. Keyword arguments: * parentElem: ElementTree element to which the content will be added * lines: a list of lines * inList: a level Returns: None """ dequoted = [] i = 0 blank_line = False # allow one blank line between paragraphs for line in lines: m = CORE_RE['quoted'].match(line) if m: dequoted.append(m.group(1)) i += 1 blank_line = False elif not blank_line and line.strip() != '': dequoted.append(line) i += 1 elif not blank_line and line.strip() == '': dequoted.append(line) i += 1 blank_line = True else: break blockquote = etree.SubElement(parentElem, "blockquote") self.parseChunk(blockquote, dequoted, inList) self.parseChunk(parentElem, lines[i:], inList)
def run(self, doc): div = etree.Element("div") div.attrib["class"] = "alert alert-info" last_li = None # Add title to the div if self.config["title"][0]: header = etree.SubElement(div, "span") header.attrib["class"] = "toctitle" header.text = self.config["title"][0] level = 0 list_stack = [div] header_rgx = re.compile("[Hh][123456]") # Get a list of id attributes used_ids = [] for c in doc.getiterator(): if "id" in c.attrib: used_ids.append(c.attrib["id"]) for (p, c) in self.iterparent(doc): if not c.text: continue # To keep the output from screwing up the # validation by putting a <div> inside of a <p> # we actually replace the <p> in its entirety. # We do not allow the marker inside a header as that # would causes an enless loop of placing a new TOC # inside previously generated TOC. if c.text.find( self.config["marker"][0]) > -1 and not header_rgx.match( c.tag): for i in range(len(p)): if p[i] == c: p[i] = div break if header_rgx.match(c.tag): tag_level = int(c.tag[-1]) while tag_level < level: list_stack.pop() level -= 1 if tag_level > level: newlist = etree.Element("ul") if last_li: last_li.append(newlist) else: list_stack[-1].append(newlist) list_stack.append(newlist) level += 1 # Do not override pre-existing ids if not "id" in c.attrib: id = self.config["slugify"][0](c.text) if id in used_ids: ctr = 1 while "%s_%d" % (id, ctr) in used_ids: ctr += 1 id = "%s_%d" % (id, ctr) used_ids.append(id) c.attrib["id"] = id else: id = c.attrib["id"] # List item link, to be inserted into the toc div last_li = etree.Element("li") link = etree.SubElement(last_li, "a") link.text = c.text link.attrib["href"] = '#' + id if int(self.config["anchorlink"][0]): anchor = etree.SubElement(c, "a") anchor.text = c.text anchor.attrib["href"] = "#" + id anchor.attrib["class"] = "toclink" c.text = "" list_stack[-1].append(last_li)
def __processList(self, parentElem, lines, inList, listexpr, tag): """ Given a list of document lines starting with a list item, finds the end of the list, breaks it up, and recursively processes each list item and the remainder of the text file. Keyword arguments: * parentElem: A ElementTree element to which the content will be added * lines: a list of lines * inList: a level Returns: None """ ul = etree.SubElement(parentElem, tag) # ul might actually be '<ol>' looseList = 0 # Make a list of list items items = [] item = -1 i = 0 # a counter to keep track of where we are for line in lines: loose = 0 if not line.strip(): # If we see a blank line, this _might_ be the end of the list i += 1 loose = 1 # Find the next non-blank line for j in range(i, len(lines)): if lines[j].strip(): next = lines[j] break else: # There is no more text => end of the list break # Check if the next non-blank line is still a part of the list if (CORE_RE[listexpr].match(next) or CORE_RE['tabbed'].match(next)): # get rid of any white space in the line items[item].append(line.strip()) looseList = loose or looseList continue else: break # found end of the list # Now we need to detect list items (at the current level) # while also detabing child elements if necessary for expr in ['ul', 'ol', 'tabbed']: m = CORE_RE[expr].match(line) if m: if expr in ['ul', 'ol']: # We are looking at a new item #if m.group(1) : # Removed the check to allow for a blank line # at the beginning of the list item items.append([m.group(1)]) item += 1 elif expr == 'tabbed': # This line needs to be detabbed items[item].append(m.group(4)) #after the 'tab' i += 1 break else: items[item].append(line) # Just regular continuation i += 1 # added on 2006.02.25 else: i += 1 # Add the ElementTree elements for item in items: li = etree.SubElement(ul, "li") self.parseChunk(li, item, inList + 1, looseList=looseList) # Process the remaining part of the section self.parseChunk(parentElem, lines[i:], inList)
def __processHR(self, parentElem): hr = etree.SubElement(parentElem, "hr")