def guessParagraphs(s, dont_wrap=["h1", "h2", "h3", "pre", "center", "table"]): # parse string and make it double-linked tree node = d.parseString(s) d.makeDoubleLinked(node) # get all elements between <hx> (headers) - they will be converted to # <p>aragraphs tmp = [] buffs = [] for el in node.childs[0].childs: if el.getTagName().lower() in dont_wrap and not el.isEndTag(): buffs.append(tmp) tmp = [] else: tmp.append(el) buffs.append(tmp) # process paragraphs for buff in buffs: __processBuffer(buff) # remove blank <p>aragraphs map( lambda x: x.replaceWith(d.HTMLElement("")), filter( lambda x: x.getContent().strip() == "", node.find("p") ) ) replacements = [ ("<p>", "\n<p>"), ("</p>", "</p>\n\n"), ("<p>\n", "<p>"), ("<h", "\n<h"), ("\t", ""), ("<p><br />\n", "<p>"), ("<p></p>\n", ""), ] regular_replacements = [ (r"• (.*)</p>\n", r"<li>\1</li>\n</p>\n"), (r"• (.*)\n", r"<li>\1</li>\n"), ] str_node = str(node) for replacement in replacements: str_node = str_node.replace(replacement[0], replacement[1]) for replacement in regular_replacements: str_node = re.sub(replacement[0], replacement[1], str_node) return str_node
def getNodePath(dom, nodeid): "Retun file path of node with given |nodeid|." # check if dom is already double-linked list if not hasattr(dom.childs[0], 'parent') or dom.childs[0].parent != dom: d.makeDoubleLinked(dom) # get reference to node node = dom.find("node", {"unique_id": str(nodeid)})[0] # check for filename in tags new_filename = None if "tags" in node.params and node.params["tags"].strip() != "": # if tags are in node definition for i in node.params["tags"].split(): # go thru tags if i.startswith("filename:"): # look for tag which starts with filename: i = i.split(":") new_filename = i[1] if len(i) > 1 else None break # does this node contain another nodes? endpoint = len(node.find("node")) <= 1 # get path (based on node path in dom) path = "" while node.parent is not None and node.getTagName().lower() == "node": path = node.params["name"] + "/" + path node = node.parent if endpoint: path = path[:-1] # remove '/' from end of the path else: path += "index" # index file for directory path += ".html" # apply new_filename from from tags parameter of node if new_filename is not None: path = os.path.dirname(path) path += "/" if path.strip() != "" else "" path += new_filename return utfToFilename(path)
def guessParagraphs(s, dont_wrap=["h1", "h2", "h3", "pre", "center", "table"]): # parse string and make it double-linked tree node = d.parseString(s) d.makeDoubleLinked(node) # get all elements between <hx> (headers) - they will be converted to # <p>aragraphs tmp = [] buffs = [] for el in node.childs[0].childs: if el.getTagName().lower() in dont_wrap and not el.isEndTag(): buffs.append(tmp) tmp = [] else: tmp.append(el) buffs.append(tmp) # process paragraphs for buff in buffs: __processBuffer(buff) # remove blank <p>aragraphs map( lambda x: x.replaceWith(d.HTMLElement("")), filter( lambda x: x.getContent().strip() == "", node.find("p") ) ) # return "beautified" string return str(node) \ .replace("<p>", "\n<p>") \ .replace("</p>", "</p>\n\n") \ .replace("<p>\n", "<p>") \ .replace("<h", "\n<h") \ .replace("<p><br />\n", "<p>") # don't ask..