def parseReplay(osrStream): data = {} data['mode'] = parser.parseByte(osrStream) data['version'] = parser.parseInt(osrStream) data['beatmap_md5'] = parser.parseString(osrStream) data['player_name'] = parser.parseString(osrStream) data['replay_md5'] = parser.parseString(osrStream) data['300s'] = parser.parseShort(osrStream) data['100s'] = parser.parseShort(osrStream) data['50s'] = parser.parseShort(osrStream) data['geki'] = parser.parseShort(osrStream) data['katu'] = parser.parseShort(osrStream) data['misses'] = parser.parseShort(osrStream) data['score'] = parser.parseInt(osrStream) data['combo'] = parser.parseShort(osrStream) data['fc'] = parser.parseByte(osrStream) data['mods'] = parser.parseInt(osrStream) data['lifebar'] = parser.parseString(osrStream) data['timestamp'] = parser.parseLong(osrStream) data_length = parser.parseInt(osrStream) data_buffer = osrStream.read(data_length) mouse_data = str(lzma.decompress(data_buffer), 'utf-8') data['mouse_data'] = parseMouseData(mouse_data) data['unk'] = parser.parseLong(osrStream) return data
def guessParagraphs(s, dont_wrap=["h1", "h2", "h3", "pre", "center", "table"]): # parse string and make it double-linked tree node = d.parseString(s) d.makeDoubleLinked(node) # get all elements between <hx> (headers) - they will be converted to # <p>aragraphs tmp = [] buffs = [] for el in node.childs[0].childs: if el.getTagName().lower() in dont_wrap and not el.isEndTag(): buffs.append(tmp) tmp = [] else: tmp.append(el) buffs.append(tmp) # process paragraphs for buff in buffs: __processBuffer(buff) # remove blank <p>aragraphs map( lambda x: x.replaceWith(d.HTMLElement("")), filter( lambda x: x.getContent().strip() == "", node.find("p") ) ) replacements = [ ("<p>", "\n<p>"), ("</p>", "</p>\n\n"), ("<p>\n", "<p>"), ("<h", "\n<h"), ("\t", ""), ("<p><br />\n", "<p>"), ("<p></p>\n", ""), ] regular_replacements = [ (r"• (.*)</p>\n", r"<li>\1</li>\n</p>\n"), (r"• (.*)\n", r"<li>\1</li>\n"), ] str_node = str(node) for replacement in replacements: str_node = str_node.replace(replacement[0], replacement[1]) for replacement in regular_replacements: str_node = re.sub(replacement[0], replacement[1], str_node) return str_node
def __processBuffer(buff): "Convert array of elements in buff to paragraphs." p_stack = [[]] for el in buff: content = el.getContent() if el.isTag() else str(el) # content without \n\n is just regular part of <p> if not "\n\n" in content: if "\n" in content: nel = d.parseString(str(el))#.replace("\n", "<br />\n")) nel.parent = el.parent el.replaceWith(nel) p_stack[-1].append(el) continue if el.isTag(): __processBuffer(el.childs) else: # split by \n\n and convert it to tags tmp = map( # support for <br> lambda x: d.HTMLElement(x), # lambda x: d.HTMLElement(x.replace("\n", "<br />\n")), content.split("\n\n") ) # new tags are moved into blank container # original element is then replaced by this blank container repl = d.HTMLElement("") repl.childs = tmp el.replaceWith(repl) # elements must have parents for i in tmp: i.parent = el if len(tmp) == 0: p_stack.append([]) continue # first element is part of previous <p> p_stack[-1].append(tmp[0]) tmp = tmp[1:] if len(tmp) > 1 else [] # ^ del tmp[0] <- this tends to delete object in tmp[0] .. wtf? # other elements are new <p>s by itself for i in tmp: p_stack.append([i]) # convert stack of elements to <p> for p in p_stack: elementsToP(p)
def _processTable(table): "Convert cherrytree table to HTML table." del table.params["char_offset"] html_table = str(table) html_table = html_table.replace("<cell>", "<td>") html_table = html_table.replace("</cell>", "</td>") html_table = html_table.replace("<row>", "<tr>") html_table = html_table.replace("</row>", "</tr>\n") return d.parseString(html_table)
def saveUserCSS(html_template, css, out_dir): """" Save |css|. Try parse filename from |html_template|, if there is proper <link rel='stylesheet'> tag. Default "style.css". """ dom = d.parseString(html_template) css_name = dom.find("link", {"rel": "stylesheet"}) if not css_name: css_name = "style.css" else: css_name = css_name[0] css_name = css_name.params.get("href", "style.css") css_name = os.path.basename(css_name) with open(out_dir + "/" + css_name, "wt") as fh: fh.write(css)
def saveUserCSS(html_template, css, out_dir): """" Save |css|. Try parse filename from |html_template|, if there is proper <link rel='stylesheet'> tag. Default "style.css". """ dom = d.parseString(html_template) css_name = dom.find("link", {"rel": "stylesheet"}) if len(css_name) <= 0: css_name = "style.css" else: css_name = css_name[0] css_name = css_name.params["href"] if "href" in css_name.params\ else "style.css" css_name = os.path.basename(css_name) fh = open(out_dir + "/" + css_name, "wt") fh.write(css) fh.close()
def guessParagraphs(s, dont_wrap=["h1", "h2", "h3", "pre", "center", "table"]): # parse string and make it double-linked tree node = d.parseString(s) d.makeDoubleLinked(node) # get all elements between <hx> (headers) - they will be converted to # <p>aragraphs tmp = [] buffs = [] for el in node.childs[0].childs: if el.getTagName().lower() in dont_wrap and not el.isEndTag(): buffs.append(tmp) tmp = [] else: tmp.append(el) buffs.append(tmp) # process paragraphs for buff in buffs: __processBuffer(buff) # remove blank <p>aragraphs map( lambda x: x.replaceWith(d.HTMLElement("")), filter( lambda x: x.getContent().strip() == "", node.find("p") ) ) # return "beautified" string return str(node) \ .replace("<p>", "\n<p>") \ .replace("</p>", "</p>\n\n") \ .replace("<p>\n", "<p>") \ .replace("<h", "\n<h") \ .replace("<p><br />\n", "<p>") # don't ask..
def __getUserTemplate(dom, name): """" Return users template identified by name (case insensitive). Template is then converted to html. Returns: (template_node, html_content) """ template_node = __getFirstNodeByCIName(dom, name) # don't continue, if there is no rss node if template_node is None: return (None, None) html_content = d.parseString( convertToHtml(dom, template_node.params["unique_id"]) ) # preprocess content content = html_content.getContent().replace("<p></p>", "").strip() for key, val in HTML_ENTITIES.iteritems(): content = content.replace(val, key) return (template_node, html_content)
def generateAtomFeed(dom, out_dir): rss_node = __getFirstNodeByCIName(dom, "__rss") # don't continue, if there is no rss node if rss_node is None: return None # iterate thru feed records first = True entries = "" update_times = [] for node in rss_node.find("node"): # skip first iteration (main node containing information about feed) if first: first = False continue # convert node from rich_text to html html_node = d.parseString(convertToHtml(dom, node.params["unique_id"])) if len(html_node.find("a")) > 0: first_link = html_node.find("a")[0] else: raise ValueError( "Item '" + node.params["name"] + "' doesn't have date and/or URL!" ) updated = first_link.getContent() # get url from first link, or set it to default url = first_link.params["href"] if "href" in first_link.params else "" url = "./" + url[5:] if url.startswith("./../") and len(url) > 5 else url # remove first link (and it's content) from html code if first_link is not None: first_link.replaceWith(d.HTMLElement("")) # preprocess content content = html_node.getContent().replace("<p></p>", "").strip() for key, val in HTML_ENTITIES.iteritems(): content = content.replace(val, key) entries += Template(ATOM_ENTRY_TEMPLATE).substitute( title=node.params["name"], url=url, uid=hashlib.md5( node.params["name"] + str(url) + str(updated) ).hexdigest(), updated=updated, content=content ) update_times.append(updated) # remove node from DOM node.replaceWith(d.HTMLElement("")) # extract Atom template from .ctd atom_template = rss_node.find("codebox") if len(atom_template) <= 0: raise ValueError("There is no codebox with Atom template!") atom_template = atom_template[0].getContent() atom_template = __removeHTMLEntities(atom_template) atom_feed = Template(atom_template).substitute( updated=update_times[0], entries=entries ) # get feed's filename - it is specified in atom template filename = d.parseString(atom_feed).find("link") if len(filename) <= 0: raise ValueError("There has to be link in your Atom template!") filename = filename[0] if not "href" in filename.params: raise ValueError( "Link in your Atom template has to have 'href' parameter!" ) filename = filename.params["href"].split("/")[-1] if "." not in filename: filename = "atom.xml" writeln( "You didn't specified filename of your feed, so I choosed " + "'%s'" % (filename) ) fh = open(out_dir + "/" + filename, "wt") fh.write(atom_feed) fh.close() # get rid of RSS node rss_node.replaceWith(d.HTMLElement(""))
def getVal(this): dataString = serial_nucleo.getString() parsedDic = parser.parseString(dataString) return parsedDic['adc_ch1']
def convertToHtml(dom, node_id, do_anchors=True, out_dir=None, root_path=None): # get node element node = dom.find("node", {"unique_id": str(node_id)})[0] node = d.parseString(str(node)).find("node")[0] # get deep copy # remove subnodes for n in node.find("node"): if n.params["unique_id"] != str(node_id): n.replaceWith(d.HTMLElement("")) replacements = _createReplacements(node, out_dir, root_path) def find_replacements_placeholder(node): return node.find( "rich_text", {"justification": "left"}, fn=lambda x: x.getContent() == "" ) # replace <rich_text justification="left"></rich_text> with tags from # `replacements` for cnt, rt in enumerate(find_replacements_placeholder(node)): if "link" in rt.params: # support for pictures as links el = d.HTMLElement("<rich_text>") el.params["link"] = rt.params["link"] el.childs = [replacements[cnt]] el.endtag = d.HTMLElement("</rich_text>") rt.replaceWith(el) else: rt.replaceWith(replacements[cnt]) #=========================================================================== # transform all <rich_text> tags to something usefull for t in node.find("rich_text"): # transform <rich_text some="crap"> to html tags _transformRichText(t) # transform links _transformLink(t, dom, node_id, out_dir, root_path) # there are _arrays_ of rich_text with no params - this is not same as # <p>, because <p> allows nested parameters -> <p>Xex <b>bold</b></p>, # but cherry tree does shit like # <rich_text>Xex </rich_text><rich_text weight="heavy">bold</rich_text> # <rich_text></rich_text> if len(t.params) == 0: el = d.HTMLElement() el.childs = t.childs t.replaceWith(el) # convert text to paragraphs node = str(node).replace('<rich_text justification="left">', "") # dont ask node = d.parseString(guessParagraphs(node, DONT_WRAP)) if do_anchors: # apply anchors for head in node.find("h1") + node.find("h2") + node.find("h3"): anchor = "anchor_%s_%s" % ( head.getTagName(), utfToFilename(head.getContent()) ) head.params["id"] = anchor # make head link to itself head.childs = [ d.parseString( "<a href='#" + anchor + "'>" + head.getContent() + "</a>" ) ] return str(node.find("node")[0].getContent())
def convertToHtml(dom, node_id, do_anchors=True, out_dir=None, root_path=None): # get node element node = dom.find("node", {"unique_id": str(node_id)})[0] node = d.parseString(str(node)).find("node")[0] # get deep copy # remove subnodes for n in node.find("node"): if n.params["unique_id"] != str(node_id): n.replaceWith(d.HTMLElement("")) #=========================================================================== # transform <codebox>es to <pre> tags. # CherryTree saves <codebox>es at the end of the <node>. Thats right - they # are not in the source as all other tags, but at the end. Instead of # <codebox> in the text, there is # <rich_text justification="left"></rich_text>, which needs to be replaced # with <pre> def processTable(table): "Convert cherrytree table to HTML table." del table.params["char_offset"] html_table = str(table) html_table = html_table.replace("<cell>", "<td>") html_table = html_table.replace("</cell>", "</td>") html_table = html_table.replace("<row>", "<tr>") html_table = html_table.replace("</row>", "</tr>\n") return d.parseString(html_table) def processPicture(picture, out_dir, root_path): content = base64.b64decode(picture.getContent()) if out_dir is not None: filename = hashlib.md5(content).hexdigest() + ".png" directory = out_dir + "/pictures" if not os.path.exists(directory): os.makedirs(directory) with open(directory + "/" + filename, "wb") as f: f.write(content) img = d.HTMLElement("<img />") if out_dir is not None: img.params["src"] = root_path + "/pictures/" + filename else: content = "".join(picture.getContent().split()) img.params["src"] = "data:image/png;base64," + picture.getContent() return img # create html versions of |replacements_tagnames| tags and put them into # |replacements[]| variable # remove |replacements_tagnames| from DOM replacements = [] replacements_tagnames = ["codebox", "table", "encoded_png"] for replacement in node.find("", fn=lambda x: x.getTagName() in replacements_tagnames): el = None tag_name = replacement.getTagName() if tag_name == "codebox": el = d.HTMLElement("<pre>") el.childs = replacement.childs[:] el.params["syntax"] = replacement.params["syntax_highlighting"] el.endtag = d.HTMLElement("</pre>") elif tag_name == "table": el = processTable(replacement) elif tag_name == "encoded_png": el = processPicture(replacement, out_dir, root_path) else: raise ValueError( "This shouldn't happend." + "If does, there is new unknown <element>." ) replacements.append(el) # remove original element (codebox/table) from DOM replacement.replaceWith(d.HTMLElement("")) # replace <rich_text justification="left"></rich_text> with tags from # |replacements| # if len(replacements) > 0: for cnt, rt in enumerate(node.find("rich_text", {"justification": "left"})): if "link" in rt.params: # support for pictures as links el = d.HTMLElement("<rich_text>") el.params["link"] = rt.params["link"] el.childs = [replacements[cnt]] el.endtag = d.HTMLElement("</rich_text>") rt.replaceWith(el) else: rt.replaceWith(replacements[cnt]) #=========================================================================== # transform all <rich_text> tags to something usefull for t in node.find("rich_text"): # transform <rich_text some="crap"> to html tags __transformRichText(t) # transform links __transformLink(t, dom, node_id, out_dir, root_path) # there are _arrays_ of rich_text with no params - this is not same as # <p>, because <p> allows nested parameters -> <p>Xex <b>bold</b></p>, # but cherry tree does shit like # <rich_text>Xex </rich_text><rich_text weight="heavy">bold</rich_text> # <rich_text></rich_text> if len(t.params) == 0: el = d.HTMLElement() el.childs = t.childs t.replaceWith(el) # convert text to paragraphs node = str(node).replace('<rich_text justification="left">', "") # dont ask node = d.parseString(guessParagraphs(node, DONT_WRAP)) if do_anchors: # apply anchors for head in node.find("h1") + node.find("h2") + node.find("h3"): anchor = "anchor_%s_%s" % ( head.getTagName(), utfToFilename(head.getContent()) ) head.params["id"] = anchor # make head link to itself head.childs = [ d.parseString( "<a href='#" + anchor + "'>" + head.getContent() + "</a>" ) ] # TODO transform • to ul/li tags return str(node.find("node")[0].getContent())