Python parseString Examples, parser.parseString Python Examples

Example #1

0

Show file

File: replay_reader.py Project: henerididdles/osu-Replay

def parseReplay(osrStream):
    data = {}
    data['mode'] = parser.parseByte(osrStream)
    data['version'] = parser.parseInt(osrStream)
    data['beatmap_md5'] = parser.parseString(osrStream)
    data['player_name'] = parser.parseString(osrStream)
    data['replay_md5'] = parser.parseString(osrStream)
    data['300s'] = parser.parseShort(osrStream)
    data['100s'] = parser.parseShort(osrStream)
    data['50s'] = parser.parseShort(osrStream)
    data['geki'] = parser.parseShort(osrStream)
    data['katu'] = parser.parseShort(osrStream)
    data['misses'] = parser.parseShort(osrStream)
    data['score'] = parser.parseInt(osrStream)
    data['combo'] = parser.parseShort(osrStream)
    data['fc'] = parser.parseByte(osrStream)
    data['mods'] = parser.parseInt(osrStream)
    data['lifebar'] = parser.parseString(osrStream)
    data['timestamp'] = parser.parseLong(osrStream)

    data_length = parser.parseInt(osrStream)
    data_buffer = osrStream.read(data_length)
    mouse_data = str(lzma.decompress(data_buffer), 'utf-8')
    data['mouse_data'] = parseMouseData(mouse_data)

    data['unk'] = parser.parseLong(osrStream)

    return data

Example #2

0

Show file

File: guessparagraphs.py Project: Bystroushaak/cherrytree2html.py

def guessParagraphs(s, dont_wrap=["h1", "h2", "h3", "pre", "center", "table"]):
    # parse string and make it double-linked tree
    node = d.parseString(s)
    d.makeDoubleLinked(node)

    # get all elements between <hx> (headers) - they will be converted to
    # <p>aragraphs
    tmp = []
    buffs = []
    for el in node.childs[0].childs:
        if el.getTagName().lower() in dont_wrap and not el.isEndTag():
            buffs.append(tmp)
            tmp = []
        else:
            tmp.append(el)
    buffs.append(tmp)

    # process paragraphs
    for buff in buffs:
        __processBuffer(buff)

    # remove blank <p>aragraphs
    map(
        lambda x: x.replaceWith(d.HTMLElement("")),
        filter(
            lambda x: x.getContent().strip() == "",
            node.find("p")
        )
    )

    replacements = [
        ("<p>",         "\n<p>"),
        ("</p>",        "</p>\n\n"),
        ("<p>\n",       "<p>"),
        ("<h",          "\n<h"),
        ("\t",          ""),
        ("<p><br />\n", "<p>"),
        ("<p></p>\n",   ""),
    ]

    regular_replacements = [
        (r"• (.*)</p>\n", r"<li>\1</li>\n</p>\n"),
        (r"• (.*)\n", r"<li>\1</li>\n"),
    ]

    str_node = str(node)

    for replacement in replacements:
        str_node = str_node.replace(replacement[0], replacement[1])

    for replacement in regular_replacements:
        str_node = re.sub(replacement[0], replacement[1], str_node)

    return str_node

Example #3

0

Show file

File: guessparagraphs.py Project: Bystroushaak/cherrytree2html.py

def __processBuffer(buff):
    "Convert array of elements in buff to paragraphs."

    p_stack = [[]]
    for el in buff:
        content = el.getContent() if el.isTag() else str(el)

        # content without \n\n is just regular part of <p>
        if not "\n\n" in content:
            if "\n" in content:
                nel = d.parseString(str(el))#.replace("\n", "<br />\n"))
                nel.parent = el.parent
                el.replaceWith(nel)
            p_stack[-1].append(el)
            continue

        if el.isTag():
            __processBuffer(el.childs)
        else:
            # split by \n\n and convert it to tags
            tmp = map(
                # support for <br>
                lambda x: d.HTMLElement(x),
                # lambda x: d.HTMLElement(x.replace("\n", "<br />\n")),
                content.split("\n\n")
            )

            # new tags are moved into blank container
            # original element is then replaced by this blank container
            repl = d.HTMLElement("")
            repl.childs = tmp
            el.replaceWith(repl)

            # elements must have parents
            for i in tmp:
                i.parent = el

            if len(tmp) == 0:
                p_stack.append([])
                continue

            # first element is part of previous <p>
            p_stack[-1].append(tmp[0])
            tmp = tmp[1:] if len(tmp) > 1 else []
            # ^ del tmp[0] <- this tends to delete object in tmp[0] .. wtf?

            # other elements are new <p>s by itself
            for i in tmp:
                p_stack.append([i])

    # convert stack of elements to <p>
    for p in p_stack:
        elementsToP(p)

Example #4

0

Show file

File: converttohtml.py Project: Bystroushaak/cherrytree2html.py

def _processTable(table):
    "Convert cherrytree table to HTML table."

    del table.params["char_offset"]

    html_table = str(table)

    html_table = html_table.replace("<cell>", "<td>")
    html_table = html_table.replace("</cell>", "</td>")
    html_table = html_table.replace("<row>", "<tr>")
    html_table = html_table.replace("</row>", "</tr>\n")

    return d.parseString(html_table)

Example #5

0

Show file

File: usertemplates.py Project: Bystroushaak/cherrytree2html.py

def saveUserCSS(html_template, css, out_dir):
    """"
    Save |css|.
    Try parse filename from |html_template|, if there is proper
    <link rel='stylesheet'> tag.
    Default "style.css".
    """
    dom = d.parseString(html_template)
    css_name = dom.find("link", {"rel": "stylesheet"})

    if not css_name:
        css_name = "style.css"
    else:
        css_name = css_name[0]
        css_name = css_name.params.get("href", "style.css")

    css_name = os.path.basename(css_name)

    with open(out_dir + "/" + css_name, "wt") as fh:
        fh.write(css)

Example #6

0

Show file

def saveUserCSS(html_template, css, out_dir):
    """"
    Save |css|.
    Try parse filename from |html_template|, if there is proper
    <link rel='stylesheet'> tag.
    Default "style.css".
    """
    dom = d.parseString(html_template)
    css_name = dom.find("link", {"rel": "stylesheet"})

    if len(css_name) <= 0:
        css_name = "style.css"
    else:
        css_name = css_name[0]
        css_name = css_name.params["href"] if "href" in css_name.params\
                                           else "style.css"

    css_name = os.path.basename(css_name)

    fh = open(out_dir + "/" + css_name, "wt")
    fh.write(css)
    fh.close()

Example #7

0

Show file

def guessParagraphs(s, dont_wrap=["h1", "h2", "h3", "pre", "center", "table"]):
    # parse string and make it double-linked tree
    node = d.parseString(s)
    d.makeDoubleLinked(node)

    # get all elements between <hx> (headers) - they will be converted to
    # <p>aragraphs
    tmp = []
    buffs = []
    for el in node.childs[0].childs:
        if el.getTagName().lower() in dont_wrap and not el.isEndTag():
            buffs.append(tmp)
            tmp = []
        else:
            tmp.append(el)
    buffs.append(tmp)

    # process paragraphs
    for buff in buffs:
        __processBuffer(buff)

    # remove blank <p>aragraphs
    map(
        lambda x: x.replaceWith(d.HTMLElement("")),
        filter(
            lambda x: x.getContent().strip() == "",
            node.find("p")
        )
    )

    # return "beautified" string
    return str(node)                               \
                    .replace("<p>", "\n<p>")       \
                    .replace("</p>", "</p>\n\n")   \
                    .replace("<p>\n", "<p>")       \
                    .replace("<h", "\n<h")         \
                    .replace("<p><br />\n", "<p>")  # don't ask..

Example #8

0

Show file

def __getUserTemplate(dom, name):
    """"
    Return users template identified by name (case insensitive).

    Template is then converted to html.

    Returns: (template_node, html_content)
    """
    template_node = __getFirstNodeByCIName(dom, name)

    # don't continue, if there is no rss node
    if template_node is None:
        return (None, None)

    html_content = d.parseString(
        convertToHtml(dom, template_node.params["unique_id"])
    )

    # preprocess content
    content = html_content.getContent().replace("<p></p>", "").strip()
    for key, val in HTML_ENTITIES.iteritems():
        content = content.replace(val, key)

    return (template_node, html_content)

Example #9

0

Show file

def generateAtomFeed(dom, out_dir):
    rss_node = __getFirstNodeByCIName(dom, "__rss")

    # don't continue, if there is no rss node
    if rss_node is None:
        return None

    # iterate thru feed records
    first = True
    entries = ""
    update_times = []
    for node in rss_node.find("node"):
        # skip first iteration (main node containing information about feed)
        if first:
            first = False
            continue

        # convert node from rich_text to html
        html_node = d.parseString(convertToHtml(dom, node.params["unique_id"]))

        if len(html_node.find("a")) > 0:
            first_link = html_node.find("a")[0]
        else:
            raise ValueError(
                "Item '" +
                node.params["name"] +
                "' doesn't have date and/or URL!"
            )

        updated = first_link.getContent()

        # get url from first link, or set it to default
        url = first_link.params["href"] if "href" in first_link.params else ""
        url = "./" + url[5:] if url.startswith("./../") and len(url) > 5 else url

        # remove first link (and it's content) from html code
        if first_link is not None:
            first_link.replaceWith(d.HTMLElement(""))

        # preprocess content
        content = html_node.getContent().replace("<p></p>", "").strip()
        for key, val in HTML_ENTITIES.iteritems():
            content = content.replace(val, key)


        entries += Template(ATOM_ENTRY_TEMPLATE).substitute(
            title=node.params["name"],
            url=url,
            uid=hashlib.md5(
                node.params["name"] +
                str(url) +
                str(updated)
            ).hexdigest(),
            updated=updated,
            content=content
        )

        update_times.append(updated)

        # remove node from DOM
        node.replaceWith(d.HTMLElement(""))

    # extract Atom template from .ctd
    atom_template = rss_node.find("codebox")
    if len(atom_template) <= 0:
        raise ValueError("There is no codebox with Atom template!")
    atom_template = atom_template[0].getContent()

    atom_template = __removeHTMLEntities(atom_template)

    atom_feed = Template(atom_template).substitute(
        updated=update_times[0],
        entries=entries
    )

    # get feed's filename - it is specified in atom template
    filename = d.parseString(atom_feed).find("link")
    if len(filename) <= 0:
        raise ValueError("There has to be link in your Atom template!")
    filename = filename[0]

    if not "href" in filename.params:
        raise ValueError(
            "Link in your Atom template has to have 'href' parameter!"
        )
    filename = filename.params["href"].split("/")[-1]

    if "." not in filename:
        filename = "atom.xml"
        writeln(
            "You didn't specified filename of your feed, so I choosed " +
            "'%s'" % (filename)
        )

    fh = open(out_dir + "/" + filename, "wt")
    fh.write(atom_feed)
    fh.close()

    # get rid of RSS node
    rss_node.replaceWith(d.HTMLElement(""))

Example #10

0

Show file

File: main.py Project: upiitacode/MA_05_ADC_Plot

 def getVal(this):
     dataString = serial_nucleo.getString()
     parsedDic = parser.parseString(dataString)
     return parsedDic['adc_ch1']

Example #11

0

Show file

File: converttohtml.py Project: Bystroushaak/cherrytree2html.py

def convertToHtml(dom, node_id, do_anchors=True, out_dir=None, root_path=None):
    # get node element
    node = dom.find("node", {"unique_id": str(node_id)})[0]
    node = d.parseString(str(node)).find("node")[0]  # get deep copy

    # remove subnodes
    for n in node.find("node"):
        if n.params["unique_id"] != str(node_id):
            n.replaceWith(d.HTMLElement(""))

    replacements = _createReplacements(node, out_dir, root_path)

    def find_replacements_placeholder(node):
        return node.find(
            "rich_text",
            {"justification": "left"},
            fn=lambda x: x.getContent() == ""
        )

    # replace <rich_text justification="left"></rich_text> with tags from
    # `replacements`
    for cnt, rt in enumerate(find_replacements_placeholder(node)):
        if "link" in rt.params:  # support for pictures as links
            el = d.HTMLElement("<rich_text>")
            el.params["link"] = rt.params["link"]
            el.childs = [replacements[cnt]]
            el.endtag = d.HTMLElement("</rich_text>")
            rt.replaceWith(el)
        else:
            rt.replaceWith(replacements[cnt])
    #===========================================================================

    # transform all <rich_text> tags to something usefull
    for t in node.find("rich_text"):
        # transform <rich_text some="crap"> to html tags
        _transformRichText(t)

        # transform links
        _transformLink(t, dom, node_id, out_dir, root_path)

        # there are _arrays_ of rich_text with no params - this is not same as
        # <p>, because <p> allows nested parameters -> <p>Xex <b>bold</b></p>,
        # but cherry tree does shit like
        # <rich_text>Xex </rich_text><rich_text weight="heavy">bold</rich_text>
        # <rich_text></rich_text>
        if len(t.params) == 0:
            el = d.HTMLElement()
            el.childs = t.childs
            t.replaceWith(el)

    # convert text to paragraphs
    node = str(node).replace('<rich_text justification="left">', "")  # dont ask
    node = d.parseString(guessParagraphs(node, DONT_WRAP))

    if do_anchors:
        # apply anchors
        for head in node.find("h1") + node.find("h2") + node.find("h3"):
            anchor = "anchor_%s_%s" % (
                head.getTagName(), utfToFilename(head.getContent())
            )

            head.params["id"] = anchor

            # make head link to itself
            head.childs = [
                d.parseString(
                    "<a href='#" + anchor + "'>" + head.getContent() + "</a>"
                )
            ]

    return str(node.find("node")[0].getContent())

Example #12

0

Show file

def convertToHtml(dom, node_id, do_anchors=True, out_dir=None, root_path=None):
    # get node element
    node = dom.find("node", {"unique_id": str(node_id)})[0]
    node = d.parseString(str(node)).find("node")[0]  # get deep copy

    # remove subnodes
    for n in node.find("node"):
        if n.params["unique_id"] != str(node_id):
            n.replaceWith(d.HTMLElement(""))

    #===========================================================================
    # transform <codebox>es to <pre> tags.
    # CherryTree saves <codebox>es at the end of the <node>. Thats right - they
    # are not in the source as all other tags, but at the end. Instead of
    # <codebox> in the text, there is
    # <rich_text justification="left"></rich_text>, which needs to be replaced
    # with <pre>
    def processTable(table):
        "Convert cherrytree table to HTML table."

        del table.params["char_offset"]

        html_table = str(table)

        html_table = html_table.replace("<cell>", "<td>")
        html_table = html_table.replace("</cell>", "</td>")
        html_table = html_table.replace("<row>", "<tr>")
        html_table = html_table.replace("</row>", "</tr>\n")

        return d.parseString(html_table)

    def processPicture(picture, out_dir, root_path):
        content = base64.b64decode(picture.getContent())

        if out_dir is not None:
            filename = hashlib.md5(content).hexdigest() + ".png"

            directory = out_dir + "/pictures"
            if not os.path.exists(directory):
                os.makedirs(directory)

            with open(directory + "/" + filename, "wb") as f:
                f.write(content)

        img = d.HTMLElement("<img />")

        if out_dir is not None:
            img.params["src"] = root_path + "/pictures/" + filename
        else:
            content = "".join(picture.getContent().split())
            img.params["src"] = "data:image/png;base64," + picture.getContent()

        return img

    # create html versions of |replacements_tagnames| tags and put them into
    # |replacements[]| variable
    # remove |replacements_tagnames| from DOM
    replacements = []
    replacements_tagnames = ["codebox", "table", "encoded_png"]
    for replacement in node.find("", fn=lambda x:
                                     x.getTagName() in replacements_tagnames):
        el = None

        tag_name = replacement.getTagName()
        if tag_name == "codebox":
            el = d.HTMLElement("<pre>")
            el.childs = replacement.childs[:]
            el.params["syntax"] = replacement.params["syntax_highlighting"]
            el.endtag = d.HTMLElement("</pre>")
        elif tag_name == "table":
            el = processTable(replacement)
        elif tag_name == "encoded_png":
            el = processPicture(replacement, out_dir, root_path)
        else:
            raise ValueError(
                "This shouldn't happend." +
                "If does, there is new unknown <element>."
            )

        replacements.append(el)

        # remove original element (codebox/table) from DOM
        replacement.replaceWith(d.HTMLElement(""))

    # replace <rich_text justification="left"></rich_text> with tags from
    # |replacements|
    # if len(replacements) > 0:
    for cnt, rt in enumerate(node.find("rich_text", {"justification": "left"})):
        if "link" in rt.params:  # support for pictures as links
            el = d.HTMLElement("<rich_text>")
            el.params["link"] = rt.params["link"]
            el.childs = [replacements[cnt]]
            el.endtag = d.HTMLElement("</rich_text>")
            rt.replaceWith(el)
        else:
            rt.replaceWith(replacements[cnt])
    #===========================================================================

    # transform all <rich_text> tags to something usefull
    for t in node.find("rich_text"):
        # transform <rich_text some="crap"> to html tags
        __transformRichText(t)

        # transform links
        __transformLink(t, dom, node_id, out_dir, root_path)

        # there are _arrays_ of rich_text with no params - this is not same as
        # <p>, because <p> allows nested parameters -> <p>Xex <b>bold</b></p>,
        # but cherry tree does shit like
        # <rich_text>Xex </rich_text><rich_text weight="heavy">bold</rich_text>
        # <rich_text></rich_text>
        if len(t.params) == 0:
            el = d.HTMLElement()
            el.childs = t.childs
            t.replaceWith(el)

    # convert text to paragraphs
    node = str(node).replace('<rich_text justification="left">', "")  # dont ask
    node = d.parseString(guessParagraphs(node, DONT_WRAP))

    if do_anchors:
        # apply anchors
        for head in node.find("h1") + node.find("h2") + node.find("h3"):
            anchor = "anchor_%s_%s" % (
                head.getTagName(), utfToFilename(head.getContent())
            )

            head.params["id"] = anchor

            # make head link to itself
            head.childs = [
                d.parseString(
                    "<a href='#" + anchor + "'>" + head.getContent() + "</a>"
                )
            ]

    # TODO transform • to ul/li tags

    return str(node.find("node")[0].getContent())