Esempio n. 1
0
def naive_xml_to_json(xml_string):
    if xml_string in (b"<data/>", b"<root/>"):
        return '{"success":""}'

    def walk(xml_node, depth=0):
        json = list()

        children = xml_node.getchildren()

        name = xml_node.tag.split("}")[-1]
        body = xml_node.text
        if body is not None:
            body = tornado.escape.json_encode(body)

        has_children = len(children) > 0

        if has_children:
            json.append('"%s" : {' % name)
        else:
            if body is not None:
                json.append('"%s" : %s' % (name, body))
            else:
                json.append('"%s" : ""' % (name))

        for child, is_last in iterate_with_lookahead(children):
            
            name = xml_node.tag.split("{")[-1]
            body = xml_node.text

            json.append(walk(child, depth + 1))

            if not is_last:
                json.append(',')

        if has_children:
            json.append('}')


        return ''.join(json)

            
    
    try:
        xml_string = bytes(xml_string, "utf-8")
    except TypeError:
        pass

    root_xml_node = lxml.etree.fromstring(xml_string)
    root_name = get_xml_element_name(root_xml_node)
    
    if root_name in ("data", "root"):
        root_xml_node = root_xml_node[0]
        root_name = get_xml_element_name(root_xml_node)

    json_string = walk(root_xml_node)

    return "{%s}" % json_string
Esempio n. 2
0
def naive_xml_to_json(xml_string):
    if xml_string in (b"<data/>", b"<root/>"):
        return '{"success":""}'

    def walk(xml_node, depth=0):
        json = list()

        children = xml_node.getchildren()

        name = xml_node.tag.split("}")[-1]
        body = xml_node.text
        if body is not None:
            body = tornado.escape.json_encode(body)

        has_children = len(children) > 0

        if has_children:
            json.append('"%s" : {' % name)
        else:
            if body is not None:
                json.append('"%s" : %s' % (name, body))
            else:
                json.append('"%s" : ""' % (name))

        for child, is_last in iterate_with_lookahead(children):

            name = xml_node.tag.split("{")[-1]
            body = xml_node.text

            json.append(walk(child, depth + 1))

            if not is_last:
                json.append(',')

        if has_children:
            json.append('}')

        return ''.join(json)

    try:
        xml_string = bytes(xml_string, "utf-8")
    except TypeError:
        pass

    root_xml_node = lxml.etree.fromstring(xml_string)
    root_name = get_xml_element_name(root_xml_node)

    if root_name in ("data", "root"):
        root_xml_node = root_xml_node[0]
        root_name = get_xml_element_name(root_xml_node)

    json_string = walk(root_xml_node)

    return "{%s}" % json_string
Esempio n. 3
0
def prune_non_schema_xml(root_schema_node, xml_string):
    def _walk(schema_node, xml_node):
        xml_children = collect_siblings(xml_node)
        actual_names = set(xml_children.keys())
        expected_names = collect_child_names(schema_node)

        remove_names = actual_names - expected_names

        for name, siblings in xml_children.items():
            if name in remove_names:
                for sib in siblings:
                    xml_node.remove(sib)
            else:
                child_schema_node = find_child_by_name(schema_node, name)
                for sib in siblings:
                    _walk(child_schema_node, sib)

    # check root node
    schema_children = collect_children(root_schema_node)

    root_xml_node = et.fromstring(xml_string)
    root_name = get_xml_element_name(root_xml_node)

    for child in schema_children:
        if child.get_name() == root_name:
            root_schema_node = child
            break
    else:
        # root node isn't in the schema
        return "<data/>"

    _walk(root_schema_node, root_xml_node)
    xml_string = et.tostring(root_xml_node, pretty_print=True).decode("utf-8")

    return xml_string
Esempio n. 4
0
def prune_non_schema_xml(root_schema_node, xml_string):

    def _walk(schema_node, xml_node):
        xml_children = collect_siblings(xml_node)
        actual_names = set(xml_children.keys())
        expected_names = collect_child_names(schema_node)

        remove_names = actual_names - expected_names

        for name, siblings in xml_children.items():
            if name in remove_names:
                for sib in siblings:
                    xml_node.remove(sib)
            else:
                child_schema_node = find_child_by_name(schema_node, name)
                for sib in siblings:
                    _walk(child_schema_node, sib)

    # check root node
    schema_children = collect_children(root_schema_node)

    root_xml_node = et.fromstring(xml_string)
    root_name = get_xml_element_name(root_xml_node)

    for child in schema_children:
        if child.get_name() == root_name:
            root_schema_node = child
            break
    else:
        # root node isn't in the schema
        return "<data/>"

    _walk(root_schema_node, root_xml_node)
    xml_string = et.tostring(root_xml_node, pretty_print=True).decode("utf-8")

    return xml_string