Example #1
0
    def parse(xml):
        tokens = tokenize(xml)
                
        #skip anything before root node
        start_i = 0
        while start_i < len(tokens) and not isinstance(tokens[start_i], StartTag):
            start_i += 1

        end_i = findEnd(tokens, start_i)

        root = Node()
        root.tagname = tokens[start_i].tagname
        root.attributes = tokens[start_i].attributes
        
        children = parse_node(tokens, start_i+1, end_i-1)
        root.childNodes = children

        doc = XMLDocument(root)
        if isinstance(tokens[0], Prolog):
            doc.prolog = tokens[0]
        return doc
Example #2
0
def parse_node(elements, start_i, end_i):
    """
         start_i    end_i
    <tag>................</tag>
    """
    nodes = []

    i = start_i
    while i <= end_i:
        if isinstance(elements[i], TextNode):
            nodes.append(elements[i])
            i += 1
        elif isinstance(elements[i], StartTag):
            end_j = findEnd(elements, i)
            n = Node()
            n.tagname = elements[i].tagname
            n.attributes = elements[i].attributes

            children = parse_node(elements, i+1, end_j-1)
            n.childNodes = children

            nodes.append(n)

            i = end_j+1
        elif isinstance(elements[i], SelfClosingTag):
            n = Node(True)
            n.tagname = elements[i].tagname
            n.attributes = elements[i].attributes
            nodes.append(n)
            i += 1
        elif isinstance(elements[i], EndTag):
            i += 1
        else:
            raise ValueError("unknown element", elements[i])

    return nodes