if __name__ == '__main__': import libxml2dom html = """ <html> <body> <div> 1 <p> 2 </p> </div> <div> 3 <p> <div></div> </p> </div> </body> </html> """ dom = libxml2dom.parseString(html) root = Node(dom) root = root.loadNodeTree(dom) a = root.childNodes[0].childNodes[0].childNodes[0] b = root.childNodes[0].childNodes[0].childNodes[1] #print a.tag, b.tag print simpleTreeMatching(a,b)
i += r return maxmatch if __name__ == "__main__": print "Test utils.match functions" import libxml2dom from eri.extractors.distancebypair2.node import Node import test_trees if True: #__debug = True html = test_trees.tree3 dom = libxml2dom.parseString(html) root = Node(dom) root = root.loadNodeTree(dom, 0, True) div1 = root.childNodes[0].childNodes[0].childNodes[0] div2 = root.childNodes[0].childNodes[0].childNodes[1] body = root.childNodes[0].childNodes[0] # print 'div1', div1.tags # print 'div2', div2.tags # print 'body', body.tags r = treematch(body) if not r == [1, 1, 1, 1, 1, False, False]: print 'False', r #tree2