def testCompCombination(self): trees = [0,1,2] for tid in trees: apple = etree.fromstring(self.xmlcode[tid]) BasicFeatureExtraction.extractBasicStats(apple) print(PartialTreeAlignment.compCombination(apple, 2)) print(PartialTreeAlignment.compCombination(apple, 1))
def testMatchTree(self): pairs = [((0,1),3), ((1,2),1)] for (a,b),truevalue in pairs: apple = etree.fromstring(self.xmlcode[a]) BasicFeatureExtraction.extractBasicStats(apple) lemon = etree.fromstring(self.xmlcode[b]) BasicFeatureExtraction.extractBasicStats(lemon) dist, opers = PartialTreeAlignment.matchTree(apple, lemon) # print(dist, opers) self.assertEqual(dist, truevalue)
def testDfsTree(self): cases = [(0, '<a Pre="0" Post="7"><b Pre="3" Post="6"><c Pre="4" Post="5"/></b><d Pre="1" Post="2"/></a>')] for cid,truevalue in cases: apple = etree.fromstring(self.xmllist[cid]) BasicFeatureExtraction.dfsTree(apple, printPreorder, printPostorder) truetree = etree.fromstring(truevalue) appletree = apple.getroottree() for grape in apple.iter(): xpath = appletree.getpath(grape) self.assertEqual(grape.attrib['Pre'], truetree.xpath(xpath)[0].attrib['Pre'], cid) self.assertEqual(grape.attrib['Post'], truetree.xpath(xpath)[0].attrib['Post'], cid) print(etree.tostring(apple))
def testFindDataRegions(self): cases = [(0,[]), (3,[]), (4,[('sky',[2,0,4])]), (5,[])] from IntelligentCrawl.config import Configuration Configuration.MinDataRegiontHeight = 3 for tid,truevalue in cases: apple = etree.fromstring(self.xmlcode[tid]) BasicFeatureExtraction.extractBasicStats(apple) print(etree.tostring(apple)) res = PartialTreeAlignment.findDataRegions(apple, 2, 0.3) print(res) tags = [e[0].tag for e in res] drs = [e[1] for e in res] truetags = [e[0] for e in truevalue] truedrs = [e[1] for e in truevalue] self.assertListEqual(tags, truetags, tid) self.assertListEqual(drs, truedrs, tid)
def testPartialAlign(self): grove = [] tree = etree.fromstring(self.xmlcode[0]) BasicFeatureExtraction.extractBasicStats(tree) grove.append(tree) tree = etree.fromstring(self.xmlcode[1]) BasicFeatureExtraction.extractBasicStats(tree) grove.append(tree) tree = etree.fromstring(self.xmlcode[2]) BasicFeatureExtraction.extractBasicStats(tree) grove.append(tree) pattern,dataTable = PartialTreeAlignment.partialAlign(grove) print(etree.tostring(pattern,encoding='unicode')) print(dataTable)