def testCompCombination(self): trees = [0,1,2] for tid in trees: apple = etree.fromstring(self.xmlcode[tid]) BasicFeatureExtraction.extractBasicStats(apple) print(PartialTreeAlignment.compCombination(apple, 2)) print(PartialTreeAlignment.compCombination(apple, 1))
def testMatchTree(self): pairs = [((0,1),3), ((1,2),1)] for (a,b),truevalue in pairs: apple = etree.fromstring(self.xmlcode[a]) BasicFeatureExtraction.extractBasicStats(apple) lemon = etree.fromstring(self.xmlcode[b]) BasicFeatureExtraction.extractBasicStats(lemon) dist, opers = PartialTreeAlignment.matchTree(apple, lemon) # print(dist, opers) self.assertEqual(dist, truevalue)
def testPartialAlign(self): grove = [] tree = etree.fromstring(self.xmlcode[0]) BasicFeatureExtraction.extractBasicStats(tree) grove.append(tree) tree = etree.fromstring(self.xmlcode[1]) BasicFeatureExtraction.extractBasicStats(tree) grove.append(tree) tree = etree.fromstring(self.xmlcode[2]) BasicFeatureExtraction.extractBasicStats(tree) grove.append(tree) pattern,dataTable = PartialTreeAlignment.partialAlign(grove) print(etree.tostring(pattern,encoding='unicode')) print(dataTable)
def testFindDataRegions(self): cases = [(0,[]), (3,[]), (4,[('sky',[2,0,4])]), (5,[])] from IntelligentCrawl.config import Configuration Configuration.MinDataRegiontHeight = 3 for tid,truevalue in cases: apple = etree.fromstring(self.xmlcode[tid]) BasicFeatureExtraction.extractBasicStats(apple) print(etree.tostring(apple)) res = PartialTreeAlignment.findDataRegions(apple, 2, 0.3) print(res) tags = [e[0].tag for e in res] drs = [e[1] for e in res] truetags = [e[0] for e in truevalue] truedrs = [e[1] for e in truevalue] self.assertListEqual(tags, truetags, tid) self.assertListEqual(drs, truedrs, tid)