def testCompCombination(self):
     trees = [0,1,2]
     for tid in trees:
         apple = etree.fromstring(self.xmlcode[tid])
         BasicFeatureExtraction.extractBasicStats(apple)
         print(PartialTreeAlignment.compCombination(apple, 2))
         print(PartialTreeAlignment.compCombination(apple, 1))
    def testMatchTree(self):
        pairs = [((0,1),3),
                 ((1,2),1)]
        for (a,b),truevalue in pairs:
            apple = etree.fromstring(self.xmlcode[a])
            BasicFeatureExtraction.extractBasicStats(apple)
            lemon = etree.fromstring(self.xmlcode[b])
            BasicFeatureExtraction.extractBasicStats(lemon)
            dist, opers = PartialTreeAlignment.matchTree(apple, lemon)
#            print(dist, opers)
            self.assertEqual(dist, truevalue)
 def testDfsTree(self):
     cases = [(0, '<a Pre="0" Post="7"><b Pre="3" Post="6"><c Pre="4" Post="5"/></b><d Pre="1" Post="2"/></a>')]
     for cid,truevalue in cases:
         apple = etree.fromstring(self.xmllist[cid])
         BasicFeatureExtraction.dfsTree(apple, printPreorder, printPostorder)
         truetree = etree.fromstring(truevalue)
         appletree = apple.getroottree()
         for grape in apple.iter():
             xpath = appletree.getpath(grape)
             self.assertEqual(grape.attrib['Pre'], truetree.xpath(xpath)[0].attrib['Pre'], cid)
             self.assertEqual(grape.attrib['Post'], truetree.xpath(xpath)[0].attrib['Post'], cid)
         print(etree.tostring(apple))
 def testFindDataRegions(self):
     cases = [(0,[]),
              (3,[]),
              (4,[('sky',[2,0,4])]),
              (5,[])]
     from IntelligentCrawl.config import Configuration
     Configuration.MinDataRegiontHeight = 3
     for tid,truevalue in cases:
         apple = etree.fromstring(self.xmlcode[tid])
         BasicFeatureExtraction.extractBasicStats(apple)
         print(etree.tostring(apple))
         res = PartialTreeAlignment.findDataRegions(apple, 2, 0.3)
         print(res)
         tags = [e[0].tag for e in res]
         drs = [e[1] for e in res]
         truetags = [e[0] for e in truevalue]
         truedrs = [e[1] for e in truevalue]
         self.assertListEqual(tags, truetags, tid)
         self.assertListEqual(drs, truedrs, tid)
 def testPartialAlign(self):
     grove = []
     tree = etree.fromstring(self.xmlcode[0])
     BasicFeatureExtraction.extractBasicStats(tree)
     grove.append(tree)
     tree = etree.fromstring(self.xmlcode[1])
     BasicFeatureExtraction.extractBasicStats(tree)
     grove.append(tree)
     tree = etree.fromstring(self.xmlcode[2])
     BasicFeatureExtraction.extractBasicStats(tree)
     grove.append(tree)
     pattern,dataTable = PartialTreeAlignment.partialAlign(grove)
     print(etree.tostring(pattern,encoding='unicode'))
     print(dataTable)