Esempio n. 1
0
 def __init__(self, fatherElement, centralDic):
     #print '>> construct DataNode'
     self.doter = DataNodeDoter()
     self.doter.init(self)
     #text content of a tag
     self.type = 'datanode'
     self.setName('datas')
     #data dic
     self.dic = centralDic
     self.datadic = Datas(centralDic)
     #data container for each page
     self.pagedatas = []
     self._imp = 0
     self.fatherElement = fatherElement
Esempio n. 2
0
class DataNode:
    '''
    DataNode is a special StyleNode
    a container for nodes like b p img a and text
    '''
    def __init__(self, fatherElement, centralDic):
        #print '>> construct DataNode'
        self.doter = DataNodeDoter()
        self.doter.init(self)
        #text content of a tag
        self.type = 'datanode'
        self.setName('datas')
        #data dic
        self.dic = centralDic
        self.datadic = Datas(centralDic)
        #data container for each page
        self.pagedatas = []
        self._imp = 0
        self.fatherElement = fatherElement

    def addFeatures(self, features):
        features = [f for f in features]
        self.datadic.addFeatures(features)
        _dic = Datas(self.dic)
        _dic.addFeatures(features)
        #print 'pagedic', _dic.list.datas
        self.pagedatas.append(_dic)

    def setName(self, data):
        self._name = str(data)

    def hasData(self):
        return self.datadic.hasData()

    def getName(self):
        return self._name

    def getP(self):
        '''
        get frequency
        '''
        #return self.fatherElement.getP()
        return 1

    def getCompImp(self):
        if self._imp: return self._imp

        m = len(self.pagedatas)
        l = self.datadic.size()
        if not l: return 0
        '''
        print '-' * 50
        print 'm: dicsize: ', m
        print 'nodedic: ', self.datadic.list.datas
        print 'pagedatas:'
        '''
        '''
        for p in self.pagedatas:
            print p.list.datas
        '''

        def P(i):
            '''
            print '-' * 50
            print 'P(i): ' + '-'*30
            print 'm: ', m
            '''
            n = 0
            data_index = self.datadic[i]
            print 'data_index: ', data_index

            li = []
            for page in self.pagedatas:
                #print 'find pageindex in page', data_index, page.list.datas
                res = np.where(page.list.datas == data_index )
                #print 'find res:', res
                try:
                    i = res[0][0]
                    li.append(1)
                except:
                    li.append(0)
                    pass
            n = sum(li)
            if not n: n=1
            print 'n, m : %d, %d' %  (n, m)
            return [i/n for i in li]

        def H(i):
            if m == 1: return 0
            res = 0
            for p in P(i):
                if not p: continue
                res -= p * math.log(p, m)
            return res

        if m ==1: return 1
        res = sum(
            [H(i) for i in range(l)]
        )
        #res = 1 - res / l
        res = 1 - res/l
        self._imp = res
        print 'H(i): ', self._imp
        return res

    def _addData(self, data):
        self.datas.append(data)
        self.nums.append(1)

    def _incNum(self, pos):
        self.nums[pos] += 1

    def __str__(self):
        res = ''
        res += self.doter.initDotNode() + '\n'
        self.doter.incIndex()
        return res