def __init__(self, fatherElement, centralDic): #print '>> construct DataNode' self.doter = DataNodeDoter() self.doter.init(self) #text content of a tag self.type = 'datanode' self.setName('datas') #data dic self.dic = centralDic self.datadic = Datas(centralDic) #data container for each page self.pagedatas = [] self._imp = 0 self.fatherElement = fatherElement
class DataNode: ''' DataNode is a special StyleNode a container for nodes like b p img a and text ''' def __init__(self, fatherElement, centralDic): #print '>> construct DataNode' self.doter = DataNodeDoter() self.doter.init(self) #text content of a tag self.type = 'datanode' self.setName('datas') #data dic self.dic = centralDic self.datadic = Datas(centralDic) #data container for each page self.pagedatas = [] self._imp = 0 self.fatherElement = fatherElement def addFeatures(self, features): features = [f for f in features] self.datadic.addFeatures(features) _dic = Datas(self.dic) _dic.addFeatures(features) #print 'pagedic', _dic.list.datas self.pagedatas.append(_dic) def setName(self, data): self._name = str(data) def hasData(self): return self.datadic.hasData() def getName(self): return self._name def getP(self): ''' get frequency ''' #return self.fatherElement.getP() return 1 def getCompImp(self): if self._imp: return self._imp m = len(self.pagedatas) l = self.datadic.size() if not l: return 0 ''' print '-' * 50 print 'm: dicsize: ', m print 'nodedic: ', self.datadic.list.datas print 'pagedatas:' ''' ''' for p in self.pagedatas: print p.list.datas ''' def P(i): ''' print '-' * 50 print 'P(i): ' + '-'*30 print 'm: ', m ''' n = 0 data_index = self.datadic[i] print 'data_index: ', data_index li = [] for page in self.pagedatas: #print 'find pageindex in page', data_index, page.list.datas res = np.where(page.list.datas == data_index ) #print 'find res:', res try: i = res[0][0] li.append(1) except: li.append(0) pass n = sum(li) if not n: n=1 print 'n, m : %d, %d' % (n, m) return [i/n for i in li] def H(i): if m == 1: return 0 res = 0 for p in P(i): if not p: continue res -= p * math.log(p, m) return res if m ==1: return 1 res = sum( [H(i) for i in range(l)] ) #res = 1 - res / l res = 1 - res/l self._imp = res print 'H(i): ', self._imp return res def _addData(self, data): self.datas.append(data) self.nums.append(1) def _incNum(self, pos): self.nums[pos] += 1 def __str__(self): res = '' res += self.doter.initDotNode() + '\n' self.doter.incIndex() return res