def pvalue(self): ''' Compute Goodness of fit statistics ''' expe = self.expe frontend = self.frontend data = frontend.data d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) gof = gofit(d, dc) if not hasattr(self.gramexp, 'Table'): corpuses = self.specname(self.gramexp.get_set('corpus')) Meas = ['pvalue', 'alpha', 'x_min', 'n_tail'] Table = np.empty((len(corpuses), len(Meas))) Table = np.column_stack((corpuses, Table)) self.gramexp.Table = Table self.gramexp.Meas = Meas else: Table = self.gramexp.Table Meas = self.gramexp.Meas for i, v in enumerate(Meas): Table[self.corpus_pos, i + 1] = gof[v] if self._it == self.expe_size - 1: tablefmt = 'latex' print(colored('\nPvalue Table:', 'green')) print( self.tabulate(Table, headers=Meas, tablefmt=tablefmt, floatfmt='.3f'))
def burstiness(self, clusters_org='source', _type='local'): '''Zipf Analisis (global burstiness) + local burstiness + feature burstiness ''' expe = self.expe frontend = self.frontend data = frontend.data figs = [] # Global burstiness d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) fig = plt.figure() plot_degree(data, spec=True, title=self.specname(expe.corpus)) #plot_degree_poly(data, spec=True, title=expe.corpus) gof = gofit(d, dc) if not gof: return alpha = gof['alpha'] x_min = gof['x_min'] y_max = gof['y_max'] # plot linear law from power law estimation #plt.figure() idx = d.searchsorted(x_min) i = int(idx - 0.1 * len(d)) idx = i if i >= 0 else idx x = d[idx:] ylin = np.exp(-alpha * np.log(x / float(x_min)) + np.log(y_max)) #ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log((alpha-1)/x_min)) # Hack xticks fig.canvas.draw() # ! lim = plt.gca().get_xlim() # ! locs, labels = plt.xticks() idx_xmin = locs.searchsorted(x_min) locs = np.insert(locs, idx_xmin, x_min) labels.insert(idx_xmin, plt.Text(text='x_min')) plt.xticks(locs, labels) plt.gca().set_xlim(lim) fit = np.polyfit(np.log(d), np.log(dc), deg=1) poly_fit = fit[0] * np.log(d) + fit[1] diff = np.abs(poly_fit[-1] - np.log(ylin[-1])) ylin = np.exp(np.log(ylin) + diff * 0.75) #\# plt.plot(x, ylin, 'g--', label='power %.2f' % alpha) figs.append(plt.gcf()) # Local burstiness # # Get the Class/Cluster and local degree information # Reordering Adjacency Mmatrix based on Clusters/Class/Communities # clusters = None K = None if clusters_org == 'source': clusters = frontend.get_clusters() elif clusters_org == 'model': model = ModelManager.from_expe(expe, load=True) #clusters = model.get_clusters(K, skip=1) #clusters = model.get_communities(K) clusters = Louvain.get_clusters(frontend.to_directed(), resolution=10) if len(np.unique(clusters)) > 20 or True: clusters = Annealing(frontend.data, iterations=200, C_init=5, grow_rate=0).search() if clusters is None: lgg.error('No clusters here...passing') return else: block_hist = np.bincount(clusters) K = (block_hist != 0).sum() lgg.info('%d Clusters from `%s\':' % (K, clusters_org)) expe.K = K assert (not 'model' in expe) expe.model = 'no_model' #data_r, labels= reorder_mat(data, clusters, labels=True) Table, Meas = self.init_fit_tables(_type=_type) # Just inner degree f = plt.figure() ax = f.gca() #f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) # assume symmetric it_k = 0 np.fill_diagonal(data, 0) for l in np.arange(K): for k in np.arange(K): if k != l: continue ixgrid = np.ix_(clusters == k, clusters == l) if k == l: title = 'Inner degree' y = np.zeros(data.shape) # some zeros... y[ixgrid] = data[ixgrid] #ax = ax1 else: title = 'Outer degree' y = np.zeros(data.shape) # some zeros... y[ixgrid] = data[ixgrid] #ax = ax2 # title = '' #/# d, dc = degree_hist(adj_to_degree(y)) if len(d) == 0: continue plot_degree_2((d, dc, None), logscale=True, colors=True, line=True, ax=ax, title=title) gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_k] = gof[v] #* y.sum() / TOT it_k += 1 plt.suptitle(self.specname(expe.corpus)) figs.append(plt.gcf()) # Features burstiness plt.figure() hist, label = sorted_perm(block_hist, reverse=True) bins = len(hist) plt.bar(range(bins), hist) plt.xticks(np.arange(bins) + 0.5, label) plt.xlabel('Class labels') plt.title('Blocks Size (max assignement)') figs.append(plt.gcf()) if expe._write: self.write_frames(figs) if self._it == self.expe_size - 1: for _model, table in self.gramexp.tables.items(): # Mean and standard deviation table_mean = np.char.array(np.around( table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) table = np.column_stack((self.specname(corpuses), table)) tablefmt = 'simple' table = self.tabulate(table, headers=['__' + _model.upper() + '__'] + Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table)
def pvalue(self, _type='global'): """ similar to zipf but compute pvalue and print table Parameters ========== _type: str in [global, local, feature] """ if self.model is None: return expe = self.expe figs = [] Y = self._Y N = Y[0].shape[0] model = self.model Table, Meas = self.init_fit_tables(_type, Y) self.log.info('using `%s\' burstiness' % _type) if _type == 'global': ### Global degree for it_dat, data in enumerate(Y): d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_dat] = gof[v] elif _type == 'local': ### Z assignement method a, b = model.get_params() N, K = a.shape print('theta shape: %s' % (str((N, K)))) now = Now() if 'mmsb' in expe.model: ZZ = [] for _i, _ in enumerate(Y): #for _ in Y: # Do not reflect real local degree ! theta = self._Theta[_i] phi = self._Phi[_i] Z = np.empty((2, N, N)) order = np.arange(N**2).reshape((N, N)) if expe.symmetric: triu = np.triu_indices(N) order = order[triu] else: order = order.flatten() order = zip(*np.unravel_index(order, (N, N))) for i, j in order: Z[0, i, j] = categorical(theta[i]) Z[1, i, j] = categorical(theta[j]) Z[0] = np.triu(Z[0]) + np.triu(Z[0], 1).T Z[1] = np.triu(Z[1]) + np.triu(Z[1], 1).T ZZ.append(Z) self.log.info('Z formation %s second', nowDiff(now)) clustering = 'modularity' comm = model.communities_analysis(data=Y[0], clustering=clustering) print('clustering method: %s, active clusters ratio: %f' % (clustering, len(comm['block_hist'] > 0) / K)) local_degree_c = {} ### Iterate over all classes couple if expe.symmetric: #k_perm = np.unique( map(list, map(set, itertools.product(np.unique(clusters) , repeat=2)))) k_perm = np.unique( list( map( list, map( list, map(set, itertools.product(range(K), repeat=2)))))) else: #k_perm = itertools.product(np.unique(clusters) , repeat=2) k_perm = itertools.product(range(K), repeat=2) for it_k, c in enumerate(k_perm): if isinstance(c, (np.int64, np.float64)): k = l = c elif len(c) == 2: # Stochastic Equivalence (extra class bind k, l = c #continue else: # Comunnities (intra class bind) k = l = c.pop() #if i > expe.limit_class: # break if k != l: continue degree_c = [] YY = [] if 'mmsb' in expe.model: for y, z in zip(Y, ZZ): # take the len of ZZ if < Y y_c = y.copy() phi_c = np.zeros(y.shape) # UNDIRECTED ! phi_c[(z[0] == k) & (z[1] == l )] = 1 #; phi_c[(z[0] == l) & (z[1] == k)] = 1 y_c[phi_c != 1] = 0 #degree_c += adj_to_degree(y_c).values() #yerr= None YY.append(y_c) elif 'ilfm' in expe.model: for _i, y in enumerate(Y): theta = self._Theta[_i] YY.append( (y * np.outer(theta[:, k], theta[:, l])).astype(int)) d, dc, yerr = random_degree(YY) if len(d) == 0: continue gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_k] = gof[v] elif _type == 'feature': raise NotImplementedError if self._it == self.expe_size - 1: for _model, table in self.gramexp.tables.items(): # Mean and standard deviation table_mean = np.char.array(np.around( table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) table = np.column_stack((self.specname(corpuses), table)) tablefmt = 'simple' table = tabulate(table, headers=['__' + _model.upper() + '__'] + Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table) if expe._write: if expe._mode == 'predictive': base = '%s_%s_%s' % (self.specname( expe.corpus), self.specname(_model), _type) else: base = '%s_%s_%s' % ('MG', self.specname(_model), _type) self.write_frames(table, base=base, ext='md')