def homo_mustach(self, frame): """ Hmophily mustach """ if self.model is None: return expe = self.expe figs = [] Y = self._Y N = Y[0].shape[0] model = self.model if not hasattr(self.gramexp, 'tables'): corpuses = self.specname(self.gramexp.get_set('corpus')) models = self.gramexp.get_set('model') tables = {} corpuses = self.specname(self.gramexp.get_set('corpus')) for m in models: sim = ['natural', 'latent'] Meas = ['links', 'non-links'] table = { 'natural': { 'links': [], 'non-links': [] }, 'latent': { 'links': [], 'non-links': [] } } tables[m] = table self.gramexp.Meas = Meas self.gramexp.tables = tables table = tables[expe.model] else: table = self.gramexp.tables[expe.model] Meas = self.gramexp.Meas ### Global degree d, dc, yerr = random_degree(Y) sim_nat = model.similarity_matrix(sim='natural') sim_lat = model.similarity_matrix(sim='latent') step_tab = len(self.specname(self.gramexp.get_set('corpus'))) if not hasattr(self.gramexp._figs[expe.model], 'damax'): damax = -np.inf else: damax = self.gramexp._figs[expe.model].damax self.gramexp._figs[expe.model].damax = max(sim_nat.max(), sim_lat.max(), damax) for it_dat, data in enumerate(Y): #homo_object = data #homo_object = model.likelihood() table['natural']['links'].extend(sim_nat[data == 1].tolist()) table['natural']['non-links'].extend(sim_nat[data == 0].tolist()) table['latent']['links'].extend(sim_lat[data == 1].tolist()) table['latent']['non-links'].extend(sim_lat[data == 0].tolist()) if self._it == self.expe_size - 1: for _model, table in self.gramexp.tables.items(): ax = self.gramexp._figs[_model].fig.gca() bp = ax.boxplot([table['natural']['links']], widths=0.5, positions=[1], whis='range') bp = ax.boxplot([table['natural']['non-links']], widths=0.5, positions=[2], whis='range') bp = ax.boxplot([table['latent']['links']], widths=0.5, positions=[4], whis='range') bp = ax.boxplot([table['latent']['non-links']], widths=0.5, positions=[5], whis='range') ax.set_ylabel('Similarity') ax.set_xticks([1.5, 4.5]) ax.set_xticklabels(('natural', 'latent'), rotation=0) ax.set_xlim(0, 6) nbox = 4 top = self.gramexp._figs[_model].damax pos = [1, 2, 4, 5] upperLabels = ['linked', ' non-linked'] * 2 #weights = ['light', 'ultralight'] weights = ['normal', 'normal'] for tick in range(nbox): ax.text(pos[tick], top + top * 0.015, upperLabels[tick], horizontalalignment='center', weight=weights[tick % 2]) print(_model) t1 = sp.stats.ttest_ind(table['natural']['links'], table['natural']['non-links']) t2 = sp.stats.ttest_ind(table['latent']['links'], table['latent']['non-links']) print(t1) print(t2)
def pvalue(self, _type='global'): """ similar to zipf but compute pvalue and print table Parameters ========== _type: str in [global, local, feature] """ if self.model is None: return expe = self.expe figs = [] Y = self._Y N = Y[0].shape[0] model = self.model Table, Meas = self.init_fit_tables(_type, Y) self.log.info('using `%s\' burstiness' % _type) if _type == 'global': ### Global degree for it_dat, data in enumerate(Y): d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_dat] = gof[v] elif _type == 'local': ### Z assignement method a, b = model.get_params() N, K = a.shape print('theta shape: %s' % (str((N, K)))) now = Now() if 'mmsb' in expe.model: ZZ = [] for _i, _ in enumerate(Y): #for _ in Y: # Do not reflect real local degree ! theta = self._Theta[_i] phi = self._Phi[_i] Z = np.empty((2, N, N)) order = np.arange(N**2).reshape((N, N)) if expe.symmetric: triu = np.triu_indices(N) order = order[triu] else: order = order.flatten() order = zip(*np.unravel_index(order, (N, N))) for i, j in order: Z[0, i, j] = categorical(theta[i]) Z[1, i, j] = categorical(theta[j]) Z[0] = np.triu(Z[0]) + np.triu(Z[0], 1).T Z[1] = np.triu(Z[1]) + np.triu(Z[1], 1).T ZZ.append(Z) self.log.info('Z formation %s second', nowDiff(now)) clustering = 'modularity' comm = model.communities_analysis(data=Y[0], clustering=clustering) print('clustering method: %s, active clusters ratio: %f' % (clustering, len(comm['block_hist'] > 0) / K)) local_degree_c = {} ### Iterate over all classes couple if expe.symmetric: #k_perm = np.unique( map(list, map(set, itertools.product(np.unique(clusters) , repeat=2)))) k_perm = np.unique( list( map( list, map( list, map(set, itertools.product(range(K), repeat=2)))))) else: #k_perm = itertools.product(np.unique(clusters) , repeat=2) k_perm = itertools.product(range(K), repeat=2) for it_k, c in enumerate(k_perm): if isinstance(c, (np.int64, np.float64)): k = l = c elif len(c) == 2: # Stochastic Equivalence (extra class bind k, l = c #continue else: # Comunnities (intra class bind) k = l = c.pop() #if i > expe.limit_class: # break if k != l: continue degree_c = [] YY = [] if 'mmsb' in expe.model: for y, z in zip(Y, ZZ): # take the len of ZZ if < Y y_c = y.copy() phi_c = np.zeros(y.shape) # UNDIRECTED ! phi_c[(z[0] == k) & (z[1] == l )] = 1 #; phi_c[(z[0] == l) & (z[1] == k)] = 1 y_c[phi_c != 1] = 0 #degree_c += adj_to_degree(y_c).values() #yerr= None YY.append(y_c) elif 'ilfm' in expe.model: for _i, y in enumerate(Y): theta = self._Theta[_i] YY.append( (y * np.outer(theta[:, k], theta[:, l])).astype(int)) d, dc, yerr = random_degree(YY) if len(d) == 0: continue gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_k] = gof[v] elif _type == 'feature': raise NotImplementedError if self._it == self.expe_size - 1: for _model, table in self.gramexp.tables.items(): # Mean and standard deviation table_mean = np.char.array(np.around( table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) table = np.column_stack((self.specname(corpuses), table)) tablefmt = 'simple' table = tabulate(table, headers=['__' + _model.upper() + '__'] + Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table) if expe._write: if expe._mode == 'predictive': base = '%s_%s_%s' % (self.specname( expe.corpus), self.specname(_model), _type) else: base = '%s_%s_%s' % ('MG', self.specname(_model), _type) self.write_frames(table, base=base, ext='md')
def h**o(self, _type='pearson', _sim='latent'): """ Hmophily test -- table output Parameters ========== _type: similarity type in (contengency, pearson) _sim: similarity metric in (natural, latent) """ if self.model is None: return expe = self.expe figs = [] Y = self._Y N = Y[0].shape[0] model = self.model self.log.info('using `%s\' type' % _type) if not hasattr(self.gramexp, 'tables'): corpuses = self.specname(self.gramexp.get_set('corpus')) models = self.gramexp.get_set('model') tables = {} corpuses = self.specname(self.gramexp.get_set('corpus')) for m in models: if _type == 'pearson': Meas = ['pearson coeff', '2-tailed pvalue'] table = np.empty((len(corpuses), len(Meas), len(Y))) elif _type == 'contingency': Meas = ['natural', 'latent', 'natural', 'latent'] table = np.empty((2 * len(corpuses), len(Meas), len(Y))) tables[m] = table self.gramexp.Meas = Meas self.gramexp.tables = tables table = tables[expe.model] else: table = self.gramexp.tables[expe.model] Meas = self.gramexp.Meas if _type == 'pearson': self.log.info('using `%s\' similarity' % _sim) # No variance for link expecation !!! Y = [Y[0]] ### Global degree d, dc, yerr = random_degree(Y) sim = model.similarity_matrix(sim=_sim) #plot(sim, title='Similarity', sort=True) #plot_degree(sim) for it_dat, data in enumerate(Y): #homo_object = data homo_object = model.likelihood() table[self.corpus_pos, :, it_dat] = sp.stats.pearsonr(homo_object.flatten(), sim.flatten()) elif _type == 'contingency': ### Global degree d, dc, yerr = random_degree(Y) sim_nat = model.similarity_matrix(sim='natural') sim_lat = model.similarity_matrix(sim='latent') step_tab = len(self.specname(self.gramexp.get_set('corpus'))) for it_dat, data in enumerate(Y): #homo_object = data homo_object = model.likelihood() table[self.corpus_pos, 0, it_dat] = sim_nat[data == 1].mean() table[self.corpus_pos, 1, it_dat] = sim_lat[data == 1].mean() table[self.corpus_pos, 2, it_dat] = sim_nat[data == 1].var() table[self.corpus_pos, 3, it_dat] = sim_lat[data == 1].var() table[self.corpus_pos + step_tab, 0, it_dat] = sim_nat[data == 0].mean() table[self.corpus_pos + step_tab, 1, it_dat] = sim_lat[data == 0].mean() table[self.corpus_pos + step_tab, 2, it_dat] = sim_nat[data == 0].var() table[self.corpus_pos + step_tab, 3, it_dat] = sim_lat[data == 0].var() if self._it == self.expe_size - 1: for _model, table in self.gramexp.tables.items(): # Function in (utils. ?) # Mean and standard deviation table_mean = np.char.array(np.around( table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) try: table = np.column_stack((corpuses, table)) except: table = np.column_stack((corpuses * 2, table)) tablefmt = 'simple' # 'latex' table = tabulate(table, headers=['__' + _model.upper() + '__'] + Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table) if expe._write: base = '%s_homo_%s' % (self.specname(_model), _type) self.write_frames(table, base=base, ext='md')
def burstiness(self, _type='all'): '''Zipf Analysis (global burstiness) + local burstiness + feature burstiness Parameters ---------- _type : str type of burstiness to compute in ('global', 'local', 'feature', 'all') ''' if self.model is None: return expe = self.expe figs = [] Y = self._Y N = Y[0].shape[0] model = self.model if _type in ('global', 'all'): # Global burstiness d, dc, yerr = random_degree(Y) fig = plt.figure() title = 'global | %s, %s' % (self.specname( expe.get('corpus')), self.specname(expe.model)) plot_degree_2((d, dc, yerr), logscale=True, title=title) figs.append(plt.gcf()) if _type in ('local', 'all'): # Local burstiness print('Computing Local Preferential attachment') a, b = model.get_params() N, K = a.shape print('theta shape: %s' % (str((N, K)))) now = Now() if 'mmsb' in expe.model: ### Z assignement method # ZZ = [] for _i, _ in enumerate(Y): #for _ in Y: # Do not reflect real local degree ! theta = self._Theta[_i] phi = self._Phi[_i] Z = np.empty((2, N, N)) order = np.arange(N**2).reshape((N, N)) if expe.symmetric: triu = np.triu_indices(N) order = order[triu] else: order = order.flatten() order = zip(*np.unravel_index(order, (N, N))) for i, j in order: Z[0, i, j] = categorical(theta[i]) Z[1, i, j] = categorical(theta[j]) Z[0] = np.triu(Z[0]) + np.triu(Z[0], 1).T Z[1] = np.triu(Z[1]) + np.triu(Z[1], 1).T ZZ.append(Z) self.log.info('Z formation %s second' % nowDiff(now)) clustering = 'modularity' comm = model.communities_analysis(data=Y[0], clustering=clustering) print('clustering method: %s, active clusters ratio: %f' % (clustering, len(comm['block_hist'] > 0) / K)) local_degree_c = {} ### Iterate over all classes couple if expe.symmetric: #k_perm = np.unique( map(list, map(set, itertools.product(np.unique(clusters) , repeat=2)))) k_perm = np.unique( list( map( list, map( list, map(set, itertools.product(range(K), repeat=2)))))) else: #k_perm = itertools.product(np.unique(clusters) , repeat=2) k_perm = itertools.product(range(K), repeat=2) fig = plt.figure() for i, c in enumerate(k_perm): if isinstance(c, (np.int64, np.float64)): k = l = c elif len(c) == 2: # Stochastic Equivalence (outer class) k, l = c else: # Comunnities (inner class) k = l = c.pop() #if i > expe.limit_class: # break if k != l: continue degree_c = [] YY = [] if 'mmsb' in expe.model: for y, z in zip(Y, ZZ): # take the len of ZZ if < Y y_c = np.zeros(y.shape) phi_c = np.zeros(y.shape) # UNDIRECTED ! phi_c[(z[0] == k) & (z[1] == l)] = 1 y_c = y * phi_c #degree_c += adj_to_degree(y_c).values() #yerr= None YY.append(y_c) elif 'ilfm' in expe.model: # or Corpus ! for _i, y in enumerate(Y): theta = self._Theta[_i] if theta.shape[1] <= max(k, l): print('warning: not all block converted.') continue YY.append( (y * np.outer(theta[:, k], theta[:, l])).astype(int)) d, dc, yerr = random_degree(YY) if len(d) == 0: continue title = 'local | %s, %s' % (self.specname( expe.get('corpus')), self.specname(expe.model)) plot_degree_2((d, dc, yerr), logscale=True, colors=True, line=True, title=title) figs.append(plt.gcf()) # Blockmodel Analysis #if _type in ('feature', 'all'): # plt.figure() # if 'mmsb' in expe.model: # # Feature burstiness # hist, label = clusters_hist(comm['clusters']) # bins = len(hist) # plt.bar(range(bins), hist) # plt.xticks(np.arange(bins)+0.5, label) # plt.xlabel('Class labels') # plt.title('Blocks Size (max assignement)') # elif 'ilfm' in expe.model: # # Feature burstiness # hist, label = sorted_perm(comm['block_hist'], reverse=True) # bins = len(hist) # plt.bar(range(bins), hist) # plt.xticks(np.arange(bins)+0.5, label) # plt.xlabel('Class labels') # plt.title('Blocks Size (max assignement)') # figs.append(plt.gcf()) if expe._write: if expe._mode == 'predictive': base = '%s_%s' % (self.specname( expe.corpus), self.specname(expe.model)) else: base = '%s_%s' % ('MG', self.specname(expe.model)) self.write_frames(figs, base=base) return