def h**o(self, _type='pearson', _sim='latent'): """ Hmophily test -- table output Parameters ========== _type: similarity type in (contengency, pearson) _sim: similarity metric in (natural, latent) """ if self.model is None: return expe = self.expe figs = [] Y = self._Y N = Y[0].shape[0] model = self.model self.log.info('using `%s\' type' % _type) if not hasattr(self.gramexp, 'tables'): corpuses = self.specname(self.gramexp.get_set('corpus')) models = self.gramexp.get_set('model') tables = {} corpuses = self.specname(self.gramexp.get_set('corpus')) for m in models: if _type == 'pearson': Meas = ['pearson coeff', '2-tailed pvalue'] table = np.empty((len(corpuses), len(Meas), len(Y))) elif _type == 'contingency': Meas = ['natural', 'latent', 'natural', 'latent'] table = np.empty((2 * len(corpuses), len(Meas), len(Y))) tables[m] = table self.gramexp.Meas = Meas self.gramexp.tables = tables table = tables[expe.model] else: table = self.gramexp.tables[expe.model] Meas = self.gramexp.Meas if _type == 'pearson': self.log.info('using `%s\' similarity' % _sim) # No variance for link expecation !!! Y = [Y[0]] ### Global degree d, dc, yerr = random_degree(Y) sim = model.similarity_matrix(sim=_sim) #plot(sim, title='Similarity', sort=True) #plot_degree(sim) for it_dat, data in enumerate(Y): #homo_object = data homo_object = model.likelihood() table[self.corpus_pos, :, it_dat] = sp.stats.pearsonr(homo_object.flatten(), sim.flatten()) elif _type == 'contingency': ### Global degree d, dc, yerr = random_degree(Y) sim_nat = model.similarity_matrix(sim='natural') sim_lat = model.similarity_matrix(sim='latent') step_tab = len(self.specname(self.gramexp.get_set('corpus'))) for it_dat, data in enumerate(Y): #homo_object = data homo_object = model.likelihood() table[self.corpus_pos, 0, it_dat] = sim_nat[data == 1].mean() table[self.corpus_pos, 1, it_dat] = sim_lat[data == 1].mean() table[self.corpus_pos, 2, it_dat] = sim_nat[data == 1].var() table[self.corpus_pos, 3, it_dat] = sim_lat[data == 1].var() table[self.corpus_pos + step_tab, 0, it_dat] = sim_nat[data == 0].mean() table[self.corpus_pos + step_tab, 1, it_dat] = sim_lat[data == 0].mean() table[self.corpus_pos + step_tab, 2, it_dat] = sim_nat[data == 0].var() table[self.corpus_pos + step_tab, 3, it_dat] = sim_lat[data == 0].var() if self._it == self.expe_size - 1: for _model, table in self.gramexp.tables.items(): # Function in (utils. ?) # Mean and standard deviation table_mean = np.char.array(np.around( table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) try: table = np.column_stack((corpuses, table)) except: table = np.column_stack((corpuses * 2, table)) tablefmt = 'simple' # 'latex' table = tabulate(table, headers=['__' + _model.upper() + '__'] + Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table) if expe._write: base = '%s_homo_%s' % (self.specname(_model), _type) self.write_frames(table, base=base, ext='md')
def roc_evolution(self, _type='testset', _type2='max', _ratio=20, _type3='errorbar'): ''' AUC difference between two models against testset_ratio * _type : learnset/testset * _type2 : max/min/mean * _ratio : ration of the traning set to predict. If 100 _predictall will be true ''' expe = self.expe model = self.model data = self.frontend.data _ratio = int(_ratio) _predictall = (_ratio >= 100) or (_ratio < 0) if not hasattr(expe, 'testset_ratio'): setattr(expe, 'testset_ratio', 20) self.testset_ratio_pos = 0 else: self.testset_ratio_pos = self.pt['testset_ratio'] table, Meas = self.init_roc_tables() #mask = model.get_mask() if _type == 'testset': y_true, probas = model.mask_probas(data) if not _predictall: # take 20% of the size of the training set n_d = int(_ratio / 100 * data.size * (1 - expe.testset_ratio / 100) / (1 - _ratio / 100)) y_true = y_true[:n_d] probas = probas[:n_d] else: pass elif _type == 'learnset': n = int(data.size * _ratio) mask_index = np.unravel_index( np.random.permutation(data.size)[:n], data.shape) y_true = data[mask_index] probas = model.likelihood()[mask_index] # Just the ONE:1 #idx_1 = (y_true == 1) #idx_0 = (y_true == 0) #size_1 = idx_1.sum() #y_true = np.hstack((y_true[idx_1], y_true[idx_0][:size_1])) #probas = np.hstack((probas[idx_1], probas[idx_0][:size_1])) fpr, tpr, thresholds = roc_curve(y_true, probas) roc_auc = auc(fpr, tpr) table[self.corpus_pos, self.testset_ratio_pos, self.pt['_repeat'], self.model_pos] = roc_auc #precision, recall, thresholds = precision_recall_curve( y_true, probas) #plt.plot(precision, recall, label='PR curve; %s' % (expe.model )) if self._it == self.expe_size - 1: # Reduce each repetitions take_type = getattr(np, _type2) t = ma.array(np.empty(table[:, :, 0, :].shape), mask=True) t[:, :, 0] = take_type(table[:, :, :, 0], -1) t[:, :, 1] = take_type(table[:, :, :, 1], -1) table_mean = t.copy() t[:, :, 0] = table[:, :, :, 0].std(-1) t[:, :, 1] = table[:, :, :, 1].std(-1) table_std = t # Measure is comparaison of two AUC. id_mmsb = [ i for i, s in enumerate(self.gramexp.exp_tensor['model']) if s.endswith('mmsb_cgs') ][0] id_ibp = 1 if id_mmsb == 0 else 0 table_mean = table_mean[:, :, id_mmsb] - table_mean[:, :, id_ibp] table_std = table_std[:, :, id_mmsb] + table_std[:, :, id_ibp] if _type2 != 'mean': table_std = [None] * len(table_std) fig = plt.figure() corpuses = self.specname(self.gramexp.get_set('corpus')) for i in range(len(corpuses)): if _type3 == 'errorbar': plt.errorbar(list(map(int, Meas)), table_mean[i], yerr=table_std[i], fmt=_markers.next(), label=corpuses[i]) elif _type3 == 'boxplot': bplot = table[i, :, :, 0] - table[i, :, :, 1] plt.boxplot(bplot.T, labels=corpuses[i]) fig.gca().set_xticklabels(Meas) plt.errorbar(Meas, [0] * len(Meas), linestyle='--', color='k') plt.legend(loc='lower left', prop={'size': 7}) # Table formatting #table = table_mean + b' $\pm$ ' + table_std table = table_mean corpuses = self.specname(self.gramexp.get_set('corpus')) table = np.column_stack((self.specname(corpuses), table)) tablefmt = 'simple' headers = [''] + Meas table = tabulate(table, headers=headers, tablefmt=tablefmt, floatfmt='.3f') print() print(table) if expe._write: base = '%s_%s_%s' % (_type, _type2, _ratio) figs = { 'roc_evolution': ExpSpace({ 'fig': fig, 'table': table, 'base': base }) } self.write_frames(figs)
def pvalue(self, _type='global'): """ similar to zipf but compute pvalue and print table Parameters ========== _type: str in [global, local, feature] """ if self.model is None: return expe = self.expe figs = [] Y = self._Y N = Y[0].shape[0] model = self.model Table, Meas = self.init_fit_tables(_type, Y) self.log.info('using `%s\' burstiness' % _type) if _type == 'global': ### Global degree for it_dat, data in enumerate(Y): d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_dat] = gof[v] elif _type == 'local': ### Z assignement method a, b = model.get_params() N, K = a.shape print('theta shape: %s' % (str((N, K)))) now = Now() if 'mmsb' in expe.model: ZZ = [] for _i, _ in enumerate(Y): #for _ in Y: # Do not reflect real local degree ! theta = self._Theta[_i] phi = self._Phi[_i] Z = np.empty((2, N, N)) order = np.arange(N**2).reshape((N, N)) if expe.symmetric: triu = np.triu_indices(N) order = order[triu] else: order = order.flatten() order = zip(*np.unravel_index(order, (N, N))) for i, j in order: Z[0, i, j] = categorical(theta[i]) Z[1, i, j] = categorical(theta[j]) Z[0] = np.triu(Z[0]) + np.triu(Z[0], 1).T Z[1] = np.triu(Z[1]) + np.triu(Z[1], 1).T ZZ.append(Z) self.log.info('Z formation %s second', nowDiff(now)) clustering = 'modularity' comm = model.communities_analysis(data=Y[0], clustering=clustering) print('clustering method: %s, active clusters ratio: %f' % (clustering, len(comm['block_hist'] > 0) / K)) local_degree_c = {} ### Iterate over all classes couple if expe.symmetric: #k_perm = np.unique( map(list, map(set, itertools.product(np.unique(clusters) , repeat=2)))) k_perm = np.unique( list( map( list, map( list, map(set, itertools.product(range(K), repeat=2)))))) else: #k_perm = itertools.product(np.unique(clusters) , repeat=2) k_perm = itertools.product(range(K), repeat=2) for it_k, c in enumerate(k_perm): if isinstance(c, (np.int64, np.float64)): k = l = c elif len(c) == 2: # Stochastic Equivalence (extra class bind k, l = c #continue else: # Comunnities (intra class bind) k = l = c.pop() #if i > expe.limit_class: # break if k != l: continue degree_c = [] YY = [] if 'mmsb' in expe.model: for y, z in zip(Y, ZZ): # take the len of ZZ if < Y y_c = y.copy() phi_c = np.zeros(y.shape) # UNDIRECTED ! phi_c[(z[0] == k) & (z[1] == l )] = 1 #; phi_c[(z[0] == l) & (z[1] == k)] = 1 y_c[phi_c != 1] = 0 #degree_c += adj_to_degree(y_c).values() #yerr= None YY.append(y_c) elif 'ilfm' in expe.model: for _i, y in enumerate(Y): theta = self._Theta[_i] YY.append( (y * np.outer(theta[:, k], theta[:, l])).astype(int)) d, dc, yerr = random_degree(YY) if len(d) == 0: continue gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_k] = gof[v] elif _type == 'feature': raise NotImplementedError if self._it == self.expe_size - 1: for _model, table in self.gramexp.tables.items(): # Mean and standard deviation table_mean = np.char.array(np.around( table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) table = np.column_stack((self.specname(corpuses), table)) tablefmt = 'simple' table = tabulate(table, headers=['__' + _model.upper() + '__'] + Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table) if expe._write: if expe._mode == 'predictive': base = '%s_%s_%s' % (self.specname( expe.corpus), self.specname(_model), _type) else: base = '%s_%s_%s' % ('MG', self.specname(_model), _type) self.write_frames(table, base=base, ext='md')