def print_evaluate(A_matrix,sampled_graph): stats = utils.compute_graph_statistics(sampled_graph) sps_truth = utils.sp(A_matrix) sps_gen = utils.sp(sampled_graph) stats['sp_emd']= utils.emd(sps_truth,sps_gen) s = utils.specGap(A_matrix) stats['spec_gap']=(s-utils.specGap(sampled_graph))**2 print(stats)
def gen_graphs(X_from_walks, target): X = genExpected_fromWalks(X_from_walks, target.sum()) print(X) sps_truth = utils.sp(target) sp_emds = [] sgl2s = [] for i in range(20): sampled_graph = gnp(X) sps_gen = utils.sp(sampled_graph) sp_emd = utils.emd(sps_truth, sps_gen) sgl2 = (utils.specGap(target) - utils.specGap(sampled_graph))**2 sp_emds.append(sp_emd) sgl2s.append(sgl2) return sp_emds, sgl2s
def global_comp(A, X, sps_truth, spec): sp_emds = [] specs = [] ccs = [] for i in range(100): sampled_graph = gnp(X) sps_gen = utils.sp(sampled_graph) sp_emds.append(utils.emd(sps_truth, sps_gen)) specs.append((spec - utils.specGap(sampled_graph))**2) ccs.append(3 * utils.statistics_triangle_count(sampled_graph) / utils.statistics_claw_count(sampled_graph)) stats = {} stats['shortest path emds'] = sp_emds stats['spec gaps'] = specs stats['clustering coefficients'] = ccs print('Shortest path') print(np.mean(sp_emds)) print(np.std(sp_emds)) print('Specs') print(np.mean(specs)) print(np.std(specs)) print('Clusterig Coefficient') print(np.mean(ccs)) print(np.std(ccs)) print('True CC') tc = 3 * utils.statistics_triangle_count( np.array(A)) / utils.statistics_claw_count(np.array(A)) print(tc) print('ABS diff') print(tc - np.mean(ccs)) return stats
def graphEval(X, truth_spec, true_sp, true_cc, true_dd, true_bc): sp_emd_cur = [] cc_emd_cur = [] dd_emd_cur = [] assorts_cur = [] spec_l2_cur = [] spec_l2_lin_cur = [] bc_emd_cur = [] for j in range(20): A = gnp(X) G = nx.from_numpy_matrix(A) print(nx.is_connected(G)) if not nx.is_connected(G): Gc = max(nx.connected_component_subgraphs(G), key=len) print(len(Gc.nodes())) sp = utils.sp(A) cc = utils.cc(A) dd = utils.degree_sequence(A) spec_weight_l2 = l2_exp_weight(truth_spec, utils.spectrum(A)) spec_weight_l2_lin = l2_lin_weight(truth_spec, utils.spectrum(A)) bc = sorted(nx.betweenness_centrality(G).values()) sp_emd_cur.append(utils.emd(sp, true_sp)) cc_emd_cur.append(utils.emd(cc, true_cc)) dd_emd_cur.append(utils.emd(dd, true_dd)) assorts_cur.append(nx.degree_assortativity_coefficient(G)) spec_l2_cur.append(spec_weight_l2) spec_l2_lin_cur.append(spec_weight_l2_lin) bc_emd_cur.append(utils.emd(bc, true_bc)) return np.mean(sp_emd_cur), np.mean(cc_emd_cur), np.mean( dd_emd_cur), np.mean(assorts_cur), np.mean(spec_l2_cur), np.mean( bc_emd_cur), np.mean(spec_l2_lin_cur)
def global_comp(A,X,sps_truth,spec): sp_emds = [] specs = [] for i in range(100): sampled_graph = gnp(X) sps_gen = utils.sp(sampled_graph) sp_emds.append(utils.emd(sps_truth,sps_gen)) specs.append((spec-utils.specGap(sampled_graph))**2) return np.mean(sp_emds), np.std(sp_emds), np.mean(specs), np.std(specs)
def return_node(self, phrase): non_stop_phrase = ' '.join([token.text for token in utils.sp(phrase)]) if len(non_stop_phrase) > 1: phrase = non_stop_phrase if self.node_index is None: self.add_node(phrase) nearest_neighbor = self.node_index.knn_query(utils.model([phrase])) if nearest_neighbor != []: closest_neighbor, closest_distance = nearest_neighbor if closest_neighbor[0] == []: self.add_node(phrase) return phrase if closest_distance[0][0] > self.distance_threshold: self.add_node(phrase) return phrase return self.phrase_corpus[closest_neighbor[0][0]]
for [u, v] in valid: trueScores.append(A[int(u)][int(v)]) predScores.append(X[int(u)][int(v)]) auc = roc_auc_score(trueScores, predScores) ap = average_precision_score(trueScores, predScores) return auc, ap G = nx.read_gml('../data/football.gml') _A_obs = nx.adjacency_matrix(G) A_matrix = _A_obs.todense() valid_edges = np.loadtxt('plots/football_val_edges.txt').tolist() valid_nonEdges = np.loadtxt('plots/football_val_non_edges.txt').tolist() valid = valid_edges + valid_nonEdges sps_truth = utils.sp(A_matrix) spec_truth = utils.specGap(A_matrix) X = np.loadtxt( 'plots/football_walk_em/trainingIteration_1200_expectedGraph.txt') print('Random Walks Size 16') global_comp(A_matrix, X, sps_truth, spec_truth) print(auc_p(X, np.asarray(A_matrix), valid)) X = np.loadtxt( 'plots/football_edge_em_nbs/trainingIteration_3200_expectedGraph.txt') print('Random Walks Size 2 No BS adj') global_comp(A_matrix, X, sps_truth, spec_truth) print(auc_p(X, np.asarray(A_matrix), valid)) X = np.loadtxt( 'plots/football_edge_em_adj_bs/trainingIteration_1200_expectedGraph.txt') print('Random Walks Size 2 BS adj')
A_full = np.loadtxt(target_path) N = A_full.shape[0] truth_spec = utils.spectrum(A_full) step = 400 k = maxIter / step sp_emds = [] cc_emds = [] dd_emds = [] assorts = [] spectrum_weighted_distances = [] bc_emds = [] true_sp = utils.sp(A_full) true_cc = utils.cc(A_full) true_dd = utils.degree_sequence(A_full) G_true = nx.from_numpy_matrix(A_full) true_assort = nx.degree_assortativity_coefficient(G_true) true_bc = sorted(nx.betweenness_centrality(G_true)) true_assorts = [] #initialize all params for i in range(start, k + 1): iterNum = i * step X = np.loadtxt(path + '/samples_{}.txt'.format(iterNum))
def lemmatize(self, phrase): return " ".join([word.lemma_ for word in utils.sp(phrase)])