def t_selection_pipeline_undirected_village(G, ts, fraction_t_to_keep=0.25): mis = [] coups = [] d_gws = [] rt = [] for t in ts: start = time.time() cost = sgw.undirected_normalized_heat_kernel(G, t) mutual_info, d_gw, coup = process_sgwl_village(cost, database, num_nodes, num_partitions) mis.append(mutual_info) coups.append(coup) d_gws.append(d_gw) end = time.time() rt.append(end - start) print('Couplings Computed') coverages = [] for j in range(len(ts)): coup = coups[j] partition = get_partition(coup) coverages.append(coverage(G, partition)) num_to_keep = int(np.round(fraction_t_to_keep * len(ts))) good_t_max = ts[np.argsort(coverages)][-num_to_keep:] good_t_grad = ts[np.argsort(np.abs(np.gradient(coverages)))][:num_to_keep] return mis, coups, d_gws, good_t_max, good_t_grad, rt
def get_gw_ami(G, t, gt): # G -- graph # t -- heat kernel scale parameter # gt -- ground truth distribution_exponent_hk = 0.001 distribution_offset_hk = 0 C1 = sgw.undirected_normalized_heat_kernel(G, t) p1 = sgw.node_distribution(G, distribution_offset_hk, distribution_exponent_hk) p2 = np.ravel( GwGt.estimate_target_distribution({0: p1.reshape(-1, 1)}, dim_t=len(np.unique(gt)))) # Note that we are inserting prior information about the number of clusters C2 = np.diag(p2) coup, log = ot.gromov.gromov_wasserstein(C1, C2, p1, p2, loss_fun='square_loss', log=True) est_idx = np.argmax(coup, axis=1) ami = metrics.adjusted_mutual_info_score(est_idx, gt, average_method='max') comms = [set() for v in np.unique(est_idx)] for idx, val in enumerate(est_idx): comms[val].add(idx) mod = modularity(G, comms) return ami, mod
end = time.time() scores['gwl-noisy'] = mutual_info runtimes['gwl-noisy'] = end-start ########################################################### ########################################################### # Proposed method: SpecGWL ########################################################### # Raw mis = [] rt = [] ts = [8.4]#np.linspace(7,9,20) for t in ts: start = time.time() cost = sgw.undirected_normalized_heat_kernel(G,t) mutual_info = process_sgwl_village(cost,database,num_nodes,num_partitions,beta=5e-6); mis.append(mutual_info) end = time.time() rt.append(end-start) # print('--- Raw data | SpecGWL | Best mutual information score: {:3.3f} | @t = {:3.3f} | average runtime per iteration = {:3.3f}'.format(max(mis), ts[np.argmax(mis)], np.mean(rt))) scores['specgwl-raw'] = max(mis) runtimes['specgwl-raw'] = sum(rt) # avetimes['specgwl-raw'] = np.mean(rt) # Noisy mis = [] rt = [] ts = [8.4]#np.linspace(7,9,20) for t in ts:
cost_t = G_adj_perm p = sgw.node_distribution(G, distribution_offset_adj, distribution_exponent_adj) q = np.matmul(p, perm.T) p_s = p.reshape(len(p), 1) p_t = q.reshape(len(q), 1) coup_adj, d_gw, p_s = gromov_wasserstein_discrepancy( cost_s, cost_t, p_s, p_t, ot_hyperpara_adj) end = time.time() times_adj.append(end - start) start = time.time() G_hk = sgw.undirected_normalized_heat_kernel(G, t) G_hk_perm = np.matmul(np.matmul(perm, G_hk), perm.T) p = sgw.node_distribution(G, distribution_offset_hk, distribution_exponent_hk) q = p coup_hk, log_hk = ot.gromov.gromov_wasserstein(G_hk, G_hk_perm, p, q, loss_fun='square_loss', log=True) end = time.time() times_hk.append(end - start)
end = time.time() scores['gwl-noisy'] = mutual_info runtimes['gwl-noisy'] = end-start ########################################################### ########################################################### # Proposed method: SpecGWL ########################################################### # Raw mis = [] rt = [] ts = [81]#np.linspace(0,90,11) for t in ts: start = time.time() cost = sgw.undirected_normalized_heat_kernel(G,t).astype(np.float64) mutual_info = process_sgwl_amazon(cost,database,num_nodes,num_partitions,beta=1.5e-6); mis.append(mutual_info) end = time.time() rt.append(end-start) # print('--- Raw data | SpecGWL | Best mutual information score: {:3.3f} | @t = {:3.3f} | average runtime per iteration = {:3.3f}'.format(max(mis), ts[np.argmax(mis)], np.mean(rt))) scores['specgwl-raw'] = max(mis) runtimes['specgwl-raw'] = sum(rt) # avetimes['specgwl-raw'] = np.mean(rt) # Noisy mis = [] rt = [] ts = [54]#np.linspace(0,90,11) for t in ts:
# Plot couplings ts = np.linspace(0, 50, 100) coups = [] dists = [] distribution_exponent_hk = 0 distribution_offset_hk = 0 p1 = sgw.node_distribution(graph1, distribution_offset_hk, distribution_exponent_hk) p2 = sgw.node_distribution(graph2, distribution_offset_hk, distribution_exponent_hk) for t in ts: graph1_hk = sgw.undirected_normalized_heat_kernel(graph1, t) graph2_hk = sgw.undirected_normalized_heat_kernel(graph2, t) coup_hk, log_hk = ot.gromov.gromov_wasserstein(graph1_hk, graph2_hk, p1, p2, loss_fun='square_loss', log=True) coups.append(coup_hk) dists.append(log_hk['gw_dist']) fig, axs = plt.subplots(10, 10, figsize=(10, 10)) axs = axs.flatten() for i in range(len(coups)): ax = axs[i] ax.imshow(coups[i], cmap='Blues')
pList = [] lambdas = [] for i in range(0, num_bootstrap): select = np.random.choice(top, size_bootstrap, replace=False) sG = nx.Graph() for n in select: sG.add_node(n) for e in G.edges(): if e[0] in select and e[1] in select: sG.add_edge(e[0], e[1]) samples.append(sG) pList.append(ot.unif(nx.number_of_nodes(sG))) AList.append(nx.adjacency_matrix(sG).toarray()) HKList3.append(sgw.undirected_normalized_heat_kernel(sG, 3)) HKList7.append(sgw.undirected_normalized_heat_kernel(sG, 7)) HKList11.append(sgw.undirected_normalized_heat_kernel(sG, 11)) lambdas.append(1 / num_bootstrap) print('---Bootstrap completed. Computing GW averages') # GW barycenter computation N = size_bootstrap # size of targeted barycenter p = ot.unif(N) #weights of targeted barycenter num_runs = 10 # each call to gromov_barycenters gives random initialization, # we will iterate this several times def run_frechet(CList): runtimes = []