def t_selection_pipeline_undirected_village(G, ts, fraction_t_to_keep=0.25):

    mis = []
    coups = []
    d_gws = []
    rt = []

    for t in ts:
        start = time.time()
        cost = sgw.undirected_normalized_heat_kernel(G, t)
        mutual_info, d_gw, coup = process_sgwl_village(cost, database,
                                                       num_nodes,
                                                       num_partitions)
        mis.append(mutual_info)
        coups.append(coup)
        d_gws.append(d_gw)
        end = time.time()
        rt.append(end - start)

    print('Couplings Computed')

    coverages = []

    for j in range(len(ts)):
        coup = coups[j]
        partition = get_partition(coup)
        coverages.append(coverage(G, partition))

    num_to_keep = int(np.round(fraction_t_to_keep * len(ts)))

    good_t_max = ts[np.argsort(coverages)][-num_to_keep:]
    good_t_grad = ts[np.argsort(np.abs(np.gradient(coverages)))][:num_to_keep]

    return mis, coups, d_gws, good_t_max, good_t_grad, rt
Beispiel #2
0
def get_gw_ami(G, t, gt):
    # G  -- graph
    # t  -- heat kernel scale parameter
    # gt -- ground truth

    distribution_exponent_hk = 0.001
    distribution_offset_hk = 0

    C1 = sgw.undirected_normalized_heat_kernel(G, t)
    p1 = sgw.node_distribution(G, distribution_offset_hk,
                               distribution_exponent_hk)
    p2 = np.ravel(
        GwGt.estimate_target_distribution({0: p1.reshape(-1, 1)},
                                          dim_t=len(np.unique(gt))))
    # Note that we are inserting prior information about the number of clusters

    C2 = np.diag(p2)
    coup, log = ot.gromov.gromov_wasserstein(C1,
                                             C2,
                                             p1,
                                             p2,
                                             loss_fun='square_loss',
                                             log=True)
    est_idx = np.argmax(coup, axis=1)

    ami = metrics.adjusted_mutual_info_score(est_idx, gt, average_method='max')
    comms = [set() for v in np.unique(est_idx)]
    for idx, val in enumerate(est_idx):
        comms[val].add(idx)

    mod = modularity(G, comms)

    return ami, mod
Beispiel #3
0
end = time.time()
scores['gwl-noisy'] = mutual_info
runtimes['gwl-noisy'] = end-start


###########################################################
###########################################################
# Proposed method: SpecGWL
########################################################### 
# Raw
mis = []
rt = []
ts = [8.4]#np.linspace(7,9,20)
for t in ts:
    start = time.time()
    cost = sgw.undirected_normalized_heat_kernel(G,t)
    mutual_info = process_sgwl_village(cost,database,num_nodes,num_partitions,beta=5e-6);
    mis.append(mutual_info)
    end = time.time()
    rt.append(end-start)

# print('--- Raw data | SpecGWL | Best mutual information score: {:3.3f} | @t = {:3.3f} | average runtime per iteration = {:3.3f}'.format(max(mis), ts[np.argmax(mis)], np.mean(rt)))
scores['specgwl-raw'] = max(mis)
runtimes['specgwl-raw'] = sum(rt)
# avetimes['specgwl-raw'] = np.mean(rt)

# Noisy
mis = []
rt = []
ts = [8.4]#np.linspace(7,9,20)
for t in ts:
Beispiel #4
0
    cost_t = G_adj_perm
    p = sgw.node_distribution(G, distribution_offset_adj,
                              distribution_exponent_adj)
    q = np.matmul(p, perm.T)
    p_s = p.reshape(len(p), 1)
    p_t = q.reshape(len(q), 1)

    coup_adj, d_gw, p_s = gromov_wasserstein_discrepancy(
        cost_s, cost_t, p_s, p_t, ot_hyperpara_adj)

    end = time.time()

    times_adj.append(end - start)

    start = time.time()
    G_hk = sgw.undirected_normalized_heat_kernel(G, t)
    G_hk_perm = np.matmul(np.matmul(perm, G_hk), perm.T)

    p = sgw.node_distribution(G, distribution_offset_hk,
                              distribution_exponent_hk)
    q = p

    coup_hk, log_hk = ot.gromov.gromov_wasserstein(G_hk,
                                                   G_hk_perm,
                                                   p,
                                                   q,
                                                   loss_fun='square_loss',
                                                   log=True)
    end = time.time()

    times_hk.append(end - start)
Beispiel #5
0
end = time.time()
scores['gwl-noisy'] = mutual_info
runtimes['gwl-noisy'] = end-start


###########################################################
###########################################################
# Proposed method: SpecGWL
########################################################### 
# Raw
mis = []
rt = []
ts = [81]#np.linspace(0,90,11)
for t in ts:
    start = time.time()
    cost = sgw.undirected_normalized_heat_kernel(G,t).astype(np.float64)
    mutual_info = process_sgwl_amazon(cost,database,num_nodes,num_partitions,beta=1.5e-6);
    mis.append(mutual_info)
    end = time.time()
    rt.append(end-start)

# print('--- Raw data | SpecGWL | Best mutual information score: {:3.3f} | @t = {:3.3f} | average runtime per iteration = {:3.3f}'.format(max(mis), ts[np.argmax(mis)], np.mean(rt)))
scores['specgwl-raw'] = max(mis)
runtimes['specgwl-raw'] = sum(rt)
# avetimes['specgwl-raw'] = np.mean(rt)

# Noisy
mis = []
rt = []
ts = [54]#np.linspace(0,90,11)
for t in ts:
# Plot couplings
ts = np.linspace(0, 50, 100)
coups = []
dists = []

distribution_exponent_hk = 0
distribution_offset_hk = 0

p1 = sgw.node_distribution(graph1, distribution_offset_hk,
                           distribution_exponent_hk)
p2 = sgw.node_distribution(graph2, distribution_offset_hk,
                           distribution_exponent_hk)

for t in ts:
    graph1_hk = sgw.undirected_normalized_heat_kernel(graph1, t)
    graph2_hk = sgw.undirected_normalized_heat_kernel(graph2, t)
    coup_hk, log_hk = ot.gromov.gromov_wasserstein(graph1_hk,
                                                   graph2_hk,
                                                   p1,
                                                   p2,
                                                   loss_fun='square_loss',
                                                   log=True)
    coups.append(coup_hk)
    dists.append(log_hk['gw_dist'])

fig, axs = plt.subplots(10, 10, figsize=(10, 10))
axs = axs.flatten()
for i in range(len(coups)):
    ax = axs[i]
    ax.imshow(coups[i], cmap='Blues')
Beispiel #7
0
pList = []
lambdas = []

for i in range(0, num_bootstrap):
    select = np.random.choice(top, size_bootstrap, replace=False)
    sG = nx.Graph()
    for n in select:
        sG.add_node(n)
    for e in G.edges():
        if e[0] in select and e[1] in select:
            sG.add_edge(e[0], e[1])
    samples.append(sG)

    pList.append(ot.unif(nx.number_of_nodes(sG)))
    AList.append(nx.adjacency_matrix(sG).toarray())
    HKList3.append(sgw.undirected_normalized_heat_kernel(sG, 3))
    HKList7.append(sgw.undirected_normalized_heat_kernel(sG, 7))
    HKList11.append(sgw.undirected_normalized_heat_kernel(sG, 11))
    lambdas.append(1 / num_bootstrap)

print('---Bootstrap completed. Computing GW averages')

# GW barycenter computation
N = size_bootstrap  # size of targeted barycenter
p = ot.unif(N)  #weights of targeted barycenter
num_runs = 10  # each call to gromov_barycenters gives random initialization,
# we will iterate this several times


def run_frechet(CList):
    runtimes = []