def find_example_with_4_stable_partitions(): while True: G, gt3, gt9 = generate_hierarchical_SBM() all_parts = run_louvain(G) gamma_estimates = run_CHAMP(G, all_parts) stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates) num_stable_partitions_below_nine = len([p for p in stable_parts if num_communities(p) <= 9]) if num_stable_partitions_below_nine > 3: all_parts = run_louvain(G) gamma_estimates = run_CHAMP(G, all_parts) # stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates) plot_CHAMP_gamma_estimates(gamma_estimates) plt.savefig("hierarchical_sbm_gamma_estimates.pdf") plt.close() layout = G.layout_fruchterman_reingold(niter=10 ** 3) for p in stable_parts: ig.plot(louvain.RBConfigurationVertexPartition(G, p), bbox=(1000, 1000), layout=layout, target=f"hierarchical_sbm_{num_communities(p)}-community.png") return else: print(f"Trial completed with {num_stable_partitions_below_nine} partitions with K <= 9. Continuing...")
def generate_domains_with_estimates(graph_filename, louvain_filename, restrict_communities=None): # Import graph and partitions G_intralayer, G_interlayer, layer_vec, ground_truth = pickle.load( open(graph_filename, "rb")) all_parts = pickle.load(open(louvain_filename, "rb")) if restrict_communities: all_parts = { p for p in all_parts if num_communities(p) == restrict_communities } else: all_parts = {p for p in all_parts} # Prune partitions with CHAMP print("Starting CHAMP...") start = time() domains = CHAMP_3D(G_intralayer, G_interlayer, layer_vec, all_parts, CHAMP_GAMMA_START, CHAMP_GAMMA_END, CHAMP_OMEGA_START, CHAMP_OMEGA_END) print(f"Took {time() - start:.2f} s") # Get parameter estimates print("Starting parameter estimation...") start = time() domains_with_estimates = domains_to_gamma_omega_estimates( G_intralayer, G_interlayer, layer_vec, domains) print(f"Took {time() - start:.2f} s") return domains_with_estimates
def plot_figure3(): """Generates figure 5.10""" G_intralayer, G_interlayer, layer_vec = generate_lazega_igraph() N = 71 T = 3 all_stable_parts = [] for K in range(2, 5): domains = pickle.load(open(f"lazega_CHAMP{K}.p", "rb")) # Truncate infinite omega solutions to our maximum omega domains_with_estimates = domains_to_gamma_omega_estimates( G_intralayer, G_interlayer, layer_vec, domains, model='multiplex') domains_with_estimates = [ (polyverts, membership, g_est, min(o_est, CHAMP_OMEGA_END - 1e-3)) for polyverts, membership, g_est, o_est in domains_with_estimates if g_est is not None ] stable_parts = gamma_omega_estimates_to_stable_partitions( domains_with_estimates) all_stable_parts.extend( [membership for _, membership, _, _ in stable_parts]) # this sorting seems to keep the number of plot breaks low between all the common stable partitions sort = np.array([ 46, 21, 64, 55, 48, 54, 68, 40, 70, 56, 65, 66, 53, 51, 67, 38, 42, 39, 37, 26, 20, 10, 12, 22, 25, 0, 23, 19, 7, 35, 61, 69, 63, 41, 28, 16, 15, 8, 11, 9, 14, 1, 3, 36, 18, 52, 43, 47, 33, 44, 60, 59, 45, 31, 34, 27, 62, 5, 49, 30, 58, 50, 17, 57, 4, 32, 6, 2, 24, 13, 29 ]) for i, membership in enumerate(all_stable_parts): plt.close() K = num_communities(membership) membership = np.array(membership) m1, m2, m3 = (membership[i * N:(i + 1) * N] for i in range(T)) if K == 2: m1, m2, m3 = m1[sort], m2[sort], m3[sort] elif K == 3: m1, m2, m3 = m1[sort], m2[sort], m3[sort] elif K == 4: m1, m2, m3 = m1[sort], m2[sort], m3[sort] membership = np.concatenate( (m1, m3, m2)) # Concatenate in order advice, work, friend plt.close() plt.rc('text', usetex=True) plt.rc('font', family='serif') ax = plot_multiplex_community(np.array(membership), np.array(layer_vec)) ax.set_xticks(np.linspace(0, T, 2 * T + 1)) ax.set_xticklabels(["", "Advice", "", "Coworker", "", "Friend"], fontsize=14) plt.title(f"Multiplex Communities in Stable Partition {i + 1}", fontsize=14) plt.ylabel("Node ID", fontsize=14) plt.savefig(f"lazega_stable_community{i}.pdf")
def one_run(gamma): """Returns (gamma_estimate, num_communities) from a gamma estimation run starting at :gamma:""" try: final_gamma, part = iterative_monolayer_resolution_parameter_estimation( G, gamma=gamma, max_iter=1) return final_gamma, num_communities(part.membership) except ValueError: return None, None
def find_stability_probabilities(num_trials=500): progress = Progress(num_trials) num_stable = [0 for _ in range(20)] for _ in range(num_trials): G, gt3, gt9 = generate_hierarchical_SBM() all_parts = run_louvain(G) gamma_estimates = run_CHAMP(G, all_parts) stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates) for p in stable_parts: assert num_communities(p) < len(num_stable) num_stable[num_communities(p)] += 1 progress.increment() progress.done() stability_probabilities = [x / num_trials for x in num_stable] pickle.dump(stability_probabilities, open("hierarchical_stability_probabilities.p", "wb"))
def run_champ_on_lazega_partitions_restricted_K(K): G_intralayer, G_interlayer, layer_vec = generate_lazega_igraph() layer_vec = np.array(layer_vec) all_parts = pickle.load(open("lazega_1M_louvain.p", "rb")) all_parts = {p for p in all_parts if num_communities(p) == K} domains = CHAMP_3D(G_intralayer, G_interlayer, layer_vec, all_parts, 0.0, CHAMP_GAMMA_END, 0.0, CHAMP_OMEGA_END) pickle.dump(domains, open(f"lazega_CHAMP{K}.p", "wb"))
def run_method(G, ground_truth_communities, num_louvain_runs, method, gamma_sweep_min=0.5, gamma_sweep_max=2.0): """ Run one trial of comparing our benchmark to typical Louvain strategies :param G: graph of interest :param ground_truth_communities: ground truth community vector :param method: "modularity pruning", "modularity pruning ground truth K", "gamma sweep" or "ground truth gamma" :return: list of NMIs compared to the ground truth communities """ ground_truth_gamma = gamma_estimate(G, ground_truth_communities) if ground_truth_gamma > gamma_sweep_max: print(f"Ground truth gamma {ground_truth_gamma:.2f} is large") if ground_truth_gamma is None: raise ValueError( "Cannot use a graph with degenerate ground truth communities") if method == "modularity pruning" or method == "modularity pruning ground truth K" or method == "gamma sweep": gammas = np.linspace(gamma_sweep_min, gamma_sweep_max, num_louvain_runs) elif method == "ground truth gamma": gammas = np.linspace(ground_truth_gamma, ground_truth_gamma, num_louvain_runs) else: raise ValueError(f"Option {method} is not valid") parts = repeated_louvain_from_gammas(G, gammas) if method == "modularity pruning": stable_parts = prune_to_stable_partitions(G, parts, gamma_start=gammas[0], gamma_end=gammas[-1], single_threaded=True) nmis = [nmi(ground_truth_communities, p) for p in stable_parts] elif method == "modularity pruning ground truth K": ground_truth_K = num_communities(ground_truth_communities) stable_parts = prune_to_stable_partitions( G, parts, gamma_start=gammas[0], gamma_end=gammas[-1], restrict_num_communities=ground_truth_K, single_threaded=True) nmis = [nmi(ground_truth_communities, p) for p in stable_parts] else: # method == "gamma sweep" or method == "ground truth gamma": nmis = [nmi(ground_truth_communities, p) for p in parts] return nmis
def generate_boxplot_results(): results = [] for i in range(len(Gs)): for p in pickle.load(open(f"parts{i}.p", "rb")): K = num_communities(p) g_est = gamma_estimate(Gs[i], p) if g_est is not None and 2 <= K < K_MAX: assert g_est < 15 results.append((K, g_est)) pickle.dump(results, open("boxplot_results.p", "wb"))
def plot_stable_partitions(all_parts): G = ig.Graph.Famous("Zachary") # Store shared force-directed layout to make later plotting layouts consistent layout = G.layout_fruchterman_reingold(niter=1000) # Plot stable partitions when the number of communities is restricted to 2-4 for K in range(2, 5): restricted_parts = {p for p in all_parts if num_communities(p) == K} if len(restricted_parts) > 0: ranges = CHAMP_2D(G, restricted_parts, GAMMA_START, GAMMA_END) gamma_estimates = ranges_to_gamma_estimates(G, ranges) stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates) for i, p in enumerate(stable_parts): ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=p), f"karate_club_{K}_stable{i}.png", bbox=(1000, 1000), layout=layout)
def assert_multiplex_SBM_correct_convergence(self, first_layer_membership, copying_probability=0.75, num_layers=10, p_in=0.25, p_out=0.05): if not check_multilayer_louvain_capabilities(fatal=False): # just return since this version of louvain is unable to perform multilayer parameter estimation anyway return K = num_communities(first_layer_membership) G_intralayer, G_interlayer, layer_membership = self.generate_multiplex_SBM( copying_probability, p_in, p_out, first_layer_membership, num_layers) # compute ground truth gamma k = mean(all_degrees(G_intralayer)) true_theta_in = p_in * (2 * G_intralayer.ecount()) / (k * k) / num_layers true_theta_out = p_out * (2 * G_intralayer.ecount()) / (k * k) / num_layers true_gamma = (true_theta_in - true_theta_out) / (log(true_theta_in) - log(true_theta_out)) # compute ground truth omega true_omega = log(1 + copying_probability * K / (1 - copying_probability)) true_omega /= (num_layers * (log(true_theta_in) - log(true_theta_out))) gamma, omega, part = iterative_multilayer_resolution_parameter_estimation( G_intralayer, G_interlayer, layer_membership, gamma=1.0, omega=0.1, model='multiplex') # check we converged close to the ground truth "correct" values # the multiplex omega estimation seems less accurate than in other models, perhaps due to # the copying probability approximation self.assertLess(abs(true_gamma - gamma), 0.05) self.assertLess(abs(true_omega - omega), 0.15)
def run_louvain(graphnum): G = Gs[graphnum] parts = [] start = time() for gamma_louvain in np.linspace(0, 10, 1000): part = louvain.find_partition( G, louvain.RBConfigurationVertexPartition, resolution_parameter=gamma_louvain).membership if num_communities(part) > 100: break else: parts.append(part) print( f"Running on Graph {graphnum}, n={G.vcount()}, m={G.ecount()}: " f"In {time() - start:.2f} s, found {len(parts)} partitions at {(time() - start) / len(parts):.2f} " "seconds per partition") return graphnum, {sorted_tuple(tuple(p)) for p in parts}
def generate_multilayer_intralayer_SBM(copying_probability, p_in, p_out, first_layer_membership, num_layers): num_nodes_per_layer = len(first_layer_membership) community_labels_per_layer = [[0] * num_nodes_per_layer for _ in range(num_layers)] community_labels_per_layer[0] = list(first_layer_membership) K = num_communities(first_layer_membership) # assign community labels in the higher layers for layer in range(1, num_layers): for v in range(num_nodes_per_layer): if random( ) < copying_probability: # copy community from last layer community_labels_per_layer[layer][ v] = community_labels_per_layer[layer - 1][v] else: # assign random community community_labels_per_layer[layer][v] = randint(0, K - 1) # create intralayer edges according to an SBM intralayer_edges = [] combined_community_labels = sum(community_labels_per_layer, []) layer_membership = [ i for i in range(num_layers) for _ in range(num_nodes_per_layer) ] for v in range(len(combined_community_labels)): for u in range(v + 1, len(combined_community_labels)): if layer_membership[v] == layer_membership[u]: if combined_community_labels[v] == combined_community_labels[ u]: if random() < p_in: intralayer_edges.append((u, v)) else: if random() < p_out: intralayer_edges.append((u, v)) G_intralayer = ig.Graph(intralayer_edges, directed=False) return G_intralayer, layer_membership
def assert_temporal_SBM_correct_convergence(self, first_layer_membership, copying_probability=0.75, num_layers=25, p_in=0.25, p_out=0.05): if not check_multilayer_louvain_capabilities(fatal=False): # just return since this version of louvain is unable to perform multilayer parameter estimation anyway return K = num_communities(first_layer_membership) G_intralayer, G_interlayer, layer_membership = self.generate_temporal_SBM(copying_probability, p_in, p_out, first_layer_membership, num_layers) # compute ground truth gamma k = mean(all_degrees(G_intralayer)) true_theta_in = p_in * (2 * G_intralayer.ecount()) / (k * k) / num_layers true_theta_out = p_out * (2 * G_intralayer.ecount()) / (k * k) / num_layers true_gamma = (true_theta_in - true_theta_out) / (log(true_theta_in) - log(true_theta_out)) # compute ground truth omega. For some reason, Pamfil et al. scale this by 1/2 (perhaps due to the directedness # of the interlayer edges), so we do the same here true_omega = log(1 + copying_probability * K / (1 - copying_probability)) true_omega /= (2 * (log(true_theta_in) - log(true_theta_out))) gamma, omega, _ = iterative_multilayer_resolution_parameter_estimation(G_intralayer, G_interlayer, layer_membership, gamma=1.0, omega=1.0, model='temporal') # check we converged close to the ground truth "correct" values self.assertLess(abs(true_gamma - gamma), 0.05) self.assertLess(abs(true_omega - omega), 0.1) # check multilevel parameter estimation as well # we never use this model, but it is a slight generalization of the temporal one gamma, omega, _ = iterative_multilayer_resolution_parameter_estimation(G_intralayer, G_interlayer, layer_membership, gamma=1.0, omega=1.0, model='multilevel') self.assertLess(abs(true_gamma - gamma), 0.05) self.assertLess(abs(true_omega - omega), 0.1)