def find_example_with_4_stable_partitions():
    while True:
        G, gt3, gt9 = generate_hierarchical_SBM()

        all_parts = run_louvain(G)
        gamma_estimates = run_CHAMP(G, all_parts)
        stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates)

        num_stable_partitions_below_nine = len([p for p in stable_parts if num_communities(p) <= 9])

        if num_stable_partitions_below_nine > 3:
            all_parts = run_louvain(G)
            gamma_estimates = run_CHAMP(G, all_parts)
            # stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates)
            plot_CHAMP_gamma_estimates(gamma_estimates)
            plt.savefig("hierarchical_sbm_gamma_estimates.pdf")
            plt.close()

            layout = G.layout_fruchterman_reingold(niter=10 ** 3)
            for p in stable_parts:
                ig.plot(louvain.RBConfigurationVertexPartition(G, p), bbox=(1000, 1000), layout=layout,
                        target=f"hierarchical_sbm_{num_communities(p)}-community.png")
            return
        else:
            print(f"Trial completed with {num_stable_partitions_below_nine} partitions with K <= 9. Continuing...")
def generate_domains_with_estimates(graph_filename,
                                    louvain_filename,
                                    restrict_communities=None):
    # Import graph and partitions
    G_intralayer, G_interlayer, layer_vec, ground_truth = pickle.load(
        open(graph_filename, "rb"))
    all_parts = pickle.load(open(louvain_filename, "rb"))

    if restrict_communities:
        all_parts = {
            p
            for p in all_parts if num_communities(p) == restrict_communities
        }
    else:
        all_parts = {p for p in all_parts}

    # Prune partitions with CHAMP
    print("Starting CHAMP...")
    start = time()
    domains = CHAMP_3D(G_intralayer, G_interlayer, layer_vec, all_parts,
                       CHAMP_GAMMA_START, CHAMP_GAMMA_END, CHAMP_OMEGA_START,
                       CHAMP_OMEGA_END)
    print(f"Took {time() - start:.2f} s")

    # Get parameter estimates
    print("Starting parameter estimation...")
    start = time()
    domains_with_estimates = domains_to_gamma_omega_estimates(
        G_intralayer, G_interlayer, layer_vec, domains)
    print(f"Took {time() - start:.2f} s")

    return domains_with_estimates
Ejemplo n.º 3
0
def plot_figure3():
    """Generates figure 5.10"""
    G_intralayer, G_interlayer, layer_vec = generate_lazega_igraph()
    N = 71
    T = 3

    all_stable_parts = []
    for K in range(2, 5):
        domains = pickle.load(open(f"lazega_CHAMP{K}.p", "rb"))

        # Truncate infinite omega solutions to our maximum omega
        domains_with_estimates = domains_to_gamma_omega_estimates(
            G_intralayer, G_interlayer, layer_vec, domains, model='multiplex')
        domains_with_estimates = [
            (polyverts, membership, g_est, min(o_est, CHAMP_OMEGA_END - 1e-3))
            for polyverts, membership, g_est, o_est in domains_with_estimates
            if g_est is not None
        ]

        stable_parts = gamma_omega_estimates_to_stable_partitions(
            domains_with_estimates)

        all_stable_parts.extend(
            [membership for _, membership, _, _ in stable_parts])

    # this sorting seems to keep the number of plot breaks low between all the common stable partitions
    sort = np.array([
        46, 21, 64, 55, 48, 54, 68, 40, 70, 56, 65, 66, 53, 51, 67, 38, 42, 39,
        37, 26, 20, 10, 12, 22, 25, 0, 23, 19, 7, 35, 61, 69, 63, 41, 28, 16,
        15, 8, 11, 9, 14, 1, 3, 36, 18, 52, 43, 47, 33, 44, 60, 59, 45, 31, 34,
        27, 62, 5, 49, 30, 58, 50, 17, 57, 4, 32, 6, 2, 24, 13, 29
    ])

    for i, membership in enumerate(all_stable_parts):
        plt.close()
        K = num_communities(membership)

        membership = np.array(membership)
        m1, m2, m3 = (membership[i * N:(i + 1) * N] for i in range(T))
        if K == 2:
            m1, m2, m3 = m1[sort], m2[sort], m3[sort]
        elif K == 3:
            m1, m2, m3 = m1[sort], m2[sort], m3[sort]
        elif K == 4:
            m1, m2, m3 = m1[sort], m2[sort], m3[sort]
        membership = np.concatenate(
            (m1, m3, m2))  # Concatenate in order advice, work, friend

        plt.close()
        plt.rc('text', usetex=True)
        plt.rc('font', family='serif')
        ax = plot_multiplex_community(np.array(membership),
                                      np.array(layer_vec))
        ax.set_xticks(np.linspace(0, T, 2 * T + 1))
        ax.set_xticklabels(["", "Advice", "", "Coworker", "", "Friend"],
                           fontsize=14)
        plt.title(f"Multiplex Communities in Stable Partition {i + 1}",
                  fontsize=14)
        plt.ylabel("Node ID", fontsize=14)
        plt.savefig(f"lazega_stable_community{i}.pdf")
 def one_run(gamma):
     """Returns (gamma_estimate, num_communities) from a gamma estimation run starting at :gamma:"""
     try:
         final_gamma, part = iterative_monolayer_resolution_parameter_estimation(
             G, gamma=gamma, max_iter=1)
         return final_gamma, num_communities(part.membership)
     except ValueError:
         return None, None
def find_stability_probabilities(num_trials=500):
    progress = Progress(num_trials)
    num_stable = [0 for _ in range(20)]
    for _ in range(num_trials):
        G, gt3, gt9 = generate_hierarchical_SBM()

        all_parts = run_louvain(G)
        gamma_estimates = run_CHAMP(G, all_parts)
        stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates)

        for p in stable_parts:
            assert num_communities(p) < len(num_stable)
            num_stable[num_communities(p)] += 1
        progress.increment()
    progress.done()

    stability_probabilities = [x / num_trials for x in num_stable]
    pickle.dump(stability_probabilities, open("hierarchical_stability_probabilities.p", "wb"))
Ejemplo n.º 6
0
def run_champ_on_lazega_partitions_restricted_K(K):
    G_intralayer, G_interlayer, layer_vec = generate_lazega_igraph()
    layer_vec = np.array(layer_vec)

    all_parts = pickle.load(open("lazega_1M_louvain.p", "rb"))
    all_parts = {p for p in all_parts if num_communities(p) == K}
    domains = CHAMP_3D(G_intralayer, G_interlayer, layer_vec, all_parts, 0.0, CHAMP_GAMMA_END, 0.0, CHAMP_OMEGA_END)

    pickle.dump(domains, open(f"lazega_CHAMP{K}.p", "wb"))
Ejemplo n.º 7
0
def run_method(G,
               ground_truth_communities,
               num_louvain_runs,
               method,
               gamma_sweep_min=0.5,
               gamma_sweep_max=2.0):
    """
    Run one trial of comparing our benchmark to typical Louvain strategies

    :param G: graph of interest
    :param ground_truth_communities: ground truth community vector
    :param method: "modularity pruning", "modularity pruning ground truth K", "gamma sweep" or "ground truth gamma"
    :return: list of NMIs compared to the ground truth communities
    """
    ground_truth_gamma = gamma_estimate(G, ground_truth_communities)

    if ground_truth_gamma > gamma_sweep_max:
        print(f"Ground truth gamma {ground_truth_gamma:.2f} is large")

    if ground_truth_gamma is None:
        raise ValueError(
            "Cannot use a graph with degenerate ground truth communities")

    if method == "modularity pruning" or method == "modularity pruning ground truth K" or method == "gamma sweep":
        gammas = np.linspace(gamma_sweep_min, gamma_sweep_max,
                             num_louvain_runs)
    elif method == "ground truth gamma":
        gammas = np.linspace(ground_truth_gamma, ground_truth_gamma,
                             num_louvain_runs)
    else:
        raise ValueError(f"Option {method} is not valid")

    parts = repeated_louvain_from_gammas(G, gammas)

    if method == "modularity pruning":
        stable_parts = prune_to_stable_partitions(G,
                                                  parts,
                                                  gamma_start=gammas[0],
                                                  gamma_end=gammas[-1],
                                                  single_threaded=True)
        nmis = [nmi(ground_truth_communities, p) for p in stable_parts]
    elif method == "modularity pruning ground truth K":
        ground_truth_K = num_communities(ground_truth_communities)
        stable_parts = prune_to_stable_partitions(
            G,
            parts,
            gamma_start=gammas[0],
            gamma_end=gammas[-1],
            restrict_num_communities=ground_truth_K,
            single_threaded=True)
        nmis = [nmi(ground_truth_communities, p) for p in stable_parts]
    else:  # method == "gamma sweep" or method == "ground truth gamma":
        nmis = [nmi(ground_truth_communities, p) for p in parts]

    return nmis
Ejemplo n.º 8
0
def generate_boxplot_results():
    results = []
    for i in range(len(Gs)):
        for p in pickle.load(open(f"parts{i}.p", "rb")):
            K = num_communities(p)
            g_est = gamma_estimate(Gs[i], p)

            if g_est is not None and 2 <= K < K_MAX:
                assert g_est < 15
                results.append((K, g_est))

    pickle.dump(results, open("boxplot_results.p", "wb"))
Ejemplo n.º 9
0
def plot_stable_partitions(all_parts):
    G = ig.Graph.Famous("Zachary")

    # Store shared force-directed layout to make later plotting layouts consistent
    layout = G.layout_fruchterman_reingold(niter=1000)

    # Plot stable partitions when the number of communities is restricted to 2-4
    for K in range(2, 5):
        restricted_parts = {p for p in all_parts if num_communities(p) == K}

        if len(restricted_parts) > 0:
            ranges = CHAMP_2D(G, restricted_parts, GAMMA_START, GAMMA_END)
            gamma_estimates = ranges_to_gamma_estimates(G, ranges)
            stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates)

            for i, p in enumerate(stable_parts):
                ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=p),
                        f"karate_club_{K}_stable{i}.png", bbox=(1000, 1000), layout=layout)
Ejemplo n.º 10
0
    def assert_multiplex_SBM_correct_convergence(self,
                                                 first_layer_membership,
                                                 copying_probability=0.75,
                                                 num_layers=10,
                                                 p_in=0.25,
                                                 p_out=0.05):
        if not check_multilayer_louvain_capabilities(fatal=False):
            # just return since this version of louvain is unable to perform multilayer parameter estimation anyway
            return

        K = num_communities(first_layer_membership)
        G_intralayer, G_interlayer, layer_membership = self.generate_multiplex_SBM(
            copying_probability, p_in, p_out, first_layer_membership,
            num_layers)

        # compute ground truth gamma
        k = mean(all_degrees(G_intralayer))
        true_theta_in = p_in * (2 * G_intralayer.ecount()) / (k *
                                                              k) / num_layers
        true_theta_out = p_out * (2 * G_intralayer.ecount()) / (k *
                                                                k) / num_layers
        true_gamma = (true_theta_in - true_theta_out) / (log(true_theta_in) -
                                                         log(true_theta_out))

        # compute ground truth omega
        true_omega = log(1 + copying_probability * K /
                         (1 - copying_probability))
        true_omega /= (num_layers * (log(true_theta_in) - log(true_theta_out)))

        gamma, omega, part = iterative_multilayer_resolution_parameter_estimation(
            G_intralayer,
            G_interlayer,
            layer_membership,
            gamma=1.0,
            omega=0.1,
            model='multiplex')

        # check we converged close to the ground truth "correct" values
        # the multiplex omega estimation seems less accurate than in other models, perhaps due to
        # the copying probability approximation
        self.assertLess(abs(true_gamma - gamma), 0.05)
        self.assertLess(abs(true_omega - omega), 0.15)
Ejemplo n.º 11
0
def run_louvain(graphnum):
    G = Gs[graphnum]
    parts = []
    start = time()

    for gamma_louvain in np.linspace(0, 10, 1000):
        part = louvain.find_partition(
            G,
            louvain.RBConfigurationVertexPartition,
            resolution_parameter=gamma_louvain).membership

        if num_communities(part) > 100:
            break
        else:
            parts.append(part)

    print(
        f"Running on Graph {graphnum}, n={G.vcount()}, m={G.ecount()}: "
        f"In {time() - start:.2f} s, found {len(parts)} partitions at {(time() - start) / len(parts):.2f} "
        "seconds per partition")
    return graphnum, {sorted_tuple(tuple(p)) for p in parts}
def generate_multilayer_intralayer_SBM(copying_probability, p_in, p_out,
                                       first_layer_membership, num_layers):
    num_nodes_per_layer = len(first_layer_membership)
    community_labels_per_layer = [[0] * num_nodes_per_layer
                                  for _ in range(num_layers)]
    community_labels_per_layer[0] = list(first_layer_membership)
    K = num_communities(first_layer_membership)

    # assign community labels in the higher layers
    for layer in range(1, num_layers):
        for v in range(num_nodes_per_layer):
            if random(
            ) < copying_probability:  # copy community from last layer
                community_labels_per_layer[layer][
                    v] = community_labels_per_layer[layer - 1][v]
            else:  # assign random community
                community_labels_per_layer[layer][v] = randint(0, K - 1)

    # create intralayer edges according to an SBM
    intralayer_edges = []
    combined_community_labels = sum(community_labels_per_layer, [])
    layer_membership = [
        i for i in range(num_layers) for _ in range(num_nodes_per_layer)
    ]

    for v in range(len(combined_community_labels)):
        for u in range(v + 1, len(combined_community_labels)):
            if layer_membership[v] == layer_membership[u]:
                if combined_community_labels[v] == combined_community_labels[
                        u]:
                    if random() < p_in:
                        intralayer_edges.append((u, v))
                else:
                    if random() < p_out:
                        intralayer_edges.append((u, v))

    G_intralayer = ig.Graph(intralayer_edges, directed=False)

    return G_intralayer, layer_membership
Ejemplo n.º 13
0
    def assert_temporal_SBM_correct_convergence(self, first_layer_membership, copying_probability=0.75, num_layers=25,
                                                p_in=0.25, p_out=0.05):
        if not check_multilayer_louvain_capabilities(fatal=False):
            # just return since this version of louvain is unable to perform multilayer parameter estimation anyway
            return

        K = num_communities(first_layer_membership)
        G_intralayer, G_interlayer, layer_membership = self.generate_temporal_SBM(copying_probability, p_in, p_out,
                                                                                  first_layer_membership,
                                                                                  num_layers)

        # compute ground truth gamma
        k = mean(all_degrees(G_intralayer))
        true_theta_in = p_in * (2 * G_intralayer.ecount()) / (k * k) / num_layers
        true_theta_out = p_out * (2 * G_intralayer.ecount()) / (k * k) / num_layers
        true_gamma = (true_theta_in - true_theta_out) / (log(true_theta_in) - log(true_theta_out))

        # compute ground truth omega. For some reason, Pamfil et al. scale this by 1/2 (perhaps due to the directedness
        # of the interlayer edges), so we do the same here
        true_omega = log(1 + copying_probability * K / (1 - copying_probability))
        true_omega /= (2 * (log(true_theta_in) - log(true_theta_out)))

        gamma, omega, _ = iterative_multilayer_resolution_parameter_estimation(G_intralayer, G_interlayer,
                                                                               layer_membership, gamma=1.0, omega=1.0,
                                                                               model='temporal')

        # check we converged close to the ground truth "correct" values
        self.assertLess(abs(true_gamma - gamma), 0.05)
        self.assertLess(abs(true_omega - omega), 0.1)

        # check multilevel parameter estimation as well
        # we never use this model, but it is a slight generalization of the temporal one
        gamma, omega, _ = iterative_multilayer_resolution_parameter_estimation(G_intralayer, G_interlayer,
                                                                               layer_membership, gamma=1.0, omega=1.0,
                                                                               model='multilevel')
        self.assertLess(abs(true_gamma - gamma), 0.05)
        self.assertLess(abs(true_omega - omega), 0.1)