def t_selection_pipeline_undirected_village(G, ts, fraction_t_to_keep=0.25):

    mis = []
    coups = []
    d_gws = []
    rt = []

    for t in ts:
        start = time.time()
        cost = sgw.undirected_normalized_heat_kernel(G, t)
        mutual_info, d_gw, coup = process_sgwl_village(cost, database,
                                                       num_nodes,
                                                       num_partitions)
        mis.append(mutual_info)
        coups.append(coup)
        d_gws.append(d_gw)
        end = time.time()
        rt.append(end - start)

    print('Couplings Computed')

    coverages = []

    for j in range(len(ts)):
        coup = coups[j]
        partition = get_partition(coup)
        coverages.append(coverage(G, partition))

    num_to_keep = int(np.round(fraction_t_to_keep * len(ts)))

    good_t_max = ts[np.argsort(coverages)][-num_to_keep:]
    good_t_grad = ts[np.argsort(np.abs(np.gradient(coverages)))][:num_to_keep]

    return mis, coups, d_gws, good_t_max, good_t_grad, rt
Ejemplo n.º 2
0
def best_split(wordPairs):
    """
    Giving a Graph, return the best community partition
    
    :param Graph: a graph constructed with the most similar word pairs 
    :return: (level of partition that gives the best performance, best performance, best partition)
    """
    from networkx.algorithms import community
    from networkx.algorithms.community.quality import performance, coverage
    import networkx as nx

    Graph = nx.Graph()
    edges = [(pair[0][0], pair[0][1]) for pair in wordPairs]
    edgewidth = [pair[1] * 10 for pair in wordPairs]
    Graph.add_edges_from(edges)

    max_pc = 0
    max_index = None
    best_communities = None
    communities_generator = community.girvan_newman(Graph)
    for i, communities in enumerate(communities_generator):
        p = performance(Graph, communities)
        c = coverage(Graph, communities)
        if 2 * p * c / (p + c) > max_pc:
            max_index = i
            max_pc = 2 * p * c / (p + c)
            best_communities = communities
    return (max_index, max_pc, best_communities)
Ejemplo n.º 3
0
    def clustering_statistics(self, community_partition, feat_name, feat_desc,
                              feat_interpret):
        """Compute quality of the community partitions."""
        compl_desc = " of the partition of " + feat_desc

        self.add_feature(
            feat_name + "_modularity",
            lambda graph: quality.modularity(graph, community_partition),
            "Modularity" + compl_desc,
            feat_interpret,
        )

        self.add_feature(
            feat_name + "_coverage",
            lambda graph: quality.coverage(graph, community_partition),
            "Coverage" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_performance",
            lambda graph: quality.performance(graph, community_partition),
            "Performance" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_inter_community_edges",
            lambda graph: quality.inter_community_edges(
                graph, community_partition),
            "Inter community edges" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_inter_community_non_edges",
            lambda graph: quality.inter_community_non_edges(
                graph, community_partition),
            "Inter community non edges" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_intra_community_edges",
            lambda graph: quality.intra_community_edges(
                graph, community_partition),
            "Intra community edges" + compl_desc,
            feat_interpret,
        )
Ejemplo n.º 4
0
        mx = max(mx, U[j][i])
    probableCommunities = []
    for j in anchorList:
        if (U[j][i] == mx):
            probableCommunities.append(j)
    selectedAnchorIndex = random.randint(0, len(probableCommunities) - 1)
    selectedAnchor = probableCommunities[selectedAnchorIndex]
    if selectedAnchor not in finalCommunities:
        finalCommunities[selectedAnchor] = [i]
    else:
        finalCommunities[selectedAnchor].append(i)
print("gmc", gmc)
print("alc", alc)
print("lpac", lpac)
print("async_fluidc", asfl)
print("FuzAg Communties")
for i in finalCommunities:
    for j in finalCommunities[i]:
        print(j, end=" ")
    print()
listOfFinalCommunties = []
for i in finalCommunities:
    listOfFinalCommunties.append(finalCommunities[i])

coverageOfFuzAg = coverage(g, listOfFinalCommunties)
print("coverage Of FuzAg", coverageOfFuzAg)
print("coverage of greedy_modularity_communities", coverage(g, gmc))
print("coverage of async_lpa_communities", coverage(g, alc))
print("coverage of label_propagation_communities", coverage(g, lpac))
print("coverage of Async Fluid Communties", coverage(g, asfl))
Ejemplo n.º 5
0
 def _compute_coverage(self, partition, weight=None):
     return coverage(self.graph, _get_community_sets(partition))
Ejemplo n.º 6
0
    ##--------------------- print for label propagation result
    G_treated = label_prop(G, max_iter=100)
    labels = [G_treated.nodes[node]["label"] for node in G_treated.nodes]
    # print(labels)
    labels = list(set(labels))
    partitions = []
    for label in labels:
        partitions.append(
            set([
                node for node in G_treated.nodes
                if G_treated.nodes[node]["label"] == label
            ]))
    # start = time.time()
    print('modularity, coverage, performance : ',
          modularity(G_treated, partitions), coverage(G_treated, partitions),
          performance(G_treated, partitions))
    # end = time.time()
    # print(end-start)

    ##--------------------- print for louvain result
    # start = time.time()
    partition = community_louvain.best_partition(G)
    # print(partition)
    labels = [partition[node] for node in G.nodes]
    labels = list(set(labels))
    partitions = []
    for label in labels:
        partitions.append(
            set([node for node in G.nodes if partition[node] == label]))
    # print(modularity(partitions, G))
num_clusts = list(range(5, 45))
t = 20

cost = sgw.undirected_normalized_heat_kernel(G, t)

d_gws = []
mis = []
coverages = []
modularities = []

for j in num_clusts:
    mutual_info, d_gw, coup = process_sgwl_eu(cost, database, num_nodes, j)
    partition = get_partition(coup)
    mis.append(mutual_info)
    d_gws.append(d_gw)
    coverages.append(coverage(G, partition))
    modularities.append(modularity(G, partition))

# Estimate number of clusters
estimated_clusters_raw_sym = num_clusts[np.argmax(modularities)]
print('Number of Clusters:', estimated_clusters_raw_sym)

# Now perform modularity/coverage maximizing pipeline
ts = np.linspace(3, 10, 40)
mis, coups, d_gws, good_t_max, good_t_grad, rt = t_selection_pipeline_undirected_eu(
    G, ts, estimated_clusters_raw_sym)

coverages = []

for j in range(len(ts)):
    coup = coups[j]
def main():
    global VERBOSE
    h, v, w, mu, mu_val = parse(' '.join(sys.argv[1:]))
    
    if h:
        print("- Use -v to activate Verbose")
        print("- Use -w to exclude the genetic algorithm from the run")
        print("- Use -mu value to set the value of mu in the graph generators; value should be in the range (0, 1)")
        return

    if v:
        VERBOSE = True
    else:
        VERBOSE = False

    if w:
        algorithms = [clauset_newman_moore, louvain, reneel]
    else:
        algorithms = [clauset_newman_moore, louvain, reneel, gcm]
    

    if VERBOSE:
        print("Start process")

    # small graphs
    non_lfr_runs(algorithms)

    with open('results/small_c1_test.json', 'w') as fs:
        json.dump(RESULTS_S, fs)
    
    

    # lfr benchmark graphs
    sizes = [250, 500, 1000, 1500, 2000, 2500, 3000]
    for n in sizes:
        G, target_partition, target_communities = genrate_lfr_graph(size=n, mu=mu_val)
        nodes_no = n
        edges_no = G.number_of_edges()
        avg_degree = sum([G.degree[i] for i in range(n)]) / nodes_no
        print("========================================================")
        print(nodes_no, edges_no, avg_degree)
        print("========================================================")

        pos = nx.spring_layout(G)

        results = [alg(G) for alg in algorithms]
        partitions = [r[0] for r in results]

        metrics = [(coverage(G, r[1]), performance(G, r[1]), 
                    normalized_mutual_info_score(convert_to_array(target_partition), 
                                                 convert_to_array(r[0])
                                                ))
                    for r in results]
        
        runtimes = [r[2] for r in results]
          
        for idx in range(len(metrics)):
            RESULTS_LFR[NAMES[idx]][n] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "nmi": metrics[idx][2],
                "runtime": runtimes[idx],
            }
            if VERBOSE:
                print(
                    f"The coverage obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][0])
                print(
                    f"The performance obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][1])
                print(
                    f"The NMI score obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][2])
                print("========================================================")

        parallel_display(algorithms, partitions, G, pos)
    
    with open('results/lfr_c1_test.json', 'w') as fb:
        json.dump(RESULTS_LFR, fb)
def non_lfr_runs(algorithms):
    # Karate Club graph
    karate_g = generate_karate_club_graph()
    pos = nx.spring_layout(karate_g)
    results = [alg(karate_g) for alg in algorithms]
    partitions = [r[0] for r in results]

    metrics = [(coverage(karate_g, r[1]), 
                performance(karate_g, r[1]),
                modularity(karate_g, r[1]),)
                for r in results]

    runtimes = [r[2] for r in results]

    for idx in range(len(metrics)):
        RESULTS_S[NAMES[idx]]["Karate"] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "modularity": metrics[idx][2],
                "runtime": runtimes[idx],
            }

        if VERBOSE:
            print(
                f"The coverage obtained by {algorithms[idx].__name__} on the Karate Club graph was " +
                "%.4f" % metrics[idx][0])
            print(
                f"The performance obtained by {algorithms[idx].__name__} on the Karate Club graph was " +
                "%.4f" % metrics[idx][1])
            print(
                f"The final modularity obtained by {algorithms[idx].__name__} on the Karate Club graph was " +
                "%.4f" % metrics[idx][2])
            print("========================================================")


    parallel_display(algorithms, partitions, karate_g, pos)

    # simple Caveman graph 4 6
    caveman_g = generate_caveman_graph(cliques=4, size=6)
    pos = nx.spring_layout(caveman_g)
    results = [alg(caveman_g) for alg in algorithms]
    partitions = [r[0] for r in results]

    metrics = [(coverage(caveman_g, r[1]), 
                performance(caveman_g, r[1]),
                modularity(caveman_g, r[1]),)
                for r in results]

    runtimes = [r[2] for r in results]

    for idx in range(len(metrics)):
        RESULTS_S[NAMES[idx]]["Caveman46"] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "modularity": metrics[idx][2],
                "runtime": runtimes[idx],
            }
        if VERBOSE:
            print(
                f"The coverage obtained by {algorithms[idx].__name__} on the Caveman46 graph was " +
                "%.4f" % metrics[idx][0])
            print(
                f"The performance obtained by {algorithms[idx].__name__} on the Caveman46 graph was " +
                "%.4f" % metrics[idx][1])
            print(
                f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman46 graph was " +
                "%.4f" % metrics[idx][2])
            print("========================================================")

    parallel_display(algorithms, partitions, caveman_g, pos)

    # simple Caveman graph 7 3
    caveman_g = generate_caveman_graph(cliques=7, size=3)
    pos = nx.spring_layout(caveman_g)
    results = [alg(caveman_g) for alg in algorithms]
    partitions = [r[0] for r in results]

    metrics = [(coverage(caveman_g, r[1]),
                performance(caveman_g, r[1]),
                modularity(caveman_g, r[1]),)
               for r in results]

    runtimes = [r[2] for r in results]

    for idx in range(len(metrics)):
        RESULTS_S[NAMES[idx]]["Caveman73"] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "modularity": metrics[idx][2],
                "runtime": runtimes[idx],
            }
        if VERBOSE:
            print(
                f"The coverage obtained by {algorithms[idx].__name__} on the Caveman73 graph was " +
                "%.4f" % metrics[idx][0])
            print(
                f"The performance obtained by {algorithms[idx].__name__} on the Caveman73 graph was " +
                "%.4f" % metrics[idx][1])
            print(
                f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman73 graph was " +
                "%.4f" % metrics[idx][2])
            print("========================================================")

    parallel_display(algorithms, partitions, caveman_g, pos)
Ejemplo n.º 10
0
    print(mod)

    # TODO Conductance of Louvain
    # TODO Coverage of Louvain
    # TODO Adjusted Rand Index of Louvain
    # TODO Normalized Mutual Information of Louvain
    # TODO Normalized Mutual Information Variant of Louvain
    """
        Label Propagation Algorithm and Evaluation
    """
    cs = asyn_lpa.asyn_lpa_communities(graph)

    # Modularity
    mod = modularity(cs, graph)
    print(mod)

    # Coverage
    cov = coverage(cs, graph)
    print(cov)

    # TODO Conductance of Label Propagation
    # TODO Adjusted Rand Index of Label Propagation
    # TODO Normalized Mutual Information of Label Propagation
    # TODO Normalized Mutual Information Variant of Label Propagation

    # TODO implement Smart local moving method
    # TODO Evaluation of Smart local moving

    # TODO implement Infomap method
    # TODO Evaluation of Infomap
Ejemplo n.º 11
0
 def test_good_partition(self):
     """Tests that a good partition has a high coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 2}, {3, 4, 5}]
     assert_almost_equal(6 / 7, coverage(G, partition))
Ejemplo n.º 12
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert_almost_equal(3 / 7, coverage(G, partition))
Ejemplo n.º 13
0
 def test_good_partition(self):
     """Tests that a good partition has a high coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 2}, {3, 4, 5}]
     assert_almost_equal(6 / 7, coverage(G, partition))
Ejemplo n.º 14
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert_almost_equal(3 / 7, coverage(G, partition))