Exemple #1
0
    def test_good_partition(self):
        """Tests that a good partition has a high performance measure.

        """
        G = barbell_graph(3, 0)
        partition = [set([0, 1, 2]), set([3, 4, 5])]
        assert_almost_equal(14 / 15, performance(G, partition))
Exemple #2
0
    def test_good_partition(self):
        """TestData that a good partition has a high performance measure.

        """
        G = barbell_graph(3, 0)
        partition = [{0, 1, 2}, {3, 4, 5}]
        assert almost_equal(14 / 15, performance(G, partition))
Exemple #3
0
 def test_good_partition(self):
     """Tests that a good partition has a high performance measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 2}, {3, 4, 5}]
     assert 14 / 15 == pytest.approx(performance(G, partition), abs=1e-7)
     assert 14 / 15 == pytest.approx(partition_quality(G, partition)[1],
                                     abs=1e-7)
Exemple #4
0
    def test_good_partition(self):
        """Tests that a good partition has a high performance measure.

        """
        G = barbell_graph(3, 0)
        partition = [{0, 1, 2}, {3, 4, 5}]
        assert_almost_equal(14 / 15, performance(G, partition))
Exemple #5
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low performance measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert 8 / 15 == pytest.approx(performance(G, partition), abs=1e-7)
     assert 8 / 15 == pytest.approx(partition_quality(G, partition)[1],
                                    abs=1e-7)
Exemple #6
0
def opt_async_fluid(G, kmin, kmax, verbose=False, rep=2):
    '''Searches for the best k within the given range
       rep is the number of repetitions to try (may have had bad initialization'''

    bestcom = 0
    bestp = 0
    bestk = 0
    for i in range(0, rep):
        print "rep = " + str(i + 1)
        for k in range(kmin, kmax + 1):
            com = async_fluid(G, k)
            print "k = " + str(k)
            partition = []
            tc = com
            for c in tc:
                partition.append(c)
            if verbose: print partition

            p = community.performance(G, partition)
            if p > bestp:
                bestp = p
                bestcom = partition
                bestk = k
        print "Current best K = " + str(bestk)

    print "Best K: " + str(bestk)
    print "Best P: " + str(bestp)

    return bestcom
def communitiesRandomModule(graphForCommunities):
    # function for random analysis
    # variables
    com = 0
    communitiesListCNM = []
    listCommunities = []
    communitiesDict = dict()
    community = None
    vertex = None
    networkModularity = 0
    networkCoverage = 0
    networkPerformance = 0
    counter = 0
    # get Clauset-Newman-Moore communities
    communitiesListCNM = list(
        greedy_modularity_communities(graphForCommunities))
    # evaluate modularity
    for community in communitiesListCNM:
        for vertex in set(community):
            communitiesDict[vertex] = com
        listCommunities.append(set(community))
        com = com + 1
    networkModularity = louv.modularity(communitiesDict, graphForCommunities)
    networkCoverage = coverage(graphForCommunities, listCommunities)
    networkPerformance = performance(graphForCommunities, listCommunities)
    # end of function
    return (networkModularity, len(listCommunities), networkCoverage,
            networkPerformance)
def AnalizeCommunitiesAndMakeDrawings(name, fileName, someNetwork):
    # function message
    print("\t- Obtaining communities (for undir network version)...")
    # variables
    communitiesFinalResult = ""
    communitiesResult = "Communities (Undirected):\n"
    graphForCommunities = nx.Graph()
    read = "ok"
    com = 0
    communitiesListCNM = []
    listCommunities = []
    communitiesDict = dict()
    community = None
    vertex = None
    networkModularity = 0
    networkCoverage = 0
    networkPerformance = 0
    counter = 0
    # get undir graph integer-weighted
    (graphForCommunities, read) = ParseFileToNetwork(fileName, "-u",
                                                     "community")
    # get Clauset-Newman-Moore communities
    communitiesListCNM = list(
        greedy_modularity_communities(graphForCommunities))
    # evaluate modularity
    for community in communitiesListCNM:
        for vertex in set(community):
            communitiesDict[vertex] = com
        listCommunities.append(set(community))
        com = com + 1
    networkModularity = louv.modularity(communitiesDict, graphForCommunities)
    networkCoverage = coverage(graphForCommunities, listCommunities)
    networkPerformance = performance(graphForCommunities, listCommunities)
    communitiesResult = communitiesResult + "- Modularity: " + str(
        networkModularity) + "\n"
    communitiesResult = communitiesResult + "- Coverage: " + str(
        networkCoverage) + "\n"
    communitiesResult = communitiesResult + "- Performance: " + str(
        networkPerformance) + "\n"
    communitiesResult = communitiesResult + "- Number of Communities: " + str(
        len(listCommunities)) + "\n"
    for counter in range(len(listCommunities)):
        communitiesResult = communitiesResult + "- Number of nodes in community " + str(
            counter + 1) + ": " + str(len(listCommunities[counter])) + "\n"
        communitiesResult = communitiesResult + "- Nodes in C_" + str(
            counter + 1) + ": " + ",".join(listCommunities[counter]) + "\n"
    # plot graph with communities
    drawCommunities(someNetwork, listCommunities, name)
    # end of function
    communitiesFinalResult = "\n\n" + communitiesResult + "\n\n"
    return (communitiesFinalResult)
Exemple #9
0
def girvan_newman(G, verbose=False):
    """Runs the asynchronous fluid community detection algorithm
       G = Graph to look at
       verbose = whether or not to show steps
    """
    communities_generator = community.girvan_newman(G)
    bestcom = 0
    bestq = 0

    for next_level_communities in communities_generator:  #iterate over dendrograms
        quality = community.performance(G, next_level_communities)
        if quality > bestq:
            bestq = quality
            bestcom = next_level_communities
        if verbose:
            print sorted(map(sorted, next_level_communities))
            print("quality = {}".format(quality))

    if (verbose):
        print "Best Communities:"
        print sorted(map(sorted, bestcom))

    return bestcom
Exemple #10
0
def evaluate_performance(G, commu_list):
    
    # initiate a dictionary to store performances
    performances = {}
    
    # loop over all community structure
    for c in commu_list:
        communities = c['communities']
        # prepare element for performance calculation
        commu = []
        # loop over communities in an algorithm
        for i in sorted(pd.Series(communities.values()).unique()):
            store_list = []
            # loop over all nodes
            for node in G.nodes:
                if communities[node] == i:
                    store_list.append(node)
            commu.append(store_list)
        # calculate performance
        performance = nx_comm.performance(G, commu)
        # store value in `performances`
        performances[c['algo']] = performance
    
    return performances
Exemple #11
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low performance measure."""
     G = barbell_graph(3, 0)
     partition = [set([0, 1, 4]), set([2, 3, 5])]
     assert_almost_equal(8 / 15, performance(G, partition))
Exemple #12
0
 def test_bad_partition(self):
     """TestData that a poor partition has a low performance measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert almost_equal(8 / 15, performance(G, partition))
def performance_wrapper(partition, G):
    community_map = extract_community_map(partition)
    return algorithms.performance(G, community_map)
Exemple #14
0
    def get_quality_metrics(self, station_df, lst_graphs):

        import dunn as di
        from sklearn import metrics
        import networkx as nx
        import networkx.algorithms.community as nx_comm
        import numpy as np
        import pandas as pd
        import traceback

        quality_metric_df = pd.DataFrame([])

        try:
            #d            _n_num_clust = len(station_df['label'].unique())     # Generated Cluster Count
            _n_num_clust = len([
                x for x in station_df['label'].unique() if x > -1
            ])  # Generated Cluster Count
            if _n_num_clust <= 1:
                raise ValueError(
                    'Cannot compute quality metric for %d clusters' %
                    (_n_num_clust))
            ''' returns the simple graph of the clusters and the set dictionary of cluster nodes '''
            G_simple_, l_G_clusters_ = self.__get_graph_n_labels(station_df)

            _s_st_types = str(station_df['st_type'].unique())  # Station Types
            _n_tot_num_st = station_df.shape[0]  # Station Quantity
            _f_min_dist = self._max_distance  # Minimum Distance
            _n_min_pts = self._minimum_samples  # Minimum Points
            _s_clust = str(self._name)  # Clustering Name
            _s_algo = str(self._algorithm)  # Algorithm
            _s_metric = str(self._metric)  # Metric
            _s_method = str(self._cluster_method)  # Method
            _s_seed = str(self._seed)  # Seed
            __lst_valid_cloud_clust = [
                frozenset(clust) for clust in l_G_clusters_
                if len(clust) >= self._minimum_samples
            ]
            _n_valid_clust = len(
                __lst_valid_cloud_clust)  # Valid Cluster Count

            # Clustered Station Count
            _n_sts_in_clusters = 0
            for x in __lst_valid_cloud_clust:
                _n_sts_in_clusters += len(x)

            _n_noise = station_df.shape[
                0] - _n_sts_in_clusters  # Unclsutered Noise Count
            _n_avg_deg = sum([
                d for n, d in G_simple_.degree()
                if G_simple_.nodes[n]["label"] > -1
            ]) / _n_sts_in_clusters  # Average Node Degree
            ''' Compute the accuracy of r-regularity constraint on the individual clusters by considering the
                systematic error that is a reproducible inaccuracy consistent for the same clustering strategy.
                For such we apply the weighted mean absolute error to estimate the deviation from the expected degree.
            '''
            sum_deg_abs_err = 0
            _deg_wmae = 0
            _deg_err_st_count = 0
            #p            print("\nclusters:",len(lst_graphs))
            for H in lst_graphs:
                H = nx.Graph(H)
                H.remove_nodes_from(list(nx.isolates(H)))
                H.remove_nodes_from(
                    [n for n, v in H.nodes(data=True) if v["label"] == -1])
                H_deg_abs_err = 0
                _l_deg_diff = []
                if H.number_of_nodes() > 0:
                    _l_deg_diff = [
                        _n_min_pts - 1 - d for n, d in H.degree()
                        if (int(d) < int(_n_min_pts -
                                         1) and H.nodes[n]["label"] > -1)
                    ]
                if len(_l_deg_diff) > 0:
                    #p                    print("\ndegree mean absolute error")
                    #p                    print("minPts:",_n_min_pts)
                    #p                    print("list deg diff:",_l_deg_diff)
                    #p                    print("graph nodes:",sorted([d for n,d in H.degree()]))
                    sum_deg_abs_err += sum(_l_deg_diff)
                    _deg_err_st_count += len(_l_deg_diff)
            if _deg_err_st_count > 0:
                _deg_wmae = sum_deg_abs_err / (_deg_err_st_count *
                                               (_n_min_pts - 1))
#p                print("_deg_wmae", _deg_wmae,_deg_err_st_count)
            ''' prepare valid stations for measuring the quality'''
            lst_st = list(nx.get_node_attributes(G_simple_, 'pos').values())
            lst_lbl = list(nx.get_node_attributes(G_simple_, 'label').values())

            _f_silhouette = metrics.silhouette_score(
                lst_st, lst_lbl, metric='haversine')  # Silhouette Coefficient
            _f_cal_har = metrics.calinski_harabasz_score(
                lst_st, lst_lbl)  # Calinski Harabaz score
            _f_dav_bould = metrics.davies_bouldin_score(
                lst_st, lst_lbl)  # Davies Bouldin score
            _f_dunn = di.dunn_fast(lst_st, lst_lbl)  # Dunn Index
            _f_modul = nx_comm.modularity(G_simple_,
                                          l_G_clusters_)  # Modularity

            try:
                l_conductance = list(
                    nx.conductance(G_simple_, cluster_i, weight='distance')
                    for cluster_i in __lst_valid_cloud_clust)
                _f_conduct = sum(l_conductance) / len(
                    l_conductance)  # Conductance Average
            except Exception:
                _f_conduct = 0
            _f_cover = nx_comm.coverage(G_simple_,
                                        l_G_clusters_)  # Coverage Score
            _f_perform = nx_comm.performance(
                G_simple_, l_G_clusters_)  # Performance Score

            dict_quality_mesrs = {
                'Station Types': _s_st_types,
                'Station Quantity': _n_tot_num_st,
                'Maximum Distance': _f_min_dist,
                'Minimum Points': _n_min_pts,
                'Name': _s_clust,
                'Algorithm': _s_algo,
                'Metric': _s_metric,
                'Method': _s_method,
                'Seed': _s_seed,
                'Generated Cluster Count': _n_num_clust,
                'Valid Cluster Count': _n_valid_clust,
                'Clustered Station Count': _n_sts_in_clusters,
                'Unclsutered Noise Count': _n_noise,
                'Average Station Degree': _n_avg_deg,
                'Degree Weighted Mean Absolute Error': _deg_wmae,
                'Degree Error Station Count': _deg_err_st_count,
                'Silhouette Coefficient': _f_silhouette,
                'Calinski Harabaz score': _f_cal_har,
                'Davies Bouldin score': _f_dav_bould,
                'Dunn Index': _f_dunn,
                'Modularity': _f_modul,
                'Conductance Average': _f_conduct,
                'Coverage Score': _f_cover,
                'Performance Score': _f_perform,
            }
            #            print('Dict qual',dict_quality_mesrs('Seed'))
            quality_metric_df = pd.DataFrame(dict_quality_mesrs,
                                             index=[_s_clust])
            quality_metric_df.reset_index(drop=True, inplace=True)

        except Exception as err:
            print(
                "Class cluster_quality_metric [get_quality_metrics] Error message:",
                err)
            #            print(G_simple_.edges('distance'))
            print(traceback.format_exc())

        return quality_metric_df
Exemple #15
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low performance measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert_almost_equal(8 / 15, performance(G, partition))
Exemple #16
0
        c[j] = k
        k += 1
    d[i] += 1
    d[j] += 1
    v[c[i]] += 1
    v[c[j]] += 1
    if v[c[i]] <= v_max and v[c[j]] <= v_max:
        if v[c[i]] <= v[c[j]]:
            v[c[j]] += d[i]
            v[c[i]] -= d[i]
            c[i] = c[j]
        else:
            v[c[i]] += d[j]
            v[c[j]] -= d[j]
            c[j] = c[i]

com_dict = defaultdict(set)
for i in c:
    com_dict[c[i]].add(i)
communities = list()
for com in com_dict.values():
    if len(com) > 1:
        communities.append(com)

plot_nx_clusters(nx_graph, communities, pos)

print(nx_comm.performance(nx_graph,communities))