Example #1
0
def conductance(g, s=(0, 0), t=(10, 10), max_cut=5):
    nodes = list(g.nodes)
    min = nx.conductance(g, [nodes[0]])
    for n in range(max_cut):
        cuts = itertools.combinations(nodes, n)
        for c in cuts:
            if c and ((s in c and t not in c) or (s not in c and t in c)):
                cond = nx.conductance(g, c)
                min = cond if cond < min else min
    return sum / N
Example #2
0
 def test_graph(self):
     G = nx.barbell_graph(5, 0)
     # Consider the singleton sets containing the "bridge" nodes.
     # There is only one cut edge, and each set has volume five.
     S = {4}
     T = {5}
     conductance = nx.conductance(G, S, T)
     expected = 1 / 5
     assert expected == conductance
     # Test with no input T
     G2 = nx.barbell_graph(3, 0)
     # There is only one cut edge, and each set has volume seven.
     S2 = {0, 1, 2}
     assert nx.conductance(G2, S2) == 1 / 7
def extend_labeled_data(input_path, output_path):
    """
    Extends the graph under input_path based on the Conductance metric and saves the extended graph under output_path
    :param input_path: Location of original graph
    :param output_path: Location of output extended graph
    """
    total_graph = nx.read_graphml(input_path)
    A_pos = create_labeled_subgraph(total_graph)
    sub_a_pos = list(A_pos.nodes(data=True))
    B_neg = create_unlabeled_subgraph(total_graph)
    sub_b_neg = list(B_neg.nodes(data=True))
    i = 0
    if i == 0:
        cond_on_iteration[i] = nx.conductance(total_graph, A_pos)
    while not stop_criterion_reached(i):
        i += 1
        argmax = {}
        nodes_to_iter = set()
        for node in sub_a_pos:
            if node[1]['leaning'] == 'R':
                neighbors = nx.neighbors(total_graph, node[0])
                strong_edge_neighbors = [
                    n_2
                    for (n_1, n_2, w) in total_graph.edges(node[0], data=True)
                    if w['weight'] > 1
                ]
                clean_neighbors = []
                for neighbor in neighbors:
                    if neighbor in [b[0] for b in sub_b_neg]:
                        if neighbor in strong_edge_neighbors:
                            clean_neighbors.append(neighbor)
                nodes_to_iter.update(clean_neighbors)

        counter = 0
        for node in nodes_to_iter:
            counter += 1
            c = math.floor(counter / len(nodes_to_iter) * 100)
            temp = [x[0] for x in sub_a_pos]
            temp.append(node)
            argmax[node] = nx.conductance(total_graph, temp)
            print(str(c) + '%')
        b = max(argmax.items(), key=operator.itemgetter(1))[0]
        cond_on_iteration[i] = argmax[b]
        b_data = [(x, y) for (x, y) in sub_b_neg if x == b][0]
        sub_a_pos.append(b_data)
        sub_b_neg.remove(b_data)
        print('Finished with ' + str(len(sub_a_pos)) + ' and ' +
              str(len(sub_b_neg)) + 'nodes')
    extended_graph = nx.subgraph(total_graph, [x for (x, y) in sub_a_pos])
    nx.write_graphml(extended_graph, output_path)
def calculate_component_wise_measures(G, comm_n_dict):

    comp_graphs = list(nx.connected_component_subgraphs(G))
    comp_wise_conductance = {}
    comp_wise_ncut = {}
    #comp_wise_conductance = {i:{} for i in range(len(comp_graphs))}
    i = 0
    
    for comp_id in range(len(comp_graphs)):
        graph = comp_graphs[comp_id]
        if len(list(graph.nodes())) > 1:
            for comm in comm_n_dict:
                if set(comm_n_dict[comm]).issubset(set(graph.nodes())) and len(list((graph.nodes()))) > len(comm_n_dict[comm]) and len(comm_n_dict[comm]) > 1:
                    if comp_id not in comp_wise_conductance:
                        comp_wise_conductance[comp_id] = {}
                    comp_wise_conductance[comp_id][comm] = nx.conductance(graph, comm_n_dict[comm])

    #print comp_wise_conductance
    comp_min_conductance = {}
    comp_num_communities = {}
    for comp_id in comp_wise_conductance:
        #print comp_wise_conductance[comp_id]
        if len(comp_wise_conductance[comp_id]) > 0:
            #comp_min_conductance[comp_id] = sum(comp_wise_conductance[comp_id].values())/ len(comp_wise_conductance[comp_id])
            comp_num_communities[comp_id] = len(comp_wise_conductance[comp_id])
            comp_min_conductance[comp_id] = sorted(comp_wise_conductance[comp_id].items(), key=lambda x: x[1])[0][1]
    

    return min(comp_min_conductance.values())
Example #5
0
 def test_graph(self):
     G = nx.barbell_graph(5, 0)
     # Consider the singleton sets containing the "bridge" nodes.
     # There is only one cut edge, and each set has volume five.
     S = {4}
     T = {5}
     conductance = nx.conductance(G, S, T)
     expected = 1 / 5
     assert_equal(expected, conductance)
Example #6
0
 def test_graph(self):
     G = nx.barbell_graph(5, 0)
     # Consider the singleton sets containing the "bridge" nodes.
     # There is only one cut edge, and each set has volume five.
     S = {4}
     T = {5}
     conductance = nx.conductance(G, S, T)
     expected = 1 / 5
     assert_equal(expected, conductance)
Example #7
0
def getConductance(G, communities):
    sum_cond = 0
    #     avg_len = 0
    conductances = {}
    for L in communities:
        #         print(L)
        #         avg_len += len(L)
        if len(L) == len(G.nodes):
            cond = 1
        else:
            cond = nx.conductance(G, L)
        conductances.update({str(L): cond})
        sum_cond += cond
    avg_cond = sum_cond / len(communities)
    #     avg_len = avg_len / len(communities)
    return avg_cond
Example #8
0
def sweep(myp,G,data):

    supp = torch.nonzero(myp).squeeze().tolist()
    degs = data.adj[supp,:].sum(-1)
    sortedsupp = torch.argsort(myp[supp]/degs,descending=True).squeeze().tolist()
    support = [supp[i] for i in sortedsupp]
    sweepset = []
    bestconductance = 1000
    bestvolume = 0
    bestset = []
    
    for i in support:
        sweepset += [i]
        volume = nx.volume(G,sweepset)
        conductance = nx.conductance(G,sweepset)
        if(conductance < bestconductance):
            bestconductance = conductance
            bestvolume = volume
            bestset = sweepset
    
    
    return bestset, bestconductance, bestvolume
Example #9
0
    def get_quality_metrics(self, station_df, lst_graphs):

        import dunn as di
        from sklearn import metrics
        import networkx as nx
        import networkx.algorithms.community as nx_comm
        import numpy as np
        import pandas as pd
        import traceback

        quality_metric_df = pd.DataFrame([])

        try:
            #d            _n_num_clust = len(station_df['label'].unique())     # Generated Cluster Count
            _n_num_clust = len([
                x for x in station_df['label'].unique() if x > -1
            ])  # Generated Cluster Count
            if _n_num_clust <= 1:
                raise ValueError(
                    'Cannot compute quality metric for %d clusters' %
                    (_n_num_clust))
            ''' returns the simple graph of the clusters and the set dictionary of cluster nodes '''
            G_simple_, l_G_clusters_ = self.__get_graph_n_labels(station_df)

            _s_st_types = str(station_df['st_type'].unique())  # Station Types
            _n_tot_num_st = station_df.shape[0]  # Station Quantity
            _f_min_dist = self._max_distance  # Minimum Distance
            _n_min_pts = self._minimum_samples  # Minimum Points
            _s_clust = str(self._name)  # Clustering Name
            _s_algo = str(self._algorithm)  # Algorithm
            _s_metric = str(self._metric)  # Metric
            _s_method = str(self._cluster_method)  # Method
            _s_seed = str(self._seed)  # Seed
            __lst_valid_cloud_clust = [
                frozenset(clust) for clust in l_G_clusters_
                if len(clust) >= self._minimum_samples
            ]
            _n_valid_clust = len(
                __lst_valid_cloud_clust)  # Valid Cluster Count

            # Clustered Station Count
            _n_sts_in_clusters = 0
            for x in __lst_valid_cloud_clust:
                _n_sts_in_clusters += len(x)

            _n_noise = station_df.shape[
                0] - _n_sts_in_clusters  # Unclsutered Noise Count
            _n_avg_deg = sum([
                d for n, d in G_simple_.degree()
                if G_simple_.nodes[n]["label"] > -1
            ]) / _n_sts_in_clusters  # Average Node Degree
            ''' Compute the accuracy of r-regularity constraint on the individual clusters by considering the
                systematic error that is a reproducible inaccuracy consistent for the same clustering strategy.
                For such we apply the weighted mean absolute error to estimate the deviation from the expected degree.
            '''
            sum_deg_abs_err = 0
            _deg_wmae = 0
            _deg_err_st_count = 0
            #p            print("\nclusters:",len(lst_graphs))
            for H in lst_graphs:
                H = nx.Graph(H)
                H.remove_nodes_from(list(nx.isolates(H)))
                H.remove_nodes_from(
                    [n for n, v in H.nodes(data=True) if v["label"] == -1])
                H_deg_abs_err = 0
                _l_deg_diff = []
                if H.number_of_nodes() > 0:
                    _l_deg_diff = [
                        _n_min_pts - 1 - d for n, d in H.degree()
                        if (int(d) < int(_n_min_pts -
                                         1) and H.nodes[n]["label"] > -1)
                    ]
                if len(_l_deg_diff) > 0:
                    #p                    print("\ndegree mean absolute error")
                    #p                    print("minPts:",_n_min_pts)
                    #p                    print("list deg diff:",_l_deg_diff)
                    #p                    print("graph nodes:",sorted([d for n,d in H.degree()]))
                    sum_deg_abs_err += sum(_l_deg_diff)
                    _deg_err_st_count += len(_l_deg_diff)
            if _deg_err_st_count > 0:
                _deg_wmae = sum_deg_abs_err / (_deg_err_st_count *
                                               (_n_min_pts - 1))
#p                print("_deg_wmae", _deg_wmae,_deg_err_st_count)
            ''' prepare valid stations for measuring the quality'''
            lst_st = list(nx.get_node_attributes(G_simple_, 'pos').values())
            lst_lbl = list(nx.get_node_attributes(G_simple_, 'label').values())

            _f_silhouette = metrics.silhouette_score(
                lst_st, lst_lbl, metric='haversine')  # Silhouette Coefficient
            _f_cal_har = metrics.calinski_harabasz_score(
                lst_st, lst_lbl)  # Calinski Harabaz score
            _f_dav_bould = metrics.davies_bouldin_score(
                lst_st, lst_lbl)  # Davies Bouldin score
            _f_dunn = di.dunn_fast(lst_st, lst_lbl)  # Dunn Index
            _f_modul = nx_comm.modularity(G_simple_,
                                          l_G_clusters_)  # Modularity

            try:
                l_conductance = list(
                    nx.conductance(G_simple_, cluster_i, weight='distance')
                    for cluster_i in __lst_valid_cloud_clust)
                _f_conduct = sum(l_conductance) / len(
                    l_conductance)  # Conductance Average
            except Exception:
                _f_conduct = 0
            _f_cover = nx_comm.coverage(G_simple_,
                                        l_G_clusters_)  # Coverage Score
            _f_perform = nx_comm.performance(
                G_simple_, l_G_clusters_)  # Performance Score

            dict_quality_mesrs = {
                'Station Types': _s_st_types,
                'Station Quantity': _n_tot_num_st,
                'Maximum Distance': _f_min_dist,
                'Minimum Points': _n_min_pts,
                'Name': _s_clust,
                'Algorithm': _s_algo,
                'Metric': _s_metric,
                'Method': _s_method,
                'Seed': _s_seed,
                'Generated Cluster Count': _n_num_clust,
                'Valid Cluster Count': _n_valid_clust,
                'Clustered Station Count': _n_sts_in_clusters,
                'Unclsutered Noise Count': _n_noise,
                'Average Station Degree': _n_avg_deg,
                'Degree Weighted Mean Absolute Error': _deg_wmae,
                'Degree Error Station Count': _deg_err_st_count,
                'Silhouette Coefficient': _f_silhouette,
                'Calinski Harabaz score': _f_cal_har,
                'Davies Bouldin score': _f_dav_bould,
                'Dunn Index': _f_dunn,
                'Modularity': _f_modul,
                'Conductance Average': _f_conduct,
                'Coverage Score': _f_cover,
                'Performance Score': _f_perform,
            }
            #            print('Dict qual',dict_quality_mesrs('Seed'))
            quality_metric_df = pd.DataFrame(dict_quality_mesrs,
                                             index=[_s_clust])
            quality_metric_df.reset_index(drop=True, inplace=True)

        except Exception as err:
            print(
                "Class cluster_quality_metric [get_quality_metrics] Error message:",
                err)
            #            print(G_simple_.edges('distance'))
            print(traceback.format_exc())

        return quality_metric_df
 def cut_conductance(self, set_A, set_B):
     return nx.conductance(self.G, set_A, set_B, weight='weight')
Example #11
0
    print("Precision@{}: {}".format(k, len(right_set)/k))
    print("Recall@{}: {}".format(k, len(right_set)/len(labelList)))    
    if outPath:
        f_r = open('degree_top{}'.format(k), 'w')
        for t in top_k_dict.items():
            f_r.write(t[0]+'\t'+str(t[1])+'\n')
        f_r.close()




if __name__ == "__main__":
    args = parse_args()
    g = Graph()
    start_time = time.time()
    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input, weighted=args.weighted,
                        directed=args.directed)
    cluster = read_cluster(args.cluster)
    print("File read done, elapsed time {}s".format(time.time()-start_time))
    print("Node Size: {}".format(g.G.number_of_nodes()))
    print("Edge Size: {}".format(g.G.number_of_edges()))
    
    start_time = time.time()
    print("S size {}, volume {}".format(len(cluster[0]), nx.volume(g.G, cluster[0], weight='weight')))
    print("T size {}, volume {}".format(len(cluster[1]), nx.volume(g.G, cluster[1], weight='weight')))    
    cond = nx.conductance(g.G, cluster[0], cluster[1], weight='weight')
    print("Conductance {}, elapsed time {}s".format(cond, time.time()-start_time))
               ):  # Iterating through each community list of 20 percent seeds
    for b in percent20_seeds_communities_list[
            a]:  # Iterating through each node of that particular community
        fact_matrix_seed20[b][
            a] = 1  # Update the value in the factor matrix with '1' for the corresponding node and community combination
fact_matrix_seed20  # Displays the initial factor matrix for 20 percent seeds data

# In this below step we compute the conductance for the remaining values in the "fact_matrix_20"
for a in range(
        len(fact_matrix_seed20)
):  # Iterating through the range of initial factor matrix obtained for 20 percent seed communities
    value_minimum = np.inf  # Initialized minimum value
    for b in range(len(fact_matrix_seed20[0])
                   ):  # Iterating through each node within the factor matrix
        conduct = nx.conductance(
            G,
            (percent20_seeds_communities_list[b] + list(G.neighbors(a)) + [a])
        )  # Computing the conductance associated with the particular node and their associated neighbors data
        if value_minimum > conduct:  # If obtained conductance value is less then the minimum value then we reassign them
            value_minimum = conduct  # Now we swap the values obtained
            conduct_minimum = b  # This stores the node number having the minimum conductance
        for c in list(G.neighbors(a)) + [
                a
        ]:  # In this loop we evaluate if 'u' is locally minimal to 'c' and update it with '1' else keep at as '0'
            fact_matrix_seed20[c][conduct_minimum] = 1
fact_matrix_seed20  # Displays the final factor matrix for 20 percent seed communities obtained on computing the conductance
"""***Method-2: Factor Matrix Initialization for Neighborhood Seed Communities***

***I) Loading the Neighborhood Seed Communities Data File***

***II) Computing the Factorization Matrix associated to Neighborhood Seed Communities and associated conductance calculation***
"""