Ejemplos de clustering en Python, ejemplos de networkx.algorithms.clustering en Python

Ejemplo n.º 1

0

Mostrar archivo

def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'):
        import random

        if type == 'degree':
                degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist()
        elif type == 'closeness':
                closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist()
        elif type == 'betweenness':
                betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist()
        elif type == 'katz':
                katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist()
        elif type == 'clustering':
                clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist()
        else:
                indexes = list(knn_graph_obj.nodes)
                #print(indexes)
                node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes)))
                #print(node_toget_labels)

        return node_toget_labels

Ejemplo n.º 2

0

Mostrar archivo

Archivo: centrality_box_plot.py Proyecto: ThiagoAkinori/TCC

def get_centrality_labels(knn_graph_obj, type='degree'):
    import random

    if type == 'degree':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.degree_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'closeness':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.closeness_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'betweenness':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.betweenness_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'clustering':
        node_toget_labels = pd.DataFrame.from_dict(clustering(knn_graph_obj),
                                                   orient='index',
                                                   columns=['value'])
    else:
        node_toget_labels = list(knn_graph_obj.nodes)
        #print(node_toget_labels)

    return node_toget_labels

Ejemplo n.º 3

0

Mostrar archivo

Archivo: graph_features.py Proyecto: pablooliveira/stability-impact

def compute_stats(df_fname, dist_mat=None):
    df = pd.read_hdf(df_fname)
    # Stats organized as follows:
    #   name : lambda/function to be called on (g)
    dists = np.loadtxt(dist_mat)

    stats = {
        "edgecount":
        nxf.number_of_edges,
        "globaleffic":
        nxa.global_efficiency,
        "degree":
        lambda g: dict(nxf.degree(g)).values(),
        "modularity":
        lambda g: nntm.consensus_modularity(nx.adj_matrix(g).toarray(),
                                            seed=42)[1].mean(),
        "assort":
        nxa.assortativity.degree_assortativity_coefficient,
        "avplength":
        lambda g: np.mean(connection_length(g, dists=dists)),
        "weight":
        lambda g: list(nxf.get_edge_attributes(g, 'weight').values()),
        "ccoeff":
        lambda g: list(nxa.clustering(g, weight=None).values()),
        "betweenness":
        lambda g: list(
            nxa.betweenness_centrality(g, weight='weight').values()),
        "plength":
        lambda g: connection_length(g, dists=dists)
    }

    # create dict (and eventual DF) column per stat with the following struct:
    stat_results = {'index': []}
    stat_results.update({stat_name: [] for stat_name in stats.keys()})

    for idx, row in df.iterrows():
        tmpg = nx.Graph(row.graph)
        stat_results['index'] += [row['index']]
        for stat_name, stat_fn in stats.items():
            tmps = stat_fn(tmpg)
            try:
                len(tmps)
                stat_results[stat_name] += [np.array(list(tmps))]
            except TypeError:
                stat_results[stat_name] += [tmps]

    stat_df = pd.DataFrame.from_dict(stat_results)
    stat_df.to_hdf(df_fname.replace('.h5', '_stats.h5'), "stats", mode="w")

Ejemplo n.º 4

0

Mostrar archivo

def clustering(G, nodes=None, weight=None):
    @context_to_dict
    @project_to_simple
    def _clustering(G):
        return graphscope.clustering(G)

    if weight or not isinstance(G, (nx.Graph, nx.DiGraph, graphscope.Graph)):
        # forward networkx.clustering
        return nxa.clustering(G, nodes, weight)
    clusterc = _clustering(G)
    if nodes is not None:
        if not isinstance(nodes, list) and nodes in clusterc:
            return clusterc[nodes]
        else:
            return {n: clusterc[n] for n in nodes}
    return clusterc

Ejemplo n.º 5

0

Mostrar archivo

G.nodes(data=True)


# In[15]:


attribute_assortativity_coefficient(G, 'club')


# ##### Task2: Add Local Clustering Coefficient

# In[16]:


attr_cluster = clustering(G)


# In[17]:


nx.set_node_attributes(G, attr_cluster, 'cluster')


# In[18]:


G.nodes(data=True)


# ##### Task3: Pair of nodes with largest Cosine/ Jaccard

Ejemplo n.º 6

0

Mostrar archivo

Archivo: watts_strogatz.py Proyecto: link-kut/Tweepy-Recipe

def clustering_coefficiency_from_graph(p):
	g = net.watts_strogatz_graph(300, 4, p)
	c_list = algo.clustering(g).values()
	return (sum(c_list) / len(c_list), net.diameter(g))

Ejemplo n.º 7

0

Mostrar archivo

Archivo: xego_new_brandes.py Proyecto: kmk4021/LINK_ML_BIG_DATA

    print "The number of nodes: %d" % num_nodes
    print "The number of edges in the nodes' complete graph: %d" % (num_nodes * (num_nodes - 1) / 2)

    brandes_ego_elapse_time = []
    proposed_ego_elapse_time = []

    brandes_xEgo_elapse_time = []
    proposed_xEgo_elapse_time = []

    for x in [8, 4, 2, 1, 0.5, 0.25, 0.125, 0.0625]:
        print "Threshold: %5.4f" % x
        adj_map = get_adjacency_matrix(num_nodes, weight_map, x)
        # print adj_map
        g = net.Graph(adj_map)
        density = net.density(g)
        clustering = algo.clustering(g)
        scc_g = algo.connected_component_subgraphs(g)[0]
        diameter = algo.diameter(scc_g)

        clustering_coefficient = mean(clustering.values())

        bet = get_CentralityList(g).values()
        myfile = open(data_type + "_" + str(x) + "_global_bet.csv", "wb")
        wr = csv.writer(myfile)
        wr.writerow(bet)
        myfile.close()

        ego_bet_map = get_Ego_CentralityList(g)
        ego_bet = ego_bet_map.values()
        cor_1 = 1 - Bio.Cluster.distancematrix((bet, ego_bet), dist="s")[1][0]
        print "Brandes, ego-global correlation: %5.4f - elapsed_time: %5.4f" % (cor_1, elapsed_time1)

Ejemplo n.º 8

0

Mostrar archivo

 def clustering_sum(self, h1, h2):
     G = self._graph
     c_h1 = nxa.clustering(G, h1)
     c_h2 = nxa.clustering(G, h2)
     return c_h1 + c_h2

Ejemplo n.º 9

0

Mostrar archivo

Archivo: Featurator.py Proyecto: pabloem/hanja-graph

 def clustering_sum(self,h1,h2):
     G = self._graph
     c_h1 = nxa.clustering(G,h1)
     c_h2 = nxa.clustering(G,h2)
     return c_h1 + c_h2

Ejemplo n.º 10

0

Mostrar archivo

Archivo: watts_strogatz.py Proyecto: mkhoin/Tweepy-Recipe

def clustering_coefficiency_from_graph(p):
    g = net.watts_strogatz_graph(300, 4, p)
    c_list = algo.clustering(g).values()
    return (sum(c_list) / len(c_list), net.diameter(g))

Ejemplo n.º 11

0

Mostrar archivo

def extractnetstats(ID,
                    network,
                    thr,
                    conn_model,
                    est_path,
                    roi,
                    prune,
                    node_size,
                    norm,
                    binary,
                    custom_weight=None):
    """
    Function interface for performing fully-automated graph analysis.

    Parameters
    ----------
    ID : str
        A subject id or other unique identifier.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of
        brain subgraphs.
    thr : float
        The value, between 0 and 1, used to threshold the graph using any variety of methods
        triggered through other options.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance,
       partcorr for partial correlation). sps type is used by default.
    est_path : str
        File path to the thresholded graph, conn_matrix_thr, saved as a numpy array in .npy format.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    node_size : int
        Spherical centroid node size in the case that coordinate-based centroids
        are used as ROI's.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    custom_weight : float
        The edge attribute that holds the numerical value used as a weight.
        If None, then each edge has weight 1. Default is None.

    Returns
    -------
    out_path : str
        Path to .csv file where graph analysis results are saved.
    """
    import pandas as pd
    import yaml
    try:
        import cPickle as pickle
    except ImportError:
        import _pickle as pickle
    from pathlib import Path
    from pynets import thresholding, utils

    # Advanced options
    fmt = 'edgelist_ssv'
    est_path_fmt = "%s%s" % ('.', est_path.split('.')[-1])

    # Load and threshold matrix
    if est_path_fmt == '.txt':
        in_mat_raw = np.array(np.genfromtxt(est_path))
    else:
        in_mat_raw = np.array(np.load(est_path))

    # De-diagnal
    in_mat = np.array(np.array(thresholding.autofix(in_mat_raw)))

    # Normalize connectivity matrix
    # Force edges to values between 0-1
    if norm == 1:
        in_mat = thresholding.normalize(in_mat)
    # Apply log10
    elif norm == 2:
        in_mat = np.log10(in_mat)
    else:
        pass

    # Correct nan's and inf's
    in_mat[np.isnan(in_mat)] = 0
    in_mat[np.isinf(in_mat)] = 1

    # Get hyperbolic tangent (i.e. fischer r-to-z transform) of matrix if non-covariance
    if (conn_model == 'corr') or (conn_model == 'partcorr'):
        in_mat = np.arctanh(in_mat)

    # Binarize graph
    if binary is True:
        in_mat = thresholding.binarize(in_mat)

    # Get dir_path
    dir_path = os.path.dirname(os.path.realpath(est_path))

    # Load numpy matrix as networkx graph
    G_pre = nx.from_numpy_matrix(in_mat)

    # Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness
    # centrality are > 3 standard deviations below the mean)
    if prune == 1:
        [G, _] = prune_disconnected(G_pre)
    elif prune == 2:
        [G, _] = most_important(G_pre)
    else:
        G = G_pre

    # Get corresponding matrix
    in_mat = np.array(nx.to_numpy_matrix(G))

    # Saved pruned
    if (prune != 0) and (prune is not None):
        final_mat_path = "%s%s%s" % (est_path.split(est_path_fmt)[0],
                                     '_pruned_mat', est_path_fmt)
        utils.save_mat(in_mat, final_mat_path, fmt)

    # Print graph summary
    print("%s%.2f%s" % ('\n\nThreshold: ', 100 * float(thr), '%'))
    print("%s%s" % ('Source File: ', est_path))
    info_list = list(nx.info(G).split('\n'))[2:]
    for i in info_list:
        print(i)

    if nx.is_connected(G) is True:
        frag = False
        print('Graph is connected...')
    else:
        frag = True
        print('Warning: Graph is fragmented...\n')

    # Create Length matrix
    mat_len = thresholding.weight_conversion(in_mat, 'lengths')

    # Load numpy matrix as networkx graph
    G_len = nx.from_numpy_matrix(mat_len)

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # # # # Calculate global and local metrics from graph G # # # #
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    import community
    from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality, rich_club_coefficient, sigma
    from pynets.stats.netstats import average_local_efficiency, global_efficiency, participation_coef, participation_coef_sign, diversity_coef_sign
    # For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the
    # function (with a G-only input) to the netstats module.
    metric_list_glob = [
        global_efficiency, average_local_efficiency,
        degree_assortativity_coefficient, average_clustering,
        average_shortest_path_length, degree_pearson_correlation_coefficient,
        graph_number_of_cliques, transitivity, sigma
    ]
    metric_list_comm = ['louvain_modularity']
    # with open("%s%s" % (str(Path(__file__).parent), '/global_graph_measures.yaml'), 'r') as stream:
    #     try:
    #         metric_dict_global = yaml.load(stream)
    #         metric_list_global = metric_dict_global['metric_list_global']
    #         print("%s%s%s" % ('\n\nCalculating global measures:\n', metric_list_global, '\n\n'))
    #     except FileNotFoundError:
    #         print('Failed to parse global_graph_measures.yaml')

    with open(
            "%s%s" %
        (str(Path(__file__).parent), '/nodal_graph_measures.yaml'),
            'r') as stream:
        try:
            metric_dict_nodal = yaml.load(stream)
            metric_list_nodal = metric_dict_nodal['metric_list_nodal']
            print("%s%s%s" % ('\n\nCalculating nodal measures:\n',
                              metric_list_nodal, '\n\n'))
        except FileNotFoundError:
            print('Failed to parse nodal_graph_measures.yaml')

    # Note the use of bare excepts in preceding blocks. Typically, this is considered bad practice in python. Here,
    # we are exploiting it intentionally to facilitate uninterrupted, automated graph analysis even when algorithms are
    # undefined. In those instances, solutions are assigned NaN's.

    # Iteratively run functions from above metric list that generate single scalar output
    num_mets = len(metric_list_glob)
    net_met_arr = np.zeros([num_mets, 2], dtype='object')
    j = 0
    for i in metric_list_glob:
        met_name = str(i).split('<function ')[1].split(' at')[0]
        net_met = met_name
        try:
            try:
                net_met_val = raw_mets(G, i, custom_weight)
            except:
                print("%s%s%s" %
                      ('WARNING: ', net_met, ' failed for graph G.'))
                net_met_val = np.nan
        except:
            print("%s%s%s" %
                  ('WARNING: ', str(i), ' is undefined for graph G'))
            net_met_val = np.nan
        net_met_arr[j, 0] = net_met
        net_met_arr[j, 1] = net_met_val
        print(net_met)
        print(str(net_met_val))
        print('\n')
        j = j + 1
    net_met_val_list = list(net_met_arr[:, 1])

    # Create a list of metric names for scalar metrics
    metric_list_names = []
    net_met_val_list_final = net_met_val_list
    for i in net_met_arr[:, 0]:
        metric_list_names.append(i)

    # Run miscellaneous functions that generate multiple outputs
    # Calculate modularity using the Louvain algorithm
    if 'louvain_modularity' in metric_list_comm:
        try:
            ci = community.best_partition(G)
            modularity = community.community_louvain.modularity(ci, G)
            metric_list_names.append('modularity')
            net_met_val_list_final.append(modularity)
        except:
            print('Louvain modularity calculation is undefined for graph G')
            pass

    # Participation Coefficient by louvain community
    if 'participation_coefficient' in metric_list_nodal:
        try:
            if ci is None:
                raise KeyError(
                    'Participation coefficient cannot be calculated for graph G in the absence of a '
                    'community affiliation vector')
            if len(in_mat[in_mat < 0.0]) > 0:
                pc_vector = participation_coef_sign(in_mat, ci)
            else:
                pc_vector = participation_coef(in_mat, ci)
            print(
                '\nExtracting Participation Coefficient vector for all network nodes...'
            )
            pc_vals = list(pc_vector)
            pc_edges = list(range(len(pc_vector)))
            num_edges = len(pc_edges)
            pc_arr = np.zeros([num_edges + 1, 2], dtype='object')
            j = 0
            for i in range(num_edges):
                pc_arr[j, 0] = "%s%s" % (str(pc_edges[j]), '_partic_coef')
                try:
                    pc_arr[j, 1] = pc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Participation coefficient is undefined for node ',
                           str(j), ' of graph G'))
                    pc_arr[j, 1] = np.nan
                j = j + 1
            # Add mean
            pc_arr[num_edges, 0] = 'average_participation_coefficient'
            nonzero_arr_partic_coef = np.delete(pc_arr[:, 1], [0])
            pc_arr[num_edges, 1] = np.mean(nonzero_arr_partic_coef)
            print("%s%s" % ('Mean Participation Coefficient across edges: ',
                            str(pc_arr[num_edges, 1])))
            for i in pc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(pc_arr[:,
                                                                          1])
        except:
            print('Participation coefficient cannot be calculated for graph G')
            pass

    # Diversity Coefficient by louvain community
    if 'diversity_coefficient' in metric_list_nodal:
        try:
            if ci is None:
                raise KeyError(
                    'Diversity coefficient cannot be calculated for graph G in the absence of a community '
                    'affiliation vector')
            [dc_vector, _] = diversity_coef_sign(in_mat, ci)
            print(
                '\nExtracting Diversity Coefficient vector for all network nodes...'
            )
            dc_vals = list(dc_vector)
            dc_edges = list(range(len(dc_vector)))
            num_edges = len(dc_edges)
            dc_arr = np.zeros([num_edges + 1, 2], dtype='object')
            j = 0
            for i in range(num_edges):
                dc_arr[j, 0] = "%s%s" % (str(dc_edges[j]), '_diversity_coef')
                try:
                    dc_arr[j, 1] = dc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Diversity coefficient is undefined for node ',
                           str(j), ' of graph G'))
                    dc_arr[j, 1] = np.nan
                j = j + 1
            # Add mean
            dc_arr[num_edges, 0] = 'average_diversity_coefficient'
            nonzero_arr_diversity_coef = np.delete(dc_arr[:, 1], [0])
            dc_arr[num_edges, 1] = np.mean(nonzero_arr_diversity_coef)
            print("%s%s" % ('Mean Diversity Coefficient across edges: ',
                            str(dc_arr[num_edges, 1])))
            for i in dc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(dc_arr[:,
                                                                          1])
        except:
            print('Diversity coefficient cannot be calculated for graph G')
            pass

    # Local Efficiency
    if 'local_efficiency' in metric_list_nodal:
        try:
            le_vector = local_efficiency(G)
            print(
                '\nExtracting Local Efficiency vector for all network nodes...'
            )
            le_vals = list(le_vector.values())
            le_nodes = list(le_vector.keys())
            num_nodes = len(le_nodes)
            le_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                le_arr[j, 0] = "%s%s" % (str(le_nodes[j]), '_local_efficiency')
                try:
                    le_arr[j, 1] = le_vals[j]
                except:
                    print(
                        "%s%s%s" % ('Local efficiency is undefined for node ',
                                    str(j), ' of graph G'))
                    le_arr[j, 1] = np.nan
                j = j + 1
            le_arr[num_nodes, 0] = 'average_local_efficiency_nodewise'
            nonzero_arr_le = np.delete(le_arr[:, 1], [0])
            le_arr[num_nodes, 1] = np.mean(nonzero_arr_le)
            print("%s%s" % ('Mean Local Efficiency across nodes: ',
                            str(le_arr[num_nodes, 1])))
            for i in le_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(le_arr[:,
                                                                          1])
        except:
            print('Local efficiency cannot be calculated for graph G')
            pass

    # Local Clustering
    if 'local_clustering' in metric_list_nodal:
        try:
            cl_vector = clustering(G)
            print(
                '\nExtracting Local Clustering vector for all network nodes...'
            )
            cl_vals = list(cl_vector.values())
            cl_nodes = list(cl_vector.keys())
            num_nodes = len(cl_nodes)
            cl_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                cl_arr[j, 0] = "%s%s" % (str(cl_nodes[j]), '_local_clustering')
                try:
                    cl_arr[j, 1] = cl_vals[j]
                except:
                    print(
                        "%s%s%s" % ('Local clustering is undefined for node ',
                                    str(j), ' of graph G'))
                    cl_arr[j, 1] = np.nan
                j = j + 1
            cl_arr[num_nodes, 0] = 'average_local_efficiency_nodewise'
            nonzero_arr_cl = np.delete(cl_arr[:, 1], [0])
            cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl)
            print("%s%s" % ('Mean Local Clustering across nodes: ',
                            str(cl_arr[num_nodes, 1])))
            for i in cl_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(cl_arr[:,
                                                                          1])
        except:
            print('Local clustering cannot be calculated for graph G')
            pass

    # Degree centrality
    if 'degree_centrality' in metric_list_nodal:
        try:
            dc_vector = degree_centrality(G)
            print(
                '\nExtracting Degree Centrality vector for all network nodes...'
            )
            dc_vals = list(dc_vector.values())
            dc_nodes = list(dc_vector.keys())
            num_nodes = len(dc_nodes)
            dc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                dc_arr[j,
                       0] = "%s%s" % (str(dc_nodes[j]), '_degree_centrality')
                try:
                    dc_arr[j, 1] = dc_vals[j]
                except:
                    print(
                        "%s%s%s" % ('Degree centrality is undefined for node ',
                                    str(j), ' of graph G'))
                    dc_arr[j, 1] = np.nan
                j = j + 1
            dc_arr[num_nodes, 0] = 'average_degree_cent'
            nonzero_arr_dc = np.delete(dc_arr[:, 1], [0])
            dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc)
            print("%s%s" % ('Mean Degree Centrality across nodes: ',
                            str(dc_arr[num_nodes, 1])))
            for i in dc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(dc_arr[:,
                                                                          1])
        except:
            print('Degree centrality cannot be calculated for graph G')
            pass

    # Betweenness Centrality
    if 'betweenness_centrality' in metric_list_nodal:
        try:
            bc_vector = betweenness_centrality(G_len, normalized=True)
            print(
                '\nExtracting Betweeness Centrality vector for all network nodes...'
            )
            bc_vals = list(bc_vector.values())
            bc_nodes = list(bc_vector.keys())
            num_nodes = len(bc_nodes)
            bc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                bc_arr[j, 0] = "%s%s" % (str(
                    bc_nodes[j]), '_betweenness_centrality')
                try:
                    bc_arr[j, 1] = bc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Betweeness centrality is undefined for node ',
                           str(j), ' of graph G'))
                    bc_arr[j, 1] = np.nan
                j = j + 1
            bc_arr[num_nodes, 0] = 'average_betweenness_centrality'
            nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0])
            bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent)
            print("%s%s" % ('Mean Betweenness Centrality across nodes: ',
                            str(bc_arr[num_nodes, 1])))
            for i in bc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(bc_arr[:,
                                                                          1])
        except:
            print('Betweenness centrality cannot be calculated for graph G')
            pass

    # Eigenvector Centrality
    if 'eigenvector_centrality' in metric_list_nodal:
        try:
            ec_vector = eigenvector_centrality(G, max_iter=1000)
            print(
                '\nExtracting Eigenvector Centrality vector for all network nodes...'
            )
            ec_vals = list(ec_vector.values())
            ec_nodes = list(ec_vector.keys())
            num_nodes = len(ec_nodes)
            ec_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                ec_arr[j, 0] = "%s%s" % (str(
                    ec_nodes[j]), '_eigenvector_centrality')
                try:
                    ec_arr[j, 1] = ec_vals[j]
                except:
                    print("%s%s%s" %
                          ('Eigenvector centrality is undefined for node ',
                           str(j), ' of graph G'))
                    ec_arr[j, 1] = np.nan
                j = j + 1
            ec_arr[num_nodes, 0] = 'average_eigenvector_centrality'
            nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0])
            ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent)
            print("%s%s" % ('Mean Eigenvector Centrality across nodes: ',
                            str(ec_arr[num_nodes, 1])))
            for i in ec_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(ec_arr[:,
                                                                          1])
        except:
            print('Eigenvector centrality cannot be calculated for graph G')
            pass

    # Communicability Centrality
    if 'communicability_centrality' in metric_list_nodal:
        try:
            cc_vector = communicability_betweenness_centrality(G,
                                                               normalized=True)
            print(
                '\nExtracting Communicability Centrality vector for all network nodes...'
            )
            cc_vals = list(cc_vector.values())
            cc_nodes = list(cc_vector.keys())
            num_nodes = len(cc_nodes)
            cc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                cc_arr[j, 0] = "%s%s" % (str(
                    cc_nodes[j]), '_communicability_centrality')
                try:
                    cc_arr[j, 1] = cc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Communicability centrality is undefined for node ',
                           str(j), ' of graph G'))
                    cc_arr[j, 1] = np.nan
                j = j + 1
            cc_arr[num_nodes, 0] = 'average_communicability_centrality'
            nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0])
            cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent)
            print("%s%s" % ('Mean Communicability Centrality across nodes: ',
                            str(cc_arr[num_nodes, 1])))
            for i in cc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(cc_arr[:,
                                                                          1])
        except:
            print(
                'Communicability centrality cannot be calculated for graph G')
            pass

    # Rich club coefficient
    if 'rich_club_coefficient' in metric_list_nodal:
        try:
            rc_vector = rich_club_coefficient(G, normalized=True)
            print(
                '\nExtracting Rich Club Coefficient vector for all network nodes...'
            )
            rc_vals = list(rc_vector.values())
            rc_edges = list(rc_vector.keys())
            num_edges = len(rc_edges)
            rc_arr = np.zeros([num_edges + 1, 2], dtype='object')
            j = 0
            for i in range(num_edges):
                rc_arr[j, 0] = "%s%s" % (str(rc_edges[j]), '_rich_club')
                try:
                    rc_arr[j, 1] = rc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Rich club coefficient is undefined for node ',
                           str(j), ' of graph G'))
                    rc_arr[j, 1] = np.nan
                j = j + 1
            # Add mean
            rc_arr[num_edges, 0] = 'average_rich_club_coefficient'
            nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0])
            rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club)
            print("%s%s" % ('Mean Rich Club Coefficient across edges: ',
                            str(rc_arr[num_edges, 1])))
            for i in rc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(rc_arr[:,
                                                                          1])
        except:
            print('Rich club coefficient cannot be calculated for graph G')
            pass

    if roi:
        met_list_picke_path = "%s%s%s%s" % (
            os.path.dirname(os.path.abspath(est_path)), '/net_met_list', "%s" %
            ("%s%s%s" % ('_', network, '_') if network else "_"),
            os.path.basename(roi).split('.')[0])
    else:
        if network:
            met_list_picke_path = "%s%s%s" % (os.path.dirname(
                os.path.abspath(est_path)), '/net_met_list_', network)
        else:
            met_list_picke_path = "%s%s" % (os.path.dirname(
                os.path.abspath(est_path)), '/net_met_list')
    pickle.dump(metric_list_names, open(met_list_picke_path, 'wb'), protocol=2)

    # And save results to csv
    out_path = utils.create_csv_path(ID, network, conn_model, thr, roi,
                                     dir_path, node_size)
    np.savetxt(out_path, net_met_val_list_final, delimiter='\t')

    if frag is True:
        out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_frag_neat.csv')
    else:
        out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_neat.csv')
    df = pd.DataFrame.from_dict(dict(
        zip(metric_list_names, net_met_val_list_final)),
                                orient='index').transpose()
    df.to_csv(out_path_neat, index=False)

    return out_path

Ejemplo n.º 12

0

Mostrar archivo

def run_GT_calcs(G, just_data, Do_kdist, Do_dia, Do_BCdist, Do_CCdist, Do_ECdist, Do_GD, Do_Eff, \
                               Do_clust, Do_ANC, Do_Ast, Do_WI, multigraph):

    # getting nodes and edges and defining variables for later use
    klist = [0]
    Tlist = [0]
    BCdist = [0]
    CCdist = [0]
    ECdist = [0]
    if multigraph:
        Do_BCdist = 0
        Do_ECdist = 0
        Do_clust = 0

    data_dict = {"x": [], "y": []}

    nnum = int(nx.number_of_nodes(G))
    enum = int(nx.number_of_edges(G))

    if Do_ANC | Do_dia:
        connected_graph = nx.is_connected(G)

    # making a dictionary for the parameters and results
    just_data.append(nnum)
    data_dict["x"].append("Number of nodes")
    data_dict["y"].append(nnum)
    just_data.append(enum)
    data_dict["x"].append("Number of edges")
    data_dict["y"].append(enum)
    multi_image_settings.progress(35)

    # calculating parameters as requested

    # creating degree histogram
    if (Do_kdist == 1):
        klist1 = nx.degree(G)
        ksum = 0
        klist = np.zeros(len(klist1))
        for j in range(len(klist1)):
            ksum = ksum + klist1[j]
            klist[j] = klist1[j]
        k = ksum / len(klist1)
        k = round(k, 5)
        just_data.append(k)
        data_dict["x"].append("Average degree")
        data_dict["y"].append(k)

    multi_image_settings.progress(40)

    # calculating network diameter
    if (Do_dia == 1):
        if connected_graph:
            dia = int(diameter(G))
        else:
            dia = 'NaN'
        just_data.append(dia)
        data_dict["x"].append("Network Diameter")
        data_dict["y"].append(dia)

    multi_image_settings.progress(45)

    # calculating graph density
    if (Do_GD == 1):
        GD = nx.density(G)
        GD = round(GD, 5)
        just_data.append(GD)
        data_dict["x"].append("Graph density")
        data_dict["y"].append(GD)

    multi_image_settings.progress(50)

    # calculating global efficiency
    if (Do_Eff == 1):
        Eff = global_efficiency(G)
        Eff = round(Eff, 5)
        just_data.append(Eff)
        data_dict["x"].append("Global Efficiency")
        data_dict["y"].append(Eff)

    multi_image_settings.progress(55)

    if (Do_WI == 1):
        WI = wiener_index(G)
        WI = round(WI, 1)
        just_data.append(WI)
        data_dict["x"].append("Wiener Index")
        data_dict["y"].append(WI)

    multi_image_settings.progress(60)

    # calculating clustering coefficients
    if (Do_clust == 1):
        Tlist1 = clustering(G)
        Tlist = np.zeros(len(Tlist1))
        for j in range(len(Tlist1)):
            Tlist[j] = Tlist1[j]
        clust = average_clustering(G)
        clust = round(clust, 5)
        just_data.append(clust)
        data_dict["x"].append("Average clustering coefficient")
        data_dict["y"].append(clust)

    # calculating average nodal connectivity
    if (Do_ANC == 1):
        if connected_graph:
            ANC = average_node_connectivity(G)
            ANC = round(ANC, 5)
        else:
            ANC = 'NaN'
        just_data.append(ANC)
        data_dict["x"].append("Average nodal connectivity")
        data_dict["y"].append(ANC)

    multi_image_settings.progress(65)

    # calculating assortativity coefficient
    if (Do_Ast == 1):
        Ast = degree_assortativity_coefficient(G)
        Ast = round(Ast, 5)
        just_data.append(Ast)
        data_dict["x"].append("Assortativity Coefficient")
        data_dict["y"].append(Ast)

    multi_image_settings.progress(70)

    # calculating betweenness centrality histogram
    if (Do_BCdist == 1):
        BCdist1 = betweenness_centrality(G)
        Bsum = 0
        BCdist = np.zeros(len(BCdist1))
        for j in range(len(BCdist1)):
            Bsum += BCdist1[j]
            BCdist[j] = BCdist1[j]
        Bcent = Bsum / len(BCdist1)
        Bcent = round(Bcent, 5)
        just_data.append(Bcent)
        data_dict["x"].append("Average betweenness centrality")
        data_dict["y"].append(Bcent)
    multi_image_settings.progress(75)

    # calculating closeness centrality
    if (Do_CCdist == 1):
        CCdist1 = closeness_centrality(G)
        Csum = 0
        CCdist = np.zeros(len(CCdist1))
        for j in range(len(CCdist1)):
            Csum += CCdist1[j]
            CCdist[j] = CCdist1[j]
        Ccent = Csum / len(CCdist1)
        Ccent = round(Ccent, 5)
        just_data.append(Ccent)
        data_dict["x"].append("Average closeness centrality")
        data_dict["y"].append(Ccent)

        multi_image_settings.progress(80)

        # calculating eigenvector centrality
        if (Do_ECdist == 1):
            try:
                ECdist1 = eigenvector_centrality(G, max_iter=100)
            except:
                ECdist1 = eigenvector_centrality(G, max_iter=10000)
            Esum = 0
            ECdist = np.zeros(len(ECdist1))
            for j in range(len(ECdist1)):
                Esum += ECdist1[j]
                ECdist[j] = ECdist1[j]
            Ecent = Esum / len(ECdist1)
            Ecent = round(Ccent, 5)
            just_data.append(Ecent)
            data_dict["x"].append("Average eigenvector centrality")
            data_dict["y"].append(Ecent)

    data = pd.DataFrame(data_dict)

    return data, just_data, klist, Tlist, BCdist, CCdist, ECdist

Ejemplo n.º 13

0

Mostrar archivo

 def d_cluster(self):
     """d_cluster"""
     from networkx.algorithms import clustering
     return clustering(self.__graph)

Ejemplo n.º 14

0

Mostrar archivo

    print "The number of edges in the nodes' complete graph: %d" % (
        num_nodes * (num_nodes - 1) / 2)

    brandes_ego_elapse_time = []
    proposed_ego_elapse_time = []

    brandes_xEgo_elapse_time = []
    proposed_xEgo_elapse_time = []

    for x in [8, 4, 2, 1, 0.5, 0.25, 0.125, 0.0625]:
        print "Threshold: %5.4f" % x
        adj_map = get_adjacency_matrix(num_nodes, weight_map, x)
        #print adj_map
        g = net.Graph(adj_map)
        density = net.density(g)
        clustering = algo.clustering(g)
        scc_g = algo.connected_component_subgraphs(g)[0]
        diameter = algo.diameter(scc_g)

        clustering_coefficient = mean(clustering.values())

        bet = get_CentralityList(g).values()
        myfile = open(data_type + "_" + str(x) + '_global_bet.csv', 'wb')
        wr = csv.writer(myfile)
        wr.writerow(bet)
        myfile.close()

        ego_bet_map = get_Ego_CentralityList(g)
        ego_bet = ego_bet_map.values()
        cor_1 = 1 - Bio.Cluster.distancematrix((bet, ego_bet), dist="s")[1][0]
        print "Brandes, ego-global correlation: %5.4f - elapsed_time: %5.4f" % (

Ejemplo n.º 15

0

Mostrar archivo

 def clustering_rate(self, h1, h2):
     G = self._graph
     c_h1 = nxa.clustering(G, h1)
     c_h2 = nxa.clustering(G, h2)
     min_degree = min(max(1, G.degree(h1)), max(1, G.degree(h2))) + 0.0
     return c_h1 * c_h2 / min_degree

Ejemplo n.º 16

0

Mostrar archivo

Archivo: Erdos_Renyi_Model.py Proyecto: beastGregpapa/Erdos-Renyi-Model

# diameter(G, e=None)
# degree_centrality(G)
# For average degree: #_edges * 2 / #_nodes;
# comparing average_clustering of both graphs
# print(average_clustering(internet_graph),(average_clustering(Erdős)))

# compute average clustering for the graphs
print("Average clustering of Internet graph is: ", average_clustering(internet_graph),
      "\nAverage clustering of Erdos graph is:  ", average_clustering(Erdős))

print("Transitivity of Internet graph is: ", nx.transitivity(internet_graph), "\nTransitivity of Erdos graph is: ",
      nx.transitivity(Erdős))

# compute clustering of the graphs
print("clustering of Internet graph is ", clustering(internet_graph), "clustering of Erdos graph is  ", clustering(Erdős))

# compute Degree_centrality for nodes
print("Degree_centrality of Internet graph is: ", degree_centrality(internet_graph), "\nDegree_centrality of Erdos graph is: ", degree_centrality(Erdős))


# compute Diameter of the Graphs
print("Diameter of Erdos graph is: ", diameter(Erdős), "\nDiameter of Internet Graph is: ", diameter(internet_graph))
# print ("diameter of Erdos is ",nx.diameter(Erdős))

# Drawing Erdős graph according to the users input
nx.draw(Erdős, with_labels=True)
plt.show()

# Drawing Internet graph with 10981 nodes and 30855 edges
nx.draw(internet_graph, with_labels=True)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: Featurator.py Proyecto: pabloem/hanja-graph

 def clustering_difference(self,h1,h2):
     G = self._graph
     c_h1 = nxa.clustering(G,h1)
     c_h2 = nxa.clustering(G,h2)
     return abs(c_h1 - c_h2)

Ejemplo n.º 18

0

Mostrar archivo

def extractnetstats(ID,
                    network,
                    thr,
                    conn_model,
                    est_path,
                    mask,
                    out_file=None):
    from pynets import thresholding, utils

    pruning = True

    ##Load and threshold matrix
    in_mat = np.array(np.genfromtxt(est_path))
    in_mat = thresholding.autofix(in_mat)

    ##Normalize connectivity matrix (weights between 0-1)
    in_mat = thresholding.normalize(in_mat)

    ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform)
    if conn_model == 'corr':
        in_mat = np.arctanh(in_mat)
        in_mat[np.isnan(in_mat)] = 0
        in_mat[np.isinf(in_mat)] = 1

    ##Get dir_path
    dir_path = os.path.dirname(os.path.realpath(est_path))

    ##Load numpy matrix as networkx graph
    G_pre = nx.from_numpy_matrix(in_mat)

    ##Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness centrality are > 3 standard deviations below the mean)
    if pruning == True:
        [G_pruned, _, _] = most_important(G_pre)
    else:
        G_pruned = G_pre

    ##Make directed if sparse
    if conn_model != 'corr' and conn_model != 'cov' and conn_model != 'tangent':
        G_di = nx.DiGraph(G_pruned)
        G_dir = G_di.to_directed()
        G = G_pruned
    else:
        G = G_pruned

    ##Get corresponding matrix
    in_mat = nx.to_numpy_array(G)

    ##Print graph summary
    print('\n\nThreshold: ' + str(thr))
    print('Source File: ' + str(est_path))
    info_list = list(nx.info(G).split('\n'))[2:]
    for i in info_list:
        print(i)

    try:
        G_dir
        print('Analyzing DIRECTED graph when applicable...')
    except:
        print('Graph is UNDIRECTED')

    if conn_model == 'corr' or conn_model == 'cov' or conn_model == 'tangent':
        if nx.is_connected(G) == True:
            num_conn_comp = nx.number_connected_components(G)
            print('Graph is CONNECTED with ' + str(num_conn_comp) +
                  ' connected component(s)')
        else:
            print('Graph is DISCONNECTED')
    print('\n')

    ##Create Length matrix
    mat_len = thresholding.weight_conversion(in_mat, 'lengths')
    ##Load numpy matrix as networkx graph
    G_len = nx.from_numpy_matrix(mat_len)

    ##Save G as gephi file
    if mask:
        if network:
            nx.write_graphml(
                G, dir_path + '/' + ID + '_' + network + '_' +
                str(os.path.basename(mask).split('.')[0]) + '.graphml')
        else:
            nx.write_graphml(
                G, dir_path + '/' + ID + '_' +
                str(os.path.basename(mask).split('.')[0]) + '.graphml')
    else:
        if network:
            nx.write_graphml(G,
                             dir_path + '/' + ID + '_' + network + '.graphml')
        else:
            nx.write_graphml(G, dir_path + '/' + ID + '.graphml')

    ###############################################################
    ########### Calculate graph metrics from graph G ##############
    ###############################################################
    from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality
    from pynets.netstats import average_local_efficiency, global_efficiency, local_efficiency, modularity_louvain_dir, smallworldness
    ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function  (with a G-only input) to the netstats module.
    metric_list = [
        global_efficiency, average_local_efficiency, smallworldness,
        degree_assortativity_coefficient, average_clustering,
        average_shortest_path_length, degree_pearson_correlation_coefficient,
        graph_number_of_cliques, transitivity
    ]

    ##Custom Weight Parameter
    #custom_weight = 0.25
    custom_weight = None

    ##Iteratively run functions from above metric list that generate single scalar output
    num_mets = len(metric_list)
    net_met_arr = np.zeros([num_mets, 2], dtype='object')
    j = 0
    for i in metric_list:
        met_name = str(i).split('<function ')[1].split(' at')[0]
        net_met = met_name
        try:
            if i is 'average_shortest_path_length':
                try:
                    try:
                        net_met_val = float(i(G_dir))
                        print('Calculating from directed graph...')
                    except:
                        net_met_val = float(i(G))
                except:
                    ##case where G is not fully connected
                    net_met_val = float(
                        average_shortest_path_length_for_all(G))
            if custom_weight is not None and i is 'degree_assortativity_coefficient' or i is 'global_efficiency' or i is 'average_local_efficiency' or i is 'average_clustering':
                custom_weight_param = 'weight = ' + str(custom_weight)
                try:
                    net_met_val = float(i(G_dir, custom_weight_param))
                    print('Calculating from directed graph...')
                except:
                    net_met_val = float(i(G, custom_weight_param))
            else:
                try:
                    net_met_val = float(i(G_dir))
                    print('Calculating from directed graph...')
                except:
                    net_met_val = float(i(G))
        except:
            net_met_val = np.nan
        net_met_arr[j, 0] = net_met
        net_met_arr[j, 1] = net_met_val
        print(net_met)
        print(str(net_met_val))
        print('\n')
        j = j + 1
    net_met_val_list = list(net_met_arr[:, 1])

    ##Run miscellaneous functions that generate multiple outputs
    ##Calculate modularity using the Louvain algorithm
    [community_aff, modularity] = modularity_louvain_dir(in_mat)

    ##Calculate core-periphery subdivision
    [Coreness_vec, Coreness_q] = core_periphery_dir(in_mat)

    ##Local Efficiency
    try:
        try:
            le_vector = local_efficiency(G_dir)
        except:
            le_vector = local_efficiency(G)
        print('\nExtracting Local Efficiency vector for all network nodes...')
        le_vals = list(le_vector.values())
        le_nodes = list(le_vector.keys())
        num_nodes = len(le_nodes)
        le_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            le_arr[j, 0] = str(le_nodes[j]) + '_local_efficiency'
            #print('\n' + str(le_nodes[j]) + '_local_efficiency')
            try:
                le_arr[j, 1] = le_vals[j]
            except:
                le_arr[j, 1] = np.nan
            #print(str(le_vals[j]))
            j = j + 1
        le_arr[num_nodes, 0] = 'MEAN_local_efficiency'
        nonzero_arr_le = np.delete(le_arr[:, 1], [0])
        le_arr[num_nodes, 1] = np.mean(nonzero_arr_le)
        print('Mean Local Efficiency across nodes: ' +
              str(le_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Local Clustering
    try:
        cl_vector = clustering(G)
        print('\nExtracting Local Clustering vector for all network nodes...')
        cl_vals = list(cl_vector.values())
        cl_nodes = list(cl_vector.keys())
        num_nodes = len(cl_nodes)
        cl_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            cl_arr[j, 0] = str(cl_nodes[j]) + '_local_clustering'
            #print('\n' + str(cl_nodes[j]) + '_local_clustering')
            try:
                cl_arr[j, 1] = cl_vals[j]
            except:
                cl_arr[j, 1] = np.nan
            #print(str(cl_vals[j]))
            j = j + 1
        cl_arr[num_nodes, 0] = 'MEAN_local_efficiency'
        nonzero_arr_cl = np.delete(cl_arr[:, 1], [0])
        cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl)
        print('Mean Local Clustering across nodes: ' +
              str(cl_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Degree centrality
    try:
        try:
            dc_vector = degree_centrality(G_dir)
        except:
            dc_vector = degree_centrality(G)
        print('\nExtracting Degree Centrality vector for all network nodes...')
        dc_vals = list(dc_vector.values())
        dc_nodes = list(dc_vector.keys())
        num_nodes = len(dc_nodes)
        dc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            dc_arr[j, 0] = str(dc_nodes[j]) + '_degree_centrality'
            #print('\n' + str(dc_nodes[j]) + '_degree_centrality')
            try:
                dc_arr[j, 1] = dc_vals[j]
            except:
                dc_arr[j, 1] = np.nan
            #print(str(cl_vals[j]))
            j = j + 1
        dc_arr[num_nodes, 0] = 'MEAN_degree_centrality'
        nonzero_arr_dc = np.delete(dc_arr[:, 1], [0])
        dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc)
        print('Mean Degree Centrality across nodes: ' +
              str(dc_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Betweenness Centrality
    try:
        bc_vector = betweenness_centrality(G_len, normalized=True)
        print(
            '\nExtracting Betweeness Centrality vector for all network nodes...'
        )
        bc_vals = list(bc_vector.values())
        bc_nodes = list(bc_vector.keys())
        num_nodes = len(bc_nodes)
        bc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            bc_arr[j, 0] = str(bc_nodes[j]) + '_betweenness_centrality'
            #print('\n' + str(bc_nodes[j]) + '_betw_cent')
            try:
                bc_arr[j, 1] = bc_vals[j]
            except:
                bc_arr[j, 1] = np.nan
            #print(str(bc_vals[j]))
            j = j + 1
        bc_arr[num_nodes, 0] = 'MEAN_betw_cent'
        nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0])
        bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent)
        print('Mean Betweenness Centrality across nodes: ' +
              str(bc_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Eigenvector Centrality
    try:
        try:
            ec_vector = eigenvector_centrality(G_dir, max_iter=1000)
        except:
            ec_vector = eigenvector_centrality(G, max_iter=1000)
        print(
            '\nExtracting Eigenvector Centrality vector for all network nodes...'
        )
        ec_vals = list(ec_vector.values())
        ec_nodes = list(ec_vector.keys())
        num_nodes = len(ec_nodes)
        ec_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            ec_arr[j, 0] = str(ec_nodes[j]) + '_eigenvector_centrality'
            #print('\n' + str(ec_nodes[j]) + '_eig_cent')
            try:
                ec_arr[j, 1] = ec_vals[j]
            except:
                ec_arr[j, 1] = np.nan
            #print(str(ec_vals[j]))
            j = j + 1
        ec_arr[num_nodes, 0] = 'MEAN_eig_cent'
        nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0])
        ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent)
        print('Mean Eigenvector Centrality across nodes: ' +
              str(ec_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Communicability Centrality
    try:
        cc_vector = communicability_betweenness_centrality(G, normalized=True)
        print(
            '\nExtracting Communicability Centrality vector for all network nodes...'
        )
        cc_vals = list(cc_vector.values())
        cc_nodes = list(cc_vector.keys())
        num_nodes = len(cc_nodes)
        cc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            cc_arr[j, 0] = str(cc_nodes[j]) + '_communicability_centrality'
            #print('\n' + str(cc_nodes[j]) + '_comm_cent')
            try:
                cc_arr[j, 1] = cc_vals[j]
            except:
                cc_arr[j, 1] = np.nan
            #print(str(cc_vals[j]))
            j = j + 1
        cc_arr[num_nodes, 0] = 'MEAN_comm_cent'
        nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0])
        cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent)
        print('Mean Communicability Centrality across nodes: ' +
              str(cc_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Rich club coefficient
    try:
        rc_vector = rich_club_coefficient(G, normalized=True)
        print(
            '\nExtracting Rich Club Coefficient vector for all network nodes...'
        )
        rc_vals = list(rc_vector.values())
        rc_edges = list(rc_vector.keys())
        num_edges = len(rc_edges)
        rc_arr = np.zeros([num_edges + 1, 2], dtype='object')
        j = 0
        for i in range(num_edges):
            rc_arr[j, 0] = str(rc_edges[j]) + '_rich_club'
            #print('\n' + str(rc_edges[j]) + '_rich_club')
            try:
                rc_arr[j, 1] = rc_vals[j]
            except:
                rc_arr[j, 1] = np.nan
            #print(str(rc_vals[j]))
            j = j + 1
        ##Add mean
        rc_arr[num_edges, 0] = 'MEAN_rich_club'
        nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0])
        rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club)
        print('Mean Rich Club Coefficient across edges: ' +
              str(rc_arr[num_edges, 1]))
        print('\n')
    except:
        pass

    ##Create a list of metric names for scalar metrics
    metric_list_names = []
    net_met_val_list_final = net_met_val_list
    for i in net_met_arr[:, 0]:
        metric_list_names.append(i)

    ##Add modularity measure
    try:
        metric_list_names.append('Modularity')
        net_met_val_list_final.append(modularity)
    except:
        pass

    ##Add Core/Periphery measure
    try:
        metric_list_names.append('Coreness')
        net_met_val_list_final.append(Coreness_q)
    except:
        pass

    ##Add local efficiency measures
    try:
        for i in le_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(le_arr[:, 1])
    except:
        pass

    ##Add local clustering measures
    try:
        for i in cl_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(cl_arr[:, 1])
    except:
        pass

    ##Add centrality measures
    try:
        for i in dc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1])
    except:
        pass
    try:
        for i in bc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(bc_arr[:, 1])
    except:
        pass
    try:
        for i in ec_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(ec_arr[:, 1])
    except:
        pass
    try:
        for i in cc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(cc_arr[:, 1])
    except:
        pass

    ##Add rich club measure
    try:
        for i in rc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(rc_arr[:, 1])
    except:
        pass

    ##Save metric names as pickle
    try:
        import cPickle
    except ImportError:
        import _pickle as cPickle

    if mask != None:
        if network != None:
            met_list_picke_path = os.path.dirname(os.path.abspath(
                est_path)) + '/net_metric_list_' + network + '_' + str(
                    os.path.basename(mask).split('.')[0])
        else:
            met_list_picke_path = os.path.dirname(
                os.path.abspath(est_path)) + '/net_metric_list_' + str(
                    os.path.basename(mask).split('.')[0])
    else:
        if network != None:
            met_list_picke_path = os.path.dirname(
                os.path.abspath(est_path)) + '/net_metric_list_' + network
        else:
            met_list_picke_path = os.path.dirname(
                os.path.abspath(est_path)) + '/net_metric_list'
    cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb'))

    ##And save results to csv
    out_path = utils.create_csv_path(ID, network, conn_model, thr, mask,
                                     dir_path)
    np.savetxt(out_path, net_met_val_list_final)

    return (out_path)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: Featurator.py Proyecto: pabloem/hanja-graph

 def clustering_rate(self,h1,h2):
     G = self._graph
     c_h1 = nxa.clustering(G,h1)
     c_h2 = nxa.clustering(G,h2)
     min_degree = min(max(1,G.degree(h1)),max(1,G.degree(h2)))+0.0
     return c_h1*c_h2/min_degree

Ejemplo n.º 20

0

Mostrar archivo

 def clustering_difference(self, h1, h2):
     G = self._graph
     c_h1 = nxa.clustering(G, h1)
     c_h2 = nxa.clustering(G, h2)
     return abs(c_h1 - c_h2)