def extractnetstats(ID, NETWORK, thr, conn_model, est_path1, out_file=None): import pynets from pynets import netstats, thresholding ##Load and threshold matrix in_mat = np.array(genfromtxt(est_path1)) in_mat = thresholding.autofix(in_mat) ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform) if conn_model == 'corr': in_mat = np.arctanh(in_mat) ##Get dir_path dir_path = os.path.dirname(os.path.realpath(est_path1)) ##Assign Weight matrix mat_wei = in_mat ##Load numpy matrix as networkx graph G=nx.from_numpy_matrix(mat_wei) ##Create Binary matrix #mat_bin = weight_conversion(in_mat, 'binarize') ##Load numpy matrix as networkx graph #G_bin=nx.from_numpy_matrix(mat_bin) ##Create Length matrix mat_len = thresholding.weight_conversion(in_mat, 'lengths') ##Load numpy matrix as networkx graph G_len=nx.from_numpy_matrix(mat_len) ##Save gephi files if NETWORK != None: nx.write_graphml(G, dir_path + '/' + ID + '_' + NETWORK + '.graphml') else: nx.write_graphml(G, dir_path + '/' + ID + '.graphml') ############################################################### ########### Calculate graph metrics from graph G ############## ############################################################### import random import itertools from itertools import permutations from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, rich_club_coefficient, eigenvector_centrality, communicability_centrality from pynets.netstats import efficiency, global_efficiency, local_efficiency, create_random_graph, smallworldness_measure, smallworldness, modularity ##For non-nodal scalar metrics from networkx.algorithms library, add the name of the function to metric_list for it to be automatically calculated. ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function (with a G-only input) to the netstats module. #metric_list = [global_efficiency, local_efficiency, smallworldness, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity] metric_list = [global_efficiency, local_efficiency, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity] ##Iteratively run functions from above metric list num_mets = len(metric_list) net_met_arr = np.zeros([num_mets, 2], dtype='object') j=0 for i in metric_list: met_name = str(i).split('<function ')[1].split(' at')[0] if NETWORK != None: net_met = NETWORK + '_' + met_name else: net_met = met_name try: net_met_val = float(i(G)) except: net_met_val = np.nan net_met_arr[j,0] = net_met net_met_arr[j,1] = net_met_val print(net_met) print(str(net_met_val)) print('\n') j = j + 1 net_met_val_list = list(net_met_arr[:,1]) ##Calculate modularity using the Louvain algorithm [community_aff, modularity] = modularity(mat_wei) ##betweenness_centrality try: bc_vector = betweenness_centrality(G_len) print('Extracting Betweeness Centrality vector for all network nodes...') bc_vals = list(bc_vector.values()) bc_nodes = list(bc_vector.keys()) num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j=0 for i in range(num_nodes): if NETWORK != None: bc_arr[j,0] = NETWORK + '_' + str(bc_nodes[j]) + '_betw_cent' print('\n' + NETWORK + '_' + str(bc_nodes[j]) + '_betw_cent') else: bc_arr[j,0] = 'WholeBrain_' + str(bc_nodes[j]) + '_betw_cent' print('\n' + 'WholeBrain_' + str(bc_nodes[j]) + '_betw_cent') try: bc_arr[j,1] = bc_vals[j] except: bc_arr[j,1] = np.nan print(str(bc_vals[j])) j = j + 1 bc_val_list = list(bc_arr[:,1]) bc_arr[num_nodes,0] = NETWORK + '_MEAN_betw_cent' nonzero_arr_betw_cent = np.delete(bc_arr[:,1], [0]) bc_arr[num_nodes,1] = np.mean(nonzero_arr_betw_cent) print('\n' + 'Mean Betweenness Centrality across all nodes: ' + str(bc_arr[num_nodes,1]) + '\n') except: print('Betweeness Centrality calculation failed. Skipping...') bc_val_list = [] pass ##eigenvector_centrality try: ec_vector = eigenvector_centrality(G_len) print('Extracting Eigenvector Centrality vector for all network nodes...') ec_vals = list(ec_vector.values()) ec_nodes = list(ec_vector.keys()) num_nodes = len(ec_nodes) ec_arr = np.zeros([num_nodes + 1, 2], dtype='object') j=0 for i in range(num_nodes): if NETWORK != None: ec_arr[j,0] = NETWORK + '_' + str(ec_nodes[j]) + '_eig_cent' print('\n' + NETWORK + '_' + str(ec_nodes[j]) + '_eig_cent') else: ec_arr[j,0] = 'WholeBrain_' + str(ec_nodes[j]) + '_eig_cent' print('\n' + 'WholeBrain_' + str(ec_nodes[j]) + '_eig_cent') try: ec_arr[j,1] = ec_vals[j] except: ec_arr[j,1] = np.nan print(str(ec_vals[j])) j = j + 1 ec_val_list = list(ec_arr[:,1]) ec_arr[num_nodes,0] = NETWORK + '_MEAN_eig_cent' nonzero_arr_eig_cent = np.delete(ec_arr[:,1], [0]) ec_arr[num_nodes,1] = np.mean(nonzero_arr_eig_cent) print('\n' + 'Mean Eigenvector Centrality across all nodes: ' + str(ec_arr[num_nodes,1]) + '\n') except: print('Eigenvector Centrality calculation failed. Skipping...') ec_val_list = [] pass ##communicability_centrality try: cc_vector = communicability_centrality(G_len) print('Extracting Communicability Centrality vector for all network nodes...') cc_vals = list(cc_vector.values()) cc_nodes = list(cc_vector.keys()) num_nodes = len(cc_nodes) cc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j=0 for i in range(num_nodes): if NETWORK != None: cc_arr[j,0] = NETWORK + '_' + str(cc_nodes[j]) + '_comm_cent' print('\n' + NETWORK + '_' + str(cc_nodes[j]) + '_comm_cent') else: cc_arr[j,0] = 'WholeBrain_' + str(cc_nodes[j]) + '_comm_cent' print('\n' + 'WholeBrain_' + str(cc_nodes[j]) + '_comm_cent') try: cc_arr[j,1] = cc_vals[j] except: cc_arr[j,1] = np.nan print(str(cc_vals[j])) j = j + 1 cc_val_list = list(cc_arr[:,1]) cc_arr[num_nodes,0] = NETWORK + '_MEAN_comm_cent' nonzero_arr_comm_cent = np.delete(cc_arr[:,1], [0]) cc_arr[num_nodes,1] = np.mean(nonzero_arr_comm_cent) print('\n' + 'Mean Communicability Centrality across all nodes: ' + str(cc_arr[num_nodes,1]) + '\n') except: print('Communicability Centrality calculation failed. Skipping...') cc_val_list = [] pass ##rich_club_coefficient try: rc_vector = rich_club_coefficient(G, normalized=True) print('Extracting Rich Club Coefficient vector for all network nodes...') rc_vals = list(rc_vector.values()) rc_edges = list(rc_vector.keys()) num_edges = len(rc_edges) rc_arr = np.zeros([num_edges + 1, 2], dtype='object') j=0 for i in range(num_edges): if NETWORK != None: rc_arr[j,0] = NETWORK + '_' + str(rc_edges[j]) + '_rich_club' print('\n' + NETWORK + '_' + str(rc_edges[j]) + '_rich_club') else: cc_arr[j,0] = 'WholeBrain_' + str(rc_nodes[j]) + '_rich_club' print('\n' + 'WholeBrain_' + str(rc_nodes[j]) + '_rich_club') try: rc_arr[j,1] = rc_vals[j] except: rc_arr[j,1] = np.nan print(str(rc_vals[j])) j = j + 1 ##Add mean rc_val_list = list(rc_arr[:,1]) rc_arr[num_edges,0] = NETWORK + '_MEAN_rich_club' nonzero_arr_rich_club = np.delete(rc_arr[:,1], [0]) rc_arr[num_edges,1] = np.mean(nonzero_arr_rich_club) print('\n' + 'Mean Rich Club Coefficient across all edges: ' + str(rc_arr[num_edges,1]) + '\n') except: print('Rich Club calculation failed. Skipping...') rc_val_list = [] pass ##Create a list of metric names for scalar metrics metric_list_names = [] net_met_val_list_final = net_met_val_list for i in net_met_arr[:,0]: metric_list_names.append(i) ##Add modularity measure try: if NETWORK != None: metric_list_names.append(NETWORK + '_Modularity') else: metric_list_names.append('WholeBrain_Modularity') net_met_val_list_final.append(modularity) except: pass ##Add centrality and rich club measures try: for i in bc_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(bc_arr[:,1]) except: pass try: for i in ec_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(ec_arr[:,1]) except: pass try: for i in cc_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cc_arr[:,1]) except: pass try: for i in rc_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(rc_arr[:,1]) except: pass ##Save metric names as pickle try: import cPickle except ImportError: import _pickle as cPickle if NETWORK != None: met_list_picke_path = os.path.dirname(os.path.abspath(est_path1)) + '/met_list_pickle_' + NETWORK else: met_list_picke_path = os.path.dirname(os.path.abspath(est_path1)) + '/met_list_pickle_WB' cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb')) ##Save results to csv if 'inv' in est_path1: if NETWORK != None: out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_sps_cov_' + str(thr) + '.csv' else: out_path = dir_path + '/' + ID + '_net_mets_sps_cov_' + str(thr) + '.csv' else: if NETWORK != None: out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_corr_' + str(thr) + '.csv' else: out_path = dir_path + '/' + ID + '_net_mets_corr_' + str(thr) + '.csv' np.savetxt(out_path, net_met_val_list_final) return(out_path)
def extractnetstats(ID, network, thr, conn_model, est_path, mask, out_file=None): from pynets import thresholding, utils pruning = True ##Load and threshold matrix in_mat = np.array(np.genfromtxt(est_path)) in_mat = thresholding.autofix(in_mat) ##Normalize connectivity matrix (weights between 0-1) in_mat = thresholding.normalize(in_mat) ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform) if conn_model == 'corr': in_mat = np.arctanh(in_mat) in_mat[np.isnan(in_mat)] = 0 in_mat[np.isinf(in_mat)] = 1 ##Get dir_path dir_path = os.path.dirname(os.path.realpath(est_path)) ##Load numpy matrix as networkx graph G_pre = nx.from_numpy_matrix(in_mat) ##Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness centrality are > 3 standard deviations below the mean) if pruning == True: [G_pruned, _, _] = most_important(G_pre) else: G_pruned = G_pre ##Make directed if sparse if conn_model != 'corr' and conn_model != 'cov' and conn_model != 'tangent': G_di = nx.DiGraph(G_pruned) G_dir = G_di.to_directed() G = G_pruned else: G = G_pruned ##Get corresponding matrix in_mat = nx.to_numpy_array(G) ##Print graph summary print('\n\nThreshold: ' + str(thr)) print('Source File: ' + str(est_path)) info_list = list(nx.info(G).split('\n'))[2:] for i in info_list: print(i) try: G_dir print('Analyzing DIRECTED graph when applicable...') except: print('Graph is UNDIRECTED') if conn_model == 'corr' or conn_model == 'cov' or conn_model == 'tangent': if nx.is_connected(G) == True: num_conn_comp = nx.number_connected_components(G) print('Graph is CONNECTED with ' + str(num_conn_comp) + ' connected component(s)') else: print('Graph is DISCONNECTED') print('\n') ##Create Length matrix mat_len = thresholding.weight_conversion(in_mat, 'lengths') ##Load numpy matrix as networkx graph G_len = nx.from_numpy_matrix(mat_len) ##Save G as gephi file if mask: if network: nx.write_graphml( G, dir_path + '/' + ID + '_' + network + '_' + str(os.path.basename(mask).split('.')[0]) + '.graphml') else: nx.write_graphml( G, dir_path + '/' + ID + '_' + str(os.path.basename(mask).split('.')[0]) + '.graphml') else: if network: nx.write_graphml(G, dir_path + '/' + ID + '_' + network + '.graphml') else: nx.write_graphml(G, dir_path + '/' + ID + '.graphml') ############################################################### ########### Calculate graph metrics from graph G ############## ############################################################### from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality from pynets.netstats import average_local_efficiency, global_efficiency, local_efficiency, modularity_louvain_dir, smallworldness ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function (with a G-only input) to the netstats module. metric_list = [ global_efficiency, average_local_efficiency, smallworldness, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity ] ##Custom Weight Parameter #custom_weight = 0.25 custom_weight = None ##Iteratively run functions from above metric list that generate single scalar output num_mets = len(metric_list) net_met_arr = np.zeros([num_mets, 2], dtype='object') j = 0 for i in metric_list: met_name = str(i).split('<function ')[1].split(' at')[0] net_met = met_name try: if i is 'average_shortest_path_length': try: try: net_met_val = float(i(G_dir)) print('Calculating from directed graph...') except: net_met_val = float(i(G)) except: ##case where G is not fully connected net_met_val = float( average_shortest_path_length_for_all(G)) if custom_weight is not None and i is 'degree_assortativity_coefficient' or i is 'global_efficiency' or i is 'average_local_efficiency' or i is 'average_clustering': custom_weight_param = 'weight = ' + str(custom_weight) try: net_met_val = float(i(G_dir, custom_weight_param)) print('Calculating from directed graph...') except: net_met_val = float(i(G, custom_weight_param)) else: try: net_met_val = float(i(G_dir)) print('Calculating from directed graph...') except: net_met_val = float(i(G)) except: net_met_val = np.nan net_met_arr[j, 0] = net_met net_met_arr[j, 1] = net_met_val print(net_met) print(str(net_met_val)) print('\n') j = j + 1 net_met_val_list = list(net_met_arr[:, 1]) ##Run miscellaneous functions that generate multiple outputs ##Calculate modularity using the Louvain algorithm [community_aff, modularity] = modularity_louvain_dir(in_mat) ##Calculate core-periphery subdivision [Coreness_vec, Coreness_q] = core_periphery_dir(in_mat) ##Local Efficiency try: try: le_vector = local_efficiency(G_dir) except: le_vector = local_efficiency(G) print('\nExtracting Local Efficiency vector for all network nodes...') le_vals = list(le_vector.values()) le_nodes = list(le_vector.keys()) num_nodes = len(le_nodes) le_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): le_arr[j, 0] = str(le_nodes[j]) + '_local_efficiency' #print('\n' + str(le_nodes[j]) + '_local_efficiency') try: le_arr[j, 1] = le_vals[j] except: le_arr[j, 1] = np.nan #print(str(le_vals[j])) j = j + 1 le_arr[num_nodes, 0] = 'MEAN_local_efficiency' nonzero_arr_le = np.delete(le_arr[:, 1], [0]) le_arr[num_nodes, 1] = np.mean(nonzero_arr_le) print('Mean Local Efficiency across nodes: ' + str(le_arr[num_nodes, 1])) print('\n') except: pass ##Local Clustering try: cl_vector = clustering(G) print('\nExtracting Local Clustering vector for all network nodes...') cl_vals = list(cl_vector.values()) cl_nodes = list(cl_vector.keys()) num_nodes = len(cl_nodes) cl_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cl_arr[j, 0] = str(cl_nodes[j]) + '_local_clustering' #print('\n' + str(cl_nodes[j]) + '_local_clustering') try: cl_arr[j, 1] = cl_vals[j] except: cl_arr[j, 1] = np.nan #print(str(cl_vals[j])) j = j + 1 cl_arr[num_nodes, 0] = 'MEAN_local_efficiency' nonzero_arr_cl = np.delete(cl_arr[:, 1], [0]) cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl) print('Mean Local Clustering across nodes: ' + str(cl_arr[num_nodes, 1])) print('\n') except: pass ##Degree centrality try: try: dc_vector = degree_centrality(G_dir) except: dc_vector = degree_centrality(G) print('\nExtracting Degree Centrality vector for all network nodes...') dc_vals = list(dc_vector.values()) dc_nodes = list(dc_vector.keys()) num_nodes = len(dc_nodes) dc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): dc_arr[j, 0] = str(dc_nodes[j]) + '_degree_centrality' #print('\n' + str(dc_nodes[j]) + '_degree_centrality') try: dc_arr[j, 1] = dc_vals[j] except: dc_arr[j, 1] = np.nan #print(str(cl_vals[j])) j = j + 1 dc_arr[num_nodes, 0] = 'MEAN_degree_centrality' nonzero_arr_dc = np.delete(dc_arr[:, 1], [0]) dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc) print('Mean Degree Centrality across nodes: ' + str(dc_arr[num_nodes, 1])) print('\n') except: pass ##Betweenness Centrality try: bc_vector = betweenness_centrality(G_len, normalized=True) print( '\nExtracting Betweeness Centrality vector for all network nodes...' ) bc_vals = list(bc_vector.values()) bc_nodes = list(bc_vector.keys()) num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): bc_arr[j, 0] = str(bc_nodes[j]) + '_betweenness_centrality' #print('\n' + str(bc_nodes[j]) + '_betw_cent') try: bc_arr[j, 1] = bc_vals[j] except: bc_arr[j, 1] = np.nan #print(str(bc_vals[j])) j = j + 1 bc_arr[num_nodes, 0] = 'MEAN_betw_cent' nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0]) bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent) print('Mean Betweenness Centrality across nodes: ' + str(bc_arr[num_nodes, 1])) print('\n') except: pass ##Eigenvector Centrality try: try: ec_vector = eigenvector_centrality(G_dir, max_iter=1000) except: ec_vector = eigenvector_centrality(G, max_iter=1000) print( '\nExtracting Eigenvector Centrality vector for all network nodes...' ) ec_vals = list(ec_vector.values()) ec_nodes = list(ec_vector.keys()) num_nodes = len(ec_nodes) ec_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): ec_arr[j, 0] = str(ec_nodes[j]) + '_eigenvector_centrality' #print('\n' + str(ec_nodes[j]) + '_eig_cent') try: ec_arr[j, 1] = ec_vals[j] except: ec_arr[j, 1] = np.nan #print(str(ec_vals[j])) j = j + 1 ec_arr[num_nodes, 0] = 'MEAN_eig_cent' nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0]) ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent) print('Mean Eigenvector Centrality across nodes: ' + str(ec_arr[num_nodes, 1])) print('\n') except: pass ##Communicability Centrality try: cc_vector = communicability_betweenness_centrality(G, normalized=True) print( '\nExtracting Communicability Centrality vector for all network nodes...' ) cc_vals = list(cc_vector.values()) cc_nodes = list(cc_vector.keys()) num_nodes = len(cc_nodes) cc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cc_arr[j, 0] = str(cc_nodes[j]) + '_communicability_centrality' #print('\n' + str(cc_nodes[j]) + '_comm_cent') try: cc_arr[j, 1] = cc_vals[j] except: cc_arr[j, 1] = np.nan #print(str(cc_vals[j])) j = j + 1 cc_arr[num_nodes, 0] = 'MEAN_comm_cent' nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0]) cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent) print('Mean Communicability Centrality across nodes: ' + str(cc_arr[num_nodes, 1])) print('\n') except: pass ##Rich club coefficient try: rc_vector = rich_club_coefficient(G, normalized=True) print( '\nExtracting Rich Club Coefficient vector for all network nodes...' ) rc_vals = list(rc_vector.values()) rc_edges = list(rc_vector.keys()) num_edges = len(rc_edges) rc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): rc_arr[j, 0] = str(rc_edges[j]) + '_rich_club' #print('\n' + str(rc_edges[j]) + '_rich_club') try: rc_arr[j, 1] = rc_vals[j] except: rc_arr[j, 1] = np.nan #print(str(rc_vals[j])) j = j + 1 ##Add mean rc_arr[num_edges, 0] = 'MEAN_rich_club' nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0]) rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club) print('Mean Rich Club Coefficient across edges: ' + str(rc_arr[num_edges, 1])) print('\n') except: pass ##Create a list of metric names for scalar metrics metric_list_names = [] net_met_val_list_final = net_met_val_list for i in net_met_arr[:, 0]: metric_list_names.append(i) ##Add modularity measure try: metric_list_names.append('Modularity') net_met_val_list_final.append(modularity) except: pass ##Add Core/Periphery measure try: metric_list_names.append('Coreness') net_met_val_list_final.append(Coreness_q) except: pass ##Add local efficiency measures try: for i in le_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(le_arr[:, 1]) except: pass ##Add local clustering measures try: for i in cl_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cl_arr[:, 1]) except: pass ##Add centrality measures try: for i in dc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1]) except: pass try: for i in bc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(bc_arr[:, 1]) except: pass try: for i in ec_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(ec_arr[:, 1]) except: pass try: for i in cc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cc_arr[:, 1]) except: pass ##Add rich club measure try: for i in rc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(rc_arr[:, 1]) except: pass ##Save metric names as pickle try: import cPickle except ImportError: import _pickle as cPickle if mask != None: if network != None: met_list_picke_path = os.path.dirname(os.path.abspath( est_path)) + '/net_metric_list_' + network + '_' + str( os.path.basename(mask).split('.')[0]) else: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list_' + str( os.path.basename(mask).split('.')[0]) else: if network != None: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list_' + network else: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list' cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb')) ##And save results to csv out_path = utils.create_csv_path(ID, network, conn_model, thr, mask, dir_path) np.savetxt(out_path, net_met_val_list_final) return (out_path)
def extractnetstats(ID, network, thr, conn_model, est_path, roi, prune, node_size, norm, binary, custom_weight=None): """ Function interface for performing fully-automated graph analysis. Parameters ---------- ID : str A subject id or other unique identifier. network : str Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of brain subgraphs. thr : float The value, between 0 and 1, used to threshold the graph using any variety of methods triggered through other options. conn_model : str Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance, partcorr for partial correlation). sps type is used by default. est_path : str File path to the thresholded graph, conn_matrix_thr, saved as a numpy array in .npy format. roi : str File path to binarized/boolean region-of-interest Nifti1Image file. prune : bool Indicates whether to prune final graph of disconnected nodes/isolates. node_size : int Spherical centroid node size in the case that coordinate-based centroids are used as ROI's. norm : int Indicates method of normalizing resulting graph. binary : bool Indicates whether to binarize resulting graph edges to form an unweighted graph. custom_weight : float The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. Default is None. Returns ------- out_path : str Path to .csv file where graph analysis results are saved. """ import pandas as pd import yaml try: import cPickle as pickle except ImportError: import _pickle as pickle from pathlib import Path from pynets import thresholding, utils # Advanced options fmt = 'edgelist_ssv' est_path_fmt = "%s%s" % ('.', est_path.split('.')[-1]) # Load and threshold matrix if est_path_fmt == '.txt': in_mat_raw = np.array(np.genfromtxt(est_path)) else: in_mat_raw = np.array(np.load(est_path)) # De-diagnal in_mat = np.array(np.array(thresholding.autofix(in_mat_raw))) # Normalize connectivity matrix # Force edges to values between 0-1 if norm == 1: in_mat = thresholding.normalize(in_mat) # Apply log10 elif norm == 2: in_mat = np.log10(in_mat) else: pass # Correct nan's and inf's in_mat[np.isnan(in_mat)] = 0 in_mat[np.isinf(in_mat)] = 1 # Get hyperbolic tangent (i.e. fischer r-to-z transform) of matrix if non-covariance if (conn_model == 'corr') or (conn_model == 'partcorr'): in_mat = np.arctanh(in_mat) # Binarize graph if binary is True: in_mat = thresholding.binarize(in_mat) # Get dir_path dir_path = os.path.dirname(os.path.realpath(est_path)) # Load numpy matrix as networkx graph G_pre = nx.from_numpy_matrix(in_mat) # Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness # centrality are > 3 standard deviations below the mean) if prune == 1: [G, _] = prune_disconnected(G_pre) elif prune == 2: [G, _] = most_important(G_pre) else: G = G_pre # Get corresponding matrix in_mat = np.array(nx.to_numpy_matrix(G)) # Saved pruned if (prune != 0) and (prune is not None): final_mat_path = "%s%s%s" % (est_path.split(est_path_fmt)[0], '_pruned_mat', est_path_fmt) utils.save_mat(in_mat, final_mat_path, fmt) # Print graph summary print("%s%.2f%s" % ('\n\nThreshold: ', 100 * float(thr), '%')) print("%s%s" % ('Source File: ', est_path)) info_list = list(nx.info(G).split('\n'))[2:] for i in info_list: print(i) if nx.is_connected(G) is True: frag = False print('Graph is connected...') else: frag = True print('Warning: Graph is fragmented...\n') # Create Length matrix mat_len = thresholding.weight_conversion(in_mat, 'lengths') # Load numpy matrix as networkx graph G_len = nx.from_numpy_matrix(mat_len) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Calculate global and local metrics from graph G # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # import community from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality, rich_club_coefficient, sigma from pynets.stats.netstats import average_local_efficiency, global_efficiency, participation_coef, participation_coef_sign, diversity_coef_sign # For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the # function (with a G-only input) to the netstats module. metric_list_glob = [ global_efficiency, average_local_efficiency, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, sigma ] metric_list_comm = ['louvain_modularity'] # with open("%s%s" % (str(Path(__file__).parent), '/global_graph_measures.yaml'), 'r') as stream: # try: # metric_dict_global = yaml.load(stream) # metric_list_global = metric_dict_global['metric_list_global'] # print("%s%s%s" % ('\n\nCalculating global measures:\n', metric_list_global, '\n\n')) # except FileNotFoundError: # print('Failed to parse global_graph_measures.yaml') with open( "%s%s" % (str(Path(__file__).parent), '/nodal_graph_measures.yaml'), 'r') as stream: try: metric_dict_nodal = yaml.load(stream) metric_list_nodal = metric_dict_nodal['metric_list_nodal'] print("%s%s%s" % ('\n\nCalculating nodal measures:\n', metric_list_nodal, '\n\n')) except FileNotFoundError: print('Failed to parse nodal_graph_measures.yaml') # Note the use of bare excepts in preceding blocks. Typically, this is considered bad practice in python. Here, # we are exploiting it intentionally to facilitate uninterrupted, automated graph analysis even when algorithms are # undefined. In those instances, solutions are assigned NaN's. # Iteratively run functions from above metric list that generate single scalar output num_mets = len(metric_list_glob) net_met_arr = np.zeros([num_mets, 2], dtype='object') j = 0 for i in metric_list_glob: met_name = str(i).split('<function ')[1].split(' at')[0] net_met = met_name try: try: net_met_val = raw_mets(G, i, custom_weight) except: print("%s%s%s" % ('WARNING: ', net_met, ' failed for graph G.')) net_met_val = np.nan except: print("%s%s%s" % ('WARNING: ', str(i), ' is undefined for graph G')) net_met_val = np.nan net_met_arr[j, 0] = net_met net_met_arr[j, 1] = net_met_val print(net_met) print(str(net_met_val)) print('\n') j = j + 1 net_met_val_list = list(net_met_arr[:, 1]) # Create a list of metric names for scalar metrics metric_list_names = [] net_met_val_list_final = net_met_val_list for i in net_met_arr[:, 0]: metric_list_names.append(i) # Run miscellaneous functions that generate multiple outputs # Calculate modularity using the Louvain algorithm if 'louvain_modularity' in metric_list_comm: try: ci = community.best_partition(G) modularity = community.community_louvain.modularity(ci, G) metric_list_names.append('modularity') net_met_val_list_final.append(modularity) except: print('Louvain modularity calculation is undefined for graph G') pass # Participation Coefficient by louvain community if 'participation_coefficient' in metric_list_nodal: try: if ci is None: raise KeyError( 'Participation coefficient cannot be calculated for graph G in the absence of a ' 'community affiliation vector') if len(in_mat[in_mat < 0.0]) > 0: pc_vector = participation_coef_sign(in_mat, ci) else: pc_vector = participation_coef(in_mat, ci) print( '\nExtracting Participation Coefficient vector for all network nodes...' ) pc_vals = list(pc_vector) pc_edges = list(range(len(pc_vector))) num_edges = len(pc_edges) pc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): pc_arr[j, 0] = "%s%s" % (str(pc_edges[j]), '_partic_coef') try: pc_arr[j, 1] = pc_vals[j] except: print("%s%s%s" % ('Participation coefficient is undefined for node ', str(j), ' of graph G')) pc_arr[j, 1] = np.nan j = j + 1 # Add mean pc_arr[num_edges, 0] = 'average_participation_coefficient' nonzero_arr_partic_coef = np.delete(pc_arr[:, 1], [0]) pc_arr[num_edges, 1] = np.mean(nonzero_arr_partic_coef) print("%s%s" % ('Mean Participation Coefficient across edges: ', str(pc_arr[num_edges, 1]))) for i in pc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(pc_arr[:, 1]) except: print('Participation coefficient cannot be calculated for graph G') pass # Diversity Coefficient by louvain community if 'diversity_coefficient' in metric_list_nodal: try: if ci is None: raise KeyError( 'Diversity coefficient cannot be calculated for graph G in the absence of a community ' 'affiliation vector') [dc_vector, _] = diversity_coef_sign(in_mat, ci) print( '\nExtracting Diversity Coefficient vector for all network nodes...' ) dc_vals = list(dc_vector) dc_edges = list(range(len(dc_vector))) num_edges = len(dc_edges) dc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): dc_arr[j, 0] = "%s%s" % (str(dc_edges[j]), '_diversity_coef') try: dc_arr[j, 1] = dc_vals[j] except: print("%s%s%s" % ('Diversity coefficient is undefined for node ', str(j), ' of graph G')) dc_arr[j, 1] = np.nan j = j + 1 # Add mean dc_arr[num_edges, 0] = 'average_diversity_coefficient' nonzero_arr_diversity_coef = np.delete(dc_arr[:, 1], [0]) dc_arr[num_edges, 1] = np.mean(nonzero_arr_diversity_coef) print("%s%s" % ('Mean Diversity Coefficient across edges: ', str(dc_arr[num_edges, 1]))) for i in dc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1]) except: print('Diversity coefficient cannot be calculated for graph G') pass # Local Efficiency if 'local_efficiency' in metric_list_nodal: try: le_vector = local_efficiency(G) print( '\nExtracting Local Efficiency vector for all network nodes...' ) le_vals = list(le_vector.values()) le_nodes = list(le_vector.keys()) num_nodes = len(le_nodes) le_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): le_arr[j, 0] = "%s%s" % (str(le_nodes[j]), '_local_efficiency') try: le_arr[j, 1] = le_vals[j] except: print( "%s%s%s" % ('Local efficiency is undefined for node ', str(j), ' of graph G')) le_arr[j, 1] = np.nan j = j + 1 le_arr[num_nodes, 0] = 'average_local_efficiency_nodewise' nonzero_arr_le = np.delete(le_arr[:, 1], [0]) le_arr[num_nodes, 1] = np.mean(nonzero_arr_le) print("%s%s" % ('Mean Local Efficiency across nodes: ', str(le_arr[num_nodes, 1]))) for i in le_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(le_arr[:, 1]) except: print('Local efficiency cannot be calculated for graph G') pass # Local Clustering if 'local_clustering' in metric_list_nodal: try: cl_vector = clustering(G) print( '\nExtracting Local Clustering vector for all network nodes...' ) cl_vals = list(cl_vector.values()) cl_nodes = list(cl_vector.keys()) num_nodes = len(cl_nodes) cl_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cl_arr[j, 0] = "%s%s" % (str(cl_nodes[j]), '_local_clustering') try: cl_arr[j, 1] = cl_vals[j] except: print( "%s%s%s" % ('Local clustering is undefined for node ', str(j), ' of graph G')) cl_arr[j, 1] = np.nan j = j + 1 cl_arr[num_nodes, 0] = 'average_local_efficiency_nodewise' nonzero_arr_cl = np.delete(cl_arr[:, 1], [0]) cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl) print("%s%s" % ('Mean Local Clustering across nodes: ', str(cl_arr[num_nodes, 1]))) for i in cl_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cl_arr[:, 1]) except: print('Local clustering cannot be calculated for graph G') pass # Degree centrality if 'degree_centrality' in metric_list_nodal: try: dc_vector = degree_centrality(G) print( '\nExtracting Degree Centrality vector for all network nodes...' ) dc_vals = list(dc_vector.values()) dc_nodes = list(dc_vector.keys()) num_nodes = len(dc_nodes) dc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): dc_arr[j, 0] = "%s%s" % (str(dc_nodes[j]), '_degree_centrality') try: dc_arr[j, 1] = dc_vals[j] except: print( "%s%s%s" % ('Degree centrality is undefined for node ', str(j), ' of graph G')) dc_arr[j, 1] = np.nan j = j + 1 dc_arr[num_nodes, 0] = 'average_degree_cent' nonzero_arr_dc = np.delete(dc_arr[:, 1], [0]) dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc) print("%s%s" % ('Mean Degree Centrality across nodes: ', str(dc_arr[num_nodes, 1]))) for i in dc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1]) except: print('Degree centrality cannot be calculated for graph G') pass # Betweenness Centrality if 'betweenness_centrality' in metric_list_nodal: try: bc_vector = betweenness_centrality(G_len, normalized=True) print( '\nExtracting Betweeness Centrality vector for all network nodes...' ) bc_vals = list(bc_vector.values()) bc_nodes = list(bc_vector.keys()) num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): bc_arr[j, 0] = "%s%s" % (str( bc_nodes[j]), '_betweenness_centrality') try: bc_arr[j, 1] = bc_vals[j] except: print("%s%s%s" % ('Betweeness centrality is undefined for node ', str(j), ' of graph G')) bc_arr[j, 1] = np.nan j = j + 1 bc_arr[num_nodes, 0] = 'average_betweenness_centrality' nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0]) bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent) print("%s%s" % ('Mean Betweenness Centrality across nodes: ', str(bc_arr[num_nodes, 1]))) for i in bc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(bc_arr[:, 1]) except: print('Betweenness centrality cannot be calculated for graph G') pass # Eigenvector Centrality if 'eigenvector_centrality' in metric_list_nodal: try: ec_vector = eigenvector_centrality(G, max_iter=1000) print( '\nExtracting Eigenvector Centrality vector for all network nodes...' ) ec_vals = list(ec_vector.values()) ec_nodes = list(ec_vector.keys()) num_nodes = len(ec_nodes) ec_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): ec_arr[j, 0] = "%s%s" % (str( ec_nodes[j]), '_eigenvector_centrality') try: ec_arr[j, 1] = ec_vals[j] except: print("%s%s%s" % ('Eigenvector centrality is undefined for node ', str(j), ' of graph G')) ec_arr[j, 1] = np.nan j = j + 1 ec_arr[num_nodes, 0] = 'average_eigenvector_centrality' nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0]) ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent) print("%s%s" % ('Mean Eigenvector Centrality across nodes: ', str(ec_arr[num_nodes, 1]))) for i in ec_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(ec_arr[:, 1]) except: print('Eigenvector centrality cannot be calculated for graph G') pass # Communicability Centrality if 'communicability_centrality' in metric_list_nodal: try: cc_vector = communicability_betweenness_centrality(G, normalized=True) print( '\nExtracting Communicability Centrality vector for all network nodes...' ) cc_vals = list(cc_vector.values()) cc_nodes = list(cc_vector.keys()) num_nodes = len(cc_nodes) cc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cc_arr[j, 0] = "%s%s" % (str( cc_nodes[j]), '_communicability_centrality') try: cc_arr[j, 1] = cc_vals[j] except: print("%s%s%s" % ('Communicability centrality is undefined for node ', str(j), ' of graph G')) cc_arr[j, 1] = np.nan j = j + 1 cc_arr[num_nodes, 0] = 'average_communicability_centrality' nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0]) cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent) print("%s%s" % ('Mean Communicability Centrality across nodes: ', str(cc_arr[num_nodes, 1]))) for i in cc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cc_arr[:, 1]) except: print( 'Communicability centrality cannot be calculated for graph G') pass # Rich club coefficient if 'rich_club_coefficient' in metric_list_nodal: try: rc_vector = rich_club_coefficient(G, normalized=True) print( '\nExtracting Rich Club Coefficient vector for all network nodes...' ) rc_vals = list(rc_vector.values()) rc_edges = list(rc_vector.keys()) num_edges = len(rc_edges) rc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): rc_arr[j, 0] = "%s%s" % (str(rc_edges[j]), '_rich_club') try: rc_arr[j, 1] = rc_vals[j] except: print("%s%s%s" % ('Rich club coefficient is undefined for node ', str(j), ' of graph G')) rc_arr[j, 1] = np.nan j = j + 1 # Add mean rc_arr[num_edges, 0] = 'average_rich_club_coefficient' nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0]) rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club) print("%s%s" % ('Mean Rich Club Coefficient across edges: ', str(rc_arr[num_edges, 1]))) for i in rc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(rc_arr[:, 1]) except: print('Rich club coefficient cannot be calculated for graph G') pass if roi: met_list_picke_path = "%s%s%s%s" % ( os.path.dirname(os.path.abspath(est_path)), '/net_met_list', "%s" % ("%s%s%s" % ('_', network, '_') if network else "_"), os.path.basename(roi).split('.')[0]) else: if network: met_list_picke_path = "%s%s%s" % (os.path.dirname( os.path.abspath(est_path)), '/net_met_list_', network) else: met_list_picke_path = "%s%s" % (os.path.dirname( os.path.abspath(est_path)), '/net_met_list') pickle.dump(metric_list_names, open(met_list_picke_path, 'wb'), protocol=2) # And save results to csv out_path = utils.create_csv_path(ID, network, conn_model, thr, roi, dir_path, node_size) np.savetxt(out_path, net_met_val_list_final, delimiter='\t') if frag is True: out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_frag_neat.csv') else: out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_neat.csv') df = pd.DataFrame.from_dict(dict( zip(metric_list_names, net_met_val_list_final)), orient='index').transpose() df.to_csv(out_path_neat, index=False) return out_path