def plot_connectogram(conn_matrix, conn_model, atlas_select, dir_path, ID, network, label_names): import json from networkx.readwrite import json_graph from pathlib import Path from pynets.thresholding import normalize from pynets.netstats import most_important from scipy.cluster.hierarchy import linkage, fcluster from nipype.utils.filemanip import save_json ##Advanced Settings comm = 'nodes' pruned = False #color_scheme = 'interpolateCool' #color_scheme = 'interpolateGnBu' #color_scheme = 'interpolateOrRd' #color_scheme = 'interpolatePuRd' #color_scheme = 'interpolateYlOrRd' #color_scheme = 'interpolateReds' #color_scheme = 'interpolateGreens' color_scheme = 'interpolateBlues' ##Advanced Settings conn_matrix = normalize(conn_matrix) G=nx.from_numpy_matrix(conn_matrix) if pruned == True: [G, pruned_nodes, pruned_edges] = most_important(G) conn_matrix = nx.to_numpy_array(G) pruned_nodes.sort(reverse = True) for j in pruned_nodes: del label_names[label_names.index(label_names[j])] pruned_edges.sort(reverse = True) for j in pruned_edges: del label_names[label_names.index(label_names[j])] def doClust(X, clust_levels): ##get the linkage diagram Z = linkage(X, 'ward', ) ##choose # cluster levels cluster_levels = range(1,int(clust_levels)) ##init array to store labels for each level clust_levels_tmp = int(clust_levels) - 1 label_arr = np.zeros((int(clust_levels_tmp),int(X.shape[0]))) ##iterate thru levels for c in cluster_levels: fl = fcluster(Z,c,criterion='maxclust') #print(fl) label_arr[c-1, :] = fl return label_arr, clust_levels_tmp if comm == 'nodes' and len(conn_matrix) > 40: from pynets.netstats import modularity_louvain_dir if len(conn_matrix) < 50: gamma=0.00001 elif len(conn_matrix) < 100: gamma=0.0001 elif len(conn_matrix) < 200: gamma=0.001 elif len(conn_matrix) < 500: gamma=0.01 elif len(conn_matrix) < 1000: gamma=0.5 else: gamma=1 [node_comm_aff_mat, q] = modularity_louvain_dir(conn_matrix, hierarchy=True, gamma=gamma) print('Found ' + str(len(np.unique(node_comm_aff_mat))) + ' communities with gamma=' + str(gamma) + '...') clust_levels = len(node_comm_aff_mat) clust_levels_tmp = int(clust_levels) - 1 mask_mat = np.squeeze(np.array([node_comm_aff_mat == 0]).astype('int')) label_arr = node_comm_aff_mat * np.expand_dims(np.arange(1,clust_levels+1),axis=1) + mask_mat elif comm == 'links' and len(conn_matrix) > 40: from pynets.netstats import link_communities ##Plot link communities link_comm_aff_mat = link_communities(conn_matrix, type_clustering='single') print('Found ' + str(len(link_comm_aff_mat)) + ' communities...') clust_levels = len(link_comm_aff_mat) clust_levels_tmp = int(clust_levels) - 1 mask_mat = np.squeeze(np.array([link_comm_aff_mat == 0]).astype('int')) label_arr = link_comm_aff_mat * np.expand_dims(np.arange(1,clust_levels+1),axis=1) + mask_mat elif len(conn_matrix) > 20: print('Graph too small for reliable plotting of communities. Plotting by fcluster instead...') if len(conn_matrix) >= 250: clust_levels = 7 elif len(conn_matrix) >= 200: clust_levels = 6 elif len(conn_matrix) >= 150: clust_levels = 5 elif len(conn_matrix) >= 100: clust_levels = 4 elif len(conn_matrix) >= 50: clust_levels = 3 else: clust_levels = 2 [label_arr, clust_levels_tmp] = doClust(conn_matrix, clust_levels) def get_node_label(node_idx, labels, clust_levels_tmp): from collections import OrderedDict def write_roman(num): roman = OrderedDict() roman[1000] = "M" roman[900] = "CM" roman[500] = "D" roman[400] = "CD" roman[100] = "C" roman[90] = "XC" roman[50] = "L" roman[40] = "XL" roman[10] = "X" roman[9] = "IX" roman[5] = "V" roman[4] = "IV" roman[1] = "I" def roman_num(num): for r in roman.keys(): x, y = divmod(num, r) yield roman[r] * x num -= (r * x) if num > 0: roman_num(num) else: break return "".join([a for a in roman_num(num)]) rn_list = [] node_idx = node_idx - 1 node_labels = labels[:, node_idx] for i in [int(l) for i, l in enumerate(node_labels)]: rn_list.append(json.dumps(write_roman(i))) abet = rn_list return ".".join(["{}{}".format(abet[i],int(l)) for i, l in enumerate(node_labels)])+".{}".format(label_names[node_idx]) output = [] adj_dict = {} for i in list(G.adjacency()): source = list(i)[0] target = list(list(i)[1]) adj_dict[source] = target for node_idx, connections in adj_dict.items(): weight_vec = [] for i in connections: wei = G.get_edge_data(node_idx,int(i))['weight'] weight_vec.append(wei) entry = {} nodes_label = get_node_label(node_idx, label_arr, clust_levels_tmp) entry["name"] = nodes_label entry["size"] = len(connections) entry["imports"] = [get_node_label(int(d)-1, label_arr, clust_levels_tmp) for d in connections] entry["weights"] = weight_vec output.append(entry) if network != 'None': json_file_name = str(ID) + '_' + network + '_connectogram_' + conn_model + '_network.json' json_fdg_file_name = str(ID) + '_' + network + '_fdg_' + conn_model + '_network.json' connectogram_plot = dir_path + '/' + json_file_name fdg_js_sub = dir_path + '/' + str(ID) + '_' + network + '_fdg_' + conn_model + '_network.js' fdg_js_sub_name = str(ID) + '_' + network + '_fdg_' + conn_model + '_network.js' connectogram_js_sub = dir_path + '/' + str(ID) + '_' + network + '_connectogram_' + conn_model + '_network.js' connectogram_js_name = str(ID) + '_' + network + '_connectogram_' + conn_model + '_network.js' else: json_file_name = str(ID) + '_connectogram_' + conn_model + '.json' json_fdg_file_name = str(ID) + '_fdg_' + conn_model + '.json' connectogram_plot = dir_path + '/' + json_file_name connectogram_js_sub = dir_path + '/' + str(ID) + '_connectogram_' + conn_model + '.js' fdg_js_sub = dir_path + '/' + str(ID) + '_fdg_' + conn_model + '.js' fdg_js_sub_name = str(ID) + '_fdg_' + conn_model + '.js' connectogram_js_name = str(ID) + '_connectogram_' + conn_model + '.js' save_json(connectogram_plot, output) ##Force-directed graphing G=nx.from_numpy_matrix(np.round(conn_matrix.astype('float64'),6)) data = json_graph.node_link_data(G) data.pop('directed', None) data.pop('graph', None) data.pop('multigraph', None) for k in range(len(data['links'])): data['links'][k]['value'] = data['links'][k].pop('weight') for k in range(len(data['nodes'])): data['nodes'][k]['id'] = str(data['nodes'][k]['id']) for k in range(len(data['links'])): data['links'][k]['source'] = str(data['links'][k]['source']) data['links'][k]['target'] = str(data['links'][k]['target']) ##Add community structure for k in range(len(data['nodes'])): data['nodes'][k]['group'] = str(label_arr[0][k]) ##Add node labels for k in range(len(data['nodes'])): data['nodes'][k]['name'] = str(label_names[k]) out_file = str(dir_path + '/' + json_fdg_file_name) save_json(out_file, data) ##Copy index.html and json to dir_path #conn_js_path = '/Users/PSYC-dap3463/Applications/PyNets/pynets/connectogram.js' #index_html_path = '/Users/PSYC-dap3463/Applications/PyNets/pynets/index.html' conn_js_path = str(Path(__file__).parent/"connectogram.js") index_html_path = str(Path(__file__).parent/"index.html") fdg_replacements_js = {"FD_graph.json": str(json_fdg_file_name)} replacements_html = {'connectogram.js': str(connectogram_js_name), 'fdg.js': str(fdg_js_sub_name)} fdg_js_path = str(Path(__file__).parent/"fdg.js") with open(index_html_path) as infile, open(str(dir_path + '/index.html'), 'w') as outfile: for line in infile: for src, target in replacements_html.items(): line = line.replace(src, target) outfile.write(line) replacements_js = {'template.json': str(json_file_name), 'interpolateCool': str(color_scheme)} with open(conn_js_path) as infile, open(connectogram_js_sub, 'w') as outfile: for line in infile: for src, target in replacements_js.items(): line = line.replace(src, target) outfile.write(line) with open(fdg_js_path) as infile, open(fdg_js_sub, 'w') as outfile: for line in infile: for src, target in fdg_replacements_js.items(): line = line.replace(src, target) outfile.write(line)
def extractnetstats(ID, network, thr, conn_model, est_path, mask, out_file=None): from pynets import thresholding, utils pruning = True ##Load and threshold matrix in_mat = np.array(np.genfromtxt(est_path)) in_mat = thresholding.autofix(in_mat) ##Normalize connectivity matrix (weights between 0-1) in_mat = thresholding.normalize(in_mat) ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform) if conn_model == 'corr': in_mat = np.arctanh(in_mat) in_mat[np.isnan(in_mat)] = 0 in_mat[np.isinf(in_mat)] = 1 ##Get dir_path dir_path = os.path.dirname(os.path.realpath(est_path)) ##Load numpy matrix as networkx graph G_pre = nx.from_numpy_matrix(in_mat) ##Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness centrality are > 3 standard deviations below the mean) if pruning == True: [G_pruned, _, _] = most_important(G_pre) else: G_pruned = G_pre ##Make directed if sparse if conn_model != 'corr' and conn_model != 'cov' and conn_model != 'tangent': G_di = nx.DiGraph(G_pruned) G_dir = G_di.to_directed() G = G_pruned else: G = G_pruned ##Get corresponding matrix in_mat = nx.to_numpy_array(G) ##Print graph summary print('\n\nThreshold: ' + str(thr)) print('Source File: ' + str(est_path)) info_list = list(nx.info(G).split('\n'))[2:] for i in info_list: print(i) try: G_dir print('Analyzing DIRECTED graph when applicable...') except: print('Graph is UNDIRECTED') if conn_model == 'corr' or conn_model == 'cov' or conn_model == 'tangent': if nx.is_connected(G) == True: num_conn_comp = nx.number_connected_components(G) print('Graph is CONNECTED with ' + str(num_conn_comp) + ' connected component(s)') else: print('Graph is DISCONNECTED') print('\n') ##Create Length matrix mat_len = thresholding.weight_conversion(in_mat, 'lengths') ##Load numpy matrix as networkx graph G_len = nx.from_numpy_matrix(mat_len) ##Save G as gephi file if mask: if network: nx.write_graphml( G, dir_path + '/' + ID + '_' + network + '_' + str(os.path.basename(mask).split('.')[0]) + '.graphml') else: nx.write_graphml( G, dir_path + '/' + ID + '_' + str(os.path.basename(mask).split('.')[0]) + '.graphml') else: if network: nx.write_graphml(G, dir_path + '/' + ID + '_' + network + '.graphml') else: nx.write_graphml(G, dir_path + '/' + ID + '.graphml') ############################################################### ########### Calculate graph metrics from graph G ############## ############################################################### from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality from pynets.netstats import average_local_efficiency, global_efficiency, local_efficiency, modularity_louvain_dir, smallworldness ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function (with a G-only input) to the netstats module. metric_list = [ global_efficiency, average_local_efficiency, smallworldness, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity ] ##Custom Weight Parameter #custom_weight = 0.25 custom_weight = None ##Iteratively run functions from above metric list that generate single scalar output num_mets = len(metric_list) net_met_arr = np.zeros([num_mets, 2], dtype='object') j = 0 for i in metric_list: met_name = str(i).split('<function ')[1].split(' at')[0] net_met = met_name try: if i is 'average_shortest_path_length': try: try: net_met_val = float(i(G_dir)) print('Calculating from directed graph...') except: net_met_val = float(i(G)) except: ##case where G is not fully connected net_met_val = float( average_shortest_path_length_for_all(G)) if custom_weight is not None and i is 'degree_assortativity_coefficient' or i is 'global_efficiency' or i is 'average_local_efficiency' or i is 'average_clustering': custom_weight_param = 'weight = ' + str(custom_weight) try: net_met_val = float(i(G_dir, custom_weight_param)) print('Calculating from directed graph...') except: net_met_val = float(i(G, custom_weight_param)) else: try: net_met_val = float(i(G_dir)) print('Calculating from directed graph...') except: net_met_val = float(i(G)) except: net_met_val = np.nan net_met_arr[j, 0] = net_met net_met_arr[j, 1] = net_met_val print(net_met) print(str(net_met_val)) print('\n') j = j + 1 net_met_val_list = list(net_met_arr[:, 1]) ##Run miscellaneous functions that generate multiple outputs ##Calculate modularity using the Louvain algorithm [community_aff, modularity] = modularity_louvain_dir(in_mat) ##Calculate core-periphery subdivision [Coreness_vec, Coreness_q] = core_periphery_dir(in_mat) ##Local Efficiency try: try: le_vector = local_efficiency(G_dir) except: le_vector = local_efficiency(G) print('\nExtracting Local Efficiency vector for all network nodes...') le_vals = list(le_vector.values()) le_nodes = list(le_vector.keys()) num_nodes = len(le_nodes) le_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): le_arr[j, 0] = str(le_nodes[j]) + '_local_efficiency' #print('\n' + str(le_nodes[j]) + '_local_efficiency') try: le_arr[j, 1] = le_vals[j] except: le_arr[j, 1] = np.nan #print(str(le_vals[j])) j = j + 1 le_arr[num_nodes, 0] = 'MEAN_local_efficiency' nonzero_arr_le = np.delete(le_arr[:, 1], [0]) le_arr[num_nodes, 1] = np.mean(nonzero_arr_le) print('Mean Local Efficiency across nodes: ' + str(le_arr[num_nodes, 1])) print('\n') except: pass ##Local Clustering try: cl_vector = clustering(G) print('\nExtracting Local Clustering vector for all network nodes...') cl_vals = list(cl_vector.values()) cl_nodes = list(cl_vector.keys()) num_nodes = len(cl_nodes) cl_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cl_arr[j, 0] = str(cl_nodes[j]) + '_local_clustering' #print('\n' + str(cl_nodes[j]) + '_local_clustering') try: cl_arr[j, 1] = cl_vals[j] except: cl_arr[j, 1] = np.nan #print(str(cl_vals[j])) j = j + 1 cl_arr[num_nodes, 0] = 'MEAN_local_efficiency' nonzero_arr_cl = np.delete(cl_arr[:, 1], [0]) cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl) print('Mean Local Clustering across nodes: ' + str(cl_arr[num_nodes, 1])) print('\n') except: pass ##Degree centrality try: try: dc_vector = degree_centrality(G_dir) except: dc_vector = degree_centrality(G) print('\nExtracting Degree Centrality vector for all network nodes...') dc_vals = list(dc_vector.values()) dc_nodes = list(dc_vector.keys()) num_nodes = len(dc_nodes) dc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): dc_arr[j, 0] = str(dc_nodes[j]) + '_degree_centrality' #print('\n' + str(dc_nodes[j]) + '_degree_centrality') try: dc_arr[j, 1] = dc_vals[j] except: dc_arr[j, 1] = np.nan #print(str(cl_vals[j])) j = j + 1 dc_arr[num_nodes, 0] = 'MEAN_degree_centrality' nonzero_arr_dc = np.delete(dc_arr[:, 1], [0]) dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc) print('Mean Degree Centrality across nodes: ' + str(dc_arr[num_nodes, 1])) print('\n') except: pass ##Betweenness Centrality try: bc_vector = betweenness_centrality(G_len, normalized=True) print( '\nExtracting Betweeness Centrality vector for all network nodes...' ) bc_vals = list(bc_vector.values()) bc_nodes = list(bc_vector.keys()) num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): bc_arr[j, 0] = str(bc_nodes[j]) + '_betweenness_centrality' #print('\n' + str(bc_nodes[j]) + '_betw_cent') try: bc_arr[j, 1] = bc_vals[j] except: bc_arr[j, 1] = np.nan #print(str(bc_vals[j])) j = j + 1 bc_arr[num_nodes, 0] = 'MEAN_betw_cent' nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0]) bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent) print('Mean Betweenness Centrality across nodes: ' + str(bc_arr[num_nodes, 1])) print('\n') except: pass ##Eigenvector Centrality try: try: ec_vector = eigenvector_centrality(G_dir, max_iter=1000) except: ec_vector = eigenvector_centrality(G, max_iter=1000) print( '\nExtracting Eigenvector Centrality vector for all network nodes...' ) ec_vals = list(ec_vector.values()) ec_nodes = list(ec_vector.keys()) num_nodes = len(ec_nodes) ec_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): ec_arr[j, 0] = str(ec_nodes[j]) + '_eigenvector_centrality' #print('\n' + str(ec_nodes[j]) + '_eig_cent') try: ec_arr[j, 1] = ec_vals[j] except: ec_arr[j, 1] = np.nan #print(str(ec_vals[j])) j = j + 1 ec_arr[num_nodes, 0] = 'MEAN_eig_cent' nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0]) ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent) print('Mean Eigenvector Centrality across nodes: ' + str(ec_arr[num_nodes, 1])) print('\n') except: pass ##Communicability Centrality try: cc_vector = communicability_betweenness_centrality(G, normalized=True) print( '\nExtracting Communicability Centrality vector for all network nodes...' ) cc_vals = list(cc_vector.values()) cc_nodes = list(cc_vector.keys()) num_nodes = len(cc_nodes) cc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cc_arr[j, 0] = str(cc_nodes[j]) + '_communicability_centrality' #print('\n' + str(cc_nodes[j]) + '_comm_cent') try: cc_arr[j, 1] = cc_vals[j] except: cc_arr[j, 1] = np.nan #print(str(cc_vals[j])) j = j + 1 cc_arr[num_nodes, 0] = 'MEAN_comm_cent' nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0]) cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent) print('Mean Communicability Centrality across nodes: ' + str(cc_arr[num_nodes, 1])) print('\n') except: pass ##Rich club coefficient try: rc_vector = rich_club_coefficient(G, normalized=True) print( '\nExtracting Rich Club Coefficient vector for all network nodes...' ) rc_vals = list(rc_vector.values()) rc_edges = list(rc_vector.keys()) num_edges = len(rc_edges) rc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): rc_arr[j, 0] = str(rc_edges[j]) + '_rich_club' #print('\n' + str(rc_edges[j]) + '_rich_club') try: rc_arr[j, 1] = rc_vals[j] except: rc_arr[j, 1] = np.nan #print(str(rc_vals[j])) j = j + 1 ##Add mean rc_arr[num_edges, 0] = 'MEAN_rich_club' nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0]) rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club) print('Mean Rich Club Coefficient across edges: ' + str(rc_arr[num_edges, 1])) print('\n') except: pass ##Create a list of metric names for scalar metrics metric_list_names = [] net_met_val_list_final = net_met_val_list for i in net_met_arr[:, 0]: metric_list_names.append(i) ##Add modularity measure try: metric_list_names.append('Modularity') net_met_val_list_final.append(modularity) except: pass ##Add Core/Periphery measure try: metric_list_names.append('Coreness') net_met_val_list_final.append(Coreness_q) except: pass ##Add local efficiency measures try: for i in le_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(le_arr[:, 1]) except: pass ##Add local clustering measures try: for i in cl_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cl_arr[:, 1]) except: pass ##Add centrality measures try: for i in dc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1]) except: pass try: for i in bc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(bc_arr[:, 1]) except: pass try: for i in ec_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(ec_arr[:, 1]) except: pass try: for i in cc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cc_arr[:, 1]) except: pass ##Add rich club measure try: for i in rc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(rc_arr[:, 1]) except: pass ##Save metric names as pickle try: import cPickle except ImportError: import _pickle as cPickle if mask != None: if network != None: met_list_picke_path = os.path.dirname(os.path.abspath( est_path)) + '/net_metric_list_' + network + '_' + str( os.path.basename(mask).split('.')[0]) else: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list_' + str( os.path.basename(mask).split('.')[0]) else: if network != None: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list_' + network else: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list' cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb')) ##And save results to csv out_path = utils.create_csv_path(ID, network, conn_model, thr, mask, dir_path) np.savetxt(out_path, net_met_val_list_final) return (out_path)