def test_number_of_cliques(self): G=self.G assert_equal(nx.graph_number_of_cliques(G),5) assert_equal(nx.graph_number_of_cliques(G,cliques=self.cl),5) assert_equal(nx.number_of_cliques(G,1),1) assert_equal(list(nx.number_of_cliques(G,[1]).values()),[1]) assert_equal(list(nx.number_of_cliques(G,[1,2]).values()),[1, 2]) assert_equal(nx.number_of_cliques(G,[1,2]),{1: 1, 2: 2}) assert_equal(nx.number_of_cliques(G,2),2) assert_equal(nx.number_of_cliques(G), {1: 1, 2: 2, 3: 1, 4: 2, 5: 1, 6: 2, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1}) assert_equal(nx.number_of_cliques(G,cliques=self.cl), {1: 1, 2: 2, 3: 1, 4: 2, 5: 1, 6: 2, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1})
def draw_graph(G): # an example using Graph as a weighted network. # __author__ = """Aric Hagberg ([email protected])""" try: import matplotlib.pyplot as plt except: raise elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 0.5] esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 0.5] pos = nx.spring_layout(G) # positions for all nodes # nodes nx.draw_networkx_nodes(G, pos, node_size=200) # edges nx.draw_networkx_edges(G, pos, edgelist=elarge, width=0.4) nx.draw_networkx_edges(G, pos, edgelist=esmall, width=0.4, alpha=0.6, style='dashed') # labels nx.draw_networkx_labels(G, pos, font_size=6, font_family='sans-serif') print 'number of cliques/clusters:', nx.graph_number_of_cliques(G) print 'time:', time.time() - start plt.show()
def draw_graph(G): # an example using Graph as a weighted network. # __author__ = """Aric Hagberg ([email protected])""" try: import matplotlib.pyplot as plt except: raise elarge = [(u,v) for (u,v,d) in G.edges(data = True) if d['weight'] > 0.5] esmall = [(u,v) for (u,v,d) in G.edges(data = True) if d['weight'] <= 0.5] pos = nx.spring_layout(G) # positions for all nodes # nodes nx.draw_networkx_nodes(G, pos, node_size = 200) # edges nx.draw_networkx_edges(G, pos, edgelist = elarge, width = 0.4) nx.draw_networkx_edges(G, pos, edgelist = esmall, width = 0.4, alpha = 0.6, style = 'dashed') # labels nx.draw_networkx_labels(G, pos, font_size = 6, font_family = 'sans-serif') print 'number of cliques/clusters:', nx.graph_number_of_cliques(G) print 'time:', time.time() - start plt.show()
def netstats_simple(graph): G = graph if nx.is_connected(G): d = nx.diameter(G) r = nx.radius(G) else: d = 'NA - graph is not connected' #should be calculatable on unconnected graph - see example code for hack r = 'NA - graph is not connected' #using dictionary to pack values and variablesdot, eps, ps, pdf break equally result = {#"""single value measures""" 'nn': G.number_of_nodes(), 'ne': G.number_of_edges(), 'd': d, 'r': r, 'conn': nx.number_connected_components(G), 'asp': nx.average_shortest_path_length(G), # """number of the largest clique""" 'cn': nx.graph_clique_number(G), # """number of maximal cliques""" 'mcn': nx.graph_number_of_cliques(G), # """transitivity - """ 'tr': nx.transitivity(G), #cc = nx.clustering(G) """clustering coefficient""" 'avgcc': nx.average_clustering(G) } # result['d'] = nx.diameter(G) print result return result
def Cliques(P, tipo, ruta): RUTA = ruta + '/NetWX/files/' path = Path(RUTA) path.mkdir(parents=True, exist_ok=True) graph_clique_num = [] graph_number_of_cliqs = [] graph_find_cliques = [] for i in range(len(P)): graph_clique_num.append(graph_clique_number(P[i])) graph_number_of_cliqs.append(graph_number_of_cliques(P[i])) graph_find_cliques.append(find_cliques(P[i])) graph_clique_num = DataFrame(graph_clique_num) graph_number_of_cliqs = DataFrame(graph_number_of_cliqs) graph_find_cliques = DataFrame(graph_find_cliques) graph_clique_num.to_csv(RUTA + tipo + " - clique num.txt", sep='\t', header=None, index=False) graph_number_of_cliqs.to_csv(RUTA + tipo + " - number of maxcliques.txt", sep='\t', header=None, index=False) graph_find_cliques.to_csv(RUTA + tipo + " - find cliques.txt", sep='\t', header=None, index=False)
def calculatecliques(network): ''' Returns the number of maximal cliques in G. ''' try: n = nx.graph_number_of_cliques(network) except: n = 0 return n
def computeOneFile(file, aln_net): global max_clique_dict global full_score_dict file_root = file.split('/')[-1].split('.')[0] score_dict = prepareScoreDict(file_root) coev_net = nx.read_graphml(file) cliques = list(nx.find_cliques(coev_net)) max_clique_dict[file_root] = nx.graph_number_of_cliques(coev_net) # print file_root # print max_clique_dict[file_root] for clique in cliques: flag = False res_count_dict = {} for res in clique: if res in list(nx.nodes(aln_net)): for neighbor in list(nx.all_neighbors(aln_net, res)): prot = neighbor.split('-')[0] if prot not in res_count_dict.keys(): res_count_dict[prot] = 1 else: res_count_dict[prot] += 1 else: flag = True break if flag: max_clique_dict[file_root] -= 1 continue for key, value in res_count_dict.items(): if value == len(clique): if np.isnan(score_dict[key]): score_dict[key] = 1 else: score_dict[key] += 1 full_score_dict[file_root] = score_dict
def getGraphVector(gGraph): print("Extracting graph feature vector...") mRes = np.asarray([ len(gGraph.edges()), len(gGraph.nodes()), getMeanDegreeCentrality(gGraph), nx.graph_number_of_cliques(gGraph), nx.number_connected_components(gGraph), nx.average_node_connectivity(gGraph), getAvgShortestPath(gGraph) ]) print("Extracting graph feature vector... Done.") return mRes
def test_number_of_cliques(self): G = self.G assert nx.graph_number_of_cliques(G) == 5 assert nx.graph_number_of_cliques(G, cliques=self.cl) == 5 assert nx.number_of_cliques(G, 1) == 1 assert list(nx.number_of_cliques(G, [1]).values()) == [1] assert list(nx.number_of_cliques(G, [1, 2]).values()) == [1, 2] assert nx.number_of_cliques(G, [1, 2]) == {1: 1, 2: 2} assert nx.number_of_cliques(G, 2) == 2 assert (nx.number_of_cliques(G) == {1: 1, 2: 2, 3: 1, 4: 2, 5: 1, 6: 2, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1}) assert (nx.number_of_cliques(G, nodes=list(G)) == {1: 1, 2: 2, 3: 1, 4: 2, 5: 1, 6: 2, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1}) assert (nx.number_of_cliques(G, nodes=[2, 3, 4]) == {2: 2, 3: 1, 4: 2}) assert (nx.number_of_cliques(G, cliques=self.cl) == {1: 1, 2: 2, 3: 1, 4: 2, 5: 1, 6: 2, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1}) assert (nx.number_of_cliques(G, list(G), cliques=self.cl) == {1: 1, 2: 2, 3: 1, 4: 2, 5: 1, 6: 2, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1})
def obtenerValores(dirigido, noDirigido): # variables locales datos = [] m = 0 c = 0 dm = 0 com = 0 # 1; orden - ambas #print("orden") datos.append(str(dirigido.order())) # 2; tamaño - dirigida #print("tamaño") datos.append(str(dirigido.size())) # 3; densidad, dirigida #print("densidad") datos.append(str(nx.density(dirigido))) # 4; grado promedio - dirigido #print("grado promedio") datos.append(str((dirigido.size()) / (dirigido.order()))) # 5; diametro - no dirigido #print("diametro") datos.append(str(nx.diameter(noDirigido))) # 6; radio - no dirigido #print("radio") datos.append(str(nx.radius(noDirigido))) # 7; tamaño de clique mas grande - no dirigida #print("clique mas grande") datos.append(str(nx.graph_clique_number(noDirigido))) # 8; numero de cliques maximales - no dirigida #print("cliques maximales") datos.append(str(nx.graph_number_of_cliques(noDirigido))) # 9; global reaching centrality - dirigido #print("reachability") datos.append(str(nx.global_reaching_centrality(dirigido))) # 10; clustering coefficient - dirigida #print("clustering") datos.append(str(nx.average_clustering(dirigido))) # 11; transitividad - dirigida #print("transitivity") datos.append(str(nx.transitivity(dirigido))) # 12; 13; 14; datos MODC: modularidad, dependencia minima, total de comunidades - no dirigido #print("MODC") (m, dm, com) = MODC(noDirigido, True) datos.append(str(m)) datos.append(str(dm)) datos.append(str(com)) # fin de funcion return (datos)
def _graph(self, graph): """Generate graph-based attributes.""" graph_attr = pd.DataFrame() graph_attr['number_of_nodes'] = [nx.number_of_nodes(graph)] graph_attr['number_of_edges'] = [nx.number_of_edges(graph)] graph_attr['number_of_selfloops'] = [nx.number_of_selfloops(graph)] graph_attr['graph_number_of_cliques'] = [ nx.graph_number_of_cliques(graph) ] graph_attr['graph_clique_number'] = [nx.graph_clique_number(graph)] graph_attr['density'] = [nx.density(graph)] graph_attr['transitivity'] = [nx.transitivity(graph)] graph_attr['average_clustering'] = [nx.average_clustering(graph)] graph_attr['radius'] = [nx.radius(graph)] graph_attr['is_tree'] = [1 if nx.is_tree(graph) else 0] graph_attr['wiener_index'] = [nx.wiener_index(graph)] return graph_attr
def clique(self): import config graphs = config.graph file = open('clique.txt', 'w') clique_string = [ "This txt file will show you the finding of cliques in your network.\n\n" + "Description : In complex network, a clique is a maximal subset of the vertices or nodes in an undirected network such that every member\n" + "of the set is connected by an edge or link to every other node." + "The meaning of 'maximal' here means there is no other vertex or node in the network that can be added to the subset while keeping or preserving\n" + "the property that every vertex or node is connected to every other.\n" ] file.write(clique_string[0]) max_clique = list(nx.make_max_clique_graph(graphs)) max_clique_str = str(max_clique) max_clique_string = [ "Maximal Cliques:\n-The maximal cliques and treats these cliques as nodes.\n -These nodes in a [] are connected if they have common members in the original graph.\n" + "-" + max_clique_str + '\n' ] file.write(max_clique_string[0]) all_maximal_cliques = str(list(nx.find_cliques(graphs))) all_maximal_cliques_string = [ "Cliques:\n-The possible cliques in the network.\n" + "-" + all_maximal_cliques + '\n' ] file.write(all_maximal_cliques_string[0]) number_of_maximum_clique = str(nx.graph_number_of_cliques(graphs)) number_of_node_in_largest_clique = str(nx.graph_clique_number(graphs)) clique_number_string = [ "Basic statistic of cliques in network:\n-The (largest) number of cliques in the network:" + number_of_maximum_clique + "\n" + "-The number of nodes in the largest clique in the network:" + number_of_node_in_largest_clique ] file.write(clique_number_string[0]) file.close() # this must add or only display a empty txt import os os.system("notepad.exe clique.txt")
def _graph(self): """Generate graph-based attributes.""" self.graph_attr['number_of_nodes'] = [nx.number_of_nodes(self.graph)] self.graph_attr['number_of_edges'] = [nx.number_of_edges(self.graph)] self.graph_attr['number_of_selfloops'] = [ nx.number_of_selfloops(self.graph) ] self.graph_attr['graph_number_of_cliques'] = [ nx.graph_number_of_cliques(self.graph) ] self.graph_attr['graph_clique_number'] = [ nx.graph_clique_number(self.graph) ] self.graph_attr['density'] = [nx.density(self.graph)] self.graph_attr['transitivity'] = [nx.transitivity(self.graph)] self.graph_attr['average_clustering'] = [ nx.average_clustering(self.graph) ] self.graph_attr['radius'] = [nx.radius(self.graph)] self.graph_attr['is_tree'] = [1 if nx.is_tree(self.graph) else 0] self.graph_attr['wiener_index'] = [nx.wiener_index(self.graph)] return self.graph_attr
def main(): test = PSNG(generate_three_party_list(7, "normal"), 0.25) test.dynamic_network_formation() print(nx.degree_centrality(test.social_network)) print(nx.average_clustering(test.social_network)) print(nx.graph_number_of_cliques(test.social_network)) colour_map = [] for i in test.social_network: if (test.social_network.node[i]['attr'][0] < 0.5): colour_map.append('blue') elif (test.social_network.node[i]['attr'][0] > 0.5): colour_map.append('green') else: colour_map.append('orange') nx.draw(test.social_network, node_color=colour_map, alpha=0.9, node_size=500, with_labels=True) plt.show()
graphs_len[gr] = gtem graphs_lists[gr] = ttem graphh = G.subgraph(graphs[gr]) if len(graphh.nodes()) > 1 and len(graphh.edges()) > 0: cliques_edges.append(graphh.edges()) cliques_nodes.append(graphh.nodes()) if len(colors_to_select) == 0: colors_to_select = list(colors_list) color = random.choice(colors_to_select) colors_to_select.remove(color) colors_of_edges.append((color)) nodes_color_alpha.append(0.4) edges_color_alpha.append(0.6) edge_width_l.append(4.0) lvl2 = [] for i in range(nx.graph_number_of_cliques(G)): lvl2.append(graphs_len[i]) print str(" ") print 'ΚΛΙΚΕΣ ΣΕ ΜΗ ΚΑΤΕΥΘΥΝΟΜΕΝΟΥΣ ΓΡΑΦΟΥΣ' # print 'CLIQUES IN UNDIRECTED GRAPHS' print str(" ") print 'Ο γράφος είναι:' # print 'The graph is:' graph_name = str(G.name) + str(lvl2) print graph_name print str(" ") print 'Το σύνολο όλων των κλικών του γράφου G:' # print 'The set of all maximal cliques in graph G is:'
def main(graph_name): H = nx.read_gml(graph_name) for node in H.nodes(): # i remove self loops if node in H.neighbors(node): if len(H.neighbors(node)) > 1: H.remove_edge(node, node) else: H.remove_node(node) # for node in H.nodes(): # if H.node[node]['weigh_ins'] <5: #Adherent filter # H.remove_node(node) # print node, "is going down" G = nx.connected_component_subgraphs(H)[0] # Giant component print "size of the GC:", len( G.nodes()) #, "after filtering for adherence!!" #dir=graph_name.split("full_")[0] #dir=graph_name.split("master")[0] #dir=graph_name.split("method3_")[0] #dir=graph_name.split("method3_adh")[0] dir = graph_name.split("friends")[0] dir = dir + "roles/" time_in_system = 50 #minimum amount of time in the sytem for a user to be included in the statistics #name=graph_name.split('data/')[1] #name=graph_name.split('method3_50/interim/')[1] #name=graph_name.split('network_all_users/')[1] name = graph_name.split('5_points_network_2010/data/')[1] name = name.split('.gml')[0] name0 = dir + name + "_overlap_R6s_averages_" + str( time_in_system) + "days_exclude_R6s.dat" file0 = open(name0, 'wt') file0.close() contador = 0 name12 = dir + name + "_slopes_for_the_fits_average_weight_change.dat" file = open(name12, 'wt') file.close() ####for the Isolated Clusters: list_GC_nodes = [] for n in G.nodes(): list_GC_nodes.append(n) # print G.node[n]['percentage_weight_change'] # print "# users GC:",len(list_GC_nodes),"total:",len(H.nodes()) list_weight_changes_not_GC = [] for n in H.nodes(): if n not in list_GC_nodes: #print n,"not in GC" list_weight_changes_not_GC.append( float(H.node[n]['percentage_weight_change'])) #print "# users not in GC:",len(list_weight_changes_not_GC) # who="not_GC" #Nbins=18 #histograma(list_weight_changes_not_GC,Nbins,dir,name,who) ########################### list_R6s = [] # collect the R6 of the system list_R6s_label = [] list_R6s_percent_weight_change = [] for node in G.nodes(): if str(G.node[node]['role']) == "R6": list_R6s.append(node) list_R6s_label.append(G.node[node]['label']) list_R6s_percent_weight_change.append( float(G.node[node]['percentage_weight_change'])) name00 = dir + name + "R6s_and_top_tens_averages_" + str( time_in_system) + "days_exclude_R6s.dat" file0 = open(name00, 'at') print >> file0, "R6s", numpy.mean( list_R6s_percent_weight_change), numpy.std( list_R6s_percent_weight_change) file0.close() # print "\n\n R6s:\n" # for i in list_R6s_label: # print i # studying the possible cumulative effect of more than one R6 on the population: for node in G.nodes(): cont = 0 for n in G.neighbors(node): if str(G.node[n]['role']) == "R6": cont += 1 G.node[node]["R6_overlap"] = int(cont) ##### weight change for people not connected to any R6s:#### list_weight_changes_no_neighbors = [] for node in G.nodes(): interseccion = list(set(G.neighbors(node)) & set(list_R6s)) # print node, "intersection:",intersection,len(intersection) # print "because", list_R6s, "and ",G.neighbors(node) # raw_input() if len(interseccion) == 0: list_weight_changes_no_neighbors.append( G.node[node]['percentage_weight_change']) # print len(list_weight_changes_no_neighbors),"no_neighbors" who = "no_neigbors_R6s" Nbins = 18 histograma(list_weight_changes_no_neighbors, Nbins, dir, name, who) # mood test mood = stats.mood(list_weight_changes_no_neighbors, list_weight_changes_not_GC) print "mood test for", who, "against not_GC:", mood ######## # K-S test: ks = stats.ks_2samp(list_weight_changes_no_neighbors, list_weight_changes_not_GC) print "KS test for", who, "against not_GC:", ks name00 = "ks_results.dat" file0 = open(dir + name00, 'at') print >> file0, "KS test for", who, "of", graph_name, "against not_GC:", ks file0.close() ############################################# #average percentage weight change as a function of the size of the largest CLIQUE the node belongs to: absolute_max = 1 for i in G.nodes(): maximo = 1 list2 = nx.cliques_containing_node(G, i) # print i, list2 for elem in list2: # print elem,len(elem,) if len(elem) > maximo: maximo = len(elem) # print "\n",maximo G.node[i]['max_clique_size'] = maximo if absolute_max < maximo: absolute_max = maximo #print absolute_max lista = list( nx.find_cliques(G)) # crea una lista de cliques (lista de listas) max_clique = nx.graph_clique_number(G) #finds out max size clique num_tot_clique = nx.graph_number_of_cliques( G) #finds out total number of cliques # count number of 2, 3, 4, 5, 6 and 7cliques: num_2cliques = 0 num_3cliques = 0 num_4cliques = 0 num_5cliques = 0 num_6cliques = 0 num_7cliques = 0 num_8cliques = 0 num_9cliques = 0 for element in lista: if len(element) == 2: num_2cliques = num_2cliques + 1 elif len(element) == 3: num_3cliques = num_3cliques + 1 elif len(element) == 4: num_4cliques = num_4cliques + 1 elif len(element) == 5: num_5cliques = num_5cliques + 1 elif len(element) == 6: num_6cliques = num_6cliques + 1 elif len(element) == 7: num_7cliques = num_7cliques + 1 elif len(element) == 8: num_8cliques = num_8cliques + 1 elif len(element) == 9: num_9cliques = num_9cliques + 1 # print " 2: ",num_2cliques, " 3: ",num_3cliques, " 4: ",num_4cliques, " 5: ",num_5cliques, " 6: ",num_6cliques, " 7: ",num_7cliques, " 8: ",num_8cliques, " 9: ",num_9cliques, " max_clique_size:",max_clique, " num_tot_cliques:", num_tot_clique name33 = dir + name + "_percent_weight_change_vs_largest_clique_size.dat" file11 = open(name33, 'wt') file11.close() list_of_lists_for_bootstrap = [] x_positions_fit = [] y_positions_fit = [] cum_size_set = float(len(G.nodes())) tot_nodes = [] for clique_size in range(1, max_clique): clique_size = clique_size + 1 print clique_size num_users_set = cum_size_set percent_weight_change_that_clique_size = [] for n in G.nodes(): if G.node[n]['max_clique_size'] == clique_size: percent_weight_change_that_clique_size.append( float(G.node[n]['percentage_weight_change'])) tot_nodes.append(float(G.node[n]['percentage_weight_change'])) cum_size_set -= 1.0 file11 = open(name33, 'at') print >> file11, clique_size, len( percent_weight_change_that_clique_size), num_users_set / float( len(G.nodes())), numpy.mean( percent_weight_change_that_clique_size), numpy.std( percent_weight_change_that_clique_size) file11.close() if len(x_positions_fit) <= 7: x_positions_fit.append(clique_size) y_positions_fit.append( numpy.mean(percent_weight_change_that_clique_size)) list_of_lists_for_bootstrap.append( percent_weight_change_that_clique_size) slope, intercept, Corr_coef, p_value, std_err = stats.linregress( x_positions_fit, y_positions_fit) # least squeares polinomial fit print "result linear. fit for clique size dependency:" print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err name11 = dir + name + "_fits_clique_size.dat" file11 = open(name11, 'wt') for i in range(len(x_positions_fit)): print >> file11, x_positions_fit[ i], intercept + x_positions_fit[i] * slope print >> file11, "\n\n", "y=", intercept, "+", slope, "*x", print "Bootstrap for clique size:\n" mean_slope, standard_dev = bootstrap(x_positions_fit[0], x_positions_fit[-1], list_of_lists_for_bootstrap) zscore = (slope - mean_slope) / standard_dev print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore print x_positions_fit[0], x_positions_fit[ -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore file11.close() contador += 1 file = open(name12, 'at') print >> file, contador, mean_slope, standard_dev, "largest_clique_size" file.close() ####################################### #####dose effect of the R6s independently######## name11 = dir + name + "_dose_eff_indepently_only_one_R6_" + str( time_in_system) + "days_exclude_R6s.dat" file11 = open(name11, 'at') print >> file11, 0, "average_no_neighbors", "average_no_neighbors", "average_no_neighbors", len( list_weight_changes_no_neighbors ), numpy.mean(list_weight_changes_no_neighbors), numpy.std( list_weight_changes_no_neighbors ) # the first line of the file is actually for no_neighbors, the rest, for one_and_only_one file11.close() file11 = open(name11, 'wt') file11.close() cont = 1 list_all = [] list_all_nodes = [] for R6 in list_R6s: list_weight_changes = [] for n in G.neighbors(R6): if (G.node[n]['role'] != "R6") and (G.node[n]["R6_overlap"] == 1): list_weight_changes.append( float(G.node[n]['percentage_weight_change'])) if n not in list_all_nodes: list_all_nodes.append(n) list_all.append( float(G.node[n]['percentage_weight_change'])) if len(list_weight_changes) > 0: file11 = open(name11, 'at') print >> file11, cont, G.node[R6]['role'], G.node[R6][ 'label'], len( G.neighbors(R6)), len(list_weight_changes), numpy.mean( list_weight_changes), numpy.std(list_weight_changes) file11.close() # print cont,G.node[R6]['role'],G.node[R6]['label'], len(G.neighbors(R6)),len(list_weight_changes),numpy.mean(list_weight_changes),numpy.std(list_weight_changes) cont = cont + 1 else: # file11=open(name11, 'at') #print >> file11,cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes) #file11.close() # print cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes) cont = cont + 1 who = "one_and_only_one_R6s" Nbins = 18 histograma(list_all, Nbins, dir, name, who) #################################### print "\n\n" list_of_lists_for_bootstrap = [] x_positions_fit = [] y_positions_fit = [] averages_larger5_x = [] averages_larger5_y = [] norm = 0.0 cum_size_set = float(len(G.nodes())) - float(len(list_R6s)) for r in range(len(list_R6s) + 1): # list_BMI_changes=[] list_weight_changes = [] list_percentage_weight_changes = [] list_activities = [] num_users_set = cum_size_set for node in G.nodes(): if int(G.node[node]["R6_overlap"]) == r: if G.node[node]["role"] == "R6": # i exclude the R6s pass else: if int(G.node[node]['time_in_system']) > time_in_system: # list_BMI_changes.append(float(G.node[node]['final_BMI'])-float(G.node[node]['initial_BMI'])) list_weight_changes.append( float(G.node[node]['weight_change'])) list_percentage_weight_changes.append( float(G.node[node]['percentage_weight_change'])) list_activities.append( float(G.node[node]['activity']) / float(G.node[node]['time_in_system'])) cum_size_set -= 1.0 if len(list_percentage_weight_changes) > 0: # average_BMI_change=numpy.mean(list_BMI_changes) average_weight_change = numpy.mean(list_weight_changes) average_percentage_weight_change = numpy.mean( list_percentage_weight_changes) average_activity = numpy.mean(list_activities) #deviation_BMI=numpy.std(list_BMI_changes) deviation_weight = numpy.std(list_weight_changes) deviation_percentage_weight = numpy.std( list_percentage_weight_changes) deviation_activity = numpy.std(list_activities) #print out file0 = open(name0, 'at') print >> file0, r, len( list_percentage_weight_changes ), num_users_set / float( len(G.nodes()) ), average_percentage_weight_change, deviation_percentage_weight, average_weight_change, deviation_weight, average_activity, deviation_activity file0.close() if r <= 5: x_positions_fit.append(r) y_positions_fit.append(average_percentage_weight_change) list_of_lists_for_bootstrap.append( list_percentage_weight_changes) # else: # aux_x=r*len(list_percentage_weight_changes) # averages_larger5_x.append(aux_x) # aux_y=average_percentage_weight_change*len(list_percentage_weight_changes) # averages_larger5_y.append(aux_y) #norm+=float(len(list_percentage_weight_changes)) # x_positions_fit.append(numpy.mean(averages_larger5_x)/norm) # y_positions_fit.append(numpy.mean(averages_larger5_y)/norm) slope, intercept, Corr_coef, p_value, std_err = stats.linregress( x_positions_fit, y_positions_fit) # least squeares polinomial fit print "result linear. fit for dose eff.:" print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err name11 = dir + name + "_fits_dose_eff_R6.dat" file11 = open(name11, 'wt') for i in range(len(x_positions_fit)): print >> file11, x_positions_fit[ i], intercept + x_positions_fit[i] * slope print >> file11, "\n\n", "y=", intercept, "+", slope, "*x", print "Bootstrap for dose eff. R6s:\n" mean_slope, standard_dev = bootstrap(x_positions_fit[0], x_positions_fit[-1], list_of_lists_for_bootstrap) zscore = (slope - mean_slope) / standard_dev print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore print x_positions_fit[0], x_positions_fit[ -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore file11.close() contador += 1 file = open(name12, 'at') print >> file, contador, mean_slope, standard_dev, "dose_eff" file.close() #### averages for every R6's egonetwork:######### cont = 1 list_all_ = [] list_all_nodes_ = [] for node in list_R6s: neighbors = G.neighbors(node) #a list of nodes average_BMI_change = 0.0 list_BMI_changes = [] average_weight_change = 0.0 list_weight_changes = [] average_percentage_weight_change = 0.0 list_percentage_weight_changes = [] average_activity = 0.0 # ojo! sera dividida por el numero de dias!!!!! list_activities = [] for n in G.neighbors(node): if int(G.node[n]['time_in_system']) > time_in_system: # list_BMI_changes.append(float(G.node[n]['final_BMI'])-float(G.node[n]['initial_BMI'])) list_weight_changes.append(float(G.node[n]['weight_change'])) list_percentage_weight_changes.append( float(G.node[n]['percentage_weight_change'])) list_activities.append( float(G.node[n]['activity']) / float(G.node[n]['time_in_system'])) if n not in list_all_nodes_: list_all_nodes_.append(n) list_all_.append( float(G.node[n]['percentage_weight_change'])) #averages average_weight_change = numpy.mean(list_weight_changes) # average_BMI_change=numpy.mean(list_BMI_changes) average_activity = numpy.mean(list_activities) average_percentage_weight_change = numpy.mean( list_percentage_weight_changes) #standard deviation #deviation_BMI=numpy.std(list_BMI_changes) deviation_weight = numpy.std(list_weight_changes) deviation_percentage_weight = numpy.std(list_percentage_weight_changes) deviation_activity = numpy.std(list_activities) #print out name2 = dir + name + "_ego_R6s_average_weight_change_" + str( time_in_system) + "days.dat" file2 = open(name2, 'at') print >> file2, cont, G.node[node]['role'], G.node[node]['label'], len( G.neighbors(node)), average_weight_change, deviation_weight file2.close() name22 = dir + name + "_ego_R6s_average_percentage_weight_change_" + str( time_in_system) + "days.dat" file22 = open(name22, 'at') print >> file22, cont, G.node[node]['role'], G.node[node][ 'label'], len( G.neighbors(node) ), average_percentage_weight_change, deviation_percentage_weight file22.close() name3 = dir + name + "_ego_R6s_average_activity_" + str( time_in_system) + "days.dat" file3 = open(name3, 'at') print >> file3, cont, G.node[node]['role'], G.node[node]['label'], len( G.neighbors(node)), average_activity, deviation_activity file3.close() cont = cont + 1 who = "R6s_egonetworks_all" Nbins = 18 histograma(list_all_, Nbins, dir, name, who) # print "intersection:",len(set(list_all_)&set(list_all)),len(list_all_),len(list_all) #############just checking what happens if we remove the 40155 guy ##### percent weight change vs. role: list_roles = ["R1", "R2", "R3", "R4", "R5", "R6", "R7"] file = open(dir + name + "_percentage_weight_change_vs_role", 'wt') cont = 1 for role in list_roles: list_weight_changes_role = [] for n in G.nodes(): if G.node[n]['role'] == role: list_weight_changes_role.append( G.node[n]['percentage_weight_change']) print >> file, cont, role, len(list_weight_changes_role), numpy.mean( list_weight_changes_role), numpy.std(list_weight_changes_role) cont += 1 file.close() ############################# ############## percentage weight change vs k x_positions_fit = [] y_positions_fit = [] cum_size_set = float(len(G.nodes())) list_of_lists_for_bootstrap = [] list_k = [] for n in G.nodes(): list_k.append(len(G.neighbors(n))) max_k = max(list_k) file = open(dir + name + "_percentage_weight_change_vs_k.dat", 'wt') max_k = max_k + 1 for k in range(1, max_k): num_users_set = cum_size_set list_percent_weight_change_k = [] for n in G.nodes(): if len(G.neighbors(n)) == k: list_percent_weight_change_k.append( G.node[n]['percentage_weight_change']) cum_size_set -= 1.0 if len(list_percent_weight_change_k) > 0: print >> file, k, len( list_percent_weight_change_k), num_users_set / float( len(G.nodes())), numpy.mean( list_percent_weight_change_k), numpy.std( list_percent_weight_change_k) if len(x_positions_fit) <= 7: x_positions_fit.append(k) y_positions_fit.append( numpy.mean(list_percent_weight_change_k)) list_of_lists_for_bootstrap.append( list_percent_weight_change_k) slope, intercept, Corr_coef, p_value, std_err = stats.linregress( x_positions_fit, y_positions_fit) # least squeares polinomial fit print "result linear. fit for degree dependency:" print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err file.close() name11 = dir + name + "_fits_degree.dat" file11 = open(name11, 'wt') for i in range(len(x_positions_fit)): print >> file11, x_positions_fit[ i], intercept + x_positions_fit[i] * slope print >> file11, "\n\n", "y=", intercept, "+", slope, "*x", print "Bootstrap for degree:\n" mean_slope, standard_dev = bootstrap(x_positions_fit[0], x_positions_fit[-1], list_of_lists_for_bootstrap) zscore = (slope - mean_slope) / standard_dev print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore print x_positions_fit[0], x_positions_fit[ -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore file11.close() contador += 1 file = open(name12, 'at') print >> file, contador, mean_slope, standard_dev, "degree" file.close() ######################################## new_name = graph_name.split(".gml")[0] new_name = new_name + "_adherent_num_R6s_largest_clique.gml" nx.write_gml(G, new_name)
def calculate(network): try: n = nx.graph_number_of_cliques(network) except: n = 0 return n
def compute_features(self): g = rbc(self.graph) # Basic stats self.add_feature( "number_of_edges", lambda graph: graph.number_of_edges(), "Number of edges in Jaccard similarity graph", InterpretabilityScore(5), function_args=g, ) self.add_feature( "number_of_edges_no_selfloops", lambda graph: remove_selfloops(graph).number_of_edges(), "Number of edges, not including selfloops, in Jaccard similarity graph", InterpretabilityScore(5), function_args=g, ) self.add_feature( "connectance", lambda graph: nx.density(graph), "Connectance of Jaccard similarity graph", InterpretabilityScore(5), function_args=g, ) self.add_feature( "diameter", lambda graph: nx.diameter(ensure_connected(graph)), "Diameter of Jaccard similarity graph", InterpretabilityScore(5), function_args=g, ) self.add_feature( "radius", lambda graph: nx.radius(ensure_connected(graph)), "Radius of Jaccard similarity graph", InterpretabilityScore(5), function_args=g, ) # Assortativity self.add_feature( "degree_assortativity_coeff", lambda graph: nx.degree_assortativity_coefficient(graph), "Similarity of connections in Jaccard similarity graph with respect to the node degree", InterpretabilityScore(4), function_args=g, ) # Cliques self.add_feature( "graph_clique_number", lambda graph: nx.graph_clique_number(graph), "The size of the largest clique in the Jaccard similarity graph", InterpretabilityScore(3), function_args=g, ) self.add_feature( "num_max_cliques", lambda graph: nx.graph_number_of_cliques(graph), "The number of maximal cliques in the Jaccard similarity graph", InterpretabilityScore(3), function_args=g, ) # Clustering self.add_feature( "transitivity", lambda graph: nx.transitivity(graph), "Transitivity of the graph", InterpretabilityScore(4), function_args=g, ) # Components self.add_feature( "is_connected", lambda graph: nx.is_connected(graph) * 1, "Whether the Jaccard similarity graph is connected or not", InterpretabilityScore(5), function_args=g, ) self.add_feature( "num_connected_components", lambda graph: nx.number_connected_components(graph), "The number of connected components", InterpretabilityScore(5), function_args=g, ) self.add_feature( "largest_connected_component", lambda graph: ensure_connected(graph).number_of_nodes(), "The size of the largest connected component", InterpretabilityScore(4), function_args=g, ) # Efficiency self.add_feature( "global_efficiency", lambda graph: nx.global_efficiency(graph), "The global efficiency", InterpretabilityScore(4), function_args=g, ) # Node connectivity self.add_feature( "node_connectivity", lambda graph: nx.node_connectivity(graph), "Node connectivity", InterpretabilityScore(4), function_args=g, ) self.add_feature( "edge_connectivity", lambda graph: nx.edge_connectivity(graph), "Edge connectivity", InterpretabilityScore(4), function_args=g, )
G = nx.Graph() for i in range(8): G.add_node(i + 1) G.add_edge(1, 2) G.add_edge(1, 3) G.add_edge(2, 3) G.add_edge(4, 5) G.add_edge(4, 6) G.add_edge(5, 6) G.add_edge(1, 6) G.add_edge(2, 6) G.add_edge(3, 6) G.add_edge(7, 8) nx.draw(G) plt.show() cliques = list(nx.find_cliques(G)) print("Number", nx.graph_number_of_cliques(G)) for clique in cliques: print("Hello, world!") print(cliques)
# %% count_fliter = 3 # 此值會過濾掉留言者對作者留言次數過低的樣本,網軍此值理論上比一般人高 sum_fliter = 5 # 此值會過濾掉留言者總留言次數過低的樣本 percentage_fliter = 0.3 # 此值會過濾掉留言者對作者留言機率過低的樣本,網軍此值理論上比一般人高 G = nx.Graph() G.add_nodes_from( get_nodelist(data, count_fliter, sum_fliter, percentage_fliter)) G.add_edges_from( get_edgelist(data, count_fliter, sum_fliter, percentage_fliter)) nx.graph_number_of_cliques(G) partition = community.best_partition(G) pos = nx.spring_layout(G) plt.figure(figsize=(8, 8), dpi=300) plt.axis('off') nx.draw_networkx_nodes(G, pos, node_size=20, cmap=plt.cm.RdYlBu, node_color=list(partition.values()), label=get_nodelist(data, count_fliter, sum_fliter, percentage_fliter)) nx.draw_networkx_edges(G, pos, alpha=0.3) nx.write_gexf(G, r'E:\\research\\data\\圖庫\\test.gexf')
def main(graph_name): H = nx.read_gml(graph_name) for node in H.nodes(): # i remove self loops if node in H.neighbors(node): if len(H.neighbors(node))>1: H.remove_edge(node,node) else: H.remove_node(node) for node in H.nodes(): if H.node[node]['weigh_ins'] <5: #Adherent filter H.remove_node(node) # print node, "is going down" G= nx.connected_component_subgraphs(H)[0] # Giant component print "final size of the GC:",len(G.nodes()) #dir=graph_name.split("fr")[0] #dir=graph_name.split("master")[0] #dir=graph_name.split("method3_")[0] dir=graph_name.split("engaged_")[0] dir=dir+"roles/" print dir time_in_system=100 #minimum amount of time in the sytem for a user to be included in the statistics #name=graph_name.split('data/')[1] name=graph_name.split('method3/')[1] print name name=name.split('.gml')[0] print name print dir+name name0=dir+name+"_overlap_R6s_averages_"+str(time_in_system)+"days_exclude_R6s_clinically_signif.dat" file0=open(name0, 'wt') file0.close() ####for the Isolated Clusters: list_GC_nodes=[] for n in G.nodes(): list_GC_nodes.append(n) # print G.node[n]['percentage_weight_change'] # print "# users GC:",len(list_GC_nodes),"total:",len(H.nodes()) list_weight_changes_not_GC=[] for n in H.nodes(): if n not in list_GC_nodes: #print n,"not in GC" list_weight_changes_not_GC.append(float(H.node[n]['percentage_weight_change'])) #print "# users not in GC:",len(list_weight_changes_not_GC) who="not_GC" Nbins=18 histograma(list_weight_changes_not_GC,Nbins,dir,name,who) ########################### list_R6s=[] # collect the R6 of the system list_R6s_label=[] list_R6s_percent_weight_change=[] for node in G.nodes() : if str(G.node[node]['role']) == "R6" : list_R6s.append(node) list_R6s_label.append(G.node[node]['label']) list_R6s_percent_weight_change.append(float(G.node[node]['percentage_weight_change'])) name00=dir+name+"R6s_and_top_tens_averages_"+str(time_in_system)+"days_exclude_R6s_clinically_signif.dat" file0=open(name00, 'at') print >> file0,"R6s",numpy.mean(list_R6s_percent_weight_change),numpy.std(list_R6s_percent_weight_change) file0.close() # print "\n\n R6s:\n" # for i in list_R6s_label: # print i # studying the possible cumulative effect of more than one R6 on the population: for node in G.nodes(): cont=0 for n in G.neighbors(node): if str(G.node[n]['role']) == "R6" : cont+=1 G.node[node]["R6_overlap"]=int(cont) ##### weight change for people not connected to any R6s:#### list_weight_changes_no_neighbors=[] for node in G.nodes(): interseccion=list(set(G.neighbors(node)) & set(list_R6s)) # print node, "intersection:",intersection,len(intersection) # print "because", list_R6s, "and ",G.neighbors(node) # raw_input() if len(interseccion)==0: list_weight_changes_no_neighbors.append(G.node[node]['percentage_weight_change']) # print len(list_weight_changes_no_neighbors),"no_neighbors" who="no_neigbors_R6s" Nbins=18 histograma(list_weight_changes_no_neighbors,Nbins,dir,name,who) # mood test mood=stats.mood(list_weight_changes_no_neighbors,list_weight_changes_not_GC) print "mood test for",who, "against not_GC:",mood ######## # K-S test: ks=stats.ks_2samp(list_weight_changes_no_neighbors,list_weight_changes_not_GC) print "KS test for",who, "against not_GC:",ks name00="ks_results_clinically_signif.dat" file0=open(dir+name00, 'at') print >> file0, "KS test for",who,"of",graph_name, "against not_GC:",ks file0.close() ############################################# #average percentage weight change as a function of the size of the largest CLIQUE the node belongs to: absolute_max=1 for i in G.nodes(): maximo=1 list2=nx.cliques_containing_node(G, i) # print i, list2 for elem in list2: # print elem,len(elem,) if len(elem) > maximo: maximo=len(elem) # print "\n",maximo G.node[i]['max_clique_size']=maximo if absolute_max < maximo: absolute_max = maximo print absolute_max lista=list(nx.find_cliques(G)) # crea una lista de cliques (lista de listas) max_clique=nx.graph_clique_number(G) #finds out max size clique num_tot_clique=nx.graph_number_of_cliques(G) #finds out total number of cliques # count number of 2, 3, 4, 5, 6 and 7cliques: num_2cliques=0 num_3cliques=0 num_4cliques=0 num_5cliques=0 num_6cliques=0 num_7cliques=0 num_8cliques=0 num_9cliques=0 for element in lista: if len(element)==2: num_2cliques=num_2cliques +1 elif len(element)==3: num_3cliques=num_3cliques+1 elif len(element)==4: num_4cliques=num_4cliques+1 elif len(element)==5: num_5cliques=num_5cliques+1 elif len(element)==6: num_6cliques=num_6cliques+1 elif len(element)==7: num_7cliques=num_7cliques+1 elif len(element)==8: num_8cliques=num_8cliques+1 elif len(element)==9: num_9cliques=num_9cliques+1 print " 2: ",num_2cliques, " 3: ",num_3cliques, " 4: ",num_4cliques, " 5: ",num_5cliques, " 6: ",num_6cliques, " 7: ",num_7cliques, " 8: ",num_8cliques, " 9: ",num_9cliques, " max_clique_size:",max_clique, " num_tot_cliques:", num_tot_clique name33=dir+name+"_percent_weight_change_vs_largest_clique_size_clinically_signif.dat" file11=open(name33, 'wt') file11.close() cum_size_set=float(len(G.nodes())) tot_nodes=[] for clique_size in range(max_clique): clique_size=clique_size+1 num_users_clinically_signif=0.0 num_users_set=cum_size_set percent_weight_change_that_clique_size=[] for n in G.nodes(): if G.node[n]['max_clique_size']==clique_size: percent_weight_change_that_clique_size.append(float(G.node[n]['percentage_weight_change'])) tot_nodes.append(float(G.node[n]['percentage_weight_change'])) cum_size_set-=1.0 if G.node [n]['percentage_weight_change']<=-5.0: num_users_clinically_signif+=1.0 try: file11=open(name33, 'at') print >> file11,clique_size,len(percent_weight_change_that_clique_size),num_users_set/float(len(G.nodes())),num_users_clinically_signif/len(percent_weight_change_that_clique_size),numpy.mean(percent_weight_change_that_clique_size),numpy.std(percent_weight_change_that_clique_size) file11.close() except ZeroDivisionError: file11=open(name33, 'at') print >> file11,clique_size,len(percent_weight_change_that_clique_size),num_users_set/float(len(G.nodes())),0.0 ,numpy.mean(percent_weight_change_that_clique_size),numpy.std(percent_weight_change_that_clique_size) file11.close() ####################################### #####dose effect of the R6s independently######## name11=dir+name+"_dose_eff_indepently_only_one_R6_"+str(time_in_system)+"days_exclude_R6s.dat" file11=open(name11, 'at') print >> file11,0,"average_no_neighbors","average_no_neighbors","average_no_neighbors",len(list_weight_changes_no_neighbors),numpy.mean(list_weight_changes_no_neighbors),numpy.std(list_weight_changes_no_neighbors) # the first line of the file is actually for no_neighbors, the rest, for one_and_only_one file11.close() file11=open(name11, 'wt') file11.close() cont=1 list_all=[] list_all_nodes=[] for R6 in list_R6s: list_weight_changes=[] for n in G.neighbors(R6): if (G.node[n]['role'] != "R6") and ( G.node[n]["R6_overlap"]==1) : list_weight_changes.append(float(G.node[n]['percentage_weight_change'])) if n not in list_all_nodes: list_all_nodes.append(n) list_all.append(float(G.node[n]['percentage_weight_change'])) if len(list_weight_changes)>0: file11=open(name11, 'at') print >> file11,cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes),numpy.mean(list_weight_changes),numpy.std(list_weight_changes) file11.close() # print cont,G.node[R6]['role'],G.node[R6]['label'], len(G.neighbors(R6)),len(list_weight_changes),numpy.mean(list_weight_changes),numpy.std(list_weight_changes) cont=cont+1 else: # file11=open(name11, 'at') #print >> file11,cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes) #file11.close() # print cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes) cont=cont+1 who="one_and_only_one_R6s" Nbins=18 histograma(list_all,Nbins,dir,name,who) #################################### cum_size_set=float(len(G.nodes()))-float(len(list_R6s)) for r in range(len(list_R6s)+1): # list_BMI_changes=[] list_weight_changes=[] list_percentage_weight_changes=[] list_activities=[] num_users_clinically_signif=0.0 num_users_set=cum_size_set for node in G.nodes(): if int(G.node[node]["R6_overlap"])==r: if G.node[node]["role"]== "R6": # i exclude the R6s pass else: if int(G.node[node]['time_in_system']) > time_in_system: # list_BMI_changes.append(float(G.node[node]['final_BMI'])-float(G.node[node]['initial_BMI'])) list_weight_changes.append(float(G.node[node]['weight_change'])) list_percentage_weight_changes.append(float(G.node[node]['percentage_weight_change'])) list_activities.append(float(G.node[node]['activity'])/float(G.node[node]['time_in_system'])) cum_size_set-=1.0 if G.node [node]['percentage_weight_change']<=-5.0: num_users_clinically_signif+=1.0 if len(list_percentage_weight_changes)>0: # average_BMI_change=numpy.mean(list_BMI_changes) average_weight_change=numpy.mean(list_weight_changes) average_percentage_weight_change=numpy.mean(list_percentage_weight_changes) average_activity=numpy.mean(list_activities) #deviation_BMI=numpy.std(list_BMI_changes) deviation_weight=numpy.std(list_weight_changes) deviation_percentage_weight=numpy.std(list_percentage_weight_changes) deviation_activity=numpy.std(list_activities) #print out try: file0=open(name0, 'at') print >> file0,r,len(list_percentage_weight_changes),num_users_set/float(len(G.nodes())),num_users_clinically_signif/len(list_percentage_weight_changes),average_percentage_weight_change,deviation_percentage_weight,average_weight_change,deviation_weight,average_activity,deviation_activity file0.close() except ZeroDivisionError: file11=open(name33, 'at') print >> file11,clique_size,len(percent_weight_change_that_clique_size),num_users_set/float(len(G.nodes())),0.0 ,numpy.mean(percent_weight_change_that_clique_size),numpy.std(percent_weight_change_that_clique_size) file11.close() #### averages for every R6's egonetwork:######### cont=1 list_all_=[] list_all_nodes_=[] for node in list_R6s: neighbors=G.neighbors(node)#a list of nodes average_BMI_change=0.0 list_BMI_changes=[] average_weight_change=0.0 list_weight_changes=[] average_percentage_weight_change=0.0 list_percentage_weight_changes=[] average_activity=0.0 # ojo! sera dividida por el numero de dias!!!!! list_activities=[] for n in G.neighbors(node): if int(G.node[n]['time_in_system']) > time_in_system: # list_BMI_changes.append(float(G.node[n]['final_BMI'])-float(G.node[n]['initial_BMI'])) list_weight_changes.append(float(G.node[n]['weight_change'])) list_percentage_weight_changes.append(float(G.node[n]['percentage_weight_change'])) list_activities.append(float(G.node[n]['activity'])/float(G.node[n]['time_in_system'])) if n not in list_all_nodes_: list_all_nodes_.append(n) list_all_.append(float(G.node[n]['percentage_weight_change'])) #averages average_weight_change=numpy.mean(list_weight_changes) # average_BMI_change=numpy.mean(list_BMI_changes) average_activity=numpy.mean(list_activities) average_percentage_weight_change=numpy.mean(list_percentage_weight_changes) #standard deviation #deviation_BMI=numpy.std(list_BMI_changes) deviation_weight=numpy.std(list_weight_changes) deviation_percentage_weight=numpy.std(list_percentage_weight_changes) deviation_activity=numpy.std(list_activities) #print out name2=dir+name+"_ego_R6s_average_weight_change_"+str(time_in_system)+"days.dat" file2=open(name2, 'at') print >> file2,cont,G.node[node]['role'],G.node[node]['label'],len(G.neighbors(node)),average_weight_change,deviation_weight file2.close() name22=dir+name+"_ego_R6s_average_percentage_weight_change_"+str(time_in_system)+"days.dat" file22=open(name22, 'at') print >> file22,cont,G.node[node]['role'],G.node[node]['label'],len(G.neighbors(node)),average_percentage_weight_change,deviation_percentage_weight file22.close() name3=dir+name+"_ego_R6s_average_activity_"+str(time_in_system)+"days.dat" file3=open(name3, 'at') print >> file3,cont,G.node[node]['role'],G.node[node]['label'],len(G.neighbors(node)),average_activity,deviation_activity file3.close() cont=cont+1 who="R6s_egonetworks_all" Nbins=18 histograma(list_all_,Nbins,dir,name,who) # print "intersection:",len(set(list_all_)&set(list_all)),len(list_all_),len(list_all) #############just checking what happens if we remove the 40155 guy ##### percent weight change vs. role: list_roles=["R1","R2","R3","R4","R5","R6","R7"] file = open(dir+name+"_percentage_weight_change_vs_role",'wt') cont=1 for role in list_roles: list_weight_changes_role=[] for n in G.nodes(): if G.node[n]['role']==role: list_weight_changes_role.append(G.node[n]['percentage_weight_change']) print >> file, cont, role, len(list_weight_changes_role),numpy.mean(list_weight_changes_role),numpy.std(list_weight_changes_role) cont+=1 file.close() ############################# ############## percentage weight change vs k cum_size_set=float(len(G.nodes())) list_k=[] for n in G.nodes(): list_k.append(len(G.neighbors(n))) max_k=max(list_k) file = open(dir+name+"_percentage_weight_change_vs_k_clinically_signif.dat",'wt') max_k=max_k+1 for k in range(1,max_k): num_users_clinically_signif=0.0 num_users_set=cum_size_set list_percent_weight_change_k=[] for n in G.nodes(): if len(G.neighbors(n))==k: list_percent_weight_change_k.append(G.node[n]['percentage_weight_change']) cum_size_set-=1.0 if G.node [n]['percentage_weight_change']<=-5.0: num_users_clinically_signif+=1.0 if len(list_percent_weight_change_k)>0: try: print >> file,k, len(list_percent_weight_change_k),num_users_set/float(len(G.nodes())),num_users_clinically_signif/len(list_percent_weight_change_k),numpy.mean(list_percent_weight_change_k),numpy.std(list_percent_weight_change_k) except ZeroDivisionError: file11=open(name33, 'at') print >> file11,clique_size,len(percent_weight_change_that_clique_size),num_users_set/float(len(G.nodes())),0.0 ,numpy.mean(percent_weight_change_that_clique_size),numpy.std(percent_weight_change_that_clique_size) file11.close() file.close()
print "-------------------------------------" print "Find cliques of the graph" print "-------------------------------------" cliques = list(nx.find_cliques(G)) print cliques print "-------------------------------------" print "Compute clique number - size of the largest clique" print "-------------------------------------" graphCliqueNumber = nx.graph_clique_number(G, cliques) print graphCliqueNumber print "-------------------------------------" print "Compute number of maximal ciiques" print "-------------------------------------" graphNumberOfCliques = nx.graph_number_of_cliques(G, cliques) print graphNumberOfCliques print "-------------------------------------" print "Compute size of largest maximal clique containing a given node" print "-------------------------------------" maximalCliqueSizePerNode = nx.node_clique_number(G) print maximalCliqueSizePerNode print "-------------------------------------" print "Compute number of maximal cliques for each node" print "-------------------------------------" noOfMaximalCliquesPerNode = nx.number_of_cliques(G) print noOfMaximalCliquesPerNode
# diameter(b) # This will work only for graphs that are connected diameter = -1 if numberConnectedComponents == 1: diameter = nx.diameter(b) #print(diameter, sizeMaxClique) # The maximum clique is returned as a set of nodes # max_clique(b) maxClique = naa.max_clique(b) sizeMaxClique = len(maxClique) print (diameter, sizeMaxClique) # The dominating set is returned as a set of nodes # min_weighted_dominating_set(b) minDominatingSet = naa.min_weighted_dominating_set(b) sizeMinDominatingSet = len(minDominatingSet) # The number of maximal cliques in the graph # graph_number_of_cliques(b) numberOfCliques = nx.graph_number_of_cliques(b) print (numberConnectedComponents,diameter,sizeMaxClique,sizeMinDominatingSet,numberOfCliques)
print("Katz centrality for Hollywood") nbest_centrality(H, nx.katz_centrality_numpy, 10, "katz") T = nx.ego_graph(G, "Twitter") E = nx.ego_graph(G, "Earth") # # Examine degree distributions with histograms sns.distplot([G.degree(v) for v in G.nodes()], norm_hist=True) plt.show() sns.distplot([H.degree(v) for v in H.nodes()], norm_hist=True) plt.show() sns.distplot([T.degree(v) for v in T.nodes()], norm_hist=True) plt.show() sns.distplot([E.degree(v) for v in E.nodes()], norm_hist=True) plt.show() print("Baleen Entity Graph") print("Transitivity: {}".format(nx.transitivity(G))) print("Average clustering coefficient: {}".format( nx.average_clustering(G))) print("Number of cliques: {}".format(nx.graph_number_of_cliques(G))) print("Hollywood Ego Graph") print("Transitivity: {}".format(nx.transitivity(H))) print("Average clustering coefficient: {}".format( nx.average_clustering(H))) print("Number of cliques: {}".format(nx.graph_number_of_cliques(H)))
def output_graphmetrics(pathadd,paths,file_name,data_dir): ''' output_graphmetrics() Calculates graph theory metrics from package NetworkX, stores in file and outputs in .csv file. ''' # Graph package #print outputGraphMetrics #if outputGraphMetrics: try: import networkx as nx except ImportError: raise ImportError, "NetworkX required." pathG = nx.Graph() # Get nrows and columns nrows = len(pathadd) ncols = len(pathadd[0]) # Now loop through pathadd rows for irow in xrange(nrows): # Begin loop through pathadd columns for icol in xrange(ncols): # Skip -9999. values if pathadd[irow][icol] != -9999.0: # Get spot node number nodenumber = ncols*irow+icol # Add node to pathG pathG.add_node(nodenumber) # Check neighbors for edges all 8 conditions # Count 0,1,2,3,4,5,6,7 in counter-clockwise # around center cell starting 0 in lower # left corner # Left top corner: if irow == 0 and icol == 0: # Get neighbors: spot 1 if pathadd[irow+1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 2 if pathadd[irow+1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 3 if pathadd[irow][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Right top corner elif irow == 0 and icol == ncols-1: # Get neighbors: spot 1 if pathadd[irow+1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 7 if pathadd[irow][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 0 if pathadd[irow+1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Left bottom corner elif irow == nrows-1 and icol == 0: # Get neighbors: spot 5 if pathadd[irow-1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 4 if pathadd[irow-1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 3 if pathadd[irow][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Right bottom corner elif irow == nrows-1 and icol == ncols-1: # Get neighbors: spot 5 if pathadd[irow-1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 7 if pathadd[irow][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 6 if pathadd[irow-1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Top side elif irow == 0 and icol != 0 and icol != ncols-1: # Get neighbors: spot 7 if pathadd[irow][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 0 if pathadd[irow+1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 1 if pathadd[irow+1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 2 if pathadd[irow+1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 3 if pathadd[irow][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Left side elif icol == 0 and irow != 0 and irow != nrows-1: # Get neighbors -spots 1,2,3,4,5 # Get neighbors: spot 1 if pathadd[irow+1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 2 if pathadd[irow+1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 3 if pathadd[irow][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 4 if pathadd[irow-1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 5 if pathadd[irow-1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Right side elif icol == ncols-1 and irow != 0 and irow != nrows-1: # Get neighbors - spots 0,1,5,6,7 # Get neighbors: spot 0 if pathadd[irow+1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 1 if pathadd[irow+1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 5 if pathadd[irow-1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 6 if pathadd[irow-1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 7 if pathadd[irow][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Bottom side: elif irow == nrows-1 and icol != 0 and icol != ncols-1: # Get neighbors - spots 3,4,5,6,7 # Get neighbors: spot 3 if pathadd[irow][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 4 if pathadd[irow-1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 5 if pathadd[irow-1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 6 if pathadd[irow-1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 7 if pathadd[irow][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Everything else: else: # Get neighbors: spot 0 if pathadd[irow+1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 1 if pathadd[irow+1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 2 if pathadd[irow+1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow+1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 3 if pathadd[irow][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 4 if pathadd[irow-1][icol+1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol+1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 5 if pathadd[irow-1][icol] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+icol # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 6 if pathadd[irow-1][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow-1)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Get neighbors: spot 7 if pathadd[irow][icol-1] != -9999.: # Then get egde number edgenumber = ncols*(irow)+(icol-1) # Then add edge to pathG pathG.add_edge(nodenumber,edgenumber) # Calculate properties from path lengths: min, max, average pathlen = [] for i in xrange(len(paths)): pathlen.append(paths[i][2]) # Create file to write info to try: fout = open(data_dir+file_name, 'w') except(IOerror,OSerror) as e: print("UNICOROutputs %s, error%s"(filename,e)) sys.exit(-1) # Write header information fout.write('Minimum Path Length,') fout.write(str(min(pathlen))+'\n') fout.write('Maximum Path Length,') fout.write(str(max(pathlen))+'\n') fout.write('Average Path Length,') fout.write(str(sum(pathlen)/len(paths))+'\n') fout.write('Density of Graph,') fout.write(str(nx.density(pathG))+'\n') fout.write('Number of nodes,') fout.write(str(nx.number_of_nodes(pathG))+'\n') fout.write('Number of edges,') fout.write(str(nx.number_of_edges(pathG))+'\n') fout.write('Is the graph a bipartite,') fout.write(str(nx.is_bipartite(pathG))+'\n') fout.write('Size of the largest clique,') fout.write(str(nx.graph_clique_number(pathG))+'\n') fout.write('Number of maximal cliques,') fout.write(str(nx.graph_number_of_cliques(pathG))+'\n') fout.write('Transitivity,') fout.write(str(nx.transitivity(pathG))+'\n') fout.write('Average clustering coefficient,') fout.write(str(nx.average_clustering(pathG))+'\n') fout.write('Test graph connectivity,') fout.write(str(nx.is_connected(pathG))+'\n') fout.write('Number of connected components,') fout.write(str(nx.number_connected_components(pathG))+'\n') fout.write('Consists of a single attracting component,') fout.write(str(nx.is_attracting_component(pathG))+'\n') if nx.is_attracting_component(pathG) == True: fout.write('Number of attracting components,') fout.write(str(nx.number_attracting_components(pathG))+'\n') if nx.is_connected(pathG): fout.write('Center,') fout.write(str(nx.center(pathG))+'\n') fout.write('Diameter,') fout.write(str(nx.diameter(pathG))+'\n') #fout.write('Eccentricity,') #fout.write(str(nx.eccentricity(pathG))+'\n') fout.write('Periphery,') fout.write(str(nx.periphery(pathG))+'\n') fout.write('Radius,') fout.write(str(nx.radius(pathG))+'\n') fout.write('Degree assortativity,') fout.write(str(nx.degree_assortativity(pathG))+'\n') fout.write('Degree assortativity Pearsons r,') fout.write(str(nx.degree_pearsonr(pathG))+'\n') # Close file fout.close del(pathG) # End::output_graphmetrics()
G1 = 0 S1 = G1 / G.number_of_nodes() s1_alpha.append(S1) plt.ylabel('S2') # = ' + str(S1)) plt.xlabel('linking length [Mpc]') plt.plot(ll, s1_alpha) plt.ylim([0, 0.05]) ## plot max clique size vs ll in bottom left plt.subplot(2, 2, 3) max_clq_sz.append(nx.graph_clique_number(G)) plt.ylabel('Max Clique Size') # = ' + str(S1)) plt.xlabel('linking length [Mpc]') plt.plot(ll, max_clq_sz) ## plot # of maximal cliques vs ll in bottom right plt.subplot(2, 2, 4) max_clq_num.append(nx.graph_number_of_cliques(G)) plt.ylabel('Number of Maximal Cliques') # = ' + str(S1)) plt.xlabel('linking length [Mpc]') plt.plot(ll, max_clq_num) np.save('rgg-data/z_radii', ll) np.save('rgg-data/z_alpha', alpha) np.save('rgg-data/z_S1', s_alpha) np.save('rgg-data/z_S2', s1_alpha) np.save('rgg-data/z_mcs', max_clq_sz) np.save('rgg-data/z_mnc', max_clq_num) plt.savefig('rgg-data/rgg.png')
graphs_len[gr] = gtem graphs_lists[gr] = ttem graphh = G.subgraph(graphs[gr]) if len(graphh.nodes()) > 1 and len(graphh.edges()) > 0: cliques_edges.append(graphh.edges()) cliques_nodes.append(graphh.nodes()) if len(colors_to_select) == 0: colors_to_select = list(colors_list) color = random.choice(colors_to_select) colors_to_select.remove(color) colors_of_edges.append((color)) nodes_color_alpha.append(0.4) edges_color_alpha.append(0.6) edge_width_l.append(4.0) lvl2 = [] for i in range(nx.graph_number_of_cliques(G)): lvl2.append(graphs_len[i]) print str(" ") print "ΚΛΙΚΕΣ ΣΕ ΜΗ ΚΑΤΕΥΘΥΝΟΜΕΝΟΥΣ ΓΡΑΦΟΥΣ" # print 'CLIQUES IN UNDIRECTED GRAPHS' print str(" ") print "Ο γράφος είναι:" # print 'The graph is:' graph_name = str(G.name) + str(lvl2) print graph_name print str(" ") print "Το σύνολο όλων των κλικών του γράφου G:" # print 'The set of all maximal cliques in graph G is:'
def num_max_cliques(graph): """num_max_cliques""" return nx.graph_number_of_cliques(jaccard_similarity(graph))
def clique_v0(filename, threshold): #自底向上 windowGraph = {} cliqueGraph = {} windowGraph[49] = nx.Graph() cliqueGraph[49] = nx.DiGraph() data = pd.read_csv(filename, index_col = 0, sep = '\t' ) #df = data[data.columns[0]].sort_values(ascending = False) # 默认是最小在前 若要降序 ascending = False df = data[data.columns[0]].sort_values(ascending = False) t=1 term = 183 dic_term = {} #dic_cliques = {} for i in range(0, df.shape[0]): if df[i] == t : node_1, node_2 = df.index[i].split('_') windowGraph[49].add_edge(node_1, node_2) else : # find cliques when threshold = t print i print 'number_of_cliques(windowGraph):',nx.graph_number_of_cliques(windowGraph[49]) for cliques in sorted(list(nx.find_cliques(windowGraph[49])), key=lambda x:len(x)) : #cliques sorted by size of each clique print 'cliques size:' , len(cliques) gene_set = set() term_set = set() if sorted(cliques) not in dic_term.values() : #[1,2,3]=[1,2,3] [2,1,3]!=[1,2,3] #this clique is new then a term is generated for this clique cliqueGraph[49].add_node(term, annotation = cliques, windowsize = [49])# generate a term # find child for key,value in sorted(dic_term.items(), key=lambda d:d[0], reverse = True): # sorted by term id 854,853,852,851... if set(value).issubset(cliques) : old_size = len(gene_set) #old size gene_set |= set(value) #add term genes if len(gene_set) > old_size : #new size > old size term_set.add(key) # add useful term if len(set(cliques).intersection(gene_set)) ==len(cliques) : #gene_set == cliques print term, 'all link to terms',gene_set.difference(cliques) for child in term_set : cliqueGraph[49].add_edge(term, child) print term, child break else: continue if gene_set.issubset(cliques) and len(gene_set)<len(cliques): #print len(gene_set), len(cliques) #link to term for child_term in term_set : print 'some', term, child_term cliqueGraph[49].add_edge(term, child_term) # link to gene #print term,'some link to genes' for child_gene in set(cliques)-gene_set: #print term, child_gene cliqueGraph[49].add_edge(term, child_gene) dic_term[term] = sorted(cliques) term = term +1 else : continue t = df[i] if not t==-1: node_1, node_2 = df.index[i].split('_') windowGraph[49].add_edge(node_1, node_2) print 'dic_term',len(dic_term) print 'windowGraph[49].size()',windowGraph[49].size(), windowGraph[49].number_of_nodes() print 'cliqueGraph[49].size()',cliqueGraph[49].size(), cliqueGraph[49].number_of_nodes() ''' #output files fw = open ('49ontology_edges_v2.txt', 'w') fw2 = open('49ontology_term_annotation_v2.txt', 'w') fw3 = open('49ontology_term_annotation_v2a.txt', 'w') fw.write('parent' +'\t' +'child' +'\n') fw2.write('TermId' +'\t' + ' GeneSize'+ '\t' +'Gene'+'\n') fw3.write('TermId' +'\t' + ' GeneSize'+ '\t' +'Gene'+'\n') for i in sorted(cliqueGraph[49].edges(), key=lambda d:d[0]) : fw.write(str(i[0]) + '\t' + str(i[1]) +'\n') for i in dic_term : fw2.write(str(i) +'\t'+ str(len(dic_term[i])) + '\t' + ','.join(dic_term[i])+ '\n') # input the annotation of each term by nodes attribute---annotation for i in sorted(cliqueGraph[49].nodes()): try : fw3.write(str(i) + '\t'+ str(len(cliqueGraph[49].node[i]['annotation'])) + '\t'+ ' '.join(cliqueGraph[49].node[i]['annotation']) +'\n') except : continue fw.close() fw2.close() fw3.close() ''' print 'windowGraph[49].size()',windowGraph[49].size(), windowGraph[49].number_of_nodes() print 'cliqueGraph[49].size()',cliqueGraph[49].size(), cliqueGraph[49].number_of_nodes() print 'nx.graph_clique_number(windowGraph[49]):',nx.graph_clique_number(windowGraph[49]) print 'number_of_cliques(windowGraph):',nx.graph_number_of_cliques(windowGraph[49])
def clique2(filename, threshold): #自底向上 ''' data = pd.read_csv(filename, index_col = 0, sep = '\t' ) windowGraph = {} # generate window graph cliqueGraph = {} # generate term for i in range(0, data.shape[1]): # Declare windowGraph and cliqueGraph for each column(window) windowGraph[i] = nx.Graph() cliqueGraph[i] = nx.Graph() term = 1 lastIndex = [] while threshold >=0: df = data[data[data.columns[i]]>=threshold][data.columns[i]] newIndex = df.index.difference(lastIndex) #Update windowGraph for edge in range(0, len(newIndex)): node_1, node_2 = newIndex[edge].split('_') windowGraph[i].add_edge(node_1, node_2) #Update cliqueGraph for cliques in sorted(list(nx.find_cliques(windowGraph[i]))) : for node in cliques : cliqueGraph[i].add_edge(term, node) term = term +1 lastIndex = df.index threshold = threshold-0.2 print 'window', i, windowGraph[i].size(), windowGraph[i].number_of_nodes() ''' windowGraph = {} cliqueGraph = {} windowGraph[49] = nx.Graph() cliqueGraph[49] = nx.DiGraph() data = pd.read_csv(filename, index_col=0, sep='\t') #df = data[data.columns[0]].sort_values(ascending = False) # 默认是最小在前 若要降序 ascending = False df = data[data.columns[0]].sort_values(ascending=False) t = 1 term = 183 dic_term = {} #dic_cliques = {} for i in range(0, df.shape[0]): if df[i] == t: node_1, node_2 = df.index[i].split('_') windowGraph[49].add_edge(node_1, node_2) else: # find cliques when threshold = t #print i #print 'number_of_cliques(windowGraph):',nx.graph_number_of_cliques(windowGraph[49]) for cliques in sorted(list(nx.find_cliques(windowGraph[49])), key=lambda x: len(x) ): #cliques sorted by size of each clique gene_set = set() term_set = set() if sorted(cliques) not in dic_term.values( ): #[1,2,3]=[1,2,3] [2,1,3]!=[1,2,3] #this clique is new then a term is generated for this clique cliqueGraph[49].add_node(term, annotation=cliques, windowsize=[49 ]) # generate a term # find child #print 'cliques size:' , len(cliques) for key, value in sorted( dic_term.items(), key=lambda d: d[0], reverse=True ): # sorted by term id 854,853,852,851... if set(value).issubset(cliques): old_size = len(gene_set) #old size gene_set |= set(value) #add term genes if len(gene_set) > old_size: #new size > old size term_set.add(key) # add useful term if len(set(cliques).intersection(gene_set)) == len( cliques): #gene_set == cliques #print term, 'all link to terms',gene_set.difference(cliques) for child in term_set: cliqueGraph[49].add_edge(term, child) #print term, child break else: continue if gene_set.issubset( cliques) and len(gene_set) < len(cliques): #print len(gene_set), len(cliques) #link to term for child_term in term_set: #print 'some', term, child_term cliqueGraph[49].add_edge(term, child_term) # link to gene #print term,'some link to genes' for child_gene in set(cliques) - gene_set: #print term, child_gene cliqueGraph[49].add_edge(term, child_gene) dic_term[term] = sorted(cliques) term = term + 1 else: continue t = df[i] if not t == -1: node_1, node_2 = df.index[i].split('_') windowGraph[49].add_edge(node_1, node_2) print 'dic_term', len(dic_term) print 'windowGraph[49].size()', windowGraph[49].size( ), windowGraph[49].number_of_nodes() print 'cliqueGraph[49].size()', cliqueGraph[49].size( ), cliqueGraph[49].number_of_nodes() print 'term:', term print 'Before ' print 'windowGraph[49].size()', windowGraph[49].size( ), windowGraph[49].number_of_nodes() print 'cliqueGraph[49].size()', cliqueGraph[49].size( ), cliqueGraph[49].number_of_nodes() #find terms not root but do not have a parent print 'find terms which is not root but do not have a parent:' degree_0_terms = [] for i in cliqueGraph[49].nodes(): if cliqueGraph[49].in_degree(i) == 0: degree_0_terms.append(i) print 'degree 0 terms: ', degree_0_terms # link these terms to a sutible parent for term_id in sorted(degree_0_terms): #从小到大 从当前向后查找 for fp in range(term_id + 1, term): if set(dic_term[term_id]).issubset(dic_term[fp]): cliqueGraph[49].add_edge(fp, term_id) break #output files fw = open('49ontology_edges_v3.txt', 'w') fw2 = open('49ontology_term_annotation_v3.txt', 'w') fw3 = open('49ontology_term_annotation_v3a.txt', 'w') fw.write('parent' + '\t' + 'child' + '\n') fw2.write('TermId' + '\t' + ' GeneSize' + '\t' + 'Gene' + '\n') fw3.write('TermId' + '\t' + ' GeneSize' + '\t' + 'Gene' + '\n') for i in sorted(cliqueGraph[49].edges(), key=lambda d: d[0]): fw.write(str(i[0]) + '\t' + str(i[1]) + '\n') for i in dic_term: fw2.write( str(i) + '\t' + str(len(dic_term[i])) + '\t' + ','.join(dic_term[i]) + '\n') # input the annotation of each term by nodes attribute---annotation for i in sorted(cliqueGraph[49].nodes()): try: fw3.write( str(i) + '\t' + str(len(cliqueGraph[49].node[i]['annotation'])) + '\t' + ' '.join(cliqueGraph[49].node[i]['annotation']) + '\n') except: continue fw.close() fw2.close() fw3.close() print 'After:' print 'windowGraph[49].size()', windowGraph[49].size( ), windowGraph[49].number_of_nodes() print 'cliqueGraph[49].size()', cliqueGraph[49].size( ), cliqueGraph[49].number_of_nodes() print 'nx.graph_clique_number(windowGraph[49]):', nx.graph_clique_number( windowGraph[49]) print 'number_of_cliques(windowGraph):', nx.graph_number_of_cliques( windowGraph[49])
# -*- coding: utf-8 -*- """ AFRS - Trabalho 4 Author: Gonçalo Peres Date: 2019/02/02 """ import networkx as nx g = nx.read_gml('dolphins.gml') clique = nx.graph_number_of_cliques(g) print(clique)
graph = nx.Graph() graph.add_nodes_from(corr_df.index) weights = squareform(corr_df.fillna(0).values) edges_between = list( combinations( corr_df.index, 2)) for n in range(len(weights)): edge_weight = weights[n] if edge_weight >= 0.7: node1 = edges_between[n][0] node2 = edges_between[n][1] graph.add_edge(node1, node2, weight = edge_weight) nx.graph_number_of_cliques(graph) print nx.info(graph) out = open('graph.dimacs', 'wb') for line in graph.degree(weight='weight').iteritems(): out.write('n %s %f\n' %line) for line in graph.edges(): out.write('e %s %s\n' %line) cliques = nx.find_cliques(graph) cliques = list(cliques) a = smaller_df.as_matrix() color_intensity = [] node_sizes = [] graph_degree = graph.degree(weight='weight')
#from networkx.algorithms import bipartite #print bipartite.is_bipartite(goarn_network) from networkx import find_cliques cliques = list(find_cliques(goarn_network)) bigg= max(cliques, key=lambda l: len(l)) print bigg print nx.graph_number_of_cliques(goarn_network, cliques) # #print nx.eigenvector_centrality(goarn_network) H=nx.connected_component_subgraphs(goarn_network)[0] print(nx.average_shortest_path_length(H)) # #def trim_nodes(goarn_network,d): # """ returns a copy of G without # the nodes with a degree less than d """ # Gt = goarn_network.copy() # dn = nx.degree(Gt) # for n in Gt.nodes(): # if dn[n] <= d:
import networkx as nx G = nx.read_edgelist('soc-sign-bitcoinotc.csv', delimiter=',', nodetype=int, data=(('weight', int), ('timestamp', float)), encoding="utf-8") core_number = nx.core_number(G) k_core = nx.k_core(G) enumerate_cliques = nx.enumerate_all_cliques(G) find_cliques = nx.find_cliques(G) # all maximal cliques graph_clique_number = nx.graph_clique_number(G) # all maximal cliques graph_number_of_cliques = nx.graph_number_of_cliques(G) # all maximal cliques edge_betweenness_centrality = nx.edge_betweenness_centrality(G) print("Core number for each node: ", core_number) print("K-core: ", k_core) print("Cliques: ", enumerate_cliques) print("All maximal cliques: ", find_cliques) print("Graph clique number - size of the largest clique for the graph: ", graph_clique_number) print("Number of maximal cliques in the graph: ", graph_number_of_cliques) print("Edge betweenness: ", edge_betweenness_centrality)
import networkx as nx import mfa import numpy as np G = nx.Graph() G.add_edge(0, 2, weight=1) G.add_edge(0, 3, weight=1) G.add_edge(1, 2, weight=1) G.add_edge(1, 3, weight=1) nodes = G.number_of_nodes() edges = G.number_of_edges() deg_assort = nx.degree_pearson_correlation_coefficient(G) apl = nx.average_shortest_path_length(G) num_cliq = nx.graph_number_of_cliques(G) diameter = nx.diameter(G) Q = [q for q in range(-10,11,1)] lst = [nodes, edges, deg_assort, apl, num_cliq, diameter] ntau = mfa.NFD(G, Q) al_list, fal_list = mfa.nspectrum(ntau, Q) q_list, dim_list = mfa.ndimension(ntau, Q) lst.extend(al_list) lst.extend(fal_list) lst.extend(q_list) lst.extend(dim_list) print(lst)
def main(): initial_period = 1 final_period = 250 filename3 = "../Results/Time_evol_network_metrics_monthly___.dat" file3 = open(filename3, 'wt') file3.close() #header: period N L GC avg_degree std_degree max_k avg_pos_w std_pos_w avg_neg_w std_neg_w max_pos_w min_pos_w max_neg_w min_neg_w ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # max_shell avg_shortest_path max_clique avg_betweenness std_betweenness # 16 17 18 19 20 list_network_month_files = [] period = initial_period while period <= final_period: list_network_month_files.append( "../Results/Supply_network_slicing_monthly_period_" + str(period) + "_no_network_metrics.pickle") period += 1 list_network_month_files.append( "../Results/Supply_network_1985_2005_no_network_metrics.pickle") ########## i read input pickle network for filename in list_network_month_files: G = pickle.load(open(filename, 'rb')) if len(G.nodes()) > 1: print "\n\nloaded pickle file for the network:", filename try: period = filename.split("period_")[1].split( ".pickle")[0].split("_no_network_metrics")[0] except IndexError: period = filename.split("Supply_network_")[1].split( "_no_network_metrics.pickle")[0] # print G.nodes(data=True) #raw_input() N = len(G.nodes()) L = len(G.edges()) GC = nx.connected_component_subgraphs(G)[0] print "period", period print " N:", N, "L:", L, "GC:", len(GC.nodes()) ####### degree print "degrees:" list_k = [] for node in G.nodes(): #list_k.append(len(G.neighbors(node))) list_k.append(G.degree(node)) avg_degree = numpy.mean(list_k) std_degree = numpy.std(list_k) print " <k>:", avg_degree, "+/-", std_degree path_name_h = "../Results/degree_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_k, path_name_h) max_k = max(list_k) print " max_k:", max_k ######### weights print "weights:" list_pos_w = [] list_neg_w = [] for edge in G.edges(): list_pos_w.append(G.edge[edge[0]][edge[1]]["pos_weight"]) list_neg_w.append(-1. * (G.edge[edge[0]][edge[1]]["neg_weight"])) avg_pos_w = numpy.mean(list_pos_w) std_pos_w = numpy.std(list_pos_w) print " pos. weight:", avg_pos_w, "+/-", std_pos_w # print >> file3, numpy.mean(list_pos_w), numpy.std(list_pos_w), avg_neg_w = numpy.mean(list_neg_w) std_neg_w = numpy.std(list_neg_w) print " neg. weight:", numpy.mean(list_neg_w), "+/-", numpy.std( list_neg_w) path_name_h = "../Results/weight_pos_trans_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_pos_w, path_name_h) path_name_h = "../Results/weight_neg_trans_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_neg_w, path_name_h) max_pos_w = max(list_pos_w) min_pos_w = min(list_pos_w) max_neg_w = max(list_neg_w) min_neg_w = min(list_neg_w) print " max_pos_w:", max_pos_w, " min_pos_w:", min_pos_w print " max_neg_w:", -1. * max_neg_w, " min_neg_w:", -1. * min_neg_w ######### k-shell decomposition print "k-shell structure:" # i need to make a copy and remove the self-loops from that before i can proceed G_for_kshell = nx.Graph(G.subgraph(G.nodes())) list_edges_to_remove = [] for edge in G_for_kshell.edges(): if edge[0] == edge[1]: list_edges_to_remove.append(edge) for edge in list_edges_to_remove: G_for_kshell.remove_edge(edge[0], edge[1]) max_shell = 0 cont_zeros = 0 for i in range(max_k): size_shell = len( nx.k_shell(G_for_kshell, k=i, core_number=None)) print " ", i, size_shell if size_shell == 0: cont_zeros += 1 else: max_shell = i if cont_zeros >= 10: break print "max shell:", max_shell ######### connected components print "connected components:" max_con_comp = 0 list_sizes = [] for item in sorted(nx.connected_components(G), key=len, reverse=True): size = len(item) list_sizes.append(size) if size > max_con_comp: max_con_comp = size # print "list sizes of connected components:",list_sizes path_name_h = "../Results/connected_components_distribution_period" + str( period) + ".dat" histograma_gral.histogram(list_sizes, path_name_h) ########## avg. path lenght avg_shortest_path = nx.average_shortest_path_length(GC) print "average shortest path within GC:", avg_shortest_path ######## max. clique size absolute_max = 1 for i in G.nodes(): maximo = 1 list2 = nx.cliques_containing_node(G, i) # print i, list2 for elem in list2: # print elem,len(elem,) if len(elem) > maximo: maximo = len(elem) # print "\n",maximo G.node[i]['max_clique_size'] = maximo if absolute_max < maximo: absolute_max = maximo lista = list(nx.find_cliques( G)) # crea una lista de cliques (lista de listas) max_clique = nx.graph_clique_number(G) #finds out max size clique num_tot_clique = nx.graph_number_of_cliques( G) #finds out total number of cliques print "max. clique size:", max_clique print "calculating betweenness centrality..." #for item in nx.betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): dict_betweenness = nx.betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None) list_betweenness = [] for node in G.nodes(): betw = dict_betweenness[node] list_betweenness.append(betw) avg_betweenness = numpy.mean(list_betweenness) std_betweenness = numpy.std(list_betweenness) print "avg centrality:", avg_betweenness, std_betweenness path_name_h = "../Results/betweenness_distribution_period" + str( period) + ".dat" histograma_bines_gral.histograma_bins_norm(list_betweenness, 10, path_name_h) print print file3 = open(filename3, 'at') print >> file3, period, N, L, len( GC.nodes() ), avg_degree, std_degree, max_k, avg_pos_w, std_pos_w, -1. * avg_neg_w, std_neg_w, max_pos_w, min_pos_w, -1. * max_neg_w, -1. * min_neg_w, max_shell, avg_shortest_path, max_clique, avg_betweenness, std_betweenness file3.close() print "written:", filename3
def calculate_cliques(self, G): gcn = nx.graph_clique_number(G) nofc = nx.graph_number_of_cliques(G) return gcn, nofc
#%% net.degree_assortativity_coefficient(EliteNet_giant) #%% net.attribute_assortativity_coefficient(EliteNet_giant, 'multi') #%% # coloring the nodes by attribute: color_map = plt.get_cmap("cool") valuesForColors = [n[1]['multi'] for n in EliteNet_giant.nodes(data=True)] net.draw(EliteNet_giant, node_color=valuesForColors, cmap=color_map, with_labels=True) #%% len([a for a in net.enumerate_all_cliques(EliteNet_giant)]) #%% net.graph_number_of_cliques(EliteNet_giant) #%% for a in net.find_cliques(EliteNet_giant): print(a) #%% net.graph_clique_number(EliteNet_giant) #%% [ c for c in net.find_cliques(EliteNet_giant) if len(c) == net.graph_clique_number(EliteNet_giant) ] #%% import community parts = community.best_partition(EliteNet_giant) parts #%%