def plot_co_x(cox, start, end, size = (20,20), title = '', weighted=False, weight_threshold=10): """ Plotting function for keyword graphs Parameters -------------------- cox: the coword networkx graph; assumes that nodes have attribute 'topic' start: start year end: end year """ plt.figure(figsize=size) plt.title(title +' %s - %s'%(start,end), fontsize=18) if weighted: elarge=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] >weight_threshold] esmall=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] <=weight_threshold] pos=nx.graphviz_layout(cox) # positions for all nodes nx.draw_networkx_nodes(cox,pos, node_color= [s*4500 for s in nx.eigenvector_centrality(cox).values()], node_size = [s*6+20 for s in nx.degree(cox).values()], alpha=0.7) # edges nx.draw_networkx_edges(cox,pos,edgelist=elarge, width=1, alpha=0.5, edge_color='black') #, edge_cmap=plt.cm.Blues nx.draw_networkx_edges(cox,pos,edgelist=esmall, width=0.3,alpha=0.5,edge_color='yellow',style='dotted') # labels nx.draw_networkx_labels(cox,pos,font_size=10,font_family='sans-serif') plt.axis('off') else: nx.draw_graphviz(cox, with_labels=True, alpha = 0.8, width=0.1, fontsize=9, node_color = [s*4 for s in nx.eigenvector_centrality(cox).values()], node_size = [s*6+20 for s in nx.degree(cox).values()])
def print_degree_distributions(dataset, context): """ Extracts degree distribution values from networks, and print them to cvs-file. **warning** overwrites if file exists. """ print '> Reading data..', dataset corpus_path = '../data/'+dataset+'_text' (documents, labels) = data.read_files(corpus_path) degsfile = open('output/properties/cooccurrence/degrees_docs_'+dataset.replace('/','.'), 'w') giant = nx.DiGraph() print '> Building networks..' for i, text in enumerate(documents): if i%10==0: print ' ',str(i)+'/'+str(len(documents)) g = graph_representation.construct_cooccurrence_network(text,context=context) giant.add_edges_from(g.edges()) degs = nx.degree(g).values() degs = [str(d) for d in degs] degsfile.write(','.join(degs)+'\n') degsfile.close() print '> Writing giant\'s distribution' with open('output/properties/cooccurrence/degrees_giant_'+dataset.replace('/','.'), 'w') as f: ds = nx.degree(giant).values() ds = [str(d) for d in ds] f.write(','.join(ds))
def draw_degree_rank_plot(orig_g, mG): ori_degree_seq = sorted(nx.degree(orig_g).values(), reverse=True) # degree sequence deg_seqs = [] for newg in mG: deg_seqs.append(sorted(nx.degree(newg).values(), reverse=True)) # degree sequence df = pd.DataFrame(deg_seqs) plt.xscale("log") plt.yscale("log") plt.fill_between(df.columns, df.mean() - df.sem(), df.mean() + df.sem(), color="blue", alpha=0.2, label="se") h, = plt.plot(df.mean(), color="blue", aa=True, linewidth=4, ls="--", label="H*") orig, = plt.plot(ori_degree_seq, color="black", linewidth=4, ls="-", label="H") plt.title("Degree Distribution") plt.ylabel("Degree") plt.ylabel("Ordered Vertices") plt.tick_params( axis="x", # changes apply to the x-axis which="both", # both major and minor ticks are affected bottom="off", # ticks along the bottom edge are off top="off", # ticks along the top edge are off labelbottom="off", ) # labels along the bottom edge are off plt.legend([orig, h], ["$H$", "HRG $H^*$"], loc=3) # fig = plt.gcf() # fig.set_size_inches(5, 4, forward=True) plt.show()
def PartitionGraph(graph,kc_nodes,anchor=EMPTY_SET,k=2): G = graph.copy() R_cand = set() S_cand = set() G_ccs = nx.connected_component_subgraphs(G) for g_cc in G_ccs: cc_nodes = set(g_cc.nodes()) kc_overlap = cc_nodes.intersection(kc_nodes) if len(kc_overlap) > 0: root = kc_overlap.pop() R_nodes = set() for n in cc_nodes: d = nx.degree(G,n) if n not in anchor and n not in kc_nodes and d > 0 and d < k: R_nodes.add(n) R_cand = R_cand.union(set((u,root) for u in R_nodes)) else: S_nodes = set() for n in cc_nodes: d = nx.degree(G,n) if n not in anchor and d > 0 and d < k: S_nodes.add(n) S_cand = S_cand.union(set((u,v) for u,v in combinations(S_nodes,2))) return R_cand,S_cand
def RW_Size(G,r = 1000,m=100): sampled = [] now_node = random.choice(G.nodes()) sampled.append(now_node) while True: next_node = random.choice(nx.neighbors(G,now_node)) now_node = next_node sampled.append(now_node) if len(sampled) >= r: break print(1) lst = [] for i in range(0,r-m): if i+m <= r-1: for j in range(i+m,r): # l1 = set(nx.neighbors(G,sampled[i])) # l2 = set(nx.neighbors(G,sampled[j])) # if len(list(l1 & l2)) >= 1: lst.append((sampled[i],sampled[j])) lst.append((sampled[j],sampled[i])) sumA = 0.0 sumB = 0.0 print(len(lst)) for nodes in lst: sumA += float(nx.degree(G,nodes[0]))/nx.degree(G,nodes[1]) l1 = set(nx.neighbors(G,nodes[0])) l2 = set(nx.neighbors(G,nodes[1])) count = len(list(l1&l2)) sumB += count/(float(nx.degree(G,nodes[0]))*nx.degree(G,nodes[1])) return sumA/sumB
def compute_eps(G2, k, u, S1, S2): max_deg_G2 = max(nx.degree(G2).itervalues()) print "max_deg_G2 =", max_deg_G2 deg_list = nx.degree(G2) # start = time.clock() X = compute_X(G2, max_deg_G2) # print "compute_X: done" # print "Elapsed ", (time.clock() - start Y = compute_Y(X, G2.number_of_nodes(), max_deg_G2) # print "compute_Y: done" ent = compute_entropy(Y, G2.number_of_nodes(), max_deg_G2) # print "len(ent) =", len(ent) num_violated = 0 LOG2K = math.log(k,2) print "LOG2K =", LOG2K for (v, deg) in deg_list.iteritems(): # check the original graph if (v == u or v in S1) and ent[deg] < LOG2K: num_violated += 1 # check and update eps_min (if ok) eps2 = float(num_violated)/G2.number_of_nodes() # return eps2
def compute_eps_deterministic_multi(G, G2, k_arr): max_deg_G2 = max(nx.degree(G2).itervalues()) print "max_deg =", max_deg_G2 deg_list = nx.degree(G) deg_list_G2 = nx.degree(G2) deg_count_G2 = [0 for j in range(max_deg_G2+1)] for deg in deg_list_G2.itervalues(): deg_count_G2[deg] += 1 ent = [0.0 for j in range(max_deg_G2+1)] for j in range(max_deg_G2+1): if deg_count_G2[j] > 0: ent[j] = math.log(deg_count_G2[j],2) # simple print "len(ent) =", len(ent) # print "entropy =", ent eps_arr = [] for k in k_arr: num_violated = 0 LOG2K = math.log(k,2) for (v, deg) in deg_list.iteritems(): # check the original graph if deg <= max_deg_G2: # in the case of max_deg_G2 < max_deg_G if ent[deg] > 0.0 and ent[deg] < LOG2K: # do not check zero-column of ent num_violated += 1 # check and update eps_min (if ok) eps2 = float(num_violated)/G2.number_of_nodes() eps_arr.append(eps2) # return eps_arr
def RW_Size_col(G,r = 30000): sampled = [] now_node = random.choice(G.nodes()) sampled.append(now_node) sumA = 0.0 sumB = 0.0 sumA += nx.degree(G,now_node) sumB += 1.0/nx.degree(G,now_node) count = 0 while True: next_node = random.choice(nx.neighbors(G,now_node)) now_node = next_node sumA += nx.degree(G,now_node) sumB += 1.0/nx.degree(G,now_node) sampled.append(now_node) count += 1 if count >= r: break count2 = 0 for i in range(0,len(sampled)-1): for j in range(i+1,len(sampled)): if(sampled[i] == sampled[j]): count2 += 1 return sumA*sumB/(2*count2)
def compute_eps_multi(G, G2, k_arr): max_deg_G2 = max(nx.degree(G2).itervalues()) print "max_deg_G2 =", max_deg_G2 deg_list = nx.degree(G) # G not G2 deg_list_G2 = nx.degree(G2) X = compute_X(G2, G2.number_of_nodes(), max_deg_G2, deg_list_G2) Y = compute_Y(X, G2.number_of_nodes(), max_deg_G2, deg_list_G2) # check X, Y check_X_and_Y(X, Y, G2.number_of_nodes(), max_deg_G2, deg_list_G2) ent = compute_entropy(Y, G2.number_of_nodes(), max_deg_G2, deg_list_G2) print "len(ent) =", len(ent) # print "entropy =", ent eps_arr = [] for k in k_arr: num_violated = 0 LOG2K = math.log(k,2) # print "LOG2K =", LOG2K for (v, deg) in deg_list.iteritems(): # check the original graph if deg <= max_deg_G2: # in the case of max_deg_G2 < max_deg_G if ent[deg] > 0.0 and ent[deg] < LOG2K: # do not check zero-column of ent num_violated += 1 # check and update eps_min (if ok) eps2 = float(num_violated)/G2.number_of_nodes() eps_arr.append(eps2) # return eps_arr
def cand_gen(G, k, a, c, cores=None): ''' generate all the branch given a k-plex a a: current k-plex c: the block ''' b = neighbors(G, a) b.difference_update(c) # the strict nodes subg = G.subgraph(a) strict_nodes = {node for node in a if nx.degree(subg, node) == len(a)-k } for node in strict_nodes: b.intersection_update(G.neighbors(node)) # always reshape by optimal if cores is None: b = {node for node in list(b) if nx.degree(G, node) >= len(optimal)-k} else: b = {node for node in list(b) if cores[node]>=len(optimal)-k} # calculate the valid candidates b = {node for node in b if len(set(G.neighbors(node)).intersection(a)) >= len(a)+1-k } # sort the candidate list b = list(b) # b.sort(key = lambda x: len(set(G.neighbors(x)).intersection(a)), reverse=True) return b
def leaf_removal(g, verbose=False): G = g.copy() stop = 0; potential_mis = []; isolated = [x for x in g.nodes() if nx.degree(g,x)==0]; potential_mis.extend(isolated); G.remove_nodes_from(isolated); while stop==0: deg = G.degree(); if 1 in deg.values(): for n in G.nodes_iter(): if deg[n]==1: L = n; break; nn = nx.neighbors(G,L)[0] G.remove_node(L); G.remove_node(nn); potential_mis.append(L); isolated = [x for x in G.nodes() if nx.degree(G,x)==0]; potential_mis.extend(isolated); G.remove_nodes_from(isolated); else: stop=1; core_mis = []; if G.number_of_nodes()>=1: core_mis = nx.maximal_independent_set(G); if verbose==True: print len(potential_mis), len(core_mis), N; potential_mis.extend(core_mis); else: if verbose==True: print len(potential_mis), len(core_mis), N; return potential_mis, core_mis;
def __generate_paths(self): self.__paths = [] graph = copy.deepcopy(self.__graph) paths = [] if not graph.nodes(): return start_nodes = [n for n in graph.nodes() if (n.style() != "curve" or (nx.degree(graph, n) == 1))] start_nodes = sorted(start_nodes, key=lambda n: nx.degree(graph, n)) if start_nodes: path = [start_nodes[0]] else: path = [graph.nodes()[0]] while path: neighbors = nx.neighbors(graph, path[-1]) if neighbors: node = neighbors[0] graph.remove_edge(path[-1], node) path.append(node) else: paths.append(copy.copy(path)) while path and not graph.neighbors(path[-1]): path.pop() for path in paths: self.__paths.append(reduce_path(path)) return
def nodeMaxDegree(G): degree=0 for n in G.nodes(): if nx.degree(G,n)> degree : degree=nx.degree(G,n) node=n return node
def _tester(): # think of this like a networkx scratchpad G = nx.Graph() # this is an undirected graph G.add_edge(1, 2) G.add_edge(2, 3) G.add_node(4) print nx.degree(G) print nx.info(G)
def mindeg_GSK(BG, variables_index=0, verbose=False): Vprime1 = []; Vprime2 = []; layer = nx.get_node_attributes(BG,'bipartite'); var = [x for x in BG.nodes() if layer[x] == variables_index] fac = [x for x in BG.nodes() if layer[x] != variables_index] if verbose==True: print 'Initial variable nodes:', var; print 'Initial factor nodes:', fac; isolated_variables = [x for x in BG.nodes() if nx.degree(BG,x)==0 and layer[x]==variables_index]; [var.remove(x) for x in isolated_variables] G = BG.copy(); Vprime1.extend(isolated_variables); G.remove_nodes_from(isolated_variables) isolated_factors = [x for x in G.nodes() if nx.degree(BG,x)==0 and layer[x]!=variables_index]; [fac.remove(x) for x in isolated_factors] G.remove_nodes_from(isolated_factors); while len(var)>0: if verbose==True: print '#var:',len(var),'#fac:', len(fac), '#nodes in depleted graph:', G.number_of_nodes(),'#original BG:',BG.number_of_nodes(); pendant = return_mindeg_pendant(G,layer,variables_index); if len(pendant)==0: ## if not, choose randomly and do the game. if verbose==True: print var m = G.number_of_nodes()*2; degs = G.degree(); for e in G.edges(): if degs[e[0]] + degs[e[1]] < m: m = degs[e[0]] + degs[e[1]]; v = e; if e[0] in var: v = e[0]; else: v = e[1]; pendant = [] pendant.append(v); pendant.extend(nx.neighbors(G,v)); Vprime2.append(pendant[0]); else: Vprime1.append(pendant[0]); augmented_pendant = [] augmented_pendant.extend(pendant); for n in pendant[1:]: augmented_pendant.extend(nx.neighbors(G,n)); augmented_pendant = list(set(augmented_pendant)); G.remove_nodes_from(augmented_pendant); [var.remove(x) for x in augmented_pendant if x in var]; [fac.remove(x) for x in augmented_pendant if x in fac]; return Vprime1,Vprime2;
def k_dependency_feats(self): wordindex = self.index + 1 headindex = dep_head_of(self.deptree,wordindex) D = {} D["k_dist_to_root"] = len(dep_pathtoroot(self.deptree,wordindex)) D["k_deprel"] = self.deptree[headindex][wordindex]["deprel"] D["k_headdist"] = abs(headindex - wordindex) # maybe do 0 for root? D["k_head_degree"] = nx.degree(self.deptree,headindex) D["k_child_degree"] = nx.degree(self.deptree,wordindex) return D
def sub_by_degree(min_degree,net): #return a subset of net by biger than min_degree remove_node_list = [item for item in nx.degree(net) \ if nx.degree(net)[item] < min_degree] new_net = nx.Graph() new_net.add_nodes_from(net.nodes()) new_net.add_edges_from(net.edges()) for name in remove_node_list: new_net.remove_node(name) return new_net
def main(): LOG = True #if (len(sys.argv) != 3): # print "ERROR: genRandomGeorml <nodes> <raio>" # sys.exit(1) NMAX = int(sys.argv[1]) RAIO = float(sys.argv[2]) #NMAX=40 #RAIO=0.1 ALCANCE=250 G=nx.random_geometric_graph(NMAX,RAIO,2) while not nx.is_connected(G): RAIO=RAIO+.005 G=nx.random_geometric_graph(NMAX,RAIO,2) if LOG: print "Graph is not full connected" pos=nx.get_node_attributes(G,'pos') network(G,pos,1) #Remove vizinhos que estejam demasiado perto while nodeNear(G)<1000 : G.remove_node(nodeNear(G)) if nx.is_connected(G): pos=nx.get_node_attributes(G,'pos') network(G,pos,2) #Remove no que tem mais vizinhos T=G if not nodeSolo(T,nodeMaxDegree(T)): T.remove_node(nodeMaxDegree(T)) if nx.is_connected(T): G=T pos=nx.get_node_attributes(G,'pos') network(G,pos,3) for n in G.neighbors(nodeMaxDegree(G)): if nx.degree(G,n)== 2 : degree=nx.degree(G,n) node=n print "node=",n if not nodeSolo(G,n): G.remove_node(n) break pos=nx.get_node_attributes(G,'pos') network(G,pos,4) else: if LOG: print "SubGraph is not full connected"
def remove_by_degree(G, d): ''' remove nodes by degree ''' nodes_to_be_delete = [i for i in G.nodes() if nx.degree(G, i) < d] while len(nodes_to_be_delete) > 0: print(nodes_to_be_delete) if 164373 in nodes_to_be_delete: a = input() G.remove_nodes_from(nodes_to_be_delete) nodes_to_be_delete = [i for i in G.nodes() if nx.degree(G, i) < d] return
def compute_measures(bigDict): """ Computes the measures for each network Measures to compute: nr_of_nodes nr_of_edges max_edge_value min_edge_value is_connected number_connected_components average_unweighted_node_degree average_weighted_node_degree average_clustering_coefficient average_weighted_shortest_path_length average_unweighted_shortest_path_length To be added: single node values, e.g. node degree of brainstem etc. Non-scalar return values: (not used yet) degree_distribution edge_weight_distribution """ returnMeasures = {} for key, netw in bigDict.items(): outm = {} outm['nr_of_nodes'] = netw.number_of_nodes() outm['nr_of_edges'] = netw.number_of_edges() outm['max_edge_value'] = np.max([d['weight']for f,t,d in netw.edges(data=True)]) outm['min_edge_value'] = np.min([d['weight']for f,t,d in netw.edges(data=True)]) outm['is_connected'] = nx.is_connected(netw) outm['number_connected_components'] = nx.number_connected_components(netw) outm['average_unweighted_node_degree'] = np.mean(nx.degree(netw, weighted = False).values()) outm['average_weighted_node_degree'] = np.mean(nx.degree(netw, weighted = True).values()) outm['average_clustering_coefficient'] = nx.average_clustering(netw) outm['average_weighted_shortest_path_length'] = nx.average_shortest_path_length(netw, weighted = True) outm['average_unweighted_shortest_path_length'] = nx.average_shortest_path_length(netw, weighted = False) returnMeasures[key] = outm return returnMeasures
def apply_sifi_surcharge(self): degree_sum = 0 for bank in self.network.contracts: degree_sum += float(nx.degree(self.network.contracts)[bank]) average_degree = float(degree_sum / len(self.network.contracts.nodes())) for bank in self.network.contracts: # the sifi surcharge is the product of the sifiSurchargeFactor and the connectedness as measured # by degree/average_degree # the maximum ensures that no bank has to hold less than 1.0 times their banking capital sifiSurcharge = max(self.get_state(0).sifiSurchargeFactor*( float(nx.degree(self.network.contracts)[bank]) / average_degree), 1.0) bank.apply_sifi_surcharge(sifiSurcharge)
def construct_graph_list_und(graphs_to_const): """Construct and return a list of graphs so graph construction is easily repeatable. Can handle: Random, Small-world, Scale-free, SGPA, SGPA-random""" graph_list = [] # Always construct and add Allen Institute mouse brain to list G_brain = brain_graph.binary_undirected()[0] graph_list.append(G_brain) # Calculate degree & clustering coefficient distribution n_nodes = G_brain.order() brain_degree = nx.degree(G_brain).values() brain_degree_mean = np.mean(brain_degree) # Construct degree controlled random if 'Random' in graphs_to_const: G_RAND = und_graphs.random_simple_deg_seq( sequence=brain_degree, brain_size=BRAIN_SIZE, tries=1000)[0] graph_list.append(G_RAND) # Construct small-world graph if 'Small-world' in graphs_to_const: graph_list.append(nx.watts_strogatz_graph( n_nodes, int(round(brain_degree_mean)), SW_REWIRE_PROB)) # Construct scale-free graph if 'Scale-free' in graphs_to_const: graph_list.append(nx.barabasi_albert_graph( n_nodes, int(round(brain_degree_mean / 2.)))) # Construct SGPA graph if 'SGPA' in graphs_to_const: G_SGPA = source_growth(bc.num_brain_nodes, bc.num_brain_edges_directed, L=LENGTH_SCALE)[0] graph_list.append(G_SGPA.to_undirected()) # Construct degree-controlled SGPA graph if 'SGPA-random' in graphs_to_const: SGPA_degree = nx.degree(G_SGPA).values() G_SGPA_RAND = und_graphs.random_simple_deg_seq( sequence=SGPA_degree, brain_size=BRAIN_SIZE, tries=1000)[0] graph_list.append(G_SGPA_RAND) # Error check that we created correct number of graphs if len(graph_list) != len(graphs_to_const): raise RuntimeError('Graph list/names don\'t match') return graph_list
def make_hist(gnm, sw, rw): degree_sequence1=sorted(nx.degree(gnm).values(),reverse=True) degree_sequence2=sorted(nx.degree(sw).values(),reverse=True) degree_sequence3=sorted(nx.degree(rw).values(),reverse=True) counts1 = {} counts2 = {} counts3 = {} for i in range(len(degree_sequence1)): if(i in counts1): counts1[i] += 1 else: counts1[i] = 1 for i in range(len(degree_sequence2)): if(i in counts2): counts2[i] += 1 else: counts2[i] = 1 for i in range(len(degree_sequence3)): if(i in counts3): counts3[i] += 1 else: counts3[i] = 1 counts = [] for i in range(len(counts1)): counts.append(float(counts1[i])) p1, = plt.loglog(counts,'b-',marker='o') counts = [] for i in range(len(counts2)): counts.append(float(counts2[i])) p2, = plt.loglog(counts,'r-',marker='o') counts = [] for i in range(len(counts3)): counts.append(float(counts3[i])) p3, = plt.loglog(counts,'y-',marker='o') #p1, = plt.loglog(counts1,'b-',marker='o') #p2, = plt.loglog(counts2,'r-',marker='o') #p3, = plt.loglog(counts3,'y-',marker='o') plt.title("Degree rank plot") plt.ylabel("degree") plt.xlabel("rank") plt.legend([p1, p2, p3], ["Gnm", "Small-World", "Real-World"]) plt.savefig("test3.png") plt.close()
def findEndpointsBifurcations(self, verbose = False): """For the current graph, identify all points that are either endpoints (1 neighbor) or """+\ """bifurcation points (3 neighbors)""" endpoints = [] bifurcations = [] for n in self.cg.nodes_iter(): if( nx.degree(self.cg,n) == 1 ): endpoints.append(n) elif( nx.degree(self.cg,n) >= 3 ): bifurcations.append(n) self.endpoints[self.currentGraphKey] = endpoints self.bifurcations[self.currentGraphKey] = bifurcations
def gp(G,edge_pro_dict,node_degree_dict): Ggp=nx.Graph() for each_node in G.nodes(): Ggp.add_node(each_node) edge_pro_order=sorted(edge_pro_dict.iteritems(), key=lambda d:d[1],reverse=True) for each_edge_order in edge_pro_order: u=each_edge_order[0][0] v=each_edge_order[0][1] discrepency_u=nx.degree(Ggp)[u]-node_degree_dict[u] discrepency_v=nx.degree(Ggp)[v]-node_degree_dict[v] if(abs(discrepency_u+1)+abs(discrepency_v+1)<abs(discrepency_u)+abs(discrepency_v)): Ggp.add_edge(u,v) return Ggp
def RemoveCore(graph,k=2,anchor=EMPTY_SET): G = graph.copy() Gn = FindKCore(G,k,anchor) for u,v in Gn.edges_iter(): if u != v: G.remove_edge(u,v) KC_nodes = set(filter(lambda n: nx.degree(G,n) > 0,Gn.nodes())) G_small = G.copy() for n in G.nodes_iter(): if nx.degree(G,n) == 0: G_small.remove_node(n) return G, G_small,KC_nodes
def participation_coefficient(graph, partition): ''' Computes the participation coefficient for each node. ------ Inputs ------ graph = networkx graph partition = modularity partition of graph ------ Output ------ List of the participation coefficient for each node. ''' pc_dict = {} all_nodes = set(graph.nodes()) paths = nx.shortest_path_length(G=graph) for m in partition.keys(): mod_list = set(partition[m]) between_mod_list = list(set.difference(all_nodes, mod_list)) for source in mod_list: degree = float(nx.degree(G=graph, nbunch=source)) count = 0 for target in between_mod_list: if paths[source][target] == 1: count += 1 bm_degree = count pc = 1 - (bm_degree / degree)**2 pc_dict[source] = pc return pc_dict
def give_output_list(self, game): """ This returns a list of the selected nodes. The twin attack player finds the highest degree nodes, and for each, it selects two neighbors of that node and""" nodes = nx.nodes(game.network) nodes.sort(key=lambda x : nx.degree(game.network, x), reverse=True) selections = set() for node in nodes: adjacents = list(nx.all_neighbors(game.network, node)) for adj_node in adjacents[:2]: selections.add(adj_node) if len(selections) == game.num_seeds: break if len(selections) == game.num_seeds: break assert len(selections) == game.num_seeds return list(selections)
def draw_graph(graph2): plt.clf() nodes = set([n1 for n1, n2 in graph2] + [n2 for n1, n2 in graph2]) # Extract nodes from graph G = nx.Graph() # Graph - No Edges for node in nodes: #Nodes G.add_node(node) for edge in graph2: #Edges G.add_edge(edge[0], edge[1]) pos = nx.spring_layout(G) # Layout settings nx.draw_networkx_nodes(G,pos,node_size=1500, node_color='w', font_size=6) nx.draw_networkx_edges(G,pos,alpha=0.75,width=3) nx.draw_networkx_labels(G,pos, font_color='b') plt.title('Twitter Hashtag Graph') plt.axis('off') # Show graph plt.savefig(".\\images\\graph.png") # Calculate average degree average_degree = np.mean(nx.degree(G).values()) ft2 = open(sys.argv[2], 'a') # Write to ft2.txt if np.isnan(average_degree): # NaN for no hashtags ft2.write('0.00'+'\n') else: aver_deg = format(average_degree, '.2f') ft2.write(str(aver_deg)+'\n') ft2.close() return
def draw_graph(username, password, filename='graph.txt', label_flag=True, remove_isolated=True, different_size=True, iso_level=10, node_size=40): """Reading data from file and draw the graph.If not exists, create the file and re-scratch data from net""" print "Generating graph..." try: with open(filename, 'r') as f: G = p.load(f) except: G = getgraph(username, password) with open(filename, 'w') as f: p.dump(G, f) #nx.draw(G) # Judge whether remove the isolated point from graph if remove_isolated is True: H = nx.empty_graph() for SG in nx.connected_component_subgraphs(G): if SG.number_of_nodes() > iso_level: H = nx.union(SG, H) G = H # Ajust graph for better presentation if different_size is True: L = nx.degree(G) G.dot_size = {} for k, v in L.items(): G.dot_size[k] = v node_size = [G.dot_size[v] * 10 for v in G] pos = nx.spring_layout(G, iterations=50) nx.draw_networkx_edges(G, pos, alpha=0.2) nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color='r', alpha=0.3) # Judge whether shows label if label_flag is True: nx.draw_networkx_labels(G, pos, alpha=0.5) #nx.draw_graphviz(G) plt.show() return G
import networkx as nx import network_attack as na graph = nx.erdos_renyi_graph(n=300, p=0.1) print("the total number of edges:") n_edges = nx.number_of_edges(graph) print(n_edges) print("the total number of nodes:") n_nodes = nx.number_of_nodes(graph) print(n_nodes) n_cc = nx.number_connected_components(graph) print("the total number of connected components:") print(n_cc) print("the density of the graph:") print(nx.diameter(graph)) avg_deg = sum([d for (n, d) in nx.degree(graph)]) / float(graph.number_of_nodes()) print("the average degree is " + str(avg_deg)) closeness_centrality = nx.closeness_centrality pagerank_centrality = nx.pagerank betweenness_centrality = nx.betweenness_centrality # GCC ATTACK clo_gcc_att = na.gcc_attack(graph, closeness_centrality) pgr_gcc_att = na.gcc_attack(graph, pagerank_centrality) bet_gcc_att = na.gcc_attack(graph, betweenness_centrality) rnd_gcc = na.rnd_gcc_attack(graph, 1) na.attack_measures_plot("THe Giant Component Component Size", clo_gcc_att, pgr_gcc_att, bet_gcc_att, rnd_gcc) # Diameter ATTACK clo_dia_att = na.diameter_attack(graph, closeness_centrality)
graph.py import networkx as nx import matplotlib.pyplot as plt from random import random g = nx.read_edgelist('GraphData.txt',create_using=nx.DiGraph(),nodetype=int) print nx.info(g) d = nx.degree(g) #for node sizes colors = [(random(), random(), random()) for _i in range(10)] #for different node colors nx.draw(g,nx.random_layout(g),with_labels=True,node_size=[v * 300 for v in d.values()], node_color=colors,alpha=0.7) plt.show()
nx.draw_networkx_nodes(G, pos, alpha = 0.6, node_size = 350) # edges nx.draw_networkx_edges(G, pos, edgelist = elarge, width = 2, alpha = 0.9, edge_color = 'g') nx.draw_networkx_edges(G, pos, edgelist = emidle, width = 1.5, alpha = 0.6, edge_color = 'y') nx.draw_networkx_edges(G, pos, edgelist = esmall, width = 1, alpha = 0.3, edge_color = 'b', style = 'dashed') # labels nx.draw_networkx_labels(G, pos, font_size = 10) plt.axis('off') plt.title("《红楼梦》社交网络") plt.show() # 计算每个节点的度 Gdegree = nx.degree(G) Gdegree = dict(Gdegree) Gdegree = pd.DataFrame({"name": list(Gdegree.keys()), "degree": list(Gdegree.values())}) Gdegree.sort_values(by = "degree", ascending = False).plot(x = "name", y = "degree", kind = "bar", figsize = (12, 6), legend = False) plt.xticks(FontProperties = font, size = 5) plt.ylabel("degree") plt.show() plt.figure(figsize = (12, 12)) # 生成社交网络图 G = nx.Graph()
def Find_CNP(G, mixed_label = False): #Find all component of G G_components = list(nx.connected_components(G)) G_temp = G.copy() edge_cut = [] while len(G_components) != 0: min_ratioN1 = float('inf') min_ratioN2 = float('inf') componentOfG = G.subgraph(G_components[0]).copy() #we get the first component and find cnp if len(componentOfG.nodes()) <= 3: del G_components[0] continue for v in G_components[0]: neighbor1 = list(componentOfG.neighbors(v)) neighbor1.extend([v]) induced_N1 = componentOfG.subgraph(neighbor1) #----------- Clustering Coefficient length_n1 = len(neighbor1) length_e1 = len(list(induced_N1.edges())) if length_n1==1 or length_n1==0: cc=0 else: cc = 2*length_e1/(length_n1*(length_n1-1)) #-----Calculating clustering coefficient is finished here!------- cutRatioN1_V4 = coherentCutRatio_V4(nx.degree(induced_N1),nx.degree(componentOfG,induced_N1.nodes()),cc) cutRatioN1_V1 = coherentCutRatio_V1(nx.degree(induced_N1),nx.degree(componentOfG,induced_N1.nodes())) cutRatioN1 = stat.mean([cutRatioN1_V4,cutRatioN1_V1]) if cutRatioN1 < min_ratioN1: min_ratioN1 = cutRatioN1 #calculate edge cut for this minimum cut ratio cnp_nodes1 = induced_N1.nodes() edge_cutN1 = edgeCutSet_V2(induced_N1,componentOfG) minN1_v4 = cutRatioN1_V4 minN1_v1 = cutRatioN1_V1 #Get neighbor2 for node v neighbor2 = neighbor1[:] [neighbor2.extend(list(componentOfG.neighbors(n))) for n in neighbor1] neighbor2 = list(set(neighbor2)) induced_N2 = componentOfG.subgraph(neighbor2).copy() induced_N2 = induced_N2.copy() Complement_indN2 = nx.complement(induced_N2) if(not nx.is_connected(Complement_indN2)):#we find the CNP #----------- Clustering Coefficient length_n2 = len(neighbor2) length_e2 = len(list(induced_N2.edges())) if length_n2==1 or length_n2==0: cc=0 else: cc = 2*length_e2/(length_n2*(length_n2-1)) #-----Calculating clustering coefficient is finished here!------- cutRatioN2_V4 = coherentCutRatio_V4(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes()),cc) cutRatioN2_V1 = coherentCutRatio_V1(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes())) cutRatioN2 = stat.mean([cutRatioN2_V4,cutRatioN2_V1]) if cutRatioN2 < min_ratioN2: min_ratioN2 = cutRatioN2 #calculate edge cut for this minimum cut ratio cnp_nodes2 = induced_N2.nodes() edge_cutN2 = edgeCutSet_V2(induced_N2,componentOfG) minN2_v4 = cutRatioN2_V4 minN2_v1 = cutRatioN2_V4 else: cut_node = my_cut_nodes(Complement_indN2, mixed_label) if (len(cut_node)==0): print("for node ",v,"removing ",cut_node," is not enough and we need to remove more than K nodes") return """"we may find differnt cut_node_set if we have more than one choose a set with minimum weight""" if (len(cut_node)>1): minweight = float('inf') for n in cut_node: """we calculate the score for each set seperately and then choose one with the minimum score""" temp_N2 = induced_N2.copy() temp_N2.remove_nodes_from(n) #----------- Clustering Coefficient length_n2 = len(temp_N2.nodes()) length_e2 = len(list(temp_N2.edges())) if length_n2==1 or length_n2==0: cc=0 else: cc = 2*length_e2/(length_n2*(length_n2-1)) #-----Calculating clustering coefficient is finished here!------- cutRatioN2_V4 = coherentCutRatio_V4(nx.degree(temp_N2),nx.degree(componentOfG,temp_N2.nodes()),cc) cutRatioN2_V1 = coherentCutRatio_V1(nx.degree(temp_N2),nx.degree(componentOfG,temp_N2.nodes())) temp_score = stat.mean([cutRatioN2_V4,cutRatioN2_V1]) if temp_score < minweight: minweight = temp_score minWeightNode = n cut_node = minWeightNode else: #flatten the cut node cut_node = [node for sublist in cut_node for node in sublist] induced_N2.remove_nodes_from(cut_node) #----------- Clustering Coefficient length_n2 = len(induced_N2.nodes()) length_e2 = len(list(induced_N2.edges())) if length_n2==1 or length_n2==0: cc=0 else: cc = 2*length_e2/(length_n2*(length_n2-1)) #-----Calculating clustering coefficient is finished here!------- cutRatioN2_V4 = coherentCutRatio_V4(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes()),cc) cutRatioN2_V1 = coherentCutRatio_V1(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes())) cutRatioN2 = stat.mean([cutRatioN2_V4,cutRatioN2_V1]) if cutRatioN2 < min_ratioN2: min_ratioN2 = cutRatioN2 #calculate edge cut for this minimum cut ratio cnp_nodes2 = induced_N2.nodes() edge_cutN2 = edgeCutSet_V2(induced_N2,componentOfG) minN2_v4 = cutRatioN2_V4 minN2_v1 = cutRatioN2_V4 if min_ratioN1 < min_ratioN2: edge_cut.append(edge_cutN1) cnp_nodes = cnp_nodes1 else: edge_cut.append(edge_cutN2) cnp_nodes = cnp_nodes2 G_temp.remove_nodes_from(cnp_nodes) G_components = list(nx.connected_components(G_temp)) edge_cut = [edge for sublist in edge_cut for edge in sublist] if not mixed_label: edge_cut = list(set(tuple(sorted(x)) for x in edge_cut)) else: sorted_x = [] for i in range(len(edge_cut)): intList=sorted([i for i in edge_cut[i] if type(i) is int]) strList=sorted([i for i in edge_cut[i] if type(i) is str]) sorted_x.append(intList + strList) edge_cut = list(set(tuple(i) for i in (sorted_x))) return(edge_cut)
for asin, metadata in amazonBooks.items(): copurchaseGraph.add_node(asin) for a in metadata['Copurchased'].split(): copurchaseGraph.add_node(a.strip()) similarity = 0 n1 = set((amazonBooks[asin]['Categories']).split()) n2 = set((amazonBooks[a]['Categories']).split()) n1In2 = n1 & n2 n1Un2 = n1 | n2 if (len(n1Un2)) > 0: similarity = round(len(n1In2) / len(n1Un2), 2) copurchaseGraph.add_edge(asin, a.strip(), weight=similarity) # get degree centrality and clustering coefficients # of each ASIN and add it to amazonBooks metadata dc = networkx.degree(copurchaseGraph) for asin in networkx.nodes(copurchaseGraph): metadata = amazonBooks[asin] metadata['DegreeCentrality'] = int(dc[asin]) ego = networkx.ego_graph(copurchaseGraph, asin, radius=1) metadata['ClusteringCoeff'] = round(networkx.average_clustering(ego), 2) amazonBooks[asin] = metadata # write amazonBooks data to file # (all except copurchase data - becuase that data is now in the graph) fhw = open('./amazon-books.txt', 'w', encoding='utf-8', errors='ignore') fhw.write("Id\t" + "ASIN\t" + "Title\t" + "Categories\t" + "Group\t" #+ "Copurchased\t" + "SalesRank\t" + "TotalReviews\t" + "AvgRating\t" "DegreeCentrality\t" + "ClusteringCoeff\n") for asin, metadata in amazonBooks.items():
def cal_nei_sim(disease, edges, save_nei_sim=False): print("begin to calculate similarity based on neighbours...") G = nx.Graph() G.add_edges_from(edges) # 将多种生物信息构造成异构矩阵 print( "step 1: epsilon -> 2, calculate first degree sequence and second degree sequence..." ) DegreeSequence1 = [] DegreeSequence2 = [] for di in disease: neighboursOne = G.neighbors(di) #获取节点的第一层邻居 degreeOfOne = [] neghboursTwo = [] for indexOfNeighbours in neighboursOne: degreeOfOne.append(nx.degree(G, indexOfNeighbours)) #保存第一层邻居的degree neghboursTwo.extend(G.neighbors(indexOfNeighbours)) #获取第一层邻居节点的邻居 sortedDegreeOfOne = sorted(degreeOfOne) #对第一层邻居的degree进行排序 DegreeSequence1.append(sortedDegreeOfOne) neghboursTwo = set(neghboursTwo) neghboursTwo.remove(di) #去除二层邻居节点的自己 degreeOfTwo = [] for indexOfNeighbours in neghboursTwo: degreeOfTwo.append(nx.degree(G, indexOfNeighbours)) #保存第二层邻居的degree sortedDegreeOfTwo = sorted(degreeOfTwo) #对第一层邻居的degree进行排序 DegreeSequence2.append(sortedDegreeOfTwo) cores = multiprocessing.cpu_count() # 获取计算机CPU数目 pool = multiprocessing.Pool(cores) # 构造一个线程池 print("step 2: compute neighbour_sim in parallel with {} cpus...".format( cores)) # 构造一个多线程的任务 resultsOne = [ pool.apply_async(dtw_distance_fast, (DegreeSequence1[i], DegreeSequence1[j])) for i in range(0, len(DegreeSequence1)) for j in range(i + 1, len(DegreeSequence1)) ] # 将成对的第一层degree sequence计算结果存储到数组中 arrOne = np.zeros((len(DegreeSequence1), len(DegreeSequence1))) i = 0 j = 1 for r in resultsOne: if j == len(DegreeSequence1): i += 1 j = i + 1 arrOne[i][j] = float(r.get()) j += 1 # 构造一个多线程任务 resultsTwo = [ pool.apply_async(dtw_distance_fast, (DegreeSequence2[i], DegreeSequence2[j])) for i in range(0, len(DegreeSequence2)) for j in range(i + 1, len(DegreeSequence2)) ] # 将成对的第二层degree sequence计算结果存储到数组中 arrTwo = np.zeros((len(DegreeSequence2), len(DegreeSequence2))) i = 0 j = 1 for r in resultsTwo: if j == len(DegreeSequence2): i += 1 j = i + 1 arrTwo[i][j] = float(r.get()) j += 1 # ---------------------------------------------------------------------------- print("step 3: construct similarity matrix...") alpha = 0.5 # a decaying weight factor α in the range between 0 and 1 NeiSim = {} sim_matrix = np.zeros((len(disease), len(disease))) for i in range(0, len(disease)): for j in range(i + 1, len(disease)): distance = math.pow(alpha, 1) * arrOne[i][j] + math.pow( alpha, 2) * arrTwo[i][j] NeiSim["{}\t{}".format(disease[i], disease[j])] = math.exp(-distance) sim_matrix[i][j] = math.exp(-distance) sim_matrix[j][i] = math.exp(-distance) if save_nei_sim: print("sort the path similarity and save...") res = sorted(NeiSim.items(), key=lambda x: x[1], reverse=True) FileUtil.writeSortedDic2File(res, "./nei_Sim.txt") return sim_matrix
jet_grad = np.linspace(0, 1, 256) # Jet gradient for Old->New cbar_ax.imshow(np.vstack((jet_grad, jet_grad)), aspect='auto', cmap=cm.jet) fig.savefig(SAVE_FILE_NAME_IN_OUT, dpi=300) fig.savefig(SAVE_FILE_NAME_IN_OUT_PNG, dpi=300) # plot clustering vs degree and nodal efficiency fig, axs = plt.subplots(1, 2, figsize=(8, 3.75), tight_layout=True, facecolor='white') cc_full = nx.clustering(G.to_undirected()) deg_full = nx.degree(G.to_undirected()) cc = [cc_full[node] for node in nodes] deg = [deg_full[node] for node in nodes] # calculate nodal efficiency G.efficiency_matrix = metrics_bd.efficiency_matrix(G) nodal_efficiency = np.sum(G.efficiency_matrix, axis=1) / (len(G.nodes()) - 1) labels = ('a', 'b') axs[0].scatter(deg, cc, c=node_ages, cmap=cm.jet, lw=0) axs[0].set_xlim(0, 150) axs[0].set_ylim(-0.025, 1.025) axs[0].set_xlabel('Degree') axs[0].set_ylabel('Clustering coefficient') axs[0].locator_params(axis='x', nbins=6)
print(random_node) print("Executing MHRW...") sample = MHRW() sample.mhrw(G, random_node, size) print("Writing sample network...") nx.write_edgelist(sample.G1, "data/JS_sample_network_75.csv", delimiter=",", data=False) G.clear() G = sample.G1 DG = nx.degree(G) num_nodes = 0 sum_degree = 0 for i in DG: num_nodes += 1 sum_degree += i[1] print("Grau da rede:", sum_degree) print("Grau médio:", (sum_degree / num_nodes)) print("Nodes:", nx.number_of_nodes(G)) print("Edges:", nx.number_of_edges(G)) print("Density:", nx.density(G)) AC = nx.average_clustering(G)
def _dissolve_adjacent(_target_graph: nx.Graph, _parent_node_name: str, _node_group: Union[set, list, tuple], highest_degree=False) -> nx.Graph: # set the new centroid from the centroid of the node group's Multipoint: node_geoms = [] if not highest_degree: for n_uid in _node_group: x = _target_graph.nodes[n_uid]['x'] y = _target_graph.nodes[n_uid]['y'] node_geoms.append(geometry.Point(x, y)) # if by highest_degree, then find the centroid of the highest degree nodes else: highest_degree = 0 for n_uid in _node_group: if n_uid in _target_graph: if nx.degree(_target_graph, n_uid) > highest_degree: highest_degree = nx.degree(_target_graph, n_uid) # aggregate the highest degree nodes node_geoms = [] for n_uid in _node_group: if n_uid not in _target_graph: continue if nx.degree(_target_graph, n_uid) != highest_degree: continue x = _target_graph.nodes[n_uid]['x'] y = _target_graph.nodes[n_uid]['y'] # append geom node_geoms.append(geometry.Point(x, y)) # find the new centroid c = geometry.MultiPoint(node_geoms).centroid _target_graph.add_node(_parent_node_name, x=c.x, y=c.y) # remove old nodes and reassign to new parent node # first determine new edges new_edges = [] for uid in _node_group: for nb_uid in nx.neighbors(_target_graph, uid): # drop geoms between merged nodes # watch for self-loop edge cases if uid in _node_group and nb_uid in _node_group and uid != nb_uid: continue else: if 'geom' not in _target_graph[uid][nb_uid]: raise KeyError( f'Missing "geom" attribute for edge {uid}-{nb_uid}') line_geom = _target_graph[uid][nb_uid]['geom'] if line_geom.type != 'LineString': raise TypeError( f'Expecting LineString geometry but found {line_geom.type} geometry for edge {uid}-{nb_uid}.' ) # first orient geom in correct direction s_x = _target_graph.nodes[uid]['x'] s_y = _target_graph.nodes[uid]['y'] # check geom coordinates directionality - flip if facing backwards direction if not np.allclose( (s_x, s_y), line_geom.coords[0][:2], atol=0.001, rtol=0): line_geom = geometry.LineString(line_geom.coords[::-1]) # double check that coordinates now face the forwards direction if not np.allclose( (s_x, s_y), line_geom.coords[0][:2], atol=0.001, rtol=0): raise ValueError( f'Edge geometry endpoint coordinate mismatch for edge {uid}-{nb_uid}' ) # update geom starting point to new parent node's coordinates coords = list(line_geom.coords) coords[0] = (c.x, c.y) # if self-loop, then the end also needs updating if uid == nb_uid: coords[-1] = (c.x, c.y) target_uid = _parent_node_name else: target_uid = nb_uid new_line_geom = geometry.LineString(coords) new_edges.append( (_parent_node_name, target_uid, new_line_geom)) # remove the nodes from the target graph, this will also implicitly drop related edges _target_graph.remove_nodes_from(_node_group) # add the edges for s, e, geom in new_edges: # when dealing with a collapsed linestring, this should be a rare occurance if geom.length == 0: logger.warning( f'Encountered a geom of length 0m: check edge {s}-{e}.') continue # don't add edge duplicates from respectively merged nodes if (s, e) not in _target_graph.edges(): _target_graph.add_edge(s, e, geom=geom) # however, do add if substantially different geom... else: diff = _target_graph[s][e]['geom'].length / geom.length if abs(diff) > 1.25: _target_graph.add_edge(s, e, geom=geom) return _target_graph
def de_clip(filename, n_nodes, hinge_list, gt_file): n_iter = 5 f = open(filename) line1 = f.readline() print line1 f.close() extension = filename.split('.')[-1] if extension == 'graphml': g = input3(filename) elif len(line1.split()) != 2: g = input1(filename) else: g = input2(filename) print nx.info(g) degree_sequence = sorted(g.degree().values(), reverse=True) print Counter(degree_sequence) degree_sequence = sorted(nx.degree(g).values(), reverse=True) print Counter(degree_sequence) try: import ujson mapping = ujson.load(open(gt_file)) print 'getting mapping' mapped_nodes = 0 print str(len(mapping)) print str(len(g.nodes())) for node in g.nodes(): # print node node_base = node.split("_")[0] # print node_base #print node if mapping.has_key(node_base): g.node[node]['aln_start'] = min(mapping[node_base][0][0], mapping[node_base][0][1]) g.node[node]['aln_end'] = max(mapping[node_base][0][1], mapping[node_base][0][0]) g.node[node]['chr'] = mapping[node_base][0][2] mapped_nodes += 1 else: # pass g.node[node]['aln_start'] = 0 g.node[node]['aln_end'] = 0 g.node[node]['aln_strand'] = 0 for edge in g.edges_iter(): in_node = edge[0] out_node = edge[1] # print 'akjdfakjhfakljh' if ((g.node[in_node]['aln_start'] < g.node[out_node]['aln_start'] and g.node[out_node]['aln_start'] < g.node[in_node]['aln_end']) or (g.node[in_node]['aln_start'] < g.node[out_node]['aln_end'] and g.node[out_node]['aln_end'] < g.node[in_node]['aln_end'])): g.edge[in_node][out_node]['false_positive'] = 0 else: g.edge[in_node][out_node]['false_positive'] = 1 except: raise # print "json "+filename.split('.')[0]+'.mapping.json'+" not found. exiting." print hinge_list print str(mapped_nodes) + " out of " + str(len( g.nodes())) + " nodes mapped." # for i in range(5): # merge_simple_path(g) # degree_sequence=sorted(nx.degree(g).values(),reverse=True) # print Counter(degree_sequence) in_hinges = set() out_hinges = set() num_iter = 10000 iter_done = 0 if hinge_list != None: print "Found hinge list." with open(hinge_list, 'r') as f: for lines in f: lines1 = lines.split() if lines1[2] == '1': in_hinges.add(lines1[0] + '_0') out_hinges.add(lines1[0] + '_1') elif lines1[2] == '-1': in_hinges.add(lines1[0] + '_1') out_hinges.add(lines1[0] + '_0') print str(len(in_hinges)) + ' hinges found.' for node in g.nodes(): if node in in_hinges and node in out_hinges: g.node[node]['hinge'] = 100 elif node in in_hinges: g.node[node]['hinge'] = 10 elif node in out_hinges: g.node[node]['hinge'] = -10 else: g.node[node]['hinge'] = 0 while len(g.nodes()) > n_nodes and iter_done < num_iter: node = g.nodes()[random.randrange(len(g.nodes()))] iter_done += 1 # print iter_done if g.in_degree(node) == 1 and g.out_degree(node) == 1: base_node = node.split("_")[0] orintation = node.split("_")[1] # if orintation=='1': # node2=base_node+'_0' # else: # node2=base_node+'_1' # print node,node2 in_node = g.in_edges(node)[0][0] out_node = g.out_edges(node)[0][1] if g.node[node]['hinge'] == 0 and g.node[in_node][ 'hinge'] == 0 and g.node[out_node]['hinge'] == 0: if g.out_degree(in_node) == 1 and g.in_degree( out_node) == 1: if in_node != node and out_node != node and in_node != out_node: bad_node = False # print g.in_edges(node) # print g.edge[g.in_edges(node)[0][0]][g.in_edges(node)[0][1]] # print g.out_edges(node) for in_edge in g.in_edges(node): if g.edge[in_edge[0]][ in_edge[1]]['false_positive'] == 1: bad_node = True for out_edge in g.out_edges(node): if g.edge[out_edge[0]][ out_edge[1]]['false_positive'] == 1: bad_node = True if not bad_node: #print in_node, node, out_node merge_path(g, in_node, node, out_node) # print g.edge[edge1[0]][edge1[1]]['hinge_edge'] for nd in g.nodes(): if len(nd.split("_")) == 1: print nd + " in trouble" # in_node = g.in_edges(node2)[0][0] # out_node = g.out_edges(node2)[0][1] # if g.node[node2]['hinge']==0 and g.node[in_node]['hinge']==0 and g.node[out_node]['hinge']==0: # if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1: # if in_node != node2 and out_node != node2 and in_node != out_node: # bad_node=False # for in_edge in g.in_edges(node2): # if g.edge[in_edge]==1: # bad_node=True # for out_edge in g.out_edges(node2): # if g.edge[out_edge]==1: # bad_node=True # if not bad_node: # #print in_node, node, out_node # merge_path(g,in_node,node2,out_node) # for nd in g.nodes(): # print nd else: while len(g.nodes()) > n_nodes: node = g.nodes()[random.randrange(len(g.nodes()))] if g.in_degree(node) == 1 and g.out_degree(node) == 1: # assert g.in_degree(node2) == 1 and g.out_degree(node2) == 1 # edge_1 = g.out_edges(node)[0] # edge_2 = g.in_edges(node)[0] edge1 = g.out_edges(node)[0] edge2 = g.in_edges(node)[0] # print g.edge[edge1[0]][edge1[1]]['hinge_edge'] if (g.edge[edge1[0]][edge1[1]]['hinge_edge'] == -1 and g.edge[edge2[0]][edge2[1]]['hinge_edge'] == -1): in_node = g.in_edges(node)[0][0] out_node = g.out_edges(node)[0][1] if g.out_degree(in_node) == 1 and g.in_degree( out_node) == 1: if in_node != node and out_node != node and in_node != out_node: #print in_node, node, out_node merge_path(g, in_node, node, out_node) degree_sequence = sorted(nx.degree(g).values(), reverse=True) print Counter(degree_sequence) nx.write_graphml(g, filename.split('.')[0] + '.sparse3.graphml') print nx.number_weakly_connected_components(g) print nx.number_strongly_connected_components(g)
def calculate_preferential_bias_on_node(network, node): return (float((nx.degree(network, node) + 1)) / ( float(sum(nx.degree(network).values()) + nx.number_of_nodes(network))))
def main(): event_data = np.genfromtxt('data/events_US_air_traffic_GMT.txt', names=True, dtype=int) event_data.sort(order=['StartTime']) network = nx.read_weighted_edgelist( 'data/aggregated_US_air_traffic_network_undir.edg') n_nodes = network.number_of_nodes() # creation of bins for the plots min_timestemp = min(event_data, key=lambda item: item["StartTime"])[2] max_timestemp = max(event_data, key=lambda item: item["EndTime"])[3] n_bins = 50 bins = create_bins(min_timestemp, max_timestemp, n_bins) ###################################### # task 1 # ###################################### print("-------------- TASK 1 --------------") infection_times, infection_list = infection_time(event_data, 1, 0) print("Node 41 infection time: " + str(infection_times['41']) + " (" + str(datetime.fromtimestamp(infection_times['41'])) + ")") # animation of the infection # visualize_si(np.array(infection_list), save_fname="./simulations/infection_simulation_prob1_seed0.mp4") ###################################### # task 2 # ###################################### print("-------------- TASK 2 --------------") seed_node = 0 infection_prob = [0.01, 0.05, 0.1, 0.5, 1.0] infection_times_list_avg = [] infection_times_list_probs = [] for prob in infection_prob: for i in range(10): _, infection_list = infection_time(event_data, prob, seed_node) infection_times_list_avg.append(infection_list) infection_times_list_probs.append(infection_times_list_avg) infection_times_list_avg = [] plot_avg_prevalence_probs(infection_times_list_probs, infection_prob, n_nodes, bins) ###################################### # task 3 # ###################################### print("-------------- TASK 3 --------------") infection_prob = 0.1 seed_nodes = [0, 4, 41, 100, 200] seed_nodes_labels = ['ABE', 'ATL', 'ACN', 'HSV', 'DBQ'] infection_times_list_avg = [] infection_times_list_nodes = [] for seed_node in seed_nodes: for i in range(10): _, infection_list = infection_time(event_data, infection_prob, seed_node) infection_times_list_avg.append(infection_list) infection_times_list_nodes.append(infection_times_list_avg) infection_times_list_avg = [] plot_avg_prevalence_nodes(infection_times_list_nodes, seed_nodes_labels, n_nodes, bins) ###################################### # task 4 # ###################################### print("-------------- TASK 4 --------------") # ----- task 4 and 5 ----- # clustering_coefficient_net = nx.clustering(network) degree_net = nx.degree(network) strength_net = nx.degree(network, weight="weight") betweenness_centrality_net = nx.betweenness_centrality(network) # ------------------------ # infection_prob = 0.5 infection_times_list = [] for i in range(50): seed_node = random.randint(0, n_nodes) infection_times, _ = infection_time(event_data, infection_prob, seed_node) infection_times_list.append(infection_times) infection_times_df = pd.DataFrame(infection_times_list) infection_times_median = dict(infection_times_df.median()) plot_and_spearman_task4(infection_times_median, clustering_coefficient_net, degree_net, strength_net, betweenness_centrality_net, n_nodes) ###################################### # task 5 # ###################################### print("-------------- TASK 5 --------------") # nodes immunized imm_neighbour = [] range_nodes = set(range(0, n_nodes)) while len(imm_neighbour) < 10: rand_node = random.choice(list(range_nodes)) rand_neighbour = random.choice(list(network.neighbors(str(rand_node)))) if (int(rand_neighbour) not in imm_neighbour): imm_neighbour.append(int(rand_neighbour)) imm_random_node = [] range_nodes = set(range(0, n_nodes)) for i in range(10): rand_node = random.choice(list(range_nodes)) imm_random_node.append(rand_node) range_nodes.remove(rand_node) imm_clustering_coefficient = [] d = Counter(clustering_coefficient_net) for k, _ in d.most_common(10): imm_clustering_coefficient.append(int(k)) imm_degree = [] highest_degree = sorted(degree_net, key=lambda x: x[1], reverse=True)[:10] for k, _ in highest_degree: imm_degree.append(int(k)) imm_strength = [] highest_strength = sorted(strength_net, key=lambda x: x[1], reverse=True)[:10] for k, _ in highest_strength: imm_strength.append(int(k)) imm_betweenness_centrality = [] d = Counter(betweenness_centrality_net) for k, _ in d.most_common(10): imm_betweenness_centrality.append(int(k)) # create a set of all the immunized nodes imm_nodes = set(imm_neighbour) | set(imm_random_node) | set( imm_clustering_coefficient) | set(imm_degree) | set( imm_strength) | set(imm_betweenness_centrality) range_seed = set(range(0, n_nodes)) - imm_nodes # extract the seed nodes from a set of nodes not part of the immunized ones seed_nodes = [] for i in range(20): rand_seed = random.choice(list(range_seed)) seed_nodes.append(rand_seed) range_seed.remove(rand_seed) immunized_nodes_list = [] immunized_nodes_list.append(imm_neighbour) immunized_nodes_list.append(imm_random_node) immunized_nodes_list.append(imm_clustering_coefficient) immunized_nodes_list.append(imm_degree) immunized_nodes_list.append(imm_strength) immunized_nodes_list.append(imm_betweenness_centrality) immunization_strategy_labels = [ 'random neighbour', 'random node', 'clustering coefficient', 'degree', 'strength', 'betweenness centrality' ] infection_prob = 0.5 infection_times_list_avg = [] infection_times_list_immunization = [] for immunized_nodes, imm_strategy in zip(immunized_nodes_list, immunization_strategy_labels): print("Calculating " + imm_strategy) for seed_node in seed_nodes: _, infection_list = infection_time(event_data, infection_prob, seed_node, immunized_nodes) infection_times_list_avg.append(infection_list) infection_times_list_immunization.append(infection_times_list_avg) infection_times_list_avg = [] plot_avg_prevalence_immunization(infection_times_list_immunization, immunization_strategy_labels, n_nodes, bins) ###################################### # task 6 # ###################################### print("-------------- TASK 6 --------------") id_data = np.genfromtxt('data/US_airport_id_info.csv', delimiter=',', dtype=None, names=True, encoding=None) xycoords = {} for row in id_data: xycoords[str(row['id'])] = (row['xcoordviz'], row['ycoordviz']) edge_list = [] for edge in network.edges(): if int(edge[0]) > int(edge[1]): edge = (edge[1], edge[0]) edge_list.append(edge) # edge_list created to maintain the right order infection_prob = 0.5 infecting_edges_fraction = [] for i in range(20): seed_node = random.randint(0, n_nodes) infecting_edges = infection_edges(event_data, infection_prob, seed_node, edge_list) infecting_edges_fraction.append(infecting_edges) # calculation of the fraction of times that each link is used for infecting the disease from the results of 20 runs infecting_edges_fraction = (np.sum(np.array(infecting_edges_fraction), 0) / 20).tolist() # print Transmission links - fraction fig, ax = plot_network_usa(network, xycoords, edges=edge_list, linewidths=infecting_edges_fraction) plt.suptitle(r'Transmission links ($f_{ij}$)') fig.savefig("./plots/t6_map_fraction.pdf") # print Transmission links - mst maximum_spanning_tree = nx.maximum_spanning_tree(network) fig, ax = plot_network_usa(maximum_spanning_tree, xycoords, edges=list(maximum_spanning_tree.edges)) plt.suptitle(r'Transmission links (maximal spanning tree)') fig.savefig("./plots/t6_map_mst.pdf") link_weights = nx.get_edge_attributes(network, 'weight') link_betweenness_centrality = nx.edge_betweenness_centrality(network) # ordered lists (following the order of edge_list) link_weights_list = [] link_betweenness_centrality_list = [] for edge in edge_list: if edge in link_weights: link_weights_list.append(link_weights[edge]) else: link_weights_list.append(link_weights[(edge[1], edge[0])]) if edge in link_betweenness_centrality: link_betweenness_centrality_list.append( link_betweenness_centrality[edge]) else: link_betweenness_centrality_list.append( link_betweenness_centrality[(edge[1], edge[0])]) # scatter plot of the transmission fraction as a function of the link weight fig, ax = plot_scatterplot(link_weights_list, infecting_edges_fraction) plt.suptitle(r'Transmission fraction as a function of the link weight') ax.set_xlabel(r'link weight $w_{ij}$') ax.set_ylabel(r'transmission fraction $f_{ij}$') fig.savefig("./plots/t6_scatter_weight.pdf") # scatter plot of the transmission fraction as a function of the link betweenness centrality fig, ax = plot_scatterplot(link_betweenness_centrality_list, infecting_edges_fraction) plt.suptitle( r'Transmission fraction as a function of the link betweenness centrality' ) ax.set_xlabel(r'unweighted link betweenness centrality $eb_{ij}$') ax.set_ylabel(r'transmission fraction $f_{ij}$') fig.savefig("./plots/t6_scatter_bet_centr.pdf") # Spearman rank-correlation coefficient print( "Spearman rank-correlation coefficient between transmission fraction and: " ) print("- link weight: " + str( spearmanr(link_weights_list, infecting_edges_fraction).correlation)) print("- betweenness centrality: " + str( spearmanr(link_betweenness_centrality_list, infecting_edges_fraction).correlation))
#print(most_corelated_taxon[i][0], most_corelated_taxon[j][0]) if not G.has_edge(i + 1, j + 1): G.add_edge(i + 1, j + 1) #print(scipy.stats.spearmanr(predicted_data.loc[:, most_corelated_taxon[i][0]], predicted_data.loc[:,most_corelated_taxon[j][0]])[1], # scipy.stats.spearmanr(predicted_data.loc[:, most_corelated_taxon[i][0]], # predicted_data.loc[:, most_corelated_taxon[j][0]])[0]) #nx.draw(G, with_labels = True) #print(nx.connected_components(G)) #print(nx.degree(G)) #print(sorted(i[1] for i in nx.degree(G))) #print(nx.clustering(G)) #plt.show() #print(G) rel_dict = [] for i in nx.degree(G): #if i[1] != 0: print(i) print(labeldict[i[0]]) rel_dict.append(labeldict[i[0]]) new_data = preproccessed_data[rel_dict] #visualize_pca(new_data) otu_after_pca, _ = apply_pca(new_data, n_components=2) merged_data = otu_after_pca.join(OtuMf.mapping_file['DiagnosisGroup']) merged_data.fillna(0) mapping_disease_for_labels = { 'Control': 0, 'Cirrhosis/HCC': 1,
for node in nx.nodes(G): for tup in G.node[node]['conferences']: try: if tup[0] != conf: G2a.remove_node(node) except: continue print("Done!") # In[28]: nx.info(G2a) # In[41]: '''Compute the degree for nodes which is the number of edges each node has.''' dg_values = list(nx.degree(G2a)) # In[55]: print(min(dg_values)) print(max(dg_values)) # In[44]: #Degree histogram sns.set_style("darkgrid") sns.set_context({"figure.figsize": (6, 4)}) fig, ax = plt.subplots() sns.distplot(dg_values, color="dodgerblue", bins=8, hist=True, kde=False) plt.xlabel("Degree", fontsize=12) plt.ylabel("Node frequences", fontsize=12)
def process(graph): if (not isinstance(graph, networkx.Graph)): raise ValueError( "invalid object; must be a NetworkX Graph class or subclass") if (isinstance(graph, networkx.DiGraph)): node_to_neighbors = lambda node: graph.successors( node) + graph.predecessors(node) else: node_to_neighbors = lambda node: graph.neighbors(node) node_to_degree = networkx.degree(graph) # phase 1: node assignment node_to_row, row_to_node = {}, [] def node_to_row_(node): if (node in node_to_row): return node_to_row[node] row_idx = len(node_to_row) node_to_row[node] = row_idx row_to_node.append(node) return row_idx while True: # find the node with highest degree that has not been assigned a row hub, hub_degree = _key_with_max_value(node_to_degree, node_to_row) if (hub == None): break # add it as a new row node_to_row_(hub) # list its direct neighbors by decreasing degree hub_neighbors = reversed( sorted(node_to_neighbors(hub), key=lambda node: node_to_degree[node])) # add them as new rows, if not assigned already for node in hub_neighbors: node_to_row_(node) assert len(node_to_row) == graph.number_of_nodes() ### # phase 2: edge assignment edge_to_col, col_to_edge = {}, [] def edge_to_col_(edge): if (edge in edge_to_col): return edge_to_col[edge] if (_order(*edge) in edge_to_col): return edge_to_col[_order(*edge)] edge_idx = len(edge_to_col) edge_to_col[edge] = edge_idx col_to_edge.append(edge) return edge_idx min_col, max_col = {}, {} for i, node_a in enumerate(row_to_node): for node_b in row_to_node[i + 1:]: if graph.has_edge(node_a, node_b): edge_idx = edge_to_col_((node_a, node_b)) for node in (node_a, node_b): min_col[node] = min(min_col.get(node, POS_INF), edge_idx) max_col[node] = max(max_col.get(node, NEG_INF), edge_idx) assert len(edge_to_col) == graph.number_of_edges() ### return (row_to_node, node_to_row), (col_to_edge, edge_to_col), min_col, max_col
def select_observers(network, strategy, proportion=0.1, tread_off=0.1): """ :param network: :param strategy: "max_degree": "min_degree": "max_k_shell": "min_k_shell": "max_betweenness": "min_betweenness": "max_closeness": "min_closeness": "random": :param proportion: percentage of observers :param tread_off: mix strategy of max_degree and min_degree :return: """ observer_nodes_size = int(nx.number_of_nodes(G=network) * proportion) observers = [] if strategy == "max_degree": degree_dict = dict(nx.degree(network)) degree_sorted_by_value = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True) observers = [x[0] for x in degree_sorted_by_value][:observer_nodes_size] elif strategy == "min_degree": degree_dict = dict(nx.degree(network)) degree_sorted_by_value = sorted(degree_dict.items(), key=lambda x: x[1]) observers = [x[0] for x in degree_sorted_by_value][:observer_nodes_size] elif strategy == "max_k_shell": k_core_dict = dict(nx.core_number(G=network)) k_core_sorted = sorted(k_core_dict.items(), key=lambda x: x[1], reverse=True) observers = [x[0] for x in k_core_sorted][:observer_nodes_size] elif strategy == "min_k_shell": k_core_dict = dict(nx.core_number(G=network)) k_core_sorted = sorted(k_core_dict.items(), key=lambda x: x[1]) observers = [x[0] for x in k_core_sorted][:observer_nodes_size] elif strategy == "max_betweenness": between_centrality = dict(nx.betweenness_centrality(G=network)) between_centrality_stored = sorted(between_centrality.items(), key=lambda x: x[1], reverse=True) observers = [x[0] for x in between_centrality_stored][:observer_nodes_size] elif strategy == "min_betweenness": between_centrality = dict(nx.betweenness_centrality(G=network)) between_centrality_stored = sorted(between_centrality.items(), key=lambda x: x[1]) observers = [x[0] for x in between_centrality_stored][:observer_nodes_size] elif strategy == "max_closeness": closeness_centrality = dict(nx.closeness_centrality(G=network)) closeness_centrality_stored = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True) observers = [x[0] for x in closeness_centrality_stored ][:observer_nodes_size] elif strategy == "min_closeness": closeness_centrality = dict(nx.closeness_centrality(G=network)) closeness_centrality_stored = sorted(closeness_centrality.items(), key=lambda x: x[1]) observers = [x[0] for x in closeness_centrality_stored ][:observer_nodes_size] elif strategy == "random": observers = np.random.randint(0, nx.number_of_nodes(network), observer_nodes_size) return observers
def fullSizeGraph(request): import pandas as pd import networkx import matplotlib.pyplot as plt import numpy as np df_enron = filterDataByTime(pd.read_csv(request, request.FILES['csv_data'])) #from bokeh.io import output_notebook, show, save from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine from bokeh.plotting import figure from bokeh.models.graphs import from_networkx from bokeh.palettes import Category10 from bokeh.transform import linear_cmap from bokeh.embed import json_item #output_notebook() #remove this when not using notebook G = networkx.from_pandas_edgelist(df_enron, 'fromId', 'toId', edge_attr=True) di = { 'CEO': 1, 'Director': 2, 'Employee': 3, 'In House Lawyer': 4, 'Manager': 5, 'Managing Director': 6, 'President': 7, 'Trader': 8, 'Unknown': 9, 'Vice President': 10 } df_rejob = df_enron.replace({"fromJobtitle": di}) df_attributes = df_enron[['fromId', 'fromJobtitle']].drop_duplicates() df_attributes.columns = ['fromId', 'job'] df_attributesx = df_rejob[['fromId', 'fromJobtitle']].drop_duplicates() job = df_attributes.set_index('fromId').to_dict('i') jobx = df_attributesx.set_index('fromId').to_dict('i') networkx.set_node_attributes(G, job) networkx.set_node_attributes(G, jobx) #jobs = ['Employee','Vice President','Unknown','Manager','CEO','Trader','Director','President','Managing Director','In House Lawyer'] degrees = dict(networkx.degree(G)) networkx.set_node_attributes(G, name='degree', values=degrees) adjusted_node_size = dict([(node, (degree + 5) - ((degree + 5) * 0.3)) for node, degree in networkx.degree(G)]) networkx.set_node_attributes(G, name='adjusted_node_size', values=adjusted_node_size) size_by_this_attribute = 'adjusted_node_size' color_by_this_attribute = 'fromJobtitle' color_palette = Category10[10] TOOLTIPS = [ ("Person ID", "@index"), ("people communicated with", "@degree"), ("Jobtitle", "@job"), ] plot = figure(tooltips=TOOLTIPS, tools="pan,zoom_in,wheel_zoom,save,reset,box_select,undo", active_scroll='wheel_zoom', x_range=Range1d(-20, 20), y_range=Range1d(-20, 20), title='Enron Emails', plot_width=950, plot_height=950) plot.axis.visible = False N_graph = from_networkx(G, networkx.spring_layout, scale=100) N_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=linear_cmap( color_by_this_attribute, color_palette, 1, 10)) N_graph.edge_renderer.glyph = MultiLine(line_alpha=10, line_width=1) plot.renderers.append(N_graph) item_text = json.dumps(json_item(plot)) return django.http.JsonResponse(item_text, safe=False)
def make_interactive_network(G, labels=False, title='My Network', color_palette=Blues8, node_size='degree', node_color='modularity_class'): from networkx.algorithms import community # Get network info degrees = dict(networkx.degree(G)) networkx.set_node_attributes(G, name='degree', values=degrees) betweenness_centrality = networkx.betweenness_centrality(G) networkx.set_node_attributes(G, name='betweenness', values=betweenness_centrality) communities = community.greedy_modularity_communities(G) # Create empty dictionaries modularity_color = {} modularity_class = {} #Loop through each community in the network for community_number, community in enumerate(communities): #For each member of the community, add their community number and a distinct color for name in community: modularity_color[name] = Spectral8[community_number] modularity_class[name] = community_number networkx.set_node_attributes(G, modularity_color, 'modularity_color') networkx.set_node_attributes(G, modularity_class, 'modularity_class') #Choose colors for node and edge highlighting node_highlight_color = node_color edge_highlight_color = 'black' #Choose attributes from G network to size and color by — setting manual size (e.g. 10) or color (e.g. 'skyblue') also allowed #Pick a color palette — Blues8, Reds8, Purples8, Oranges8, Viridis8 #color_palette = Blues8 #Choose a title! title = title #Establish which categories will appear when hovering over each node HOVER_TOOLTIPS = [ ("Character", "@index"), ("Degree", "@degree"), ("Modularity Class", "@modularity_class"), ("Modularity Color", "$color[swatch]:modularity_color"), ] #Create a plot — set dimensions, toolbar, and title plot = figure(tooltips=HOVER_TOOLTIPS, tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom', x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title) #Create a network graph object # https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.drawing.layout.spring_layout.html network_graph = from_networkx(G, networkx.spring_layout, scale=10, center=(0, 0)) #Set node sizes and colors according to node degree (color as category from attribute) if node_color == 'degree': #Set node sizes and colors according to node degree (color as spectrum of color palette) minimum_value_color = min( network_graph.node_renderer.data_source.data[node_color]) maximum_value_color = max( network_graph.node_renderer.data_source.data[node_color]) network_graph.node_renderer.glyph = Circle( size=node_size, fill_color=linear_cmap(node_color, color_palette, minimum_value_color, maximum_value_color)) elif node_color == 'modularity_color': #node_color='modularity_color' network_graph.node_renderer.glyph = Circle(size=node_size, fill_color=node_color) #Set node highlight colors network_graph.node_renderer.hover_glyph = Circle( size=node_size, fill_color=node_highlight_color, line_width=2) network_graph.node_renderer.selection_glyph = Circle( size=node_size, fill_color=node_highlight_color, line_width=2) #Set edge opacity and width network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.3, line_width=1) #Set edge highlight colors network_graph.edge_renderer.selection_glyph = MultiLine( line_color=edge_highlight_color, line_width=2) network_graph.edge_renderer.hover_glyph = MultiLine( line_color=edge_highlight_color, line_width=2) #Highlight nodes and edges network_graph.selection_policy = NodesAndLinkedEdges() network_graph.inspection_policy = NodesAndLinkedEdges() plot.renderers.append(network_graph) if labels == True: #Add Labels x, y = zip(*network_graph.layout_provider.graph_layout.values()) node_labels = list(G.nodes()) source = ColumnDataSource({ 'x': x, 'y': y, 'name': [node_labels[i] for i in range(len(x))] }) labels = LabelSet(x='x', y='y', text='name', source=source, background_fill_color='white', text_font_size='10px', background_fill_alpha=.7) plot.renderers.append(labels) show(plot) #save(plot, filename=f"{title}.html")
def build(Time, dis, window, in_case): location = '/Users/Abduljaleel/Desktop/project/USA/' + Time + '/' + dis + '/' location_Time = '/Users/Abduljaleel/Desktop/project/USA/' + Time + '/' st = open(location_Time + 'log_where.txt', 'a') st.write(str(dis + '_' + Time) + '\n') db = '/Users/Abduljaleel/Desktop/project/USA/SQLite/USA_' + dis ddb = sqlite3.connect(db) cc = ddb.cursor() cc.execute("SELECT * FROM NODES order by date") ddb.commit() results = cc.fetchall() all = len(results) limit_1 = firstdate(ddb, cc) window_time = add_time(limit_1, window) limit_2 = add_time(limit_1, in_case) last_tweet_date = lastdate(ddb, cc) start = time.time() w = 1 while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa = [] for i in range(0, len(rs)): aa.append(rs[i][0]) # w = loop(Net1,aa,w) w = fib3.loop(Net1, aa, w) limit_2 = add_time(limit_2, in_case) if limit_2 > window_time: break limit_1 = next_event(limit_1, ddb, cc) w = 1 while True: cc.execute("select * from nodes where date between " + str(limit_1) + " and " + str(limit_2) + " order by date") ddb.commit() rs = cc.fetchall() aa = [] try: d = int(rs[0][0]) e = float(d * 100) / all print int(e) except: pr = 1 for i in range(0, len(rs)): aa.append(rs[i][0]) # w = loop(Net1,aa,w) w = fib3.loop(Net1, aa, w) limit_1 = add_time(limit_1, in_case) limit_2 = add_time(limit_1, window) if limit_1 >= last_tweet_date: break print 'finish creating' threshold = w * 0.79 H = nx.Graph([(u, v, d) for (u, v, d) in Net1.edges_iter(data=True) if d['weight'] > threshold]) print '------degree start------' deg = open(location + 'degree.txt', 'w') DD = open(location + 'DD.txt', 'w') data = [] DDD = H.degree() for s in DDD: deg.write(str(nx.degree(H, s)) + "\n") data.append(nx.degree(H, s)) for i in range(0, max(data)): j = i + 1 count = 0 for k in range(0, len(data)): if data[k] == j: count += 1 DD.write(str(j) + '\t' + str(count) + '\n') print '------degree finsished------' # st = open(location+'statistics.txt', 'a') # print 'cluster start' # cluster = nx.average_clustering(Net2) # print 'pl start' # pl = Net22.average_path_length() # n=0 # summ=0 # for g in nx.connected_component_subgraphs(Net2): # summ+=float(nx.average_shortest_path_length(g)) # n+=1 # summ = float(summ)/n # st.write('cluster :'+str(cluster)+'\n') # st.write('path_Len :'+str(summ)+'\n') print '------Writing Graphml start------' nx.write_graphml(H, location + dis + "_graph.graphml") sts = open(location_Time + 'log_sec.txt', 'a') end = time.time() - start sts.write(str(dis + '_' + Time) + '\t' + str(end) + '\n')
def read_a_graph(path): print path type = str(path[-4:]) onlyfiles = [f for f in listdir(path) if isfile(join(path, f))] # print onlyfiles all_graphs = {} for f in onlyfiles: G = nx.read_edgelist(path + '/' + f, delimiter=" ") print "---------" + f + "----------" print "BEFORE" print "nodes:", len(G) print "edges:", G.number_of_edges() if G.has_node('-1'): G.remove_node('-1') print "AFTER" print "nodes:", len(G) print "edges:", G.number_of_edges() if G.number_of_edges() >= (len(G) / 4): print "TAKE THIS ONE: ", "-", type, "-", f ###########CHECK HOW MANY ACCOUNTS OF OUR LIST IS IN THE TWEET CHAIN########## involved = [] if type == "fake": accounts = fake_accounts else: accounts = real_accounts for node in G.nodes(): if int(node) in accounts: involved.append(int(node)) print "INVOLVED USERS:", len(involved) print involved ####################################################### density = nx.density(G) degree = nx.degree(G) bc = nx.betweenness_centrality(G) cc = nx.closeness_centrality(G) graph = {"density": density, "degree": degree, "bc": bc, "cc": cc} all_graphs[f] = graph print "--------------------------" # plt.title(f) # nx.draw(G) # plt.show() # print all_graphs for g in all_graphs: print g cc_low = [] cc_mid = [] cc_high = [] bc_low = [] bc_mid = [] bc_high = [] degree_low = [] degree_mid = [] degree_high = [] list_of_nodes = [] for n in all_graphs[g]['cc']: list_of_nodes.append(n.encode("utf-8")) # for every characteristic split in 4 for char in all_graphs[g]: if char is not 'density': low, mid, high = get_four_split(char, all_graphs[g][char]) if char is 'cc': cc_low = low cc_mid = mid cc_high = high elif char is 'bc': bc_low = low bc_mid = mid bc_high = high elif char is 'degree': degree_low = low degree_mid = mid degree_high = high classified = [] for n in list_of_nodes: if n in get_a_list_of_the_first_of_a_tuple( cc_low) and n in get_a_list_of_the_first_of_a_tuple( degree_high): classified.append((n, "A")) elif n in get_a_list_of_the_first_of_a_tuple( degree_high) and n in get_a_list_of_the_first_of_a_tuple( bc_low): classified.append((n, "B")) elif n in get_a_list_of_the_first_of_a_tuple( cc_high) and n in get_a_list_of_the_first_of_a_tuple( degree_low): classified.append((n, "C")) elif n in get_a_list_of_the_first_of_a_tuple( cc_high) and n in get_a_list_of_the_first_of_a_tuple( bc_low): classified.append((n, "D")) elif n in get_a_list_of_the_first_of_a_tuple( bc_high) and n in get_a_list_of_the_first_of_a_tuple( degree_low): classified.append((n, "E")) elif n in get_a_list_of_the_first_of_a_tuple( bc_high) and n in get_a_list_of_the_first_of_a_tuple( cc_low): classified.append((n, "F")) elif n in get_a_list_of_the_first_of_a_tuple(bc_high): classified.append((n, "G")) else: classified.append((n, "-")) with open(g + ".txt", "w") as text_file: for n in classified: print n text_file.write(str(n) + '\n') print "______________________________" counter = 0 sum = 0 for g in all_graphs: sum = all_graphs[g]['density'] counter = counter + 1 avg = sum / counter print "average density is", avg
def get_link_measures(net): """ Compute weights and edges betweenness centralities :param net: network :return: w: list of weights eb: list of edge betweeenness centralities """ w, eb, eb_w, eb_w2, eb_pr, eb_cl, eb_ev, eb_s = [], [], [], [], [], [], [], [] # Edge weight and unweighted betweenness centrality edges = net.edges(data=True) betweenness_centr = nx.edge_betweenness_centrality(net, normalized=True) for e in edges: w.append(e[2]['weight']) eb.append(betweenness_centr[(e[0], e[1])]) # Create a copy of the graph with inverse weights, square root is used to reduce the impact of high weights net1 = net.copy() edges1 = net1.edges(data=True) for e in edges1: w_e = e[2]['weight'] net1[e[0]][e[1]]['weight'] = 1 / (w_e**(1 / 3)) # Weighted betweenness centrality on net1 betweenness_centr_w = nx.edge_betweenness_centrality(net1, normalized=True, weight='weight') for e in edges1: eb_w.append(betweenness_centr_w[(e[0], e[1])]) # Node dictionary for k-shells dict_k_shell = {} max_degree = max([net.degree(n) for n in net.nodes]) for k in reversed(range(max_degree + 1)): k_shell = nx.k_shell(net1, k=k) k_shell_nodes = k_shell.nodes() for i in k_shell_nodes: if i not in dict_k_shell: dict_k_shell[i] = k # node dict for pagerank dict_page_rank = nx.pagerank(net1, weight='weight') # closeness centrality closeness_centr = nx.closeness_centrality(net, distance='weight') closeness_centr = dict( sorted(closeness_centr.items(), key=lambda pair: list(nodes).index(pair[0]))) # eigenvector centrality eigenvector_centr = nx.eigenvector_centrality(net, tol=10**-1, weight='weight') eigenvector_centr = dict( sorted(eigenvector_centr.items(), key=lambda pair: list(nodes).index(pair[0]))) # strengths of nodes strengths = dict(nx.degree(net1, weight='weight')) # For each edge, take lower value of centrality measureof the two nodes and use it to normalize previously # computed weighted betweenness j = 0 for e in edges: eb_w2.append(eb_w[j] / min(dict_k_shell[e[0]], dict_k_shell[e[1]])) eb_pr.append(eb_w[j] / min(dict_page_rank[e[0]], dict_page_rank[e[1]])) eb_cl.append(eb_w[j] / min(closeness_centr[e[0]], closeness_centr[e[1]])) eb_ev.append(eb_w[j] / min(eigenvector_centr[e[0]], eigenvector_centr[e[1]])) eb_s.append(eb_w[j] / min(strengths[e[0]], strengths[e[1]])) j = j + 1 return w, eb, eb_w, eb_w2, eb_pr, eb_cl, eb_ev, eb_s
import networkx as nx import matplotlib.pyplot as plt from examples.drawing.plot_edge_colormap import colors n = 10 # 10 nodes m = 20 # 20 edges G = nx.gnm_random_graph(n, m) # some properties print("node degree clustering") for v in nx.nodes(G): print('%s %d %f' % (v, nx.degree(G, v), nx.clustering(G, v))) # print the adjacency list print("print the adjacency list") for line in nx.generate_adjlist(G): print(line) nx.draw(G, node_size=250, with_labels=True) plt.show()
# %% with open('direct_conections_df.pkl', 'rb') as file: data = pickle.load(file) Conexiones = pd.DataFrame(data) Conex_1 = Conexiones.iloc[0:200, 0:6] # %% G = nx.from_pandas_edgelist(Conex_1, source='p_cty_code', target='d_cty_code', create_using=nx.DiGraph()) from matplotlib.pyplot import figure figure(figsize=(12, 9)) #nx.draw_shell(G,with_labels=True) #nx.draw_circular(G) nx.degree(G) #si quisieramos crear otro df que muestre los nodos y su numero de conexiones usamos: conexion = {} for x in G.nodes: conexion[x] = len(G[x]) s = pd.Series(conexion, name='Conexiones') df2 = s.to_frame().sort_values('Conexiones', ascending=False) #%% # Density nx.density(G) # Clustering nx.clustering(G) # Similar al comando anterior for i in nx.clustering(G).items():
def most_similar_result_with_newwork(self, data_size, topn): print(' -> creating most similar job matrix with network') df = pd.DataFrame() G = nx.karate_club_graph() i = 0 keys_list = list(self.doc2idx.keys()) nodes = [] ## node list edges = [] ## edge list(튜플의 형태로 저장) for job_id in keys_list: node_id = str(job_id).split('_')[2] job_id = 'Job_ID_' + str(job_id).split('_')[2] title = self.get_job_title(job_id)[0] title = f'{title}({str(job_id)})' similar_jobs = self.model.docvecs.most_similar(job_id, topn=len(keys_list)) sim_list = [] for sim_job_id, score in similar_jobs: if score >= 0.8: nodes.append(node_id) ## node list sim_job_titles = self.get_job_title(sim_job_id)[0] sim_job_id = sim_job_id.split('_')[2] input = f'{sim_job_titles}({sim_job_id})' sim_list.append(input) temp_tuple = (node_id, sim_job_id, score) edges.append(temp_tuple) else: sim_list.append('') i = i + 1 df.loc[:, title] = pd.Series(sim_list) df.to_csv(self.model_path + self.data_name + '_sim_title_result.csv', mode='w', encoding='utf-8') nodes = set(nodes) nodes = list(nodes) print(len(nodes)) print(nodes[:]) print(edges[:]) G.add_nodes_from(nodes) G.add_weighted_edges_from(edges) degree = nx.degree(G) print(degree) plt.figure(figsize=(20, 10)) graph_pos = nx.spring_layout(G, k=0.42, iterations=17) nx.draw_networkx_labels(G, graph_pos, font_size=10, font_family='sans-serif') # nx.draw_networkx_nodes(G, graph_pos, node_size=[ var * 50 for var in degree], cmap='jet') nx.draw_networkx_edges(G, graph_pos, edge_color='gray') nx.draw(G, node_size=[100 + v[1] * 100 for v in degree], with_labels=True) plt.show() return df
@author: samic """ import networkx as nx import numpy as np import pandas as pd import matplotlib.pyplot as plt #block = pd.read_csv('block1.csv') #block = pd.read_csv('block2.csv') #block = pd.read_csv('block3.csv') block = pd.read_csv('block4.csv') block.drop(block.columns[[1]], axis=1, inplace=True) toGraph = block.stack().reset_index() del block toGraph.columns = ['var1', 'var2', 'value'] toGraph_filtered = toGraph.loc[(toGraph['value'] > .8) & (toGraph['var1'] != toGraph['var2'])] G = nx.from_pandas_edgelist(toGraph_filtered, 'var1', 'var2') #g = nx.draw(G, node_size=10) # centrality dictNodes = nx.eigenvector_centrality(G, max_iter=1000) deg = nx.degree(G)
#sys.exit(1) # basic graphs checks small_graph = Gp.networkx_graph(data_vertices, data_adjacency) big_graph = Gp.networkx_graph(struct_vertices, struct_adjacency) Gp.check_graphs_size(small_graph, big_graph) Gp.check_graphs_labels(small_graph, big_graph) print "Basic NOE graph analysis:\n" print "N (nodes, NOE graph) = ", len(small_graph.nodes()) print "N (edges, NOE graph) = ", len(small_graph.edges()) print "N (nodes, PDB graph) = ", len(big_graph.nodes()) print "N (edge, PDB graph) = ", len(big_graph.edges()) print "NOE sparsity >> ", len(small_graph.edges()) / float( len(big_graph.edges())) print "average degree >> ", np.mean(nx.degree(small_graph).values()) print "median degree >>", np.median(nx.degree(small_graph).values()) return data_vertices, data_adjacency, struct_vertices, struct_adjacency def subgraph_isomorphism(self, noe_vertices, noe_adjacency, structure_vertices, structure_adjacency, Gp, tag): graph_noe, graph_noe_indexing = Gp.igraph_graph( noe_vertices, noe_adjacency) graph_structure, graph_structure_indexing = Gp.igraph_graph( structure_vertices, structure_adjacency) EP = IgraphSubIso( ) # instance of a class for subgraph isomorphism check and extraction start_vf2 = time.time( ) # measure how long it takes for subgraph isomorphism to be evaluated
def bethe_hessian_matrix(G, r=None, nodelist=None): r"""Returns the Bethe Hessian matrix of G. The Bethe Hessian is a family of matrices parametrized by r, defined as H(r) = (r^2 - 1) I - r A + D where A is the adjacency matrix, D is the diagonal matrix of node degrees, and I is the identify matrix. It is equal to the graph laplacian when the regularizer r = 1. The default choice of regularizer should be the ratio [2] .. math:: r_m = \left(\sum k_i \right)^{-1}\left(\sum k_i^2 \right) - 1 Parameters ---------- G : Graph A NetworkX graph r : float Regularizer parameter nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). Returns ------- H : scipy.sparse.csr_matrix The Bethe Hessian matrix of G, with paramter r. Examples -------- >>> k = [3, 2, 2, 1, 0] >>> G = nx.havel_hakimi_graph(k) >>> H = nx.modularity_matrix(G) See Also -------- bethe_hessian_spectrum adjacency_matrix laplacian_matrix References ---------- .. [1] A. Saade, F. Krzakala and L. Zdeborová "Spectral clustering of graphs with the bethe hessian", Advances in Neural Information Processing Systems. 2014. .. [2] C. M. Lee, E. Levina "Estimating the number of communities in networks by spectral methods" arXiv:1507.00827, 2015. """ import scipy as sp import scipy.sparse # call as sp.sparse if nodelist is None: nodelist = list(G) if r is None: r = sum(d**2 for v, d in nx.degree(G)) / sum(d for v, d in nx.degree(G)) - 1 A = nx.to_scipy_sparse_array(G, nodelist=nodelist, format="csr") n, m = A.shape # TODO: Rm csr_array wrapper when spdiags array creation becomes available D = sp.sparse.csr_array( sp.sparse.spdiags(A.sum(axis=1), 0, m, n, format="csr")) # TODO: Rm csr_array wrapper when eye array creation becomes available I = sp.sparse.csr_array(sp.sparse.eye(m, n, format="csr")) import warnings warnings.warn( "bethe_hessian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0", FutureWarning, stacklevel=2, ) # TODO: Remove the csr_matrix wrapper in NetworkX 3.0 return sp.sparse.csr_matrix((r**2 - 1) * I - r * A + D)
# ------ 网络挖掘 ------ # 通常我们分析的数据是以网络结构存储的,我们可以使用点和边描述之间的关系 # 本章中我们将会介绍分析此类数据的基本步骤,称为图论,一个帮助我们创造、处理和研究网络的类库 # 尤其我们将会介绍如何使用特定方法建立有意义的数据可视化,以及如何建立一组关联稠密的点 # 使用图论可以让我们很容易的导入用于描述数据结构的最常用结构 import networkx as nx G = nx.read_gml("/Users/liding/E/Bdata/ptemp/liding/lesmiserables.gml") # networkx 必须要下载 1.9.1 版本才行 # 在上述代码我们导入了《悲惨世界》同时出现的单词组成的网络,可以通过https://gephi.org/datasets/lesmiserables.gml.zip免费 # 下载,数据以GML格式存储。我们还可以使用下面的命令导入并可视化网络: nx.draw(G, node_size=0, edge_color="b", alpha=.2, font_size=7) import igraph.test #链接度 deg = nx.degree(G) from numpy import percentile, mean, median print min(deg.values()) print percentile(deg.values(),25) # computes the 1st quartile print median(deg.values()) print percentile(deg.values(),75) # computes the 3rd quartile print max(deg.values())10 #挑选连接度大于10的案例 Gt = G.copy() dn = nx.degree(Gt) for n in Gt.nodes(): if dn[n] <= 10: Gt.remove_node(n) nx.draw(Gt,node_size=0,edge_color='b',alpha=.2,font_size=12)
def nX_remove_filler_nodes(networkX_graph: nx.Graph) -> nx.Graph: if not isinstance(networkX_graph, nx.Graph): raise TypeError('This method requires an undirected networkX graph.') logger.info(f'Removing filler nodes.') g_copy = networkX_graph.copy() removed_nodes = set() def manual_weld(_G, _start_node, _geom_a, _geom_b): s_x = _G.nodes[_start_node]['x'] s_y = _G.nodes[_start_node]['y'] # check geom coordinates directionality - flip to wind in same direction # i.e. _geom_a should start at _start_node whereas _geom_b should end at _start_node if not np.allclose( (s_x, s_y), _geom_a.coords[0][:2], atol=0.001, rtol=0): _geom_a = geometry.LineString(_geom_a.coords[::-1]) if not np.allclose( (s_x, s_y), _geom_b.coords[-1][:2], atol=0.001, rtol=0): _geom_b = geometry.LineString(_geom_b.coords[::-1]) # now concatenate _new_agg_geom = geometry.LineString( list(_geom_a.coords) + list(_geom_b.coords)) # check assert np.allclose(_new_agg_geom.coords[0], (s_x, s_y), atol=0.001, rtol=0) assert np.allclose(_new_agg_geom.coords[-1], (s_x, s_y), atol=0.001, rtol=0) return _new_agg_geom def recursive_weld(_G, start_node, agg_geom, agg_del_nodes, curr_node, next_node): # if the next node has a degree of 2, then follow the chain # for disconnected components, check that the next node is not back at the start node... if nx.degree(_G, next_node) == 2 and next_node != start_node: # next node becomes new current _new_curr = next_node # add next node to delete list agg_del_nodes.append(next_node) # get its neighbours _a, _b = list(nx.neighbors(networkX_graph, next_node)) # proceed to the new_next node if _a == curr_node: _new_next = _b else: _new_next = _a # get the geom and weld if 'geom' not in _G[_new_curr][_new_next]: raise KeyError( f'Missing "geom" attribute for edge {_new_curr}-{_new_next}' ) new_geom = _G[_new_curr][_new_next]['geom'] if new_geom.type != 'LineString': raise TypeError( f'Expecting LineString geometry but found {new_geom.type} geometry.' ) # when welding an isolated circular component, the ops linemerge will potentially weld onto the wrong end # i.e. start-side instead of end-side... so orient and merge manually if _new_next == start_node: _new_agg_geom = manual_weld(_G, start_node, new_geom, agg_geom) else: _new_agg_geom = ops.linemerge([agg_geom, new_geom]) if _new_agg_geom.type != 'LineString': raise TypeError( f'Found {_new_agg_geom.type} geometry instead of "LineString" for new geom {_new_agg_geom.wkt}.' f'Check that the adjacent LineStrings in the vicinity of {curr_node}-{next_node} are not corrupted.' ) return recursive_weld(_G, start_node, _new_agg_geom, agg_del_nodes, _new_curr, _new_next) else: end_node = next_node return agg_geom, agg_del_nodes, end_node # iterate the nodes and weld edges where encountering simple intersections # use the original graph so as to write changes to new graph for n in tqdm(networkX_graph.nodes(), disable=checks.quiet_mode): # some nodes will already have been removed via recursive function if n in removed_nodes: continue if nx.degree(networkX_graph, n) == 2: # get neighbours and geoms either side nb_a, nb_b = list(nx.neighbors(networkX_graph, n)) # geom A if 'geom' not in networkX_graph[n][nb_a]: raise KeyError(f'Missing "geom" attribute for edge {n}-{nb_a}') geom_a = networkX_graph[n][nb_a]['geom'] if geom_a.type != 'LineString': raise TypeError( f'Expecting LineString geometry but found {geom_a.type} geometry.' ) # start the A direction recursive weld agg_geom_a, agg_del_nodes_a, end_node_a = recursive_weld( networkX_graph, n, geom_a, [], n, nb_a) # only follow geom B if geom A doesn't return an isolated (disconnected) looping component # e.g. circular disconnected walkway if end_node_a == n: logger.warning( f'Disconnected looping component encountered around {n}') # in this case, do not remove the starting node because it suspends the loop g_copy.remove_nodes_from(agg_del_nodes_a) removed_nodes.update(agg_del_nodes_a) g_copy.add_edge(n, n, geom=agg_geom_a) continue # geom B if 'geom' not in networkX_graph[n][nb_b]: raise KeyError(f'Missing "geom" attribute for edge {n}-{nb_b}') geom_b = networkX_graph[n][nb_b]['geom'] if geom_b.type != 'LineString': raise TypeError( f'Expecting LineString geometry but found {geom_b.type} geometry.' ) # start the B direction recursive weld agg_geom_b, agg_del_nodes_b, end_node_b = recursive_weld( networkX_graph, n, geom_b, [], n, nb_b) # remove old nodes - edges are removed implicitly agg_del_nodes = agg_del_nodes_a + agg_del_nodes_b # also remove origin node n agg_del_nodes.append(n) g_copy.remove_nodes_from(agg_del_nodes) removed_nodes.update(agg_del_nodes) # merge the lines # disconnected self-loops are caught above per geom a, i.e. where the whole loop is degree == 2 # however, lollipop scenarios are not, so weld manually # lollipop scenarios are where a looping component (all degrees == 2) suspends off a node with degree > 2 if end_node_a == end_node_b: merged_line = manual_weld(networkX_graph, end_node_a, agg_geom_a, agg_geom_b) else: merged_line = ops.linemerge([agg_geom_a, agg_geom_b]) # run checks if merged_line.type != 'LineString': raise TypeError( f'Found {merged_line.type} geometry instead of "LineString" for new geom {merged_line.wkt}. ' f'Check that the adjacent LineStrings for {nb_a}-{n} and {n}-{nb_b} actually touch.' ) # add new edge g_copy.add_edge(end_node_a, end_node_b, geom=merged_line) return g_copy
try: E[rA, rB] += 1 E[rB, rA] += 1 except KeyError: E[rA, rB] = 1 E[rB, rA] = 1 if rA and rB: try: N.edge[rA][rB]['fweight'] += data['occurrence'] N.edge[rA][rB]['weight'] += 1 except KeyError: N.add_edge(rA, rB, fweight=data['occurrence'], weight=1) write_gml(N, 'networks/rimes.gml') deg = nx.degree(N, weight='weight') with open('stats/rime_degree.tsv', 'w') as f: f.write('Rime\tDegree\n') for n,w in sorted(deg.items(), key=lambda x: x[1], reverse=True): f.write(n+'\t'+str(w)+'\n') with open('stats/edges_rimes.tsv', 'w') as f: f.write('RimeA\tRimeB\tOccurrence\n') for (a,b),c in sorted(E.items(), key=lambda x: x[1], reverse=True): f.write(a+'\t'+b+'\t'+str(c)+'\n') if 'triples' in argv: triples = [] visited = [] # make subgraph consisting only of nrj-cases for nA,dA in G.nodes(data=True):