def normalize_and_save_graph(G, new_file, node_map_file = None): newG = nx.Graph() min_id = min(G.nodes_iter()) max_id = max(G.nodes_iter()) print "min_id =", min_id print "max_id =", max_id i = 0 id_dict = {} # dict for node ids for u in G.nodes_iter(): id_dict[u] = i i += 1 # write to node_map_file f = open(node_map_file, 'w') for (u,i) in id_dict.iteritems(): f.write("%d %d\n"%(u,i)) f.close(); print "write .nodemap file - DONE" # for (u,v) in G.edges_iter(): if u != v: newG.add_edge(id_dict[u], id_dict[v]) else: print "self-loop at ", u # write to file nx.write_edgelist(newG, new_file, '#', '\t', False, 'utf-8') # data = False
def filterNet(DG,mindegree=None,indegree=100,outdegree=50,outdegreemax=9999999,indegreemax=999999): print 'In filterNet' filter=[] for n in DG: if outdegreemax==None or DG.out_degree(n)<=outdegreemax: if mindegree!=None: if DG.degree(n)>=mindegree: filter.append(n) else: if indegree!=None: if DG.in_degree(n)>=indegree: filter.append(n) if outdegree!=None: if DG.out_degree(n)>=outdegree: filter.append(n) #the filter represents the intersect of the *degreesets #indegree and outdegree values are ignored if mindegree is set filter=set(filter) H=DG.subgraph(filter) #Superstitiously, perhaps, make sure we only grab nodes that project edges... filter= [n for n in H if H.degree(n)>0] L=H.subgraph(filter) print "Filter set:",filter print L.order(),L.size() L=labelGraph(L,filter) nx.write_graphml(L, projname+"/followersCommonFriends.graphml") nx.write_edgelist(L, projname+"/followersCommonFriends.txt",data=False)
def route_remaining_edges_simple(G, T, n2c): """The original routing function --- not used now""" #for u,v in G.edges_iter(): # if T.are_adjacent(n2c[u], n2c[v]): # print 'edge (%d,%d) at %d,%d good' % (u,v,n2c[u], n2c[v]) if G.number_of_edges() == 0: return [] H = construct_routing_graph(T, set(n2c.values())) SP = nx.all_pairs_dijkstra_path(H) SP_len = nx.all_pairs_dijkstra_path_length(H) nx.write_edgelist(H, "hex.graph") # for every remaining edge Routes = [] for u,v in G.edges_iter(): c = n2c[u] d = n2c[v] # find the combination of sides that gives the shortest path best = bestp = None for s1,s2 in itertools.product(T.hex_sides(),T.hex_sides()): source = T.side_name(c,s1) target = T.side_name(d,s2) if SP_len[source][target] < best or best is None: best = SP_len[source][target] bestp = SP[source][target] #print >>sys.stderr, "Route %d - %d (%g) %s" % (u, v, best, ",".join(bestp)) Routes.append(bestp) return Routes
def save_celltype_graph(self, filename="celltype_conn.gml", format="gml"): """ Save the celltype-to-celltype connectivity information in a file. filename -- path of the file to be saved. format -- format to save in. Using GML as GraphML support is not complete in NetworkX. """ start = datetime.now() if format == "gml": nx.write_gml(self.__celltype_graph, filename) elif format == "yaml": nx.write_yaml(self.__celltype_graph, filename) elif format == "graphml": nx.write_graphml(self.__celltype_graph, filename) elif format == "edgelist": nx.write_edgelist(self.__celltype_graph, filename) elif format == "pickle": nx.write_gpickle(self.__celltype_graph, filename) else: raise Exception("Supported formats: gml, graphml, yaml. Received: %s" % (format)) end = datetime.now() delta = end - start config.BENCHMARK_LOGGER.info( "Saved celltype_graph in file %s of format %s in %g s" % (filename, format, delta.seconds + delta.microseconds * 1e-6) ) print "Saved celltype connectivity graph in", filename
def main(): arg_parser = ArgumentParser(description='generate random tree') arg_parser.add_argument('--output', required=True, help='output file name') arg_parser.add_argument('--branching', dest='max_branch', type=int, default=3, help='maximum node branching') arg_parser.add_argument('--height', dest='max_height', type=int, default=4, help='maximum tree height') arg_parser.add_argument('--seed', type=int, default=None, help='seed for random number generator') arg_parser.add_argument('--delim', dest='delimiter', default=' ', help='delimiter for edge list') arg_parser.add_argument('--no-data', action='store_true', dest='no_data', help='show edge data') arg_parser.add_argument('--edge-list', action='store_true', dest='edge_list', help='generate edge list output') options = arg_parser.parse_args() random.seed(options.seed) tree = random_tree(options.max_branch, options.max_height) if options.edge_list: nx.write_edgelist(tree, options.output, delimiter=options.delimiter, data=not options.no_data) else: nx.write_graphml(tree, options.output) return 0
def splitGraphs(self,labels): """ split the graph into several subgraphs by labels """ id_label = [] ## load labels ## Node id start from 0 fid = open('labels','r') for line in fid: field = line.strip() id_label.append(int(field)) fid.close() ## calculate the number of different labels nodup_labels = set(id_label) K = len(nodup_labels) for i in range(0,K): f = open('subgraph_' + str(i) +'.sub','w') subG = [] for j in range(0,len(id_label)): if id_label[j] == i: subG.append(str(j)) G = self.G.subgraph(subG) print nx.info(G) nx.write_edgelist(G,f)
def test_write_edgelist_2(self): fh = io.BytesIO() G = nx.OrderedGraph() G.add_edges_from([(1, 2), (2, 3)]) nx.write_edgelist(G, fh, data=True) fh.seek(0) assert_equal(fh.read(), b"1 2 {}\n2 3 {}\n")
def save_object(obj, folder, obj_name, extension): """Saves object to a file with naming convention folder/obj_name.extension. File format depends on the extension.""" filename = folder + '/' + obj_name + '.' + extension did_save = False try: print("\nSaving %s to '%s'..." % (obj_name, filename)) if (extension == 'csv'): pd.DataFrame.to_csv(obj, filename, index = False) did_save = True elif (extension == 'pickle'): pickle.dump(obj, open(filename, 'wb')) did_save = True elif (extension == 'ig.edges'): ig.Graph.write_edgelist(obj, filename) did_save = True elif (extension == 'nx.edges'): nx.write_edgelist(obj, filename, data = False) did_save = True elif (extension == 'coo'): with open(filename, 'w') as f: f.write("%d " % obj.shape[0]) if (obj.shape[1] != obj.shape[0]): f.write("%d " % obj.shape[1]) f.write('\n') for (row, col, val) in zip(obj.row, obj.col, obj.data): f.write("%d %d %s\n" % (row, col, repr(val))) did_save = True if did_save: print("Successfully saved %s." % obj_name) except: pass if (not did_save): raise IOError("Failed to save %s to file." % obj_name)
def find_shortest_paths(graph, out_filename, sources, targets, k_paths): """ Use pathlinker to find shortest paths Args: graph: a networkx graph out_filename: file to print paths to (is a temporary file) sources: a list of source nodes targets: a list of target nodes k_paths: number of shortest paths to find Returns: List of networkx graphs, which should be thought of as paths. If sources are not connect to targets, then returns empty list. """ assert(k_paths > 0) edgelist_filename = out_filename + "edgelist.temp" srctgt_filename = out_filename + "srctgt.temp" nx.write_edgelist(graph, edgelist_filename) with open(srctgt_filename, 'w') as f: for node in graph.nodes(): if node in sources: f.write(str(node) + '\tsource\n') elif node in targets: f.write(str(node) + '\ttarget\n') s = "python PathLinker/PathLinker.py {} {} -o {} --write-paths --k-param={}"\ .format(edgelist_filename, srctgt_filename, out_filename, k_paths) try: os.system(s) return read_paths(out_filename + "k_100-paths.txt") except Exception as e: print(e) return []
def aggregate_max(G, sigma, k, eps, c, q, filename): edge_dict = {} count = 0 for u in G.nodes_iter(): if G.degree(u) < 5: continue print "u =", u count += 1 if count % 10 == 0: print "count =", count sG, S1, S2 = get_subgraph(G, u) (eps_min, sG_min) = generate_obfuscation(sG, u, S1, S2, sigma, k, eps, c, q) for e in sG_min.edges_iter(): v = e[0] w = e[1] if v > w: # swap to normalize v < w v = e[1] w = e[0] if (v,w) not in edge_dict: edge_dict[(v,w)] = sG_min[v][w]['p'] else: if edge_dict[(v,w)] < sG_min[v][w]['p']: edge_dict[(v,w)] = sG_min[v][w]['p'] # max # aG = nx.Graph() for ((v,w),weight) in edge_dict.iteritems(): aG.add_edge(v, w, {'p':weight}) # nx.write_edgelist(aG, filename, '#', '\t', data=['p'])
def test_write_edgelist_1(self): fh=io.BytesIO() G=nx.Graph() G.add_edges_from([(1,2),(2,3)]) nx.write_edgelist(G,fh,data=False) fh.seek(0) assert_equal(fh.read(),b"1 2\n2 3\n")
def MCLAlgorithm(self, inflation=3.3): """ Metoda wykonuje grupowanie za pomocą algorytmu MCL @param inflation: wartość współczynnika inflacji algorytmu MCL @requires: program MCL w ścieżce wykonywalnej @rtype: list @return: lista list z członkami grup """ try: nx.write_weighted_edgelist(self.graph, "/tmp/mcl-input", delimiter="\t") except: nx.write_edgelist(self.graph, "/tmp/mcl-input", delimiter="\t") import os logger.debug("Invoking mcl command ...") os.system("mcl /tmp/mcl-input --abc -te 2 -I %f -o /tmp/mcl-output" % inflation) logger.debug("MCL clustering done") out_file = open("/tmp/mcl-output", 'r') lines = out_file.readlines() partition = list() import string for line in lines: partition.append(map(int, string.split(line))) return partition
def run(output_path, graph_type, force, seed, num_nodes, edge_prob, solution_path): any_op_file_exists = (P.exists(output_path) or P.exists(solution_path)) if any_op_file_exists and not force: print('Cannot overwrite without --force', file=sys.stderr) sys.exit(-1) g = None if graph_type == 'erdos': g = nx.erdos_renyi_graph(num_nodes, edge_prob, seed=seed, directed=True) else: print('Unknown graph type: ', graph_type, file=sys.stderr) sys.exit(-1) A = np.zeros((num_nodes, num_nodes), dtype='float') # All edges are given uniformly random weights. for u, v, d in g.edges(data=True): d['act_prob'] = R.random() A[u, v] = d['act_prob'] nx.write_edgelist(g, output_path) np.savetxt(solution_path, A, delimiter=',')
def permute_network( G, Q, numEdges, outputFile ): # Permutes network by swapping edges Q * numEdges times H = G.copy() nswap = Q*numEdges swaps = nx.connected_double_edge_swap(H, nswap=nswap) nx.write_edgelist(H, outputFile) return swaps
def start(self): for id in self.oidRootNamePairs: self.oidNamePairs,currIDs=Utils.getoidNames(self.oidNamePairs,id,Def.typ) Utils.report('Processing current IDs: '+str(currIDs)) flip=(Def.typ=='fr') self.addDirectedEdges(id, currIDs,flip=flip) n=len(currIDs) Utils.report('Total amount of IDs: '+str(n)) c=1 for cid in currIDs: Utils.report('\tSub-level run: getting '+Def.typ2,str(c)+'of'+str(n)+Def.typ+cid) self.oidNamePairs,ccurrIDs=Utils.getoidNames(self.oidNamePairs,cid,Def.typ2) self.addDirectedEdges( cid, ccurrIDs) c=c+1 for id in self.oidRootNamePairs: if id not in self.oidNamePairs: self.oidNamePairs[id]=self.oidRootNamePairs[id] self.labelNodes(self.oidNamePairs) Utils.report(nx.info(self.DG)) now = datetime.datetime.now() timestamp = now.strftime("_%Y-%m-%d-%H-%M-%S") fname=UserID._name.replace(' ','_') nx.write_graphml(self.DG, '/'.join(['reports',fname+'_google'+Def.typ+'Friends_'+timestamp+".graphml"])) nx.write_edgelist(self.DG, '/'.join(['reports',fname+'_google'+Def.typ+'Friends_'+timestamp+".txt"]),data=False)
def post_processing_attack(G_min, out_file): aG = nx.Graph() for e in G_min.edges_iter(data=True): if e[2]['p'] > 0.5: aG.add_edge(e[0], e[1]) nx.write_edgelist(aG, out_file, '#', '\t', False, 'utf-8')
def get_community_biconnections(commid, df, graph): print "Find biconnections in the community :", commid print nx.info(graph) biconnected_nodes = [] for e in graph.edges(): a, b = e if graph.has_edge(b,a) and a != b: # check if already there in the list if (a,b) in biconnected_nodes or (b,a) in biconnected_nodes: pass else: biconnected_nodes.append((a,b)) print "number of biconnected edges:", len(biconnected_nodes) source_nodes, target_nodes = zip(*biconnected_nodes) all_subgraph_nodes = set(source_nodes).union(set(target_nodes)) print "Unique nodes in the biconnections", len(all_subgraph_nodes) # get the subgraph of all biconnected edges # plot dfname = biconnbase+ str(commid) + '_biz_info.csv' bicon_df = df.loc[all_subgraph_nodes] print bicon_df.shape bicon_df.to_csv(dfname) # subgraph generated from the coordinates sgname = biconnbase+ str(commid) + '_sg_edgelist.ntx' sg = graph.subgraph(list(all_subgraph_nodes)) print nx.info(sg) nx.write_edgelist(sg, sgname, data=False)
def motifOrder(data,key,orderSize,motifSize,degree): graphs = data[key] pattern = {} for G in graphs: #calculate threshold sortedWeights = np.sort(G,axis=None) threshold = sortedWeights[-len(G)*degree-1] #Output graph to txt file graph = nx.DiGraph(G>threshold) graph = nx.convert_node_labels_to_integers(graph,1) with open('result2/OUTPUT.txt','wb') as f: nx.write_edgelist(graph,f,data=False) #Jenky way to use c++ motif finder in python os.system("./fanmod_command_line_linux " +str(motifSize) + " 100000 1 result2/OUTPUT.txt 1 0 0 2 0 0 0 1 3 3 result2/MotifCount.txt 0 1") data = parseOutput("result2/MotifCount.txt") order = [] for iD,total,percent in data: order.append((iD,total)) keys = sorted(order,key=lambda x:-x[1]) keys = [int(k[0]) for k in keys] pat = tuple(keys[:orderSize]) pattern[pat] = pattern.setdefault(pat,0) + 1/float(len(graphs)) total = sorted(pattern.items(), key = lambda x: -x[1]) for key,value in total: print str(key)+": " + str(value)
def __init__(self, config, logger): self.config = config self.logger = logger self.logger.info('Creating Network') graph_type = self.config['graph']['type'] self.logger.info('Creating {}'.format(graph_type)) eval_str = '{}'.format(graph_type) self.logger.debug('eval: {}'. format(eval_str)) self.graph = eval(eval_str) self.logger.debug('Type of self.graph: {}'.format(type(self.graph))) self.logger.info('Creating network from NetworkX Graph Generator') graph_generator = self.config['graph']['generator'] self.logger.info('Graph Generator: {}'.format(graph_generator)) eval_str = '{}'.format(graph_generator) self.logger.debug('eval: {}'.format(eval_str)) network = eval(eval_str) self.logger.debug('Type of network: {}'.format(type(network))) self.nx_graph = network # used to generate network using agent as nodes nx_edge_list_filename = self.config['graph']['nx_edge_list_filename'] self.logger.info( 'Writing networkx edge list: {}'.format(nx_edge_list_filename)) nx.write_edgelist(network, nx_edge_list_filename)
def filterNet(DG,mindegree): if addUserFriendships==1: DG=addFocus(DG,user,typ) mindegree=int(mindegree) filter=[] filter= [n for n in DG if DG.degree(n)>=mindegree] H=DG.subgraph(filter) print "Filter set:",filter print H.order(),H.size() LH=labelGraph(H,filter) now = datetime.datetime.now() ts = now.strftime("_%Y-%m-%d-%H-%M-%S") nx.write_graphml(H, '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+ts+".graphml"])) nx.write_edgelist(H, '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+ts+".txt"]),data=False) #delimiter='' #indegree=sorted(nx.indegree(DG).values(),reverse=True) indegree=H.in_degree() outdegree=H.out_degree() inout = [indegree, outdegree] inoutpair = {} for k in indegree.iterkeys(): inoutpair[k] = tuple(inoutpair[k] for inoutpair in inout) fig = plt.figure() ax = fig.add_subplot(111) #ax.plot(indegree,outdegree, 'o') #ax.set_title('Indegree vs outdegree') degree_sequence=sorted(indegree.values(),reverse=True) plt.loglog(degree_sequence) plt.savefig( '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+"outdegree_histogram.png"]))
def main(n_start, n_count=1, n_inc=1, c_in_start=10, c_in_count=1, c_in_inc=1, c_out_start=5, c_out_count=1, c_out_inc=1, comm_count = 2, DC=False, i=0): bp_uncertain = 'src/bp' edge_frac = 1. nonedge_mult = 5. b = 2 trials = 2 os.makedirs('out', exist_ok=True) os.makedirs('data', exist_ok=True) for n in custom_range(n_start, n_count, n_inc): for c_in in custom_range(c_in_start, c_in_count, c_in_inc): for c_out in custom_range(c_out_start, c_out_count, c_out_inc): original_net = 'data/original_net-%d-%f-%f-%f-%f-%f-%d.edges'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i) uncertain_net = 'data/noisy_net-%d-%f-%f-%f-%f-%f-%d.edges'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i) uncertain_comms = 'out/uncertain_comms-%d-%f-%f-%f-%f-%f-%d.out'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i) print("making and fuzzing network") G_orig = make_net(c_in, c_out, n) write_edgelist(G_orig, original_net) G, _ = fuzz_network(G_orig, 1, b, edge_frac, nonedge_mult) write_weighted_edgelist(G, uncertain_net) start1 = time() print("running belief propagation") os.system('%s -i %s -o %s -c %d -l %d -n %d' % (bp_uncertain, uncertain_net, uncertain_comms, comm_count, 3, trials)) end1 = time() with open('out/results.txt', 'a+') as out_file: out_file.write("%d %f %f\t%f %f %f\t %f %f\t %s %d\n" %(n, c_in, c_out, b,edge_frac,nonedge_mult, evaluate(uncertain_comms, n), end1-start1, str(datetime.now()), i))
def main(): arg_parser = ArgumentParser(description='add edge weights to tree') arg_parser.add_argument('--input', required=True, help='inpput file') arg_parser.add_argument('--output', required=True, help='outpput file') arg_parser.add_argument('--seed', type=int, default=None, help='seed for random number generator') arg_parser.add_argument('--delim', dest='delimiter', default=' ', help='delimiter for edge list') arg_parser.add_argument('--no-data', action='store_true', dest='no_data', help='show edge data') arg_parser.add_argument('--edge-list', action='store_true', help='generate edge list output') options = arg_parser.parse_args() random.seed(options.seed) tree = nx.read_graphml(options.input) add_edge_weights(tree) if options.edge_list: nx.write_edgelist(tree, options.output, delimiter=options.delimiter, data=not options.no_data) else: nx.write_graphml(tree, options.output) return 0
def run(args): """Permutes the given PPI network the specified number of times.""" import sys, os # Load network G = load_network(args.network_edgelist) if args.verbose: print 'Input network has', len( G.edges() ), 'edges among', len(G.nodes()), print 'nodes.\nPerforming', len( G.edges() ) * args.Q, 'edge swaps.' # Make sure output directory exists os.system('mkdir -p ' + args.output_dir) # Permute network and output files for i in range(args.num_networks): if args.verbose: sys.stdout.write('+') sys.stdout.flush() # Permute graph and output as an edge list H = permute_network(G, args.Q) filename = args.output_dir + "/" + str(i + args.start_index) + ".txt" nx.write_edgelist(H, filename) if args.verbose: print
def findLowScoreEdges(self,K,order): """ Find the #K edges that has LESS contribution to the leading edgevalue by removing them. According to paper: Gelling and Melting, Large Graphs by Edge Manipulation Hanghang Tong, 2012 """ ## calcualte the eigenvalues and left and right eigen vectors ## as our input is a symmetry undirected graph, ## the left and right vectors are just transpose to each other. print nx.info(self.G) ## calculate the eigenvalues and eigenvectors mat = nx.linalg.adjacency_matrix(self.G) evals,evecs = LA.eigh(mat) idx = evals.argsort() evals = evals[idx] evecs = evecs[:,idx] ## because left vector and right vector are same but with transpose ## so we don't have to change the sign of each coordinate of the vector rvec = evecs[:,len(evals)-1] # right vector of leading eigenvalues if min(rvec) < 0.0 : for v in rvec: v = -v ## calculte the score for each edge edge_scores = [] for e in self.G.edges_iter(): idx_l = self.G.nodes().index(e[0]) idx_r = self.G.nodes().index(e[1]) score = rvec[idx_l]*rvec[idx_r] edge_scores.append((e,score)) """ sort""" if order == 'high': edge_scores.sort(key=lambda edge_scores:edge_scores[1],reverse=True) ## high to low else: edge_scores.sort(key=lambda edge_scores:edge_scores[1]) ## low to high print edge_scores[1:10] """ test: save the graph by remove edges """ marker_start = 0 marker_end = 0 for i in range(0,10): marker_end = marker_start + 5 for j in range(marker_start,marker_end): e = edge_scores[j][0] self.G.remove_edge(*e) marker_start = marker_end # adjust the beginning index fid = open(str(marker_end)+'.rg','w') nx.write_edgelist(self.G,fid)
def generate_ER_graph_with_trust(N, p, trust_list, filename): N_VALS = len(trust_list) G = nx.fast_gnp_random_graph(N, p) for (u,v) in G.edges_iter(): val = random.randint(0,N_VALS-1) G.edge[u][v]['t'] = trust_list[val] nx.write_edgelist(G, filename, '#', '\t', True, 'utf-8')
def generate_SM_graph_with_trust(N, p, k, trust_list, filename): N_VALS = len(trust_list) G = nx.connected_watts_strogatz_graph(N, k, p) for (u,v) in G.edges_iter(): val = random.randint(0,N_VALS-1) G.edge[u][v]['t'] = trust_list[val] nx.write_edgelist(G, filename, '#', '\t', True, 'utf-8')
def writeEdgelist(g, filename): # Convert date to string temp = map(lambda (x, y): (x, y.strftime('%Y-%m-%d')), nx.get_edge_attributes(g, "date").items()) nx.set_edge_attributes(g, "date", dict(temp)) # Write to file nx.write_edgelist(g,filename, delimiter="\t", data=True)
def write_subgraphs(graph_list, in_file): # write subgraphs to files i = 0 for aG in graph_list: filename = "../part/" + in_file[0:len(in_file)-3] + "." + str(i) nx.write_edgelist(aG, filename, '#', '\t', False, 'utf-8') i += 1 print "write graph_list to files: DONE"
def test_write_edgelist_3(self): fh = io.BytesIO() G = nx.OrderedGraph() G.add_edge(1, 2, weight=2.0) G.add_edge(2, 3, weight=3.0) nx.write_edgelist(G, fh, data=True) fh.seek(0) assert_equal(fh.read(), b"1 2 {'weight': 2.0}\n2 3 {'weight': 3.0}\n")
def test_write_edgelist_4(self): fh = io.BytesIO() G = nx.OrderedGraph() G.add_edge(1, 2, weight=2.0) G.add_edge(2, 3, weight=3.0) nx.write_edgelist(G, fh, data=[('weight')]) fh.seek(0) assert_equal(fh.read(), b"1 2 2.0\n2 3 3.0\n")
delimiter=' ') for edge in G.edges(): G[edge[0]][edge[1]]['weight'] = 1 if not directed: G = G.to_undirected() return G if __name__ == '__main__': filePath = 'input/PB_undirected_1.edgelist' # PB_directed_1.edgelist blogcatalog_directed_1.edgelist # 读入数据集 print("----Reading graph......") G = read_graph(weighted=0, input=filePath, directed=0) nx.write_edgelist(G, 'output/Graph.txt', data=False) print(len(G)) print(len(G.edges())) # 划分数据集 train_E, test_E = split_train_test(G, train_frac=0.9) G.remove_edges_from(test_E) print("G_giantCom :" + str(nx.is_connected(G))) nx.write_edgelist(G, 'output/Graph_train.txt', data=False) print(len(G)) print(len(G.edges())) # 验证最大联通子图 G_simple = max(nx.connected_component_subgraphs(G), key=len) nx.write_edgelist(G_simple, 'output/Graph_train_simple.txt', data=False) print(len(G_simple))
import numpy as np import networkx as nx import itertools import argh cycle_nodes = 10 tree = nx.balanced_tree(2, 2) nx.relabel_nodes(tree, {n: n + 1 for n in tree.nodes}, copy=False) tree.add_edge(0, 1) tree_nodes = len(tree.nodes()) copies = [] for i in range(cycle_nodes): T = tree.copy() copies.append( nx.relabel_nodes(T, {n: cycle_nodes * n + i for n in T.nodes})) G = nx.compose_all(copies + [nx.cycle_graph(cycle_nodes)]) # G = nx.compose_all(copies) nx.write_edgelist(G, "cycle-tree.edges", data=False)
def load_karate_club(): G = nx.karate_club_graph() G.name = 'karate' print(nx.info(G)) nx.write_edgelist(G, 'karate.csv')
G = nx.read_edgelist('data/JS_topological_network.csv', delimiter=',') #G = nx.read_edgelist('data/RB_sample_network.csv', delimiter=',') nodes = nx.number_of_nodes(G) size = int(nodes * 0.9) #random_node = list(G.nodes())[0] random_node = random.choice(list(G.nodes)) print(random_node) print("Executing MHRW...") sample = MHRW() sample.mhrw(G, random_node, size) print("Writing sample network...") nx.write_edgelist(sample.G1, "data/JS_sample_network_90.csv", delimiter=",", data=False) G.clear() G = sample.G1 DG = nx.degree(G) num_nodes = 0 sum_degree = 0 for i in DG: num_nodes += 1 sum_degree += i[1] print("Grau da rede:", sum_degree)
tfid_graph.add_edges_from([ (date1, date2, { 'common': intersects_tfid[date1][date2], 'weight': len(intersects_tfid[date1][date1]) }) for date1 in train for date2 in train if date1 != date2 and len(intersects_tfid[date1][date2]) ]) tok_graph.add_edges_from([ (date1, date2, { 'common': intersects_tok[date1][date2], 'weight': len(intersects_tok[date1][date1]) }) for date1 in train for date2 in train if date1 != date2 and len(intersects_tok[date1][date2]) ]) with open("tfidWeight", 'wb') as tf, open("tokenWeightg", 'wb') as tk: nx.write_edgelist(tfid_graph, tf) nx.write_edgelist(tok_graph, tk) nx.draw(tfid_graph, with_labels=True) # savefig("tdif_graph.png") labels = nx.get_edge_attributes(tfid_graph, 'weight') nx.draw_networkx_edge_labels(tfid_graph, edge_labels=labels) show() nx.draw(tok_graph, with_labels=True) # savefig("tok_graph.png") show() # tokens = {k:tuple(x[1] for x in v) for k, v in groupby(sorted(tokens), key=lambda x: x[0])} # print(tokens) # for i in range(7,3,-1): # date = start + str(i) # bro = ms.StatefulBrowser()
def graph_generation_basedOn_tweet(stop): # for each tweet add the user id as a node collection = db["users_info"] all_the_users = {} cursor = collection.find({"all_the_users_list": {"$exists": True}}) # for every user who exists here add it as a node to the graph for doc in cursor: all_the_users.update(doc["all_the_users_list"]) cursor = collection.find({"specific_tweet_id": {"$exists": True}}) for doc in cursor: G = nx.Graph() dic = doc["specific_tweet_id"] for tweet in dic: flag = False nodes = [] emptyNodes = [] for tweetKey in tweet.keys(): status = api.get_status(id=tweetKey) source = status.user.id sourceName = status.user.screen_name usersInvolvedList = tweet[tweetKey] for user in usersInvolvedList: G.add_node(user) nodes.append(user) for user in usersInvolvedList: for otherUser in usersInvolvedList: if user is not otherUser: oU = all_the_users.get(str(otherUser)) if oU is not None: flag = True if user in oU["followers"]: print "foll", user, otherUser G.add_edge(user, otherUser) if user in oU["friends"]: print "frie", user, otherUser G.add_edge(user, otherUser) else: emptyNodes.append(otherUser) if flag: # generate positions for the nodes print "drawing" print sourceName, len(G.nodes) nx.write_edgelist(G, "graphs/TweetGraph_{}.csv".format( str(sourceName) + "_" + str(tweetKey)), data=False) pos = nx.random_layout(G) # positions for all nodes nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color='r', node_size=10, alpha=0.8) nx.draw_networkx_nodes(G, pos, nodelist=[source], node_color='b', node_size=50, alpha=0.8) nx.draw_networkx_nodes(G, pos, nodelist=emptyNodes, node_color='black', node_size=5, alpha=0.8) nx.draw_networkx_edges(G, pos, width=1, alpha=0.5, edge_color='g') plt.title(tweetKey + " from " + sourceName) plt.show()
def __init__(self, data_path, residual_ratio, seed, precomputed, save_path, reduce_dataset=None): save_path = os.path.join(save_path, 'seed_{}'.format(seed)) if not reduce_dataset is None: save_path += '_reduced_{}'.format(reduce_dataset) try : os.makedirs(save_path) except: pass save_graph_path = os.path.join( save_path, 'residual_network.txt') save_train_path = os.path.join( save_path, 'train.json') save_test_path = os.path.join( save_path, 'test.json') if precomputed: self.residual_network = networkx.read_edgelist(save_graph_path) with open(save_train_path, 'r') as f: train_dict = json.load(f) self.x_train = np.asarray(train_dict['x_train']) self.y_train = np.asarray(train_dict['y_train']) with open(save_test_path, 'r') as f: test_dict = json.load(f) self.x_test = np.asarray(test_dict['x_test']) self.y_test = np.asarray(test_dict['y_test']) else: network = networkx.read_edgelist(data_path) if not reduce_dataset is None: network = network.subgraph(list(network.nodes())[:reduce_dataset]).copy() network = networkx.relabel.convert_node_labels_to_integers(network) mapping = dict(zip(list(network.nodes()), [str(node) for node in list(network.nodes())])) network = networkx.relabel.relabel_nodes(network, mapping) print(networkx.info(network)) removed_edges = set() kept_edges = set() # get the number of the edges to remove n_edges_to_keep = int( (residual_ratio) * network.number_of_edges()) n_edges_to_remove = network.number_of_edges() - n_edges_to_keep # set the seed np.random.seed(seed) print('removing edges randomly') start = time.time() # taking the minimal spanning tree and adding edges is a way to enforce the connectivity of the residual graph print('searching the minimal spanning tree') residual_network = networkx.algorithms.tree.minimum_spanning_edges( network, data=False, keys=False) residual_network = list(residual_network) n_edges = len(residual_network) is_acceptable = n_edges < network.number_of_edges() * residual_ratio if not is_acceptable: print( 'minimum spanning tree has more edge than required by the residual ratio') print('removing unessential edges from the network') # we remove the edges that have already been added to the residual network network.remove_edges_from(residual_network) network = list(network.edges()) network = np.random.permutation(network) removed_edges = network[:n_edges_to_remove] residual_network = networkx.Graph(residual_network) # we add to the residual network the edges left in network (ie not in the spanning tree not in remove) residual_network.add_edges_from(network[n_edges_to_remove:]) kept_edges = set(residual_network.edges()) network = networkx.Graph(removed_edges.tolist()) print('the network is connected : {}'.format( networkx.is_connected(residual_network))) print('time taken {} to generate the residual network'.format( time.time()-start)) n_train = 2 * len(kept_edges) n_test = 2 * len(removed_edges) fictive_edges = [] print('generating random fictive edges') nodes = list(residual_network.nodes()) for i in tqdm.tqdm(range(n_train//2 + n_test//2)): Id_src = nodes[np.random.randint(len(nodes))] Id_dst = nodes[np.random.randint(len(nodes))] not_acceptable = Id_dst == Id_src not_acceptable = not_acceptable or residual_network.has_edge( Id_src, Id_dst) not_acceptable = not_acceptable or network.has_edge( Id_src, Id_dst) while not_acceptable: Id_src = nodes[np.random.randint(len(nodes))] Id_dst = nodes[np.random.randint(len(nodes))] not_acceptable = Id_dst == Id_src not_acceptable = not_acceptable or residual_network.has_edge( Id_src, Id_dst) not_acceptable = not_acceptable or network.has_edge( Id_src, Id_dst) fictive_edges.append((Id_src, Id_dst)) self.x_train, self.y_train = [], [] self.x_test, self.y_test = [], [] self.x_test += list(removed_edges) self.y_test += len(self.x_test) * [1] self.x_test += fictive_edges[:n_test//2] self.y_test += n_test//2 * [0] self.x_test = np.asarray(self.x_test) self.y_test = np.asarray(self.y_test) self.x_train += list(kept_edges) self.y_train += len(self.x_train) * [1] self.x_train += fictive_edges[n_test//2:] self.y_train += n_train//2 * [0] self.x_train, self.y_train = np.asarray( self.x_train), np.asarray(self.y_train) shuffled_indexes = np.random.permutation(n_train) self.x_train = self.x_train[shuffled_indexes] self.y_train = self.y_train[shuffled_indexes] self.residual_network = residual_network # we save the graph and the train/test set for other runs networkx.write_edgelist( self.residual_network, save_graph_path) with open(save_train_path, 'w') as f: json.dump({'x_train': self.x_train.tolist(), 'y_train': self.y_train.tolist()}, f) with open(save_test_path, 'w') as f: json.dump({'x_test': self.x_test.tolist(), 'y_test': self.y_test.tolist()}, f)
def graph_generation_basedOn_tweet_with_hops(stop, type): # fill all_the_users with all the users and their relations that there are collection = db["users_info"] all_the_users = {} cursor = collection.find({"all_the_users_list": {"$exists": True}}) for doc in cursor: all_the_users.update(doc["all_the_users_list"]) cursor = collection.find({"specific_tweet_id": {"$exists": True}}) p = 0 for doc in cursor: G = nx.Graph() dic = doc["specific_tweet_id"] for tweet in dic: flag = False for tweetKey in tweet.keys(): try: status = api.get_status(id=tweetKey) source = status.user.id sourceName = status.user.screen_name except: source = 0 sourceName = "deleted" usersInvolvedList = tweet[tweetKey] #make the userSet userSet = set() for user in usersInvolvedList: userSet.add(user) level = [source] G.add_node(source) if source != 0: userSet.remove(source) else: if 183036873 in userSet: userSet.remove(183036873) elif 2302467404 in userSet: userSet.remove(2302467404) elif 4730093353 in userSet: userSet.remove(4730093353) elif 375721095 in userSet: userSet.remove(375721095) elif 707278892801765377 in userSet: userSet.remove(707278892801765377) while (len(level) != 0): newLevel = [] for user in level: toBeDeleted = [] for anotherUser in userSet: aU = all_the_users.get(str(anotherUser)) if aU is not None: flag = True if user in aU["followers"] or user in aU[ "friends"]: newLevel.append(anotherUser) toBeDeleted.append(anotherUser) G.add_node(anotherUser) G.add_edge(user, anotherUser) for user in toBeDeleted: userSet.remove(user) level = newLevel #add the users that are left as rogue nodes for user in userSet: G.add_node(user) if flag: #---------here we add all the single nodes to an imaginary node called -1 so that we can delete it in gephi later on node_list = list(G.nodes) for node in node_list: neighbors_list = [n for n in G.neighbors(node)] if len(neighbors_list) == 0: G.add_edge(node, -1) #----------------------------- print "exporting" print sourceName, len(G.nodes) typepath = "" if type is "Fake": typepath = "fake/" else: typepath = "real/" nx.write_edgelist( G, "graphs/" + typepath + "TweetGraph_{}.csv".format( str(sourceName) + "_" + str(tweetKey)), data=False) p = p + 1
def graph_generation_basedOn_tweet_with_hops1(stop): # for each tweet add the user id as a node collection = db["users_info"] all_the_users = {} cursor = collection.find({"all_the_users_list": {"$exists": True}}) # for every user who exists here add it as a node to the graph for doc in cursor: all_the_users.update(doc["all_the_users_list"]) cursor = collection.find({"specific_tweet_id": {"$exists": True}}) for doc in cursor: G = nx.Graph() dic = doc["specific_tweet_id"] for tweet in dic: flag = False nodes = [] emptyNodes = [] for tweetKey in tweet.keys(): status = api.get_status(id=tweetKey) source = status.user.id sourceName = status.user.screen_name usersInvolvedList = tweet[tweetKey] for user in usersInvolvedList: G.add_node(user) nodes.append(user) for user in usersInvolvedList: for otherUser in usersInvolvedList: if user is not otherUser: oU = all_the_users.get(str(otherUser)) if oU is not None: flag = True if user in oU["followers"]: print "foll", user, otherUser G.add_edge(user, otherUser) if user in oU["friends"]: print "frie", user, otherUser G.add_edge(user, otherUser) else: emptyNodes.append(otherUser) # nodesEntered=[] # edgesEntered={} # distanceDict={} # sortedReference=[] # #make the distance dictionary node:path,distance # i=0 # for user in G.nodes(): # shortestPath=nx.shortest_path(G,user,source) # length=len(shortestPath)-1 # distanceDict[user]=shortestPath # #add this node to the correct position on the reference list # if(i==0): # sortedReference.append([user,length]) # else: # j=0 # isNotPlaced=True # for anotherNode in sortedReference: # if anotherNode[1]<length and isNotPlaced: # #add this node to the specific position # b = sortedReference[:] # b[j:j] = [[user,length]] # sortedReference=b # isNotPlaced=False # j=j+1 # # if the node hasn't been added and we've reached the end add it in the end # if isNotPlaced: # sortedReference.append([user,length]) # i=i+1 # # #for every node of the dictionary, from bigger distance to smaller # for node in sortedReference: # path=distanceDict.get(node) # for user in path: # #add the user as a node if they dont already exist in the graph # x=0 # for user in path: # #add the edge of the user with the next one if they are not already connected # connections=edgesEntered.get(user) # if x+1!=len(path): # if path[x+1] in connections: # # # x=x+1 # #add the path and the missing edges and nodes to the new graph if flag: # generate positions for the nodes print "drawing" nx.write_edgelist(G, "TweetGraph_{}.csv".format(str(sourceName)), data=False) pos = nx.random_layout(G) # positions for all nodes nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color='r', node_size=10, alpha=0.8) nx.draw_networkx_nodes(G, pos, nodelist=[source], node_color='b', node_size=50, alpha=0.8) nx.draw_networkx_nodes(G, pos, nodelist=emptyNodes, node_color='black', node_size=5, alpha=0.8) nx.draw_networkx_edges(G, pos, width=1, alpha=0.5, edge_color='g') plt.title(tweetKey + " from " + sourceName) plt.show()
p.circle(circles_x, circles_y, size=12, color='red') # Set to output the plot in the notebook output_notebook() # Show the plot show(p) import sys import matplotlib.pyplot as plt import networkx as nx G = nx.grid_2d_graph(5, 5) # 5x5 grid # print the adjacency list for line in nx.generate_adjlist(G): print(line) # write edgelist to grid.edgelist nx.write_edgelist(G, path="grid.edgelist", delimiter=":") # read edgelist from grid.edgelist H = nx.read_edgelist(path="grid.edgelist", delimiter=":") nx.draw(H) plt.show() from ipywidgets import interact ### matplotlib inline import matplotlib.pyplot as plt import networkx as nx # wrap a few graph generation functions so they have the same signature def random_lobster(n, m, k, p): return nx.random_lobster(n, p, p / m)
valid_data = get_query(config['valid_query_path']) test_data = get_query(config['test_query_path']) train = np.concatenate(train_data) valid = np.concatenate(valid_data) test = np.concatenate(test_data) total = np.concatenate([train[:, :-1], valid, test]) edges = [] for line in total: edges.append((int(line[0]), int(line[1]))) G = nx.DiGraph() G.add_edges_from(edges) nx.write_edgelist(G, 'wholeGraph.csv', data=False) G.number_of_nodes() G.number_of_edges() in_degree = {} out_degree = {} clustering_coefficient = {} for node in list(G.nodes): in_degree[node] = G.in_degree[node] out_degree[node] = G.out_degree[node] clustering_coefficient[node] = nx.clustering(G, node) # 画图 # in degree plt.hist(np.array(list(in_degree.values())), bins=40)
import operator import pandas as pd from collections import Counter import networkx as nx with open("Brightkite_totalCheckins.txt", "r") as f: lines = f.readlines() with open("busy.txt", "w") as f: for line in lines: if line.split()[-1] == 'ecbfba0ca22411ddb71dfb65ad521832': f.write(line) with open('busy.txt') as f: list1 = [int(line.split()[0]) for line in f] list_set = set(list1) listu = list(list_set) G = nx.Graph() G = nx.read_edgelist('Brightkite_edges.txt', nodetype=int) H = G.subgraph(listu) nx.write_edgelist(H, "subgraph10.edgelist")
import networkx as nx import matplotlib.pyplot as plt g = nx.read_edgelist("concatenated_sa.csv" ,delimiter="\t")#, nodetype=int)#, data=(('weight',float),)) h=nx.convert_node_labels_to_integers(g,label_attribute="old",first_label=1) nx.write_edgelist(h,"edges.csv",data=False) print(nx.info(g)) nx.draw(h,with_labels=True) plt.show()
threshold = 0.7512 for key, value in everything.items(): for key1, value1 in everything.items(): if (key != key1): #Firstly, check if both genes don't match then find the correlation coefficient between two first_value = np.array(value).astype(np.float) second_value = np.array(value1).astype(np.float) values = round(np.corrcoef(first_value, second_value)[1, 0], 4) print(values) #If pcc is greater than some threshold then both genes share an edge hence, draw an edge if values > threshold: G.add_edge(key, key1) #Now write to the edgelist nx.write_edgelist(G, 'C:\\Users\\hp\\Desktop\\edgelist060.txt', data=False) #clustering coefficient of all nodes f = nx.average_clustering(G) print(f) #Shortest path length sh = nx.shortest_path_length(G) print(sh) #Calculate number of nodes print(nx.number_of_nodes(G)) #Calculate number of edges print(nx.number_of_edges(G)) #Calculate average degree cam = nx.read_edgelist('C:\\Users\\hp\\Desktop\\edgelist075.txt', create_using=nx.DiGraph())