def main(): """ Pre-processing: load data, compute centrality measures, write files with node data """ print(nx.__version__) # Load network data, create storage dict, and extract main component depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) depends.name="depends" suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) suggests.name="suggests" imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) imports.name="imports" nets_dict={"depends":depends,"suggests":suggests,"imports":imports} for k in nets_dict.keys(): main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes() nets_dict[k]=nx.subgraph(nets_dict[k],main_component) # Run multiple measures on graphs and normalize weights measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank] for g in nets_dict.values(): multiple_measures(g,measure_list) normalize_weights(g) # Output networks in GraphML format (to store node attributes) for i in nets_dict.items(): # print(i[1].edges(data=True)) nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml") print("") print("All files written with data") """Visualization:
def filterNet(DG,mindegree=None,indegree=100,outdegree=50,outdegreemax=9999999,indegreemax=999999): print 'In filterNet' filter=[] for n in DG: if outdegreemax==None or DG.out_degree(n)<=outdegreemax: if mindegree!=None: if DG.degree(n)>=mindegree: filter.append(n) else: if indegree!=None: if DG.in_degree(n)>=indegree: filter.append(n) if outdegree!=None: if DG.out_degree(n)>=outdegree: filter.append(n) #the filter represents the intersect of the *degreesets #indegree and outdegree values are ignored if mindegree is set filter=set(filter) H=DG.subgraph(filter) #Superstitiously, perhaps, make sure we only grab nodes that project edges... filter= [n for n in H if H.degree(n)>0] L=H.subgraph(filter) print "Filter set:",filter print L.order(),L.size() L=labelGraph(L,filter) nx.write_graphml(L, projname+"/followersCommonFriends.graphml") nx.write_edgelist(L, projname+"/followersCommonFriends.txt",data=False)
def main(): files = [] for i in range(1,26): files.append("db/Minna_no_nihongo_1.%02d.txt" % i) for i in range(26,51): files.append("db/Minna_no_nihongo_2.%02d.txt" % i) words = get_words_from_files(files) G=nx.Graph() for w in words: G.add_node(w) G.node[w]['chapter'] = words[w]['chapter'] G.node[w]['kana'] = words[w]['kana'] G.node[w]['meaning'] = words[w]['meaning'][:-1] for word1, word2 in itertools.combinations(words,2): for w1 in word1[:-1]: #print w1.encode('utf-8') #print ud.name(w1) if "CJK UNIFIED" in ud.name(w1) and w1 in word2: #print word1.encode('utf-8'), word2.encode('utf-8') G.add_edge(word1, word2) break #G = nx.connected_component_subgraphs(G) G = sorted(nx.connected_component_subgraphs(G), key = len, reverse=True) #print len(G) #nx.draw(G) nx.write_graphml(G[0], "kanjis.graphml", encoding='utf-8', prettyprint=True)
def save_graph(self, DirectoryPath_str, FileName_str): '''This function saves the graph in a map in .graphml format. ''' nx.write_graphml(self.GrapherObject_gr.get_graph(), DirectoryPath_str + FileName_str + '.graphml')
def create_joined_multigraph(): G=nx.DiGraph() upp=nx.read_graphml('upperlevel_hashtags.graphml') for ed in upp.edges(data=True): G.add_edge(ed[0],ed[1],attr_dict=ed[2]) G.add_edge(ed[1],ed[0],attr_dict=ed[2]) mid=nx.read_graphml('friendship_graph.graphml') for ed in mid.edges(data=True): G.add_edge(ed[0],ed[1],attr_dict=ed[2]) inter=nx.read_graphml('interlevel_hashtags.graphml') for ed in inter.edges(data=True): G.add_edge(ed[0],ed[1],attr_dict=ed[2]) G.add_edge(ed[1],ed[0],attr_dict=ed[2]) down=nx.read_graphml('retweet.graphml') mapping_f={} for i,v in enumerate(down.nodes()): mapping_f[v]='%iretweet_net' %i for ed in down.edges(data=True): G.add_edge(mapping_f[ed[0]],mapping_f[ed[1]],attr_dict=ed[2]) for nd in mid.nodes(): if nd in mapping_f: G.add_edge(nd,mapping_f[nd]) G.add_edge(mapping_f[nd],nd) nx.write_graphml(G,'joined_3layerdigraph.graphm') return G,upp.nodes(),mid.nodes(),mapping_f.values()
def ministro_ministro(G): """ Cria um grafo de ministros conectados de acordo com a sobreposição de seu uso da legislação Construido a partir to grafo ministro_lei """ GM = nx.Graph() for m in G: try: int(m) except ValueError:# Add only if node is a minister if m != "None": GM.add_node(m.decode('utf-8')) # Add edges for n in GM: for m in GM: if n == m: continue if GM.has_edge(n,m) or GM.has_edge(m,n): continue # Edge weight is the cardinality of the intersection each node neighbor set. w = len(set(nx.neighbors(G,n.encode('utf-8'))) & set(nx.neighbors(G,m.encode('utf-8')))) #encode again to allow for matches if w > 5: GM.add_edge(n,m,{'weight':w}) # abreviate node names GMA = nx.Graph() GMA.add_weighted_edges_from([(o.replace('MIN.','').strip(),d.replace('MIN.','').strip(),di['weight']) for o,d,di in GM.edges_iter(data=True)]) P.figure() nx.draw_spectral(GMA) nx.write_graphml(GMA,'ministro_ministro.graphml') nx.write_gml(GMA,'ministro_ministro.gml') nx.write_pajek(GMA,'ministro_ministro.pajek') nx.write_dot(GMA,'ministro_ministro.dot') return GMA
def export_graph(G, write_filename): write_dir = "./output/" + write_filename + "/" if not os.path.isdir(write_dir): os.mkdir(write_dir) # Remove pho edge weights for n1 in G.edge: for n2 in G.edge[n1]: G.edge[n1][n2]={} print("\twriting gml") for node in G.nodes_iter(): for key, val in list(G.node[node].items()): G.node[node][key]=int(val) nx.write_gml(G, write_dir + write_filename + ".gml") print("\twriting graphml") nx.write_graphml(G, write_dir + write_filename + ".graphml") print("\twriting edgelist") f = open(write_dir + write_filename + ".edgelist","w") for edge in G.edges_iter(): f.write("\t".join([str(end) for end in list(edge)[:2]])+"\n") f.close() f = open(write_dir + write_filename + ".nodelist","w") print("\twriting nodelist") f.write("\t".join(["node_id"] + node_attributes) + "\n") for node in G.nodes_iter(): f.write("\t".join([str(node)] + [str(G.node[node][attribute]) for attribute in node_attributes]) + "\n")
def createMergedGraph(groupSampleDict, processedDataDir, rawModelDir): print 'Merging genomes from specified taxonomic group' # Loop over the keys of the dictionary, one for each group for group in groupSampleDict: # Create an empty graph object mergedGraph = nx.DiGraph() # Read in the graph of the group and merge with the graph from the previous # iteration for sample in groupSampleDict[group]: # Read in adjacency list and convert to digraph object myDiGraph = nx.read_adjlist(rawModelDir+'/'+sample+'/'+sample+'AdjList.txt', create_using=nx.DiGraph()) # Append to the previous graph mergedGraph = nx.compose(mergedGraph, myDiGraph) # Check that the proper output directory exists. It not, create it. if not os.path.exists(processedDataDir+'/'+group): os.makedirs(processedDataDir+'/'+group) nx.write_adjlist(mergedGraph, processedDataDir+'/'+group+'/'+group+'AdjList.txt') nx.write_graphml(mergedGraph, processedDataDir+'/'+group+'/'+group+'Graph.xml') return
def lei_vs_lei(nedges=None): """ Grafo de todas com todas (leis) """ # Verão original Flávio comentada # curgrafo.execute('select lei_id_1,esfera_1,lei_1,lei_id_2,esfera_2, lei_2, peso from vw_gr_lei_lei where peso >300 and lei_id_2>2') # curgrafo.execute('select lei_id_1,lei_tipo_1,lei_nome_1,lei_id_2,lei_tipo_2, lei_nome_2, peso from vw_gr_lei_lei where lei_count <= 20 and lei_id_1 = 1 and lei_id_2 <= 20 limit 0,1000') # curgrafo.execute('select lei_id_1,lei_tipo_1,lei_nome_1,lei_id_2,lei_tipo_2, lei_nome_2, peso from vw_gr_lei_lei where lei_count <= 8 and lei_id_1 <= 20 and lei_id_2 <= 20 limit 0,1000') curgrafo.execute('select lei_id_1,esfera_1,lei_1,lei_id_2,esfera_2, lei_2, peso from vw_gr_lei_lei where lei_count <= 10 and lei_id_1 <= 50 and lei_id_2 <= 200 limit 0,10000') if not nedges: res = curgrafo.fetchall() nedges = len(res) else: res = curgrafo.fetchmany(nedges) eds = [(i[0],i[3],i[6]) for i in res] G = nx.Graph() #eds = [i[:3] for i in res] G.add_weighted_edges_from(eds) print "== Grafo Lei_Lei ==" print "==> Order: ",G.order() print "==> # Edges: ",len(G.edges()) # Adding attributes to nodes for i in res: G.node[i[0]]['esfera'] = i[1] G.node[i[0]]['lei'] = i[2] G.node[i[3]]['esfera'] = i[4] G.node[i[3]]['lei'] = i[5] nx.write_graphml(G,'lei_lei.graphml') nx.write_gml(G,'lei_lei.gml') nx.write_pajek(G,'lei_lei.pajek') nx.write_dot(G,'lei_lei.dot') return G,res
def produce_graph(cutoff, transform): G = nx.Graph() allusers = set() for c in user_relations: allusers.add(c[0]) allusers.add(c[1]) for u in allusers: G.add_node(u, weight = user_influence[u]) user_top = {} for c in user_relations: user1 = c[0] user2 = c[1] score = user_relations[c] if user1 not in user_top: user_top[user1] = {} if user2 not in user_top: user_top[user2] = {} user_top[user1][user2] = score for u in user_top: top5 = dict(sorted(user_top[u].items(), key=lambda x: x[1], reverse=True)[:5]) for j in top5: G.add_edge(u, j, weight = user_top[u][j]) ## user_top[c[0]] ## if user_relations[c] > cutoff: ## G.add_edge(c[0], c[1], weight = transform(user_relations[c])) nx.write_graphml(G, 'graph' + time.strftime("%Y-%m-%d-%H%M%S", time.gmtime()) + '.graphml')
def generate_graph(self, result_only=False): media_id = self.media_id print 'running', media_id self.comments = [] self.current_media = me.MediaHelper.get_media(media_id) self.comments = co.CommentHelper.get_comment(media_id) self.total_comments = len(self.comments) self.G = nx.DiGraph() self.G.add_node(self.current_media.user_id(), username=self.current_media.username(), link=self.current_media.link(), tags=self.current_media.tags()) self.add_comment(self.current_media.user_id(), 0) # code.interact(local=locals()) self.calculate_influence(self.current_media.user_id()) self.calculate_total_normalised_influence() self.calculate_no_of_normalised_influence() if not result_only: self.output_script_file() self.output_csv_file() nx.draw(self.G) plt.show(block=False) plt.savefig(self.IMAGE_FILENAME.format(media_id), format="PNG") filename = self.FILENAME.format(media_id) nx.write_graphml(self.G, filename) self.log_result()
def save_celltype_graph(self, filename="celltype_conn.gml", format="gml"): """ Save the celltype-to-celltype connectivity information in a file. filename -- path of the file to be saved. format -- format to save in. Using GML as GraphML support is not complete in NetworkX. """ start = datetime.now() if format == "gml": nx.write_gml(self.__celltype_graph, filename) elif format == "yaml": nx.write_yaml(self.__celltype_graph, filename) elif format == "graphml": nx.write_graphml(self.__celltype_graph, filename) elif format == "edgelist": nx.write_edgelist(self.__celltype_graph, filename) elif format == "pickle": nx.write_gpickle(self.__celltype_graph, filename) else: raise Exception("Supported formats: gml, graphml, yaml. Received: %s" % (format)) end = datetime.now() delta = end - start config.BENCHMARK_LOGGER.info( "Saved celltype_graph in file %s of format %s in %g s" % (filename, format, delta.seconds + delta.microseconds * 1e-6) ) print "Saved celltype connectivity graph in", filename
def test_write_read_attribute_numeric_type_graphml(self): from xml.etree.ElementTree import parse G = self.attribute_numeric_type_graph fh = io.BytesIO() nx.write_graphml(G, fh, infer_numeric_types=True) fh.seek(0) H = nx.read_graphml(fh) fh.seek(0) assert_equal(sorted(G.nodes()), sorted(H.nodes())) assert_equal(sorted(G.edges()), sorted(H.edges())) assert_equal(sorted(G.edges(data=True)), sorted(H.edges(data=True))) self.attribute_numeric_type_fh.seek(0) xml = parse(fh) # Children are the key elements, and the graph element children = xml.getroot().getchildren() assert_equal(len(children), 3) keys = [child.items() for child in children[:2]] assert_equal(len(keys), 2) assert_in(('attr.type', 'double'), keys[0]) assert_in(('attr.type', 'double'), keys[1])
def save_graph(self, graphname, fmt='edgelist'): """ Saves the graph to disk **Positional Arguments:** graphname: - Filename for the graph **Optional Arguments:** fmt: - Output graph format """ self.g.graph['ecount'] = nx.number_of_edges(self.g) g = nx.convert_node_labels_to_integers(self.g, first_label=1) if fmt == 'edgelist': nx.write_weighted_edgelist(g, graphname, encoding='utf-8') elif fmt == 'gpickle': nx.write_gpickle(g, graphname) elif fmt == 'graphml': nx.write_graphml(g, graphname) else: raise ValueError('edgelist, gpickle, and graphml currently supported') pass
def graph_constructed(path, name): list_line = load_file(path, name) list_source_nodes = [] list_target_nodes = [] DG = nx.DiGraph() for line in list_line: split_line = line.strip().split('\t') target = split_line[0] source = split_line[1] type = split_line[2] weight = int(split_line[3]) if (source not in list_source_nodes): list_source_nodes.append(source) DG.add_node(source, type_ = type) if (target not in list_target_nodes): list_target_nodes.append(target) DG.add_node(source, type_ = 'target') DG.add_weighted_edges_from([(source, target, weight)]) DG.nodes(data=True) nx.write_graphml(DG,'d:/' + name.replace('.csv', '') + '.graphml')
def output_graph(mps, mp_data, edges): G=nx.Graph() # Define the nodes for mp in mps: G.add_node(mp, label=mp_data[mp]["name"], party=mp_data[mp]["party"], constituency=mp_data[mp]["constituency"]) # Process all known edges for (mp_tuple,agr_data) in edges.items(): agreements = agr_data[0] agreement_rate = agr_data[2] # Depending on the selection criteria, filter out relationships if agreement_rate < 85: continue # Determine a (normalized) weight, again depending on the desired graph # edge_wt = agreements range_min = 85 range_max = 100 weight_base = agreement_rate - range_min edge_wt = ( float(weight_base) / float(range_max - range_min) ) G.add_edge(mp_tuple[0],mp_tuple[1], agreement=agreement_rate, weight=edge_wt ) nx.write_graphml(G, "mp_agreement.graphml")
def nC_network_to_graphml(project, graphml_file_path='test.graphml'): # extract cell position records cell_positions = [(pos_record.x_pos, pos_record.y_pos, pos_record.z_pos) for pos_record in project.generatedCellPositions.getAllPositionRecords()] # create graph object graph = nx.Graph() graph.add_nodes_from(range(len(cell_positions))) # add node properties for positions. Permute x,y,z to get a nicer # default visualisation in Gephi when opening the resulting # graphml file. for k, position in enumerate(cell_positions): graph.node[k]['x'] = position[2] graph.node[k]['y'] = position[0] graph.node[k]['z'] = position[1] # add edges for conn_name in project.generatedNetworkConnections.getNamesNonEmptyNetConns(): conns = project.generatedNetworkConnections.getSynapticConnections(conn_name) assert len(conns) == 1 conn = conns[0] source = conn.sourceEndPoint.cellNumber target = conn.targetEndPoint.cellNumber graph.add_edge(source, target, weight=conn.props[0].weight) # save to disk nx.write_graphml(graph, graphml_file_path) return graph
def test_preserve_multi_edge_data(self): """ Test that data and keys of edges are preserved on consequent write and reads """ G = nx.MultiGraph() G.add_node(1) G.add_node(2) G.add_edges_from([ # edges with no data, no keys: (1, 2), # edges with only data: (1, 2, dict(key='data_key1')), (1, 2, dict(id='data_id2')), (1, 2, dict(key='data_key3', id='data_id3')), # edges with both data and keys: (1, 2, 103, dict(key='data_key4')), (1, 2, 104, dict(id='data_id5')), (1, 2, 105, dict(key='data_key6', id='data_id7')), ]) fh = io.BytesIO() nx.write_graphml(G, fh) fh.seek(0) H = nx.read_graphml(fh, node_type=int) assert_edges_equal( G.edges(data=True, keys=True), H.edges(data=True, keys=True) ) assert_equal(G._adj, H._adj)
def filterNet(DG,mindegree): if addUserFriendships==1: DG=addFocus(DG,user,typ) mindegree=int(mindegree) filter=[] filter= [n for n in DG if DG.degree(n)>=mindegree] H=DG.subgraph(filter) print "Filter set:",filter print H.order(),H.size() LH=labelGraph(H,filter) now = datetime.datetime.now() ts = now.strftime("_%Y-%m-%d-%H-%M-%S") nx.write_graphml(H, '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+ts+".graphml"])) nx.write_edgelist(H, '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+ts+".txt"]),data=False) #delimiter='' #indegree=sorted(nx.indegree(DG).values(),reverse=True) indegree=H.in_degree() outdegree=H.out_degree() inout = [indegree, outdegree] inoutpair = {} for k in indegree.iterkeys(): inoutpair[k] = tuple(inoutpair[k] for inoutpair in inout) fig = plt.figure() ax = fig.add_subplot(111) #ax.plot(indegree,outdegree, 'o') #ax.set_title('Indegree vs outdegree') degree_sequence=sorted(indegree.values(),reverse=True) plt.loglog(degree_sequence) plt.savefig( '/'.join([path,agent,typ,tt+"degree"+str(mindegree)+"outdegree_histogram.png"]))
def main(): arg_parser = ArgumentParser(description='add edge weights to tree') arg_parser.add_argument('--input', required=True, help='inpput file') arg_parser.add_argument('--output', required=True, help='outpput file') arg_parser.add_argument('--seed', type=int, default=None, help='seed for random number generator') arg_parser.add_argument('--delim', dest='delimiter', default=' ', help='delimiter for edge list') arg_parser.add_argument('--no-data', action='store_true', dest='no_data', help='show edge data') arg_parser.add_argument('--edge-list', action='store_true', help='generate edge list output') options = arg_parser.parse_args() random.seed(options.seed) tree = nx.read_graphml(options.input) add_edge_weights(tree) if options.edge_list: nx.write_edgelist(tree, options.output, delimiter=options.delimiter, data=not options.no_data) else: nx.write_graphml(tree, options.output) return 0
def graphMLfromCSV(self, csvfile, filter): filter_score = float(filter) if filter_score is False: filter_score = float(0.0) csv = self.csv.csvaslist(csvfile) graphfile = csvfile.split('.', 1)[0] + "-graph.xml" nodes = [] for line in csv: #we should have a correctly formatted CSV to work with if 'score' in line and 'source' in line and 'target' in line: source = line[u'source'] target = line[u'target'] score = float(line[u'score']) / 100 if score >= filter_score: if source not in nodes: self.G.add_node(source) nodes.append(source) if target not in nodes: self.G.add_node(target) nodes.append(target) self.G.add_edge(source,target,weight=score) print score nx.write_graphml(self.G, graphfile)
def main(): universe = nx.read_graphml(sys.argv[1]) beings = filter(lambda x: x[1]["type"] == "Being", universe.nodes(data=True)) clients = filter(lambda x: x[1]["type"] == "client", universe.nodes(data=True)) firm = filter(lambda x: x[1]["type"] == "firm", universe.nodes(data=True)) print len(beings) print len(clients) print len(firm) for b in beings: ns = nx.neighbors(universe,b[0]) rep = ns[0] for n in ns[1:]: for nn in nx.neighbors(universe,n): universe.add_edge(rep,nn) #doesn't preserve directions or properties, yolo universe.remove_node(n) universe.remove_node(b[0]) beings = filter(lambda x: x[1]["type"] == "Being", universe.nodes(data=True)) clients = filter(lambda x: x[1]["type"] == "client", universe.nodes(data=True)) firm = filter(lambda x: x[1]["type"] == "firm", universe.nodes(data=True)) print len(beings) print len(clients) print len(firm) nx.write_graphml(universe,"simplified-{}.graphml".format(int(time.time())))
def finish_graph(): nx.draw(g) file_name='host_hops1.png' file_nameg='host_hops1.graphml' plt.savefig(file_name) nx.write_graphml(g, file_nameg) print ("Graph Drawn")
def build_graph(self): self.node_list = [self.like_minded_friend, self.books_reco_from, self.movies_reco_from, self.music_reco_from, self.sport_reco_from, 'Read a book? Ask me', 'Watch a movie? Ask me', 'Sing along with', 'Sports arena?', 'Similar tastes?'] #self.node_list.append(self.friends_likes[self.like_minded_friend][:10]) self.node_list.append(self.user_name) self.G.add_nodes_from(self.node_list) self.G.add_edge(self.user_name, 'Similar tastes?', color = 'purple') self.G.add_edge('Similar tastes?', self.like_minded_friend,color = 'purple' ) self.G.add_edge(self.user_name, 'Read a book? Ask me', color = 'blue') self.G.add_edge('Read a book? Ask me', self.books_reco_from, color = 'blue') self.G.add_edge(self.user_name, 'Watch a movie? Ask me', color = 'green') self.G.add_edge('Watch a movie? Ask me', self.movies_reco_from, color = 'green') self.G.add_edge(self.user_name, 'Sing along with', color = 'yellow') self.G.add_edge('Sing along with', self.music_reco_from, color = 'yellow') self.G.add_edge(self.user_name, 'Sports arena?', color = 'orange') self.G.add_edge('Sports arena?', self.sport_reco_from, color = 'orange') self.G.add_edge(self.user_name, 'Pages you might like!', color = 'red') for node in self.friends_likes[self.like_minded_friend][:10]: self.G.add_edge('Pages you might like', node, color = 'red') nx.write_graphml(self.G, 'FBViz' + ".graphml")
def start(self): for id in self.oidRootNamePairs: self.oidNamePairs,currIDs=Utils.getoidNames(self.oidNamePairs,id,Def.typ) Utils.report('Processing current IDs: '+str(currIDs)) flip=(Def.typ=='fr') self.addDirectedEdges(id, currIDs,flip=flip) n=len(currIDs) Utils.report('Total amount of IDs: '+str(n)) c=1 for cid in currIDs: Utils.report('\tSub-level run: getting '+Def.typ2,str(c)+'of'+str(n)+Def.typ+cid) self.oidNamePairs,ccurrIDs=Utils.getoidNames(self.oidNamePairs,cid,Def.typ2) self.addDirectedEdges( cid, ccurrIDs) c=c+1 for id in self.oidRootNamePairs: if id not in self.oidNamePairs: self.oidNamePairs[id]=self.oidRootNamePairs[id] self.labelNodes(self.oidNamePairs) Utils.report(nx.info(self.DG)) now = datetime.datetime.now() timestamp = now.strftime("_%Y-%m-%d-%H-%M-%S") fname=UserID._name.replace(' ','_') nx.write_graphml(self.DG, '/'.join(['reports',fname+'_google'+Def.typ+'Friends_'+timestamp+".graphml"])) nx.write_edgelist(self.DG, '/'.join(['reports',fname+'_google'+Def.typ+'Friends_'+timestamp+".txt"]),data=False)
def draw_base_graph(self): print 'writing base graphml ...' G = nx.Graph() G.add_nodes_from(xrange(self.num_nodes)) G.add_edges_from(self.E_base) nx.write_graphml(G,'exodus.graphml') print 'done ... (load in Gephi)'
def main(): print "time_evol module is the main code." ## to import a network of 3-node example EDGE_FILE = 'C:\Boolean_Delay_in_Economics\Manny\EDGE_FILE.dat' NODE_FILE = 'C:\Boolean_Delay_in_Economics\Manny\NODE_FILE.dat' net = inet.read_network_from_file(EDGE_FILE, NODE_FILE) nodes_list = inet.build_nodes_list(NODE_FILE) ''' ## to obtain time series data for all possible initial conditions for 3-node example network timeSeriesData = ensemble_time_series(net, nodes_list, 2, 10)#, Nbr_States=2, MAX_TimeStep=20) initState = 1 biStates = decimal_to_binary(nodes_list, initState) print 'initial state', biStates ## to print time series data for each node: a, b, c starting particualr decimal inital condition 1 print 'a', timeSeriesData['a'][1] print 'b', timeSeriesData['b'][1] print 'c', timeSeriesData['c'][1] ''' ## to obtain and visulaize transition map in the network state space decStateTransMap = net_state_transition(net, nodes_list) nx.write_graphml(decStateTransMap,'C:\Boolean_Delay_in_Economics\Manny\Results\BDE.graphml') '''
def create_graph(): G = nx.Graph() data = [] reposts = [] client = MongoClient() db = client["vk_db"] collection = db["valid_communities_info"] result = collection.find() for res in result: data.append(res) db=client["reposts"] collection = db["general"] answer = collection.find() for res in answer: reposts.append(res) for each in data: G.add_node(each["screen_name"]) G.node[each["screen_name"]]['weight'] = each["weight"] for each in reposts: G.add_edge(get_name_by_id(each["owner"]), get_name_by_link(each["link"]), weight=each["times"]) nx.write_graphml(G,'vk.graphml')
def to_file(self, name_graphml='AST2NX.graphml'): """ write to a graphml file which can be read by a lot of professional visualization tools such as Cytoscape. """ if name_graphml.endswith('.graphml'): nx.write_graphml(self.NetworkX, name_graphml) else: nx.write_graphml(self.NetworkX, name_graphml + '.graphml')
def get_topology(): G = nx.Graph() G.add_node("poi-1", packetloss=0.0, ip="0.0.0.0", geocode="US", bandwidthdown=17038, bandwidthup=2251, type="net", asn=0) G.add_edge("poi-1", "poi-1", latency=50.0, jitter=0.0, packetloss=0.05) s = StringIO() nx.write_graphml(G, s) return s.getvalue()
def saveGraph(self): networkx.write_graphml(self.Graph, self.get_abs_file_path(self.GraphFile))
def model_calcs(networks, args): """ Function for generating null models and carrying out calculations. :param networks: Dictionary with folder name as key and values as tuples (name, network object). :param args: Settings for running anuran :return: """ if args['core'] < 1: args['core'] = 1 logger.info("Setting cores for multiprocessing to 1.") # export intersections for size in args['size']: for group in networks: shared_edges = _intersection(networks[group], float(size), sign=args['sign'], edgelist=True) g = _construct_intersection(networks[group], shared_edges) nx.write_graphml(g, args['fp'] + '_' + group + '_' + str(size) + '_intersection.graphml') # first generate null models try: random, degree = generate_null(networks, n=args['perm'], npos=args['gperm'], core=args['core'], fraction=args['cs'], prev=args['prev']) except Exception: logger.error('Could not generate null models!', exc_info=True) sys.exit() set_sizes = None try: set_sizes = generate_sizes(networks, random, degree, core=args['core'], sign=args['sign'], fractions=args['cs'], prev=args['prev'], perm=args['nperm'], sizes=args['size']) set_sizes.to_csv(args['fp'] + '_sets.csv') set_differences = generate_size_differences(set_sizes, sizes=args['size']) set_differences.to_csv(args['fp'] + '_set_differences.csv') logger.info('Set sizes exported to: ' + args['fp'] + '_sets.csv') except Exception: logger.error('Failed to calculate set sizes!', exc_info=True) sys.exit() centralities = None if args['centrality']: try: centralities = generate_ci_frame(networks, random, degree, fractions=args['cs'], prev=args['prev'], perm=args['nperm'], core=args['core']) centralities.to_csv(args['fp'] + '_centralities.csv') logger.info('Centralities exported to: ' + args['fp'] + '_centralities.csv') except Exception: logger.error('Could not rank centralities!', exc_info=True) sys.exit() if args['network']: try: graph_properties = generate_graph_frame(networks, random, degree, fractions=args['cs'], core=args['prev'], perm=args['nperm']) graph_properties.to_csv(args['fp'] + '_graph_properties.csv') logger.info('Graph properties exported to: ' + args['fp'] + '_graph_properties.csv') except Exception: logger.error('Could not estimate graph properties!', exc_info=True) sys.exit() samples = None if args['sample']: try: samples = generate_sample_sizes(networks, random, degree, sign=args['sign'], core=args['core'], fractions=args['cs'], perm=args['nperm'], prev=args['prev'], sizes=args['size'], limit=args['sample'], number=args['number']) samples.to_csv(args['fp'] + '_subsampled_sets.csv') logger.info('Subsampled set sizes exported to: ' + args['fp'] + '_subsampled_sets.csv') except Exception: logger.error('Failed to subsample networks!', exc_info=True) sys.exit() central_stats = None if args['stats']: if args['stats'] == 'True': args['stats'] = True # add code for pvalue estimation set_stats = compare_set_sizes(set_sizes) set_stats.to_csv(args['fp'] + '_set_stats.csv') difference_stats = compare_set_sizes(set_differences) difference_stats.to_csv(args['fp'] + '_difference_stats.csv') if args['centrality'] and centralities is not None: central_stats = compare_centralities(centralities, mc=args['stats']) central_stats.to_csv(args['fp'] + '_centrality_stats.csv') if args['network']: graph_stats = compare_graph_properties(graph_properties) graph_stats.to_csv(args['fp'] + '_graph_stats.csv') # check if there is an order in the filenames for group in networks: prefixes = [x[0].split('_')[0] for x in networks[group]] try: prefixes = [int(x) for x in prefixes] except ValueError: pass if all(isinstance(x, int) for x in prefixes): centrality_correlation = correlate_centralities(group, centralities, mc=args['stats']) centrality_correlation.to_csv(args['fp'] + '_centrality_correlation.csv') graph_correlation = correlate_graph_properties(group, graph_properties) graph_correlation.to_csv(args['fp'] + '_centrality_correlation.csv') if args['draw']: try: for x in networks: subset_sizes = set_sizes[set_sizes['Group'] == x] draw_sets(subset_sizes, args['fp'] + '_' + x) subset_differences = set_differences[set_differences['Group'] == x] draw_set_differences(subset_differences, args['fp'] + '_' + x) if args['centrality']: subset_centralities = centralities[centralities['Group'] == x] draw_centralities(subset_centralities, args['fp'] + '_' + x) if args['sample']: subset_samples = samples[samples['Group'] == x] draw_samples(subset_samples, args['fp'] + '_' + x) if args['network']: subset_graphs = graph_properties[graph_properties['Group'] == x] draw_graphs(subset_graphs, args['fp'] + '_' + x) except Exception: logger.error('Could not draw data!', exc_info=True) sys.exit() if central_stats is not None: return central_stats
if __name__ == "__main__": # File containing all tweets data fname = 'data/stream_.jsonl' if len(sys.argv) == 2 and len(sys.argv[1]) > 0: term = sys.argv[1] export_fname = 'data/' + term + '_co_occurrences.csv' tuples = analyze_co_occurrence(fname=fname) filtered_tuples = filter_tuples_containing_term(tuples=tuples, term=term) export_co_occurrence(term=term, tuples=filtered_tuples, export_fname=export_fname) tuples = filtered_tuples else: print("No term provided for analysis ...") print("Going to analyze all the terms and their co-occurrences.") print( "Export file will contain all the tuples with their co-occurrence count." ) export_fname = 'data/all_co_occurrences.csv' tuples = analyze_co_occurrence(fname=fname) export_co_occurrence(term='', tuples=tuples, export_fname=export_fname) # Directed Graph export_fname = 'data/co_occurrence.graphml' digraph = co_occurrence_network(tuples) nx.write_graphml(digraph, export_fname) print("Co-occurrence Graph is exported at [%s]" % export_fname)
G = nx.Graph() #manually adding the start nodes G.add_node('#newyork') G.add_node('#dog') #will crawl the 2 main hashtags for url, main_tag in urls: #making a variable of the function return queue = hasher(url) #keeping visited updated, to not visit it twice visited.append(url) #loop through the list of hashtags that will be crawled for link, tag in queue: page = main_url+link if page not in visited and len(visited) <= 20: #add node for all 20 hashtags found in newyork G.add_node(tag) #add edge from all found hashtags to newyork G.add_edge(main_tag,tag) visited.append(page) #will go into the already found hashtags to find their own hashtags new_queue = hasher(page) #adding nodes from a list of hashtags G.add_nodes_from([tag for link, tag in new_queue]) #same with edges for link, taggy in new_queue: G.add_edge(tag, taggy) else: pass nx.write_graphml(G, "newyorkdog.graphml")
#Edge attributes g.edge["Alan"]["Bob"]["relationship"] = "Friends" g.edge["Carol"]["Denise"]["relationship"] = "Friends" g.edge["Carol"]["Bob"]["relationship"] = "Married" #New edge with an attribute g.add_edges_from([["Carol", "Alan", {"relationship": "Friends"}]]) for e in g.edges_iter(): n1 = e[0] n2 = e[1] print("{0} and {1} are {2}".format(n1, n2, g.edge[n1][n2]["relationship"])) #Save g to the file my_graph.graphml in graphml format #prettyprint will make it nice for a human to read nx.write_graphml(g, "my_graph.graphml", prettyprint=True) #Layout g with the Fruchterman-Reingold force-directed #algorithm and save the result to networkx_graph.png #with_labels will label each node with its id nx.draw_spring(g, with_labels=True) plt.savefig("networkx_graph.png") plt.clf() #Clear plot print 'Information about the graph' print nx.info(g) #print g.number_of_nodes() print g.nodes(data=True) #print g.number_of_edges() print g.edges(data=True) print 'Average degree of nodes'
print("Getting map from service: ", map_service_name) rospy.wait_for_service(map_service_name) graph_file = rospy.get_param("~graph_file", None) map_msg = rospy.ServiceProxy(map_service_name, GetMap)().map map_info = map_msg.info spaceDimension = 3 if spaceDimension == 3: bases = [2,3,5] lower = numpy.array([map_info.origin.position.x, map_info.origin.position.y,0.0]) upper = numpy.array([map_info.origin.position.x+map_info.resolution*map_info.width, map_info.origin.position.y+map_info.resolution*map_info.height, 2*numpy.pi]) # Settings halton_points = 2 # TODO: Set this appropriately disc_radius = 1 # TODO: Set this appropriately print(disc_radius) for i in range(1): print i numpy.random.seed() offset = numpy.random.random_sample(spaceDimension,) riskmapFile = 'haltonGraph.graphml' # Generate the graph print 'Generating the graph' G = euclidean_halton_graph(halton_points, disc_radius, bases, lower, upper, None, None, map_msg) nx.write_graphml(G, riskmapFile)
dists.append(dist) # Sort the list by ascending distance dists.sort(key=lambda _tuple: _tuple[-1]) # Get the top connections top_conns = dists[:num_top_conns] # Make a network g = nx.Graph() for word1, word2, dist in top_conns: weight = 1 - dist # cosine similarity for weight g.add_edge(word1, word2, weight=float(weight)) # Write the network nx.write_graphml( g, "./semanticNetwork/semanticNetwork.graphml") # Readable by Gephi A = nx.adjacency_matrix(g) adjmat = A.todense() numpy.savetxt("./semanticNetwork/semanticNetworkAdjmat.txt", adjmat, delimiter=' ') ########################### # reload and clean text without lemmatization and without # spell-checking to leave words as original as possible for ID in IDs: # loop through papers print(ID) with open(ID) as paper:
# Retrieve new node to process node = queue.pop() # BFS execution # Don't process an already visited node if node in visited: continue else: visited.add(node) comments_df = apiParser(node, min_utc, max_utc, comments_df) # Process df to find links to other subreddits comments_df = matchFinder(comments_df, node) # Verify there are edges to be added to graph if len(comments_df) == 0: continue else: network, queue = graphAdder(comments_df, network, node, queue) time.sleep(0.1) else: print('queue empty') # Write network to disk nx.write_graphml( network, 'network_{one}_{two}.graphml'.format(one=origin_node, two=min_utc))
def findCommunites(threshold=0.5, sector=None, k=5, force=False): th = re.sub(r'([0-9]*)\.([0-9]*)', r'\1\2', str(threshold)) if sector != None: graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_" + sector + "_th" + th + ".xml" graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_" + sector + "_th" + th + "_k" + str( k) + ".xml" outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_" + sector + "_th" + th + "_k" + str( k) + ".csv" else: graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_th" + th + ".xml" graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx" + "_th" + th + "_k" + str( k) + ".xml" outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx" + "_th" + th + "_k" + str( k) + ".csv" print "reading graph from file: ", graphInFilename print "writing graph with community info to file: ", outFilename print "writing community details in csv format to file: ", outFilename if force or not isfile(graphOutFilename): g = nx.read_graphml(graphInFilename) #freq = findFreqOfCliquesInGraph(g) #plotHistFromDict(freq) comm = nx.k_clique_communities(g, k) communities = [] for c in comm: communities.append(c) numCommunities = len(communities) print "number of communities found: ", numCommunities colors = range(numCommunities) i = 0 for c in communities: for v in c: g.node[v]['cluster'] = colors[i] + 1 i += 1 nx.write_graphml(g, graphOutFilename) import csv with open(outFilename, "wb") as f: writer = csv.writer(f, delimiter='|', quotechar="'", quoting=csv.QUOTE_MINIMAL) writer.writerow(["sector", "symbol", "name", "cluster"]) for v in g: writer.writerow([ g.node[v]['sector'], g.node[v]['symbol'], g.node[v]['name'], g.node[v]['cluster'] ]) results = PROCESSED_FILE_LOC + "results.csv" with open(results, "a") as f1: f1.write( str(dt.datetime.today()) + "," + outFilename + "," + str(numCommunities) + "," + str(calculateModularity(graphOutFilename)) + "\n") drawGraph(graphOutFilename, "gt")
final_G.node[node]['voltages'] = str(final_G.node[node]['voltages']) # throw away isolated components (this step is optional) removed_nodes = list() components = sorted(nx.connected_components(final_G), key=len, reverse=True) for component_idx in range(1, len(components)): isolated_component = components[component_idx] print('isolated component {} = {}'.format(component_idx, isolated_component)) removed_nodes.extend(isolated_component) final_G.remove_nodes_from(isolated_component) print('node count without isolated components = {}'.format( final_G.number_of_nodes())) # export graph in GraphML format nx.write_graphml(final_G, parsed_graph_fpath) # draw the final graph pos = dict() first_it = True for node in final_G.nodes(): x = final_G.node[node]['x'] y = final_G.node[node]['y'] pos[node] = [x, y] if first_it is True: x_min = x y_min = y x_max = x y_max = y
thres = np.percentile(weights, 99.9) fTopicGraph = prune(topicGraph, thres) F = nx.DiGraph(A) for (u, v) in A.edges_iter(): has_edge = False if bool(term2top[u].intersection(term2top[v])): F.remove_edge(u, v) continue for i in term2top[u]: for j in term2top[v]: if fTopicGraph.has_edge(i, j): has_edge = True if has_edge == False: F.remove_edge(u, v) nx.write_graphml(F, '/scratch/balash/pruned-graph.graphml') inv_top = word2topic() G = utils.read_pickle( '/scratch/balash/final-output/lasso/lasso_alpha_5_6_network') H = topic_subgraphs(G, inv_top) utils.write_pickle(H, '/scratch/balash/final-output/topic_subgraphs') #Interesting subgraphs for i in range(100): if W[i].number_of_nodes() < 40 and W[i].number_of_edges() > 5: print(i, W[i].number_of_nodes(), W[i].number_of_edges()) import networkx as nx from bokeh.io import show, output_file
nx.relabel_nodes(nut_network, labels, copy=False) nx.relabel_nodes(nut_network, nutr_def.to_dict()['NutrDesc'], copy=False) #Finds modularity of best partitions, describes clusters. partition = community.best_partition(nut_network) print("Modularity:", community.modularity(partition, nut_network)) HOW_MANY = 10 def describe_cluster(x): # x is a frame; select the matching rows from "domain" rows = nut_data.ix[x.index] # Calculate row sums, sort them, get the last HOW_MANY top_N = rows.sum(axis=1).sort_values(ascending=False)[:HOW_MANY] # What labels do they have? return top_N.index.values word_clusters = pd.DataFrame({"part_id": pd.Series(partition)}) results = word_clusters.groupby("part_id").apply(describe_cluster) _ = [print("--", "; ".join(r.tolist())) for r in results] #Saves to file. if not os.path.isdir("results"): os.mkdir("results") with open("results/nut_data.graphml", "wb") as ofile: nx.write_graphml(nut_network, ofile)
def _run_interface(self, runtime): print("================================================") print(" > Creation of rs-fMRI connectome maps") print(" .. BOLD file :" + self.inputs.func_file) print(" .. parcellation : %s" % self.inputs.parcellation_scheme) print("================================================") fdata = nib.load(self.inputs.func_file).get_data() tp = fdata.shape[3] if self.inputs.parcellation_scheme != "Custom": if self.inputs.parcellation_scheme == "NativeFreesurfer": resolutions = get_parcellation(self.inputs.parcellation_scheme) else: # Lausanne2018 resolutions = get_parcellation(self.inputs.parcellation_scheme) for parkey, parval in list(resolutions.items()): for vol, graphml in zip(self.inputs.roi_volumes, self.inputs.roi_graphmls): if parkey in vol: roi_fname = vol if parkey in graphml: roi_graphml_fname = graphml roi = nib.load(roi_fname) roiData = roi.get_data() resolutions[parkey]["number_of_regions"] = roiData.max() resolutions[parkey]["node_information_graphml"] = os.path.abspath(roi_graphml_fname) del roi, roiData else: resolutions = self.inputs.atlas_info # loop throughout all the resolutions ('scale33', ..., 'scale500') for parkey, parval in list(resolutions.items()): print("------------------------------------------------") print("Resolution = " + parkey) print("------------------------------------------------") # Open the corresponding ROI for vol in self.inputs.roi_volumes: if (parkey in vol) or (len(self.inputs.roi_volumes) == 1): roi_fname = vol roi = nib.load(roi_fname) mask = roi.get_data() # Compute average time-series print(" ************************************************") print(" >> Compute average rs-fMRI signal for each cortical ROI ") nROIs = parval["number_of_regions"] # number of ROIs for current resolution # matrix number of rois vs timepoints ts = np.zeros((nROIs, tp), dtype=np.float32) # loop throughout all the ROIs (current resolution) for i in range(1, nROIs + 1): ts[i - 1, :] = fdata[mask == i].mean(axis=0) # Save average roi time-series np.save(os.path.abspath("averageTimeseries_%s.npy" % parkey), ts) sio.savemat(os.path.abspath("averageTimeseries_%s.mat" % parkey), {"ts": ts}) # Create graph, add node information from parcellation and recover ROI indexes print(" ************************************************") print(" >> Load %s to initialize graph " % parval["node_information_graphml"]) G = nx.Graph() gp = nx.read_graphml(parval["node_information_graphml"]) ROI_idx = [] for u, d in gp.nodes(data=True): G.add_node(int(u)) for key in d: G.nodes[int(u)][key] = d[key] # Compute a position for the node based on the mean position of the # ROI in voxel coordinates (segmentation volume ) G.nodes[int(u)]["dn_position"] = tuple( np.mean(np.where(mask == int(d["dn_multiscaleID"])), axis=1) ) ROI_idx.append(int(d["dn_multiscaleID"])) # Apply scrubbing (if enabled) if self.inputs.apply_scrubbing: print(" ************************************************") print(" >> Apply scrubbing") # Load scrubbing FD and DVARS series FD = np.load(self.inputs.FD) DVARS = np.load(self.inputs.DVARS) # Evaluate scrubbing mask FD_th = self.inputs.FD_th DVARS_th = self.inputs.DVARS_th FD_mask = np.array(np.nonzero(FD < FD_th))[0, :] DVARS_mask = np.array(np.nonzero(DVARS < DVARS_th))[0, :] index = np.sort(np.unique(np.concatenate((FD_mask, DVARS_mask)))) + 1 index = np.concatenate(([0], index)) log_scrubbing = ( " .. INFO: DISCARDED time points after scrubbing: " + str(FD.shape[0] - index.shape[0] + 1) + " over " + str(FD.shape[0] + 1) ) print(log_scrubbing) np.save(os.path.abspath("tp_after_scrubbing.npy"), index) sio.savemat(os.path.abspath("tp_after_scrubbing.mat"), {"index": index}) ts_after_scrubbing = ts[:, index] np.save( os.path.abspath( "averageTimeseries_%s_after_scrubbing.npy" % parkey ), ts_after_scrubbing, ) sio.savemat( os.path.abspath( "averageTimeseries_%s_after_scrubbing.mat" % parkey ), {"ts": ts_after_scrubbing}, ) ts = ts_after_scrubbing # Compute pairwise ROI time-series correlation print(" ************************************************") print(" >> Compute pairwise ROI time-series correlation") nnodes = ts.shape[0] i = -1 for i_signal in ts: i += 1 for j in range(i, nnodes): j_signal = ts[j, :] value = np.corrcoef(i_signal, j_signal)[0, 1] G.add_edge(ROI_idx[i], ROI_idx[j]) G[ROI_idx[i]][ROI_idx[j]]["corr"] = value # Get the edge attributes/keys/weights from the first edge and then break. # Change w.r.t networkx2 edge_keys = [] for _, _, d in G.edges(data=True): edge_keys = list(d.keys()) break # Save the computed connectivity matrix print(" ************************************************") print(" >> Save functional connectome map as:") print(" - connectome_%s.tsv" % parkey) with open("connectome_%s.tsv" % parkey, "w") as out_file: tsv_writer = csv.writer(out_file, delimiter="\t") header = ["source", "target"] header = header + [key for key in edge_keys] tsv_writer.writerow(header) with open("connectome_%s.tsv" % parkey, "ab") as out_file: nx.write_edgelist( G, out_file, comments="#", delimiter="\t", data=edge_keys, encoding="utf-8", ) # storing network if "gPickle" in self.inputs.output_types: print(" - connectome_%s.gpickle" % parkey) nx.write_gpickle(G, "connectome_%s.gpickle" % parkey) if "mat" in self.inputs.output_types: print(" - connectome_%s.mat" % parkey) edge_struct = {} for edge_key in edge_keys: edge_struct[edge_key] = nx.to_numpy_matrix(G, weight=edge_key) # Number of ROIs (nodes) size_nodes = len(list(G.nodes())) # Get the node attributes/keys from the first node and then break. # Change w.r.t networkx2 for u, d in G.nodes(data=True): node_keys = list(d.keys()) break node_struct = {} for node_key in node_keys: if node_key == "dn_position": node_arr = np.zeros([size_nodes, 3], dtype=np.float) else: node_arr = np.zeros(size_nodes, dtype=np.object_) node_n = 0 for _, node_data in G.nodes(data=True): node_arr[node_n] = node_data[node_key] node_n += 1 node_struct[node_key] = node_arr sio.savemat("connectome_%s.mat" % parkey, mdict={"sc": edge_struct, "nodes": node_struct}) if "graphml" in self.inputs.output_types: print(" - connectome_%s.graphml" % parkey) g2 = nx.Graph() # Create graph nodes for u_gml, d_gml in G.nodes(data=True): g2.add_node(u_gml) g2.nodes[u_gml]["dn_multiscaleID"] = d_gml["dn_multiscaleID"] g2.nodes[u_gml]["dn_fsname"] = d_gml["dn_fsname"] g2.nodes[u_gml]["dn_hemisphere"] = d_gml["dn_hemisphere"] g2.nodes[u_gml]["dn_name"] = d_gml["dn_name"] g2.nodes[u_gml]["dn_position_x"] = d_gml["dn_position"][0] g2.nodes[u_gml]["dn_position_y"] = d_gml["dn_position"][1] g2.nodes[u_gml]["dn_position_z"] = d_gml["dn_position"][2] g2.nodes[u_gml]["dn_region"] = d_gml["dn_region"] # Create graph edges for u_gml, v_gml, d_gml in G.edges(data=True): g2.add_edge(u_gml, v_gml) g2[u_gml][v_gml]["corr"] = float(d_gml["corr"]) # Save the graph nx.write_graphml(g2, "connectome_%s.graphml" % parkey) print("[ DONE ]") return runtime
def createGraph(threshold=0.5, sector=None, lib="nx", force=False): #sectors = pd.read_json("sector_industry_company.json") sectors = pd.read_csv(SECTOR_INFO_FILE) th = re.sub(r'([0-9]*)\.([0-9]*)', r'\1\2', str(threshold)) if sector != None: filename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "corr_matrix_" + sector + ".json" outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_" + lib + "_" + sector + "_th" + th + ".xml" #industry = sectors[sectors['sector_name'] == sector] industry = sectors[sectors['Sector'] == sector] else: filename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "corr_matrix.json" outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_" + lib + "_th" + th + ".xml" industry = sectors print "reading correlation matrix from file: ", filename print "writing graph to file: ", outFilename if force or not isfile(outFilename): #company = dict(zip(industry['company_symbol'], zip(industry['company_name'], industry['sector_name']))) company = dict( zip(industry['Symbol'], zip(industry['Name'], industry['Sector']))) #print company corrMat = pd.read_json(filename) #print corrMat.head() symbols = corrMat.columns numStocks = len(symbols) if lib == "nx": g = nx.Graph() for i, sym in enumerate(symbols): cluster = 0 #randint(1, 5) if sym in company: companyName, sectorName = company.get(sym) else: companyName, sectorName = None, None if companyName == None or len(companyName) == 0: companyName = "Unavailable" if sectorName == None or len(sectorName) == 0: sectorName = "Unavailable" g.add_node(i, symbol=sym, name=companyName, sector=sectorName, cluster=cluster) for i in range(numStocks): for j in range(i + 1, numStocks): w = corrMat[symbols[i]][symbols[j]] if abs(w) >= threshold: #print "adding edge: (", symbols[i], ",", symbols[j], ",", w, ")" g.add_edge(i, j, weight=float(w)) print g.number_of_nodes(), g.number_of_edges() nx.write_graphml(g, outFilename) elif lib == "gt": g = Graph(directed=False) g.add_vertex(numStocks) v_symbol = g.new_vertex_property("string") g.vp.symbol = v_symbol v_name = g.new_vertex_property("string") g.vp.name = v_name v_cluster = g.new_vertex_property("int") g.vp.cluster = v_cluster for i in range(numStocks): v = g.vertex(i) g.vp.symbol[v] = symbols[i] g.vp.name[v] = company.get(symbols[i]) g.vp.cluster[v] = 0 e_weight = g.new_edge_property("double") g.ep.weight = e_weight for i in range(numStocks - 1): for j in range(i + 1, numStocks): w = corrMat[symbols[i]][symbols[j]] if abs(w) >= threshold: #print "adding edge: (", symbols[i], ",", symbols[j], ",", w, ")" g.add_edge(i, j) g.ep.weight[g.edge(i, j)] = w print g.num_vertices(), g.num_edges() g.save(outFilename, fmt="graphml") drawGraph(outFilename) else: g = nx.read_graphml(outFilename) getGraphStats(threshold, sector, lib) return g
def write(self, target): for k in self.MU.keys(): nx.write_graphml(self.MU[k], '{0}_topic_{1}.graphml'.format(target, k))
elif(output_file_type=='pickle'): try: G=nx.write_gpickle(G,output_file_path) #if the file format isin --Pickle--- write graph G break except IOError: print("The out put type:"+output_file_type+"please select another output file path\n") elif(output_file_type=='graphML'): try: G = nx.write_graphml(G,output_file_path) #if the file format isin ---GraphML---- write graph Gkl break except IOError: print("The out put type:"+output_file_type+"please select another output file path\n") elif(output_file_type=='YAML'): try: G= nx.write_yaml(G,output_file_path) #if te file format isin ---YAML--- write graph G break except IOError: print("The out put type:"+output_file_type+"please select another output file path\n")
def main(family, quantile_ass=.99): data_folder = os.path.join(Path(os.getcwd()).parents[1], 'data') #load a pickle generated from "associate_env.py script" store = pickle.load(open(data_folder + '/pickles/' + family + '.pkl', 'rb')) used_environment = store['used_env'].copy() full_freq_m = store['full_freq_m'].copy() reactome = store['reactome'].copy() model_sample = store['model_sample'].copy() transporter = store['transporter'].copy() #replace nan values by the average av_used_env = np.nanmean(used_environment, 0) inds = np.where(np.isnan(used_environment)) used_environment[inds] = np.take(av_used_env, inds[1]) #for reaction frequency av_freq_m = np.mean(full_freq_m, axis=0) diff_freq_m = full_freq_m - av_freq_m #filter out noise and find reactions that are driven by the environment env_d_score1 = np.round(np.max(diff_freq_m, axis=0), 4) env_d_score1 = env_d_score1 / max(np.abs(env_d_score1)) env_d_score2 = np.round(np.min(diff_freq_m, axis=0), 4) env_d_score2 = env_d_score2 / max(np.abs(env_d_score2)) env_d_score = np.zeros(len(env_d_score1)) for i in range(len(env_d_score1)): if abs(env_d_score2[i]) > abs(env_d_score1[i]): env_d_score[i] = env_d_score2[i] else: env_d_score[i] = env_d_score1[i] m_diff_freq_m = np.abs(env_d_score) env_driven_reactome = reactome #[m_diff_freq_m>.005] diff_freq_m_envd = diff_freq_m.T #[m_diff_freq_m>.005].T reaction_frequency = full_freq_m.T #[m_diff_freq_m>.005].T clss_freq_m = np.zeros(diff_freq_m_envd.shape) for i, v in enumerate(diff_freq_m_envd): clss_freq_m[i] = v #assign_to_rank(v, fpc,fnc) #for the environment av_used_env = np.mean(used_environment, axis=0) diff_used_env = used_environment - av_used_env #filter out noise and find metabolites that are driven by the environment m_diff_used_env = np.max(np.abs(diff_used_env), axis=0) driving_mets = transporter #[m_diff_used_env>0.005] diff_used_env_envd = diff_used_env.T #[m_diff_used_env>0.005].T used_env = used_environment.T #[m_diff_used_env>0.005].T clss_used_env = np.zeros(diff_used_env_envd.shape) for i, v in enumerate(diff_used_env_envd): clss_used_env[i] = v #assign_to_rank(v, epc, enc) s_clss_fm = np.sum(np.abs(clss_freq_m), axis=0) s_clss_ue = np.sum(np.abs(clss_used_env), axis=0) #env_driven_reactome envd_reactions = env_driven_reactome[s_clss_fm != 0] #driving_metabolites dm = driving_mets.copy() dm = dm[s_clss_ue != 0] #profiles envd_prof = clss_freq_m.T[s_clss_fm != 0].T dm_prof = clss_used_env.T[s_clss_ue != 0].T #regression terms x = reaction_frequency.T[s_clss_fm != 0].T y = used_env.T[s_clss_ue != 0].T cosine_dict = {} for i, reac in enumerate(envd_prof.T): cosine_dict[envd_reactions[i]] = np.array( [cosine(reac.flatten(), metab.flatten()) for metab in dm_prof.T]) cosine_pool = np.array(list(cosine_dict.values())).flatten() pc = np.quantile(cosine_pool[cosine_pool > 0], quantile_ass) nc = np.quantile(cosine_pool[cosine_pool < 0], 1 - quantile_ass) association_d = {} for i, reac in enumerate(envd_prof.T): v = cosine_dict[envd_reactions[i]] association_d[envd_reactions[i]] = assign_to_rank(v, pc, nc) g = build_association_network(association_d, envd_reactions, dm) nx.write_graphml( g, os.path.join( Path(os.getcwd()).parents[0], 'files', 'networks', family) + '.graphml') #find metabolite concentrations for models from sklearn.linear_model import MultiTaskElasticNetCV as EN enet = EN(cv=3, verbose=1, n_jobs=7, max_iter=10000) print(x.shape, y.shape) mod = enet.fit(x, y) evolved_env = np.zeros((len(model_sample), len(dm))) for i, mod_prof in enumerate(model_sample): print(family, i) v = mod_prof[m_diff_freq_m > .005] p = mod.predict(v[s_clss_fm != 0].reshape(1, -1)) p = p.flatten() p = p + abs(min(p)) p = p / max(p) evolved_env[i] = p.copy() #av_mod_diff = np.arctanh(av_mod_diff) met_prof = get_evolved_met_prof(evolved_env, dm, transporter) return transporter, met_prof
def cmat( intrk, roi_volumes=None, roi_graphmls=None, parcellation_scheme=None, compute_curvature=True, additional_maps=None, output_types=None, atlas_info=None, ): """Create the connection matrix for each resolution using fibers and ROIs. Parameters ---------- intrk : TRK file Reconstructed tractogram roi_volumes : list List of parcellation files for a given parcellation scheme roi_graphmls : list List of graphmls files that describes parcellation nodes parcellation_scheme : ['NativeFreesurfer', 'Lausanne2018', 'Custom'] compute_curvature : Boolean additional_maps : dict A dictionary of key/value for each additional map where the value is the path to the map output_types : ['gPickle','mat','graphml'] atlas_info : dict Dictionary storing information such as path to files related to a parcellation atlas / scheme. """ if additional_maps is None: additional_maps = {} if atlas_info is None: atlas_info = {} if output_types is None: output_types = ["gPickle"] print("================================================") print(" > Creation of connectome maps") print(" .. tractogram :" + intrk) print(" .. parcellation : %s" % parcellation_scheme) print("================================================") # create the endpoints for each fibers en_fname = "endpoints.npy" en_fnamemm = "endpointsmm.npy" curv_fname = "meancurvature.npy" fib, hdr = nib.trackvis.read(intrk, False) n = len(fib) # number of fibers if parcellation_scheme != "Custom": if parcellation_scheme != "Lausanne2018": resolutions = get_parcellation(parcellation_scheme) else: resolutions = get_parcellation(parcellation_scheme) for parkey, parval in list(resolutions.items()): for vol, graphml in zip(roi_volumes, roi_graphmls): if parkey in vol: roi_fname = vol if parkey in graphml: roi_graphml_fname = graphml roi = nib.load(roi_fname) roiData = roi.get_data() resolutions[parkey]["number_of_regions"] = roiData.max() resolutions[parkey]["node_information_graphml"] = op.abspath( roi_graphml_fname ) del roi, roiData else: resolutions = atlas_info # Previously, load_endpoints_from_trk() used the voxel size stored # in the track hdr to transform the endpoints to ROI voxel space. # This only works if the ROI voxel size is the same as the DSI/DTI # voxel size. In the case of DTI, it is not. # We do, however, assume that all of the ROI images have the same # voxel size, so this code just loads the first one to determine # what it should be firstROIFile = roi_volumes[0] firstROI = nib.load(firstROIFile) roiVoxelSize = firstROI.get_header().get_zooms() (endpoints, endpointsmm) = create_endpoints_array(fib, roiVoxelSize, True) np.save(en_fname, endpoints) np.save(en_fnamemm, endpointsmm) # Only compute curvature if required if compute_curvature: meancurv = compute_curvature_array(fib) np.save(curv_fname, meancurv) streamline_wrote = False for parkey, parval in list(resolutions.items()): print("------------------------------------------------") print("Resolution = " + parkey) print("------------------------------------------------") # create empty fiber label array fiberlabels = np.zeros((n, 2)) final_fiberlabels = [] final_fibers_idx = [] # Open the corresponding ROI: # scale1 for lausanne2008/18 # first volume for nativefreesurfer for vol in roi_volumes: if (parkey in vol) or (len(roi_volumes) == 1): roi_fname = vol roi = nib.load(roi_fname) roiData = roi.get_data() # Create the matrix print( " >> Create the connection matrix (%s rois)" % parval["number_of_regions"] ) nROIs = parval["number_of_regions"] G = nx.Graph() # Add node information from parcellation gp = nx.read_graphml(parval["node_information_graphml"]) n_nodes = len(gp) pc = -1 cnt = -1 for u, d in gp.nodes(data=True): # Percent counter cnt += 1 pcN = int(round(float(100 * cnt) / n_nodes)) if pcN > pc and pcN % 10 == 0: pc = pcN print("%4.0f%%" % pc) G.add_node(int(u)) for key in d: G.nodes[int(u)][key] = d[key] # compute a position for the node based on the mean position of the # ROI in voxel coordinates (segmentation volume ) G.nodes[int(u)]["dn_position"] = tuple( np.mean(np.where(roiData == int(d["dn_multiscaleID"])), axis=1) ) G.nodes[int(u)]["roi_volume"] = np.sum( roiData == int(d["dn_multiscaleID"]) ) dis = 0 # Prepare: compute the measures t = [c[0] for c in fib] h = np.array(t, dtype=np.object) mmap = additional_maps mmapdata = {} print(" >> Maps to be processed :") for k, v in list(mmap.items()): print(" - %s map" % k) da = nib.load(v) mdata = da.get_data() print(mdata.max()) mdata = np.nan_to_num(mdata) print(mdata.max()) mmapdata[k] = (mdata, da.get_header().get_zooms()) print(" ************************") print(" >> Processing fibers and computing metrics (%s fibers)" % n) pc = -1 for i in range(n): # n: number of fibers # Percent counter pcN = int(round(float(100 * i) / n)) if pcN > pc and pcN % 10 == 0: pc = pcN print("%4.0f%%" % pc) # ROI start => ROI end try: startvox = np.zeros((3, 1)).astype(int) startvox[0] = np.int(endpoints[i, 0, 0]) startvox[1] = np.int(endpoints[i, 0, 1]) startvox[2] = np.int(endpoints[i, 0, 2]) endvox = np.zeros((3, 1)).astype(int) endvox[0] = np.int(endpoints[i, 1, 0]) endvox[1] = np.int(endpoints[i, 1, 1]) endvox[2] = np.int(endpoints[i, 1, 2]) # Endpoints from create_endpoints_array startROI = int(roiData[startvox[0], startvox[1], startvox[2]]) endROI = int(roiData[endvox[0], endvox[1], endvox[2]]) except IndexError: print(" .. ERROR: An index error occured for fiber %s. " % i) print(" This means that the fiber start or endpoint is outside the volume. Continue.") print(" Continue.") continue # Filter if startROI == 0 or endROI == 0: dis += 1 fiberlabels[i, 0] = -1 continue if startROI > nROIs or endROI > nROIs: print(" .. ERROR: Start or endpoint of fiber terminate in a voxel which is labeled higher") print(" than is expected by the parcellation node information.") print(" Start ROI: %i, End ROI: %i" % (startROI, endROI)) print(" This needs bugfixing!") print(" Continue.") continue # Switch the rois in order to enforce startROI < endROI if endROI < startROI: tmp = startROI startROI = endROI endROI = tmp # TODO: Refine fibers ending in thalamus # if (startROI in thalamic_labels) or (endROI in thalamic_labels): # Extract all thalamic nuclei the fiber is passing through # Refine start/endROI connecting to the most probable nucleus # Update fiber label fiberlabels[i, 0] = startROI fiberlabels[i, 1] = endROI final_fiberlabels.append([startROI, endROI]) final_fibers_idx.append(i) # Add edge to graph if G.has_edge(startROI, endROI): G[startROI][endROI]["fiblist"].append(i) else: G.add_edge(startROI, endROI, fiblist=[i]) print( " ... INFO - Found %i (%f percent out of %i fibers) fibers " % (dis, dis * 100.0 / n, n) + "that start or terminate in a voxel which is not labeled. (orphans)" ) print( " ... INFO - Valid fibers: %i (%f percent)" % (n - dis, 100 - dis * 100.0 / n) ) # create a final fiber length array finalfiberlength = [] for idx in final_fibers_idx: # compute length of fiber finalfiberlength.append(length(fib[idx][0])) # convert to array final_fiberlength_array = np.array(finalfiberlength) # make final fiber labels as array final_fiberlabels_array = np.array(final_fiberlabels, dtype=np.int32) total_fibers = 0 total_volume = 0 u_old = -1 for u, v, d in G.edges(data=True): total_fibers += len(d["fiblist"]) if u != u_old: total_volume += G.nodes[int(u)]["roi_volume"] u_old = u G_out = copy.deepcopy(G) # Update edges # New connectivity measures can be added here # FIXME treat case of self-connection that gives di['fiber_length_mean'] = 0.0 for u, v, d in G.edges(data=True): # Check for diagonal elements that raise an error when the edge is visited a second time G_out.remove_edge(u, v) if len(list(G[u][v].keys())) == 1: di = {"number_of_fibers": len(G[u][v]["fiblist"])} # additional measures # compute mean/std of fiber measure if u <= v: idx = np.where( (final_fiberlabels_array[:, 0] == int(u)) & (final_fiberlabels_array[:, 1] == int(v)) )[0] else: idx = np.where( (final_fiberlabels_array[:, 0] == int(v)) & (final_fiberlabels_array[:, 1] == int(u)) )[0] di["fiber_length_mean"] = float( np.nanmean(final_fiberlength_array[idx]) ) di["fiber_length_median"] = float( np.nanmedian(final_fiberlength_array[idx]) ) di["fiber_length_std"] = float(np.nanstd(final_fiberlength_array[idx])) di["fiber_proportion"] = float( 100.0 * (di["number_of_fibers"] / float(total_fibers)) ) # Compute density # Formula: density = (#fibers / mean_fibers_length) * (2 / (area_roi_u + area_roi_v)) if di["fiber_length_mean"] > 0.0: di["fiber_density"] = float( (float(di["number_of_fibers"]) / float(di["fiber_length_mean"])) * float( 2.0 / ( G.nodes[int(u)]["roi_volume"] + G.nodes[int(v)]["roi_volume"] ) ) ) di["normalized_fiber_density"] = float( ( (float(di["number_of_fibers"]) / float(total_fibers)) / float(di["fiber_length_mean"]) ) * ( (2.0 * float(total_volume)) / ( G.nodes[int(u)]["roi_volume"] + G.nodes[int(v)]["roi_volume"] ) ) ) else: di["fiber_density"] = 0.0 di["normalized_fiber_density"] = 0.0 # This is indexed into the fibers that are valid in the sense of touching start # and end roi and not going out of the volume if u <= v: idx_valid = np.where( (fiberlabels[:, 0] == int(u)) & (fiberlabels[:, 1] == int(v)) )[0] else: idx_valid = np.where( (fiberlabels[:, 0] == int(v)) & (fiberlabels[:, 1] == int(u)) )[0] for k, vv in list(mmapdata.items()): val = [] for i in idx_valid: # retrieve indices try: idx2 = (h[i] / vv[1]).astype(np.uint32) val.append(vv[0][idx2[:, 0], idx2[:, 1], idx2[:, 2]]) except IndexError as e: print( " ... ERROR - Index error occured when trying extract scalar values for measure", k, ) print( " ... ERROR - Discard fiber with index ", i, "Exception: ", e, ) if len(val) > 0: da = np.concatenate(val) if k == "shore_rtop": di[k + "_mean"] = da.astype(np.float64).mean() di[k + "_std"] = da.astype(np.float64).std() di[k + "_median"] = np.median(da.astype(np.float64)) else: di[k + "_mean"] = da.mean().astype(np.float) di[k + "_std"] = da.std().astype(np.float) di[k + "_median"] = np.median(da).astype(np.float) del da del val G_out.add_edge(u, v) for key in di: G_out[u][v][key] = di[key] del G print(" ************************************************") print(" >> Save structural connectome maps as :") # Get the edge attributes/keys/weights from the first edge and then break. # Change w.r.t networkx2 edge_keys = [] for u, v, d in G_out.edges(data=True): edge_keys = list(d.keys()) break # Storing network/graph in TSV format (by default to be BIDS compliant) print(" - connectome_%s.tsv" % parkey) # Write header fields with open("connectome_%s.tsv" % parkey, "w") as out_file: tsv_writer = csv.writer(out_file, delimiter="\t") header = ["source", "target"] header = header + [key for key in edge_keys] tsv_writer.writerow(header) # Write list of graph edges with all connectivity metrics (edge_keys) with open("connectome_%s.tsv" % parkey, "ab") as out_file: nx.write_edgelist( G_out, out_file, comments="#", delimiter="\t", data=edge_keys, encoding="utf-8", ) # Storing network/graph in other formats that might be prefered by the user if "gPickle" in output_types: print(" - connectome_%s.gpickle" % parkey) nx.write_gpickle(G_out, "connectome_%s.gpickle" % parkey) if "mat" in output_types: edge_struct = {} for edge_key in edge_keys: if edge_key != "fiblist": edge_struct[edge_key] = nx.to_numpy_matrix(G_out, weight=edge_key) # nodes size_nodes = len(list(G_out.nodes(data=True))) # Get the node attributes/keys from the first node and then break. # Change w.r.t networkx2 for u, d in G_out.nodes(data=True): node_keys = list(d.keys()) break node_struct = {} for node_key in node_keys: if node_key == "dn_position": node_arr = np.zeros([size_nodes, 3], dtype=np.float) else: node_arr = np.zeros(size_nodes, dtype=np.object_) node_n = 0 for _, node_data in G_out.nodes(data=True): node_arr[node_n] = node_data[node_key] node_n += 1 node_struct[node_key] = node_arr print(" - connectome_%s.mat" % parkey) sio.savemat( "connectome_%s.mat" % parkey, long_field_names=True, mdict={"sc": edge_struct, "nodes": node_struct}, ) if "graphml" in output_types: g2 = nx.Graph() for u_gml, v_gml, d_gml in G_out.edges(data=True): g2.add_edge(u_gml, v_gml) for key in d_gml: g2[u_gml][v_gml][key] = d_gml[key] for u_gml, d_gml in G_out.nodes(data=True): g2.add_node(u_gml) g2.nodes[u_gml]["dn_multiscaleID"] = d_gml["dn_multiscaleID"] g2.nodes[u_gml]["dn_fsname"] = d_gml["dn_fsname"] g2.nodes[u_gml]["dn_hemisphere"] = d_gml["dn_hemisphere"] g2.nodes[u_gml]["dn_name"] = d_gml["dn_name"] g2.nodes[u_gml]["dn_position_x"] = d_gml["dn_position"][0] g2.nodes[u_gml]["dn_position_y"] = d_gml["dn_position"][1] g2.nodes[u_gml]["dn_position_z"] = d_gml["dn_position"][2] g2.nodes[u_gml]["dn_region"] = d_gml["dn_region"] print(" - connectome_%s.graphml" % parkey) nx.write_graphml(g2, "connectome_%s.graphml" % parkey) # Storing final fiber length array fiberlabels_fname = "final_fiberslength_%s.npy" % str(parkey) np.save(fiberlabels_fname, final_fiberlength_array) # Storing all fiber labels (with orphans) fiberlabels_fname = "filtered_fiberslabel_%s.npy" % str(parkey) np.save( fiberlabels_fname, np.array(fiberlabels, dtype=np.int32), ) # Storing final fiber labels (no orphans) fiberlabels_noorphans_fname = "final_fiberlabels_%s.npy" % str(parkey) np.save(fiberlabels_noorphans_fname, final_fiberlabels_array) if not streamline_wrote: print(" > Filtering tractography - keeping only no orphan fibers") finalfibers_fname = "streamline_final.trk" save_fibers(hdr, fib, finalfibers_fname, final_fibers_idx) print("Done.") print("========================")
def save_graphml(G, filename='graph.graphml', folder=None, gephi=False): """ Save graph as GraphML file to disk. Parameters ---------- G : networkx multidigraph filename : string the name of the graphml file (including file extension) folder : string the folder to contain the file, if None, use default data folder gephi : bool if True, give each edge a unique key to work around Gephi's restrictive interpretation of the GraphML specification Returns ------- None """ start_time = time.time() if folder is None: folder = settings.data_folder # create a copy to convert all the node/edge attribute values to string G_save = G.copy() if gephi: gdf_nodes, gdf_edges = graph_to_gdfs(G_save, nodes=True, edges=True, node_geometry=True, fill_edge_geometry=True) # turn each edge's key into a unique ID for Gephi compatibility gdf_edges['key'] = range(len(gdf_edges)) # gephi doesn't handle node attrs named x and y well, so rename gdf_nodes['xcoord'] = gdf_nodes['x'] gdf_nodes['ycoord'] = gdf_nodes['y'] G_save = gdfs_to_graph(gdf_nodes, gdf_edges) # remove graph attributes as Gephi only accepts node and edge attrs G_save.graph = {} else: # if not gephi, keep graph attrs and stringify them for dict_key in G_save.graph: # convert all the graph attribute values to strings G_save.graph[dict_key] = make_str(G_save.graph[dict_key]) # stringify node and edge attributes for _, data in G_save.nodes(data=True): for dict_key in data: if gephi and dict_key in ['xcoord', 'ycoord']: # don't convert x y values to string if saving for gephi continue else: # convert all the node attribute values to strings data[dict_key] = make_str(data[dict_key]) for _, _, data in G_save.edges(keys=False, data=True): for dict_key in data: # convert all the edge attribute values to strings data[dict_key] = make_str(data[dict_key]) if not os.path.exists(folder): os.makedirs(folder) nx.write_graphml(G_save, os.path.join(folder, filename)) log('Saved graph "{}" to disk as GraphML at "{}" in {:,.2f} seconds'. format(G_save.name, os.path.join(folder, filename), time.time() - start_time))
plt.ylim([-0.3, 1]) plt.xlabel("Spearman Correlation Coefficient") plt.ylabel("Kendall $\\tau$ Correlation Coefficient") plt.tight_layout() plt.savefig("spearman_vs_tau.png") # Create some example MSTs for us to draw G_pearson = Graphs_pearson[0] G_spearman = Graphs_spearman[0] G_tau = Graphs_tau[0] mst_pearson = nx.minimum_spanning_tree(correlation_to_distance(G_pearson)) mst_spearman = nx.minimum_spanning_tree(correlation_to_distance(G_spearman)) mst_tau = nx.minimum_spanning_tree(correlation_to_distance(G_tau)) nx.write_graphml(mst_pearson, "mst_pearson_0.graphml") nx.write_graphml(mst_spearman, "mst_spearman_0.graphml") nx.write_graphml(mst_tau, "mst_tau_0.graphml") max_eig_df = pd.DataFrame() max_eig_df['Pearson'] = pearson_largest_eig max_eig_df['Spearman'] = spearman_largest_eig max_eig_df['$\\tau$'] = tau_largest_eig max_eig_df.index = dt max_eig_df.plot() plt.ylabel("$\lambda_{\max}$") plt.tight_layout() plt.savefig("max_eig.png") edge_life_df = pd.DataFrame() edge_life_df['Pearson'] = edges_life_pearson
def convert_outputs(prefix, temporal_context): Path("flavors.json").write_text( json.dumps([*json.loads(Path("flavors.json").read_text()), prefix], indent=4)) label_key = 'name' if temporal_context else 'generic_name' if not Path(f'{prefix}.json').exists(): generate_graph(grapfn=f'{prefix}.json', keep_temporal_context=temporal_context) graph = json.loads(Path(f'{prefix}.json').read_text()) if not Path(f'{prefix}_metrics.json').exists(): Path(f'{prefix}_metrics.json').write_text( json.dumps(embed_metrics(graph), indent=2)) metrics = json.loads(Path(f'{prefix}_metrics.json').read_text()) if not Path(f'{prefix}_metrics_distances.json').exists(): Path(f'{prefix}_metrics_distances.json').write_text( json.dumps(embed_metrics_distance(graph, metrics))) # to_networkx g = networkx.DiGraph() g.add_nodes_from([node[label_key] for node in graph.values()]) g.add_edges_from([(node_source[label_key], graph[target][label_key]) for node_source in graph.values() for target in node_source['mention_freq'].keys()]) networkx.write_graphml(g, f'{prefix}_unweighted.graphml') for src in graph.values(): srcnm = src[label_key] for tgt, w in src['mention_freq'].items(): tgtnm = graph[tgt][label_key] g[srcnm][tgtnm]['weight'] = w networkx.write_graphml(g, f'{prefix}_weighted.graphml') g = networkx.DiGraph(networkx.read_graphml(f'{prefix}_unweighted.graphml')) g = networkx.DiGraph(networkx.read_graphml(f'{prefix}_weighted.graphml')) # to_sqlite if Path(f'{prefix}.db').exists(): Path(f'{prefix}.db').unlink() sqldb = sqlite3.connect(f'{prefix}.db') cur = sqldb.cursor() cur.execute('''CREATE TABLE node ( name VARCHAR(255), generic_name VARCHAR(255), type VARCHAR(255), doc_id VARCHAR(255), monitored bool, pub_date VARCHAR(255), in_force bool)''') cur.execute('''CREATE TABLE edge ( node_src INTEGER, node_dst INTEGER, mentions INTEGER, FOREIGN KEY(node_src) REFERENCES node(rowid) ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY(node_dst) REFERENCES node(rowid) ON UPDATE CASCADE ON DELETE CASCADE)''' ) cur.execute(f'''CREATE VIEW nodes AS SELECT rowid as id, {label_key} as label FROM node''') cur.execute('''CREATE VIEW edges AS SELECT rowid as id, node_src as source, node_dst as target, mentions as weight FROM edge''') node_name_to_id = dict() for node in graph.values(): cur.execute( '''INSERT INTO node( name, generic_name, type, doc_id, monitored, pub_date, in_force ) VALUES(?,?,?,?,?,?,?)''', (node['name'], node['generic_name'], node['type'], node['doc_id'], node['monitored'], node['pub_date'], node['in_force'])) node_name_to_id[node['name']] = cur.lastrowid for node in graph.values(): node_src_nm = node['name'] node_src = node_name_to_id[node_src_nm] for node_dst_nm, frequency in node['mention_freq'].items(): node_dst = node_name_to_id[node_dst_nm] cur.execute( '''INSERT INTO edge(node_src,node_dst,mentions) VALUES(?,?,?)''', (node_src, node_dst, frequency)) cur.close() sqldb.commit() Path(f'{prefix}.sql').write_text('\n'.join(sqldb.iterdump())) sqldb.close() # to_csv with open(f'{prefix}.csv', 'w') as file: file.write('%s,%s,%s\n' % ("source", "target", "weight")) for node in graph.values(): node_src_nm = node['name'] for node_dst_nm, frequency in node['mention_freq'].items(): file.write('%s,%s,%d\n' % (graph[node_src_nm][label_key], graph[node_dst_nm][label_key], frequency)) # to_graphviz gv = graphviz.Digraph() for node in graph.values(): gv.node(str(node_name_to_id[node['name']]), label='\n'.join( list( map( str, filter(lambda a: a is not None, [ node['type'], node['doc_id'], node['pub_date'] ]))))) for node in graph.values(): node_src_nm = node['name'] node_src = node_name_to_id[node_src_nm] for node_dst_nm, frequency in node['mention_freq'].items(): node_dst = node_name_to_id[node_dst_nm] gv.edge(str(node_src), str(node_dst), str(frequency)) gv.save(f'{prefix}.gv') # takes "forever" to render, "never" finishes # connectivity g = networkx.DiGraph(networkx.read_graphml(f'{prefix}_unweighted.graphml')) if not Path(f'{prefix}_metrics_connectivity.json').exists(): Path(f'{prefix}_metrics_connectivity.json').write_text( json.dumps(embed_metrics_connectivity(graph, metrics, g, label_key), indent=2)) # matplotlib rendering if not Path(f'{prefix}_unweighted.pdf').exists() or not Path( f'{prefix}_unweighted.png').exists(): g = networkx.DiGraph( networkx.read_graphml(f'{prefix}_unweighted.graphml')) networkx.draw(g) plt.savefig(f'{prefix}_unweighted.pdf') plt.savefig(f'{prefix}_unweighted.png') plt.close() if not Path(f'{prefix}_weighted.pdf').exists() or not Path( f'{prefix}_weighted.png').exists(): g = networkx.DiGraph( networkx.read_graphml(f'{prefix}_weighted.graphml')) networkx.draw(g) plt.savefig(f'{prefix}_weighted.pdf') plt.savefig(f'{prefix}_weighted.png') plt.close() # Leave root document explicit if not Path(f'{prefix}_root.json').exists(): Path(f'{prefix}_root.json').write_text( json.dumps(graph[find_rootdoc()['name']])) # Plot quadrants for weight in [True, False]: desc = ('un' * int(not weight)) + 'weighted' if not Path(f'{prefix}_quads_{desc}.pdf').exists() or not Path( f'{prefix}_quads_{desc}.png').exists(): key = 'weight' if weight else 'degree' dimen_cutoff = draw_degree_quadrants(graph, metrics['degree'], key) plt.savefig(f'{prefix}_quads_{desc}.pdf', bbox_inches='tight') plt.savefig(f'{prefix}_quads_{desc}.png', bbox_inches='tight') Path(f'{prefix}_quads_{desc}.json').write_text( json.dumps(dimen_cutoff, indent=4)) for weight in [True, False]: desc = ('un' * int(not weight)) + 'weighted' if True or not Path(f'{prefix}_quads_{desc}.csv').exists(): key = 'weight' if weight else 'degree' dimen_cutoff = json.loads( Path(f'{prefix}_quads_{desc}.json').read_text()) with open(f'{prefix}_quads_{desc}.csv', 'w') as file: fmt = ','.join(['%s'] * (4 + int(weight))) + '\n' hr = (dimen_cutoff['halfrange']['x'], dimen_cutoff['halfrange']['y']) file.write(fmt % ("source", "target", *(["weight"] * int(weight)), "source_color", "target_color")) for node in graph.values(): node_src_nm = node['name'] src_metric = metrics['degree'][node_src_nm] for node_dst_nm, frequency in node['mention_freq'].items(): dst_metric = metrics['degree'][node_dst_nm] file.write(fmt % ( graph[node_src_nm][label_key], graph[node_dst_nm][label_key], *([frequency] * int(weight)), QUADRANT_COLOR[get_quadrant( src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) - 1], QUADRANT_COLOR[get_quadrant( dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) - 1], )) with open(f'{prefix}_quads_{desc}_nodst3rdquad.csv', 'w') as file: fmt = ','.join(['%s'] * (4 + int(weight))) + '\n' hr = (dimen_cutoff['halfrange']['x'], dimen_cutoff['halfrange']['y']) file.write(fmt % ("source", "target", *(["weight"] * int(weight)), "source_color", "target_color")) for node in graph.values(): node_src_nm = node['name'] src_metric = metrics['degree'][node_src_nm] for node_dst_nm, frequency in node['mention_freq'].items(): dst_metric = metrics['degree'][node_dst_nm] if get_quadrant(dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) == 3: continue file.write(fmt % ( graph[node_src_nm][label_key], graph[node_dst_nm][label_key], *([frequency] * int(weight)), QUADRANT_COLOR[get_quadrant( src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) - 1], QUADRANT_COLOR[get_quadrant( dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) - 1], )) with open(f'{prefix}_quads_{desc}_nosrc3rdquad.csv', 'w') as file: fmt = ','.join(['%s'] * (4 + int(weight))) + '\n' hr = (dimen_cutoff['halfrange']['x'], dimen_cutoff['halfrange']['y']) file.write(fmt % ("source", "target", *(["weight"] * int(weight)), "source_color", "target_color")) for node in graph.values(): node_src_nm = node['name'] src_metric = metrics['degree'][node_src_nm] if get_quadrant(src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) == 3: continue for node_dst_nm, frequency in node['mention_freq'].items(): dst_metric = metrics['degree'][node_dst_nm] file.write(fmt % ( graph[node_src_nm][label_key], graph[node_dst_nm][label_key], *([frequency] * int(weight)), QUADRANT_COLOR[get_quadrant( src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) - 1], QUADRANT_COLOR[get_quadrant( dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) - 1], )) with open(f'{prefix}_quads_{desc}_no3rdquad.csv', 'w') as file: fmt = ','.join(['%s'] * (4 + int(weight))) + '\n' hr = (dimen_cutoff['halfrange']['x'], dimen_cutoff['halfrange']['y']) file.write(fmt % ("source", "target", *(["weight"] * int(weight)), "source_color", "target_color")) for node in graph.values(): node_src_nm = node['name'] src_metric = metrics['degree'][node_src_nm] if get_quadrant(src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) == 3: continue for node_dst_nm, frequency in node['mention_freq'].items(): dst_metric = metrics['degree'][node_dst_nm] if get_quadrant(dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) == 3: continue file.write(fmt % ( graph[node_src_nm][label_key], graph[node_dst_nm][label_key], *([frequency] * int(weight)), QUADRANT_COLOR[get_quadrant( src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) - 1], QUADRANT_COLOR[get_quadrant( dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) - 1], )) if True: folder_out = Path(f'{prefix}_quads_unweighted_no2nd3rdquad') folder_out.mkdir(parents=True, exist_ok=True) for node in graph.values(): node_src_nm = node['name'] src_metric = metrics['degree'][node_src_nm] if get_quadrant(src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) in [2, 3]: continue with folder_out.joinpath(f'{node["generic_name"]}.csv').open( 'w') as file: fmt = ','.join(['%s'] * 5) + '\n' hr = (dimen_cutoff['halfrange']['x'], dimen_cutoff['halfrange']['y']) file.write(fmt % ("source", "target", "source_color", "target_color", "similarity")) srcWC = None srcCacheKey = graph[node_src_nm]['filepath'][6:] if len(srcCacheKey) > 0: srcDoc = PlainCachedDocument(srcCacheKey, None).parse(' ') srcWC = WordCounter(srcDoc) for node_dst_nm, frequency in node['mention_freq'].items(): dst_metric = metrics['degree'][node_dst_nm] # if get_quadrant(dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) == 3: # continue similarity = '?' dstCacheKey = graph[node_dst_nm]['filepath'][6:] if len(dstCacheKey) > 0: dstDoc = PlainCachedDocument(dstCacheKey, None).parse(' ') dstWC = WordCounter(dstDoc) if srcWC is not None: similarity = srcWC.vectorSimilarity(dstWC) similarity = str(similarity[0][0]) file.write(fmt % ( graph[node_src_nm][label_key], graph[node_dst_nm][label_key], QUADRANT_COLOR[ get_quadrant(src_metric[f'{key}_in'], src_metric[f'{key}_out'], *hr) - 1], QUADRANT_COLOR[ get_quadrant(dst_metric[f'{key}_in'], dst_metric[f'{key}_out'], *hr) - 1], similarity, )) if True or not Path(f'{prefix}_pagerank.json').exists(): g = networkx.DiGraph( networkx.read_graphml(f'{prefix}_unweighted.graphml')) pr = networkx.pagerank(g) Path(f'{prefix}_pagerank.json').write_text(json.dumps(pr, indent=2)) spr = sorted([(k, v) for k, v in pr.items()], key=lambda a: (-a[1], a[0])) Path(f'{prefix}_pagerank_ranked.json').write_text( json.dumps(spr, indent=2)) # dirLink = {k: set(v['mention_freq'].keys()) for k, v in graph.items()} revLink = {graph[k][label_key]: set() for k in graph.keys()} for ks, v in graph.items(): ks = graph[ks][label_key] for kd in v['mention_freq'].keys(): kd = graph[kd][label_key] revLink[kd].add(ks) sptr = {spr[0][0]: spr[0][0]} for node, rank in spr[1:]: maxNode = sorted([x for x in revLink[node] if x != node], key=lambda a: -pr[a])[0] sptr[node] = maxNode Path(f'{prefix}_pagerank_ranked_spannedtree.json').write_text( json.dumps(sptr, indent=2)) table = ["source,target,source_weight,target_weight"] for ns, nd in sptr.items(): ws = "%.32f" % pr[ns] wd = "%.32f" % pr[nd] table.append(f"{ns},{nd},{ws},{wd}") Path(f'{prefix}_pagerank_ranked_spannedtree.csv').write_text( '\n'.join(table) + '\n')
def get_subgraph(V,E,label_filepath,dataset_name,level=1,subgraph_count=5,ignore_deg=None,root_node=None): """ # total_points: total number of data points # feature_dm: number of features per datapoint # number_of_labels: total number of labels # X: feature matrix of dimension total_points * feature_dm # Y: list of size total_points. Each element of the list containing labels corresponding to one datapoint # V: list of all labels (nodes) # E: dict of edge tuple -> weight, eg. {(1, 4): 1, (2, 7): 3} """ # get a dict of label -> textual_label label_dict = get_label_dict(label_filepath) # an utility function to relabel nodes of upcoming graph with textual label names def mapping(v): """ An utility function to relabel nodes of upcoming graph with textual label names :param v: label id (int) :return: returns the texual label of the node id [v] """ v = int(v) if v in label_dict: return label_dict[v] return str(v) # build a unweighted graph of all edges g = nx.Graph() g.add_edges_from(E.keys()) # Below section will try to build a smaller subgraph from the actual graph for visualization subgraph_lists = [] for sg in range(subgraph_count): if root_node is None: # select a random vertex to be the root np.random.shuffle(V) v = V[0] else: v = root_node # two files to write the graph and label information # Remove characters like \, /, <, >, :, *, |, ", ? from file names, # windows can not have file name with these characters label_info_filepath = 'samples/'+str(dataset_name)+'_Info[{}].txt'.format(str(int(v)) + '-' + remove_special_chars(mapping(v))) label_graph_filepath = 'samples/'+str(dataset_name)+'_G[{}].graphml'.format(str(int(v)) + '-' + remove_special_chars(mapping(v))) # label_graph_el = 'samples/'+str(dataset_name)+'_E[{}].el'.format(str(int(v)) + '-' + mapping(v)).replace(' ','_') print('Label:[' + mapping(v) + ']') label_info_file = open(label_info_filepath,'w') label_info_file.write('Label:[' + mapping(v) + ']' + "\n") # build the subgraph using bfs bfs_q = Queue() bfs_q.put(v) bfs_q.put(0) node_check = {} ignored = [] sub_g = nx.Graph() lvl = 0 while not bfs_q.empty() and lvl <= level: v = bfs_q.get() if v == 0: lvl += 1 bfs_q.put(0) continue elif node_check.get(v,True): node_check[v] = False edges = list(g.edges(v)) # label_info_file.write('\nNumber of edges: ' + str(len(edges)) + ' for node: ' + mapping(v) + '[' + # str(v) + ']' + '\n') if ignore_deg is not None and len(edges) > ignore_deg: # label_info_file.write('Ignoring: [' + mapping(v) + '] \t\t\t degree: [' + str(len(edges)) + ']\n') ignored.append("Ignoring: deg [" + mapping(v) + "] = [" + str(len(edges)) + "]\n") continue for uv_tuple in edges: edge = tuple(sorted(uv_tuple)) sub_g.add_edge(edge[0],edge[1],weight=E[edge]) bfs_q.put(uv_tuple[1]) else: continue # relabel the nodes to reflect textual label nx.relabel_nodes(sub_g,mapping,copy=False) print('sub_g:',sub_g) label_info_file.write(str('\n')) # Writing some statistics about the subgraph label_info_file.write(str(nx.info(sub_g)) + '\n') label_info_file.write('density: ' + str(nx.density(sub_g)) + '\n') label_info_file.write('list of the frequency of each degree value [degree_histogram]: ' + str(nx.degree_histogram(sub_g)) + '\n') for nodes in ignored: label_info_file.write(str(nodes) + '\n') # TODO: Add other statistics for better understanding of the subgraph. # subg_edgelist = nx.generate_edgelist(sub_g,label_graph_el) label_info_file.close() nx.write_graphml(sub_g,label_graph_filepath) subgraph_lists.append(sub_g) print('Graph generated at: ' + label_graph_filepath) if root_node: print("Root node provided, will generate only one graph file.") break return subgraph_lists
servers = "server1:30080" G = nx.DiGraph() G.add_node("start", serverport="30080", peers=servers) G.add_node("transfer1", type="get", protocol="tcp", size="5 KiB") G.add_node("transfer2", type="get", protocol="tcp", size="6 KiB") G.add_node("transfer3", type="get", protocol="tcp", size="7 KiB") G.add_node("transfer4", type="get", protocol="tcp", size="5 KiB") G.add_node("transfer5", type="get", protocol="tcp", size="6 KiB") G.add_node("transfer6", type="get", protocol="tcp", size="7 KiB") G.add_node("transfer7", type="get", protocol="tcp", size="5 KiB") G.add_node("transfer8", type="get", protocol="tcp", size="8 KiB") # random noise G.add_node("transfer9", type="get", protocol="tcp", size="7 KiB") # etc... for the entire stream, potentially hundreds of these G.add_edge("start", "transfer1") G.add_edge("transfer1", "transfer2") G.add_edge("transfer2", "transfer3") G.add_edge("transfer3", "transfer4") G.add_edge("transfer4", "transfer5") G.add_edge("transfer5", "transfer6") G.add_edge("transfer6", "transfer7") G.add_edge("transfer7", "transfer8") G.add_edge("transfer8", "transfer9") G.add_edge("transfer9", "start") nx.write_graphml(G, "tgen.client.graphml.xml")
def addEntity(graph, s, type, stock): typeEntity = F'{type}' stockEntity = stock graph.add_node(s, type = typeEntity, stock = stockEntity) def addRoad(graph, s, e, cap, gas, tax): start = f'{s}' end = f'{e}' RoadCap = cap RoadGas = gas RoadTax = tax graph.add_edge(start, end, capacity = RoadCap, Gas = RoadGas, Tax = RoadTax) ############################# MAIN RUNNING TEST READER ############################# path = Path(__file__) newpath = path.parent.parent.resolve() dataDir = newpath / 'data' InstancePath = dataDir / 'truck_instance_less_customers.data' file_path = InstancePath graph, entete = extract_graph(file_path) nx.draw(graph) # decomenter la ligne ci dessous pour afficher le graph avec matplotlib #plt.show() nx.write_graphml(graph, 'projet_RO_LAGNIAUX_JEAN_DENES_THEO\output_files\graphDenesLagniaux.graphml') print() print(colored('le graph a été créer et on peut le trouve dans le fichier => output_files', 'red')) print()
import networkx as nx f = open('raw/uscn_co_filtered.txt') # edges = list() graph = nx.Graph() for i, line in enumerate(f.readlines()): line = line.strip() line = line.split(',') graph.add_node(i, labelV='US', author_id=line[1]) graph.add_node(i + 1000000, labelV='CN', author_id=line[2]) graph.add_edge(i, i + 1000000, labelE='Cooperates') # edges.append((line[1], line[2])) # graph.add_edges_from(edges) print(graph.number_of_nodes(), graph.number_of_edges()) nx.write_graphml(graph, "planb.xml")
#%% Save out_graphs = [] [out_graphs.append(i) for i in nx_graphs_raw.values()] [print(i) for i in nx_graphs_raw.keys()] save_names = ["Gaa", "Gad", "Gda", "Gdd"] [out_graphs.append(i) for i in nx_graphs_norm.values()] [print(i) for i in nx_graphs_norm.keys()] save_names += ["Gaan", "Gdan", "Gadn", "Gddn"] out_graphs.append(nx_all_raw) save_names.append("G") out_graphs.append(nx_all_norm) save_names.append("Gn") for name, graph in zip(save_names, out_graphs): nx.write_graphml(graph, output_path / (name + ".graphml")) meta_data_df.to_csv(output_path / "meta_data.csv") #%% verify things are right print("\n\n\n\nChecking graphs are the same when saved") for name, graph_wrote in zip(save_names, out_graphs): print(name) graph_read = nx.read_graphml(output_path / (name + ".graphml")) adj_read = nx.to_numpy_array(graph_read) adj_wrote = nx.to_numpy_array(graph_wrote) print(np.array_equal(adj_read, adj_wrote)) graph_loader = load_networkx(name, version=data_date_graphs) adj_loader = nx.to_numpy_array(graph_loader) print(np.array_equal(adj_wrote, adj_loader)) print()
fc3 = prep.get_fc(path,3) df3 = prep.get_dataframe(fc3) df3 = prep.calcIds(df3,CONFIDENCE) df0 = prep.get_dataframe(fc0) df0 = prep.calcIds(df0,CONFIDENCE) df2 = prep.get_dataframe(fc2) df2 = prep.calcIds(df2,CONFIDENCE) df1 = prep.get_dataframe(fc1) df1 = prep.calcIds(df1,CONFIDENCE) df0.xpos = df0.xpos + xmax df1.xpos = df1.xpos + xmax side0 = pd.concat([df3, df0]) side1 = pd.concat([df2, df1]) close1 = prep.get_close_bees(side0, DISTANCE) close2 = prep.get_close_bees(side1, DISTANCE) close = pd.concat([close1,close2]) p = prep.bee_pairs_to_timeseries(close) i = prep.extract_interactions(p,LENGTH) G = prep.create_graph2(i) nx.write_graphml(G, filename + ".graphml")
def saveToGraphml(graph, filename, **kwargs): nx.write_graphml(graph, filename, **kwargs)
def gera_graphml(self, path): nx.write_graphml(self.G, path)
heap = [] for i in range(0, points.shape[0]): for j in range(0, points.shape[0]): if i != j: # Calculate ratio = d/SP # Sort ratio in a nondecreasing order ratio = direct_matrix[i,j]/SP_matrix[i][j] if ratio < 1.0: hq.heappush(heap, (ratio, i, j)) # Pick the first node pair on the list, connect them with an edge min_ratio, x, y = hq.heappop(heap) print 'min_ratio', min_ratio # Stop when min_ratio >= threshold if min_ratio >= threshold: break # Update the graph path_graph.add_edge(x, y, weight = float(direct_matrix[x,y])) threshold_graph.add_edge(x, y, weight = direct_matrix[x,y]) # plt.clf() plt.plot(points[:,0], points[:,1], 'o') #change the first point to another shape plt.plot(points[0,0], points[0,1], 'D') nx.draw_networkx_edges(threshold_graph, pos = pos_dict, width=3, edge_color='b') plt.savefig("threshold_"+ name + "_" + str(threshold) + ".png") nx.write_graphml(path_graph, "threshold_"+ name + "_" + str(threshold) + ".graphml") SP_matrix = nx.shortest_path_length(path_graph, weight = "weight")