def load_train_test_graphs(dataset, recache_input): raw_mat_path = 'data/{}.npz'.format(dataset) train_graph_path = 'data/{}/train_graph.pkl'.format(dataset) test_graph_path = 'data/{}/test_graph.pkl'.format(dataset) if recache_input: print('loading sparse matrix from {}'.format(raw_mat_path)) m = load_sparse_csr(raw_mat_path) print('splitting train and test...') train_m, test_m = split_train_test( m, weights=[0.9, 0.1]) print('converting to nx.DiGraph') train_g = nx.from_scipy_sparse_matrix(train_m, create_using=nx.DiGraph(), edge_attribute='sign') test_g = nx.from_scipy_sparse_matrix(test_m, create_using=nx.DiGraph(), edge_attribute='sign') print('saving train and test graphs...') nx.write_gpickle(train_g, train_graph_path) nx.write_gpickle(test_g, test_graph_path) else: print('loading train and test graphs...') train_g = nx.read_gpickle(train_graph_path) test_g = nx.read_gpickle(test_graph_path) return train_g, test_g
def main(): rev_forward_g = nx.read_gpickle("rev_forward.gpickle") rev_backward_g = nx.read_gpickle("rev_backward.gpickle") with open("ids.pickle", "rb") as f: ids = pickle.load(f) cutoff = 12 dists = [] for start, forward_rounds, backward_rounds, end in ids: s = count(start) if not 4 <= s <= 6: continue # backward extension backward_extension_rounds = 3 rounds = forward_rounds + backward_rounds + backward_extension_rounds for p, w in propagate( rev_backward_g, end, backward_extension_rounds - 1, cutoff ): p = add_last_round(p) dists.append((start, p, w, rounds)) print("{} ... X ... {} with probability {}, {} rounds".format( start, " <- ".join(str(v) for v in p), math.exp(-w), rounds )) with open("dists.pickle", "wb") as f: pickle.dump(dists, f)
def ntwks_to_matrices(in_files, edge_key): first = nx.read_gpickle(in_files[0]) files = len(in_files) nodes = len(first.nodes()) matrix = np.zeros((nodes, nodes, files)) for idx, name in enumerate(in_files): graph = nx.read_gpickle(name) for u, v, d in graph.edges(data=True): graph[u][v]['weight'] = d[edge_key] # Setting the edge requested edge value as weight value matrix[:, :, idx] = nx.to_numpy_matrix(graph) # Retrieve the matrix return matrix
def _build_and_store_new_graph(data_file, name=""): """ Reads the nodes and edges files stored in the 1.1 version and build a new Graph compatible with 2.0 :param data_file: path to temporary directory :param name: name of the network :return: new Graph compatible with version 2.0 """ data_file += name edges = networkx.read_gpickle(data_file + "_edges" + ".gpickle") nodes = networkx.read_gpickle(data_file + "_nodes" + ".gpickle") net = networkx.Graph() net.add_nodes_from(nodes) net.add_edges_from(edges) return net
def diff_history(directory, length = 1): glob_dir = os.path.join(directory, "*.pickle.tar.gz") pickle_files = glob.glob(glob_dir) pickle_files = sorted(pickle_files) pairs = [(a, b) for (a, b) in zip(pickle_files, pickle_files[1:])] pairs = pairs[-1*length:] diffs = [] for fileA, fileB in pairs: graphA = nx.read_gpickle(fileA) graphB = nx.read_gpickle(fileB) diff = compare(graphA, graphB) # remove render folder which is timestamps diffs.append(diff) return diffs
def __init__(self, steps=False): while not(rospy.has_param('/planner/robots')): continue number = rospy.get_param('~robot_nr', 0) centralized = rospy.get_param('/planner/centralized', False) super(RobotNode, self).__init__(number, centralized) self.mapname = rospy.get_param('/world/name', 'hospital') self.file_path = rospy.get_param('/world/path', '/home/argen/catkin_ws/src/narko/mrpp') if self.centralized: rospy.loginfo("Waiting for planner...") rospy.wait_for_service('/planner/roadmap_plan') else: rospy.wait_for_service("/goal_manager/goal") self.robots = rospy.get_param('/planner/robots', 2) self.topological_map = nx.read_gpickle(self.file_path+'/maps/graphs/test/topological/'+self.mapname+'_topological.gpickle') self.planner = ADPP(self.topological_map, self.robots, self.number) rospy.Subscriber("/planner/coordination/broadcast", Inform, self.handle_message, queue_size=100) self.pub = rospy.Publisher("/planner/coordination/broadcast", Inform, queue_size=1) self.width = rospy.get_param('/world/width', 3117) self.height = rospy.get_param('/world/height', 1189) self.resolution = rospy.get_param('/world/height',0.04626) rospy.loginfo("Initialized %s node.",rospy.get_name())
def simpleDisplay(ipaddress = "localhost",port = "9999"): ''' 利用每次处理后保存的图来进行恢复展示 :return: ''' # client,repo,stargazers,user = getRespond() # g = addTOGraph(repo,stargazers,user) # addEdge(stargazers,client,g) # getPopular(g) # savaGraph1(g) # top10(g) g = nx.read_gpickle("data/github.1") print nx.info(g) print mtsw_users = [n for n in g if g.node[n]['type'] == 'user'] h = g.subgraph(mtsw_users) print nx.info(h) print d = json_graph.node_link_data(h) json.dump(d, open('data/githubRec.json', 'w')) cmdstr = "python3 -m http.server %s" % port webbrowser.open_new_tab("http://%s:%s/%s.html"%(ipaddress,port, "display_githubRec")) os.system(cmdstr)
def fastInitializeBipartite(): G = nx.read_gpickle("graph.p") groundTruth = pickle.load(open("groundTruth.p", "rb")) sites = pickle.load(open("sites.p", "rb")) users = pickle.load(open("users.p", "rb")) print "Loaded graph data from pickle" return (G, groundTruth, sites, users)
def __init__(self, handle): super(SourcePairCombiner, self).__init__() self.handle = handle self.dirhandle = 'reassortant_edges' self.G = nx.read_gpickle('{0} Full Complement Graph.pkl'.format(self.handle)) self.current_sourcepair = None # current sourcepair graph self.current_noi = None # current node of interest
def main(): if not (len(sys.argv) == 3 and sys.argv[1] in ["forward", "backward"]): print("usage: ./find_ext.py [forward/backward] [differentials file]", file=sys.stderr) sys.exit(1) direction = sys.argv[1] if direction == "forward": g = nx.read_gpickle("rev_forward.gpickle") else: g = nx.read_gpickle("rev_backward.gpickle") with open(sys.argv[2]) as f: for start, _ in map(literal_eval, f): rounds = 1 for p, w in propagate(g, start, rounds): print((start, p, w))
def __init__(self,filename) : #self.graph = nx.barabasi_albert_graph(100,5) self.graph = nx.Graph() self.graph = nx.read_gpickle(filename) self.nodes = self.graph.nodes() self.graph = self.mapper() self.edges = self.graph.edges()
def _read_cell_graph(self, filename, format): """Load the cell-to-cell connectivity graph from a file. Returns None if any error happens. """ cell_graph = None if filename: try: start = datetime.now() if format == "gml": cell_graph = nx.read_gml(filename) elif format == "pickle": cell_graph = nx.read_gpickle(filename) elif format == "edgelist": cell_graph = nx.read_edgelist(filename) elif format == "yaml": cell_graph = nx.read_yaml(filename) elif format == "graphml": cell_graph = cell_graph = nx.read_graphml(filename) else: print "Unrecognized format:", format end = datetime.now() delta = end - start config.BENCHMARK_LOGGER.info( "Read cell_graph from file %s of format %s in %g s" % (filename, format, delta.seconds + 1e-6 * delta.microseconds) ) except Exception, e: print e
def main(): seed(0) #set seed #get graph info G = nx.read_gpickle("input/graphMTC_CentroidsLength6.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. There is also the choice of a proper multidigraph: nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") G = nx.freeze(G) #prevents edges or nodes to be added or deleted #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000. demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #get earthquake info q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl','input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #('input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl', #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None): print 'weights: ', q.weights q.num_sites = len(q.lnsas[0]) #determine which scenarios you want to run good_indices = pick_scenarios(q.lnsas, q.weights) travel_index_times = [] index = 0 #loop over scenarios print 'size of lnsas: ', len(q.lnsas) for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site if index in good_indices: print 'index: ', index (bridges, flow, path, path2) = run_simple_iteration(G, scenario, demand, False) travel_index_times.append((index, bridges, flow, path, path2)) # print 'new travel times: ', travel_index_times if index%1000 ==0: util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times) index += 1 #IMPORTANT util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times) print 'the number of scenarios I considered doing: ', index print 'the number of scenarios I actually did: ', len(travel_index_times)
def extract_all((networks, net_type, setup, args)): """ Open a pickled network and extract data from it. Parameters ---------- networks: iterable Iterable of filenames net_type: str Specific string labelling this data setup: str String describing the parameters args: tuple Attributes to be collected (e.g., "robustness") """ getter = attrgetter(*args) z_getter = attrgetter("zscores") res = list() for filename in networks: try: net = nx.read_gpickle(filename) except (IOError, EOFError): print "failed to load network file '%s'" % filename os.rename(filename, filename + ".failed") continue # update(net, filename) results = list(z_getter(net)) results.extend(list(getter(net))) results.append(numpy.mean(net.shortest_paths)) # stripping .pkl file extension results.append(os.path.basename(filename)[:-4]) results.append(net_type) results.append(setup) res.append(results) return res
def _read_celltype_graph(self, celltypes_file, format="gml"): """ Read celltype-celltype connectivity graph from file. celltypes_file -- the path of the file containing the graph. format -- format of the file. allowed values: gml, graphml, edgelist, pickle, yaml. """ start = datetime.now() celltype_graph = None try: if format == "gml": celltype_graph = nx.read_gml(celltypes_file) elif format == "edgelist": celltype_graph = nx.read_edgelist(celltypes_file) elif format == "graphml": celltype_graph = nx.read_graphml(celltypes_file) elif format == "pickle": celltype_graph = nx.read_gpickle(celltypes_file) elif format == "yaml": celltype_graph = nx.read_yaml(celltypes_file) else: print "Unrecognized format %s" % (format) except Exception, e: print e
def __init__(self, fname, interactive=True): self.fname = fname self.graph = nx.read_gpickle(fname) #apply_workaround(self.graph, thr=1e-3) #remove_intersecting_edges(self.graph) print "Number of connected components:", \ nx.number_connected_components(self.graph) self.selected_path_verts = [] if interactive: self.fig = plt.figure() self.path_patch = None G_p = nx.connected_component_subgraphs(self.graph)[0] #G_p = nx.connected_component_subgraphs(prune_graph(self.graph))[0] plot.draw_leaf(G_p, fixed_width=True) plt.ion() plt.show() self.edit_loop()
def load_data(): start = time.time() try: print("Loading data from /data pickles and hfd5 adj matrices") f = h5py.File('data/cosponsorship_data.hdf5', 'r') for chamber in ['house', 'senate']: for congress in SUPPORTED_CONGRESSES: adj_matrix_lookup[(chamber, congress)] = np.asarray(f[chamber + str(congress)]) igraph_graph = igraph.load("data/" + chamber + str(congress) + "_igraph.pickle", format="pickle") igraph_graph_lookup[(chamber, congress, False)] = igraph_graph nx_graph = nx.read_gpickle("data/" + chamber + str(congress) + "_nx.pickle") nx_graph_lookup[(chamber, congress, False)] = nx_graph except IOError as e: print("Loading data from cosponsorship files") f = h5py.File("data/cosponsorship_data.hdf5", "w") for chamber in ['house', 'senate']: for congress in SUPPORTED_CONGRESSES: print("Starting %s %s" % (str(congress), chamber)) adj_matrix = load_adjacency_matrices(congress, chamber) data = f.create_dataset(chamber + str(congress), adj_matrix.shape, dtype='f') data[0: len(data)] = adj_matrix # igraph get_cosponsorship_graph(congress, chamber, False).save("data/" + chamber + str(congress) + "_igraph.pickle", "pickle") # networkx nx.write_gpickle(get_cosponsorship_graph_nx(congress, chamber, False), "data/" + chamber + str(congress) + "_nx.pickle") print("Done with %s %s" % (str(congress), chamber)) print("Data loaded in %d seconds" % (time.time() - start))
def check(self, test_name, tree_gen_func, **more_args): kws = self.some_kws_of_run.copy() kws.update(directed_params) if more_args: kws.update(more_args) paths = run( tree_gen_func, calculate_graph=False, print_summary=False, # result_pkl_path_prefix=result_pickle_prefix, **kws) trees = pkl.load(open(paths['result'])) trees = filter(lambda t: t.number_of_edges() > 0, trees) # remove empty trees assert_true(len(trees) > 0) for t in trees: assert_true(len(t.edges()) > 0) return trees, nx.read_gpickle(paths['meta_graph'])
def graph_preprocessing_with_counts(G_input=None, save_file=None): if not G_input: graph_file = os.path.join(work_dir, "adj_graph.p") G = nx.read_gpickle(graph_file) else: G = G_input.copy() print "Raw graph size:", G.size() print "Raw graph nodes", G.number_of_nodes() profile2prob = {l.split()[0]: float(l.split()[1]) for l in open(os.path.join(work_dir, 'profile_weight.txt'))} for edge in G.edges(data=True): nodes = edge[:2] _weight = edge[2]['weight'] _count = edge[2]['count'] if _count < 3: G.remove_edge(*nodes) print "Pre-processed graph size", G.size() print "Pre-processed graph nodes", G.number_of_nodes() G.remove_nodes_from(nx.isolates(G)) print "Pre-processed graph size", G.size() print "Pre-processed graph nodes", G.number_of_nodes() if save_file: print "Saving to", save_file nx.write_gpickle(G,save_file) return G
def main(): options,args = parseCommandLine() pickleFileName = args[0]; outputFileName = None if (len(args) == 2): outputFileName = args[1] print ("Loading %s...\n") % (pickleFileName) g = nx.read_gpickle(pickleFileName); # Convert graph to matrix form for u,v,d in g.edges_iter(data=True): g.edge[u][v]['weight'] = g.edge[u][v]['number_of_fibers'] cmat = nx.to_numpy_matrix(g) mean = np.mean(cmat) std = np.std(cmat) print('Total number of connections: %d') % (np.sum(cmat)) print('Connection Matrix Mean: %f Std: %f' ) % (mean, std) # Compute binarized stats binarized_cmat= np.zeros(cmat.shape) binarized_cmat[cmat>0] = 1 print('Binarized connection matrix Mean: %f Std: %f' ) % (np.mean(binarized_cmat), np.std(binarized_cmat)) if outputFileName != None : f = open(outputFileName, 'at') f.write( ('%f,%f\n') % (mean, std) ) f.close()
def __init__(self, synonymFile, graphFile, color): self.color = color f = open(synonymFile) self.synonymDict = cPickle.load(f) f.close() self.G = nx.read_gpickle(graphFile) self.synonyms = self.synonymDict.keys()
def summarize_precoth(dwi_network_file, fdg_stats_file, subject_id): import os.path as op import scipy.io as sio import networkx as nx fdg = sio.loadmat(fdg_stats_file) dwi_ntwk = nx.read_gpickle(dwi_network_file) # Thal L-1 R-2 # Cortex 3 and 4 # Prec L-5 R-6 titles = ["subjid"] fdg_avg = ["LTh_CMR_avg","RTh_CMR_avg","LCo_CMR_avg","RCo_CMR_avg","LPre_CMR_avg","RPre_CMR_avg"] f_avg = [fdg["func_mean"][0][0],fdg["func_mean"][1][0],fdg["func_mean"][2][0], fdg["func_mean"][3][0],fdg["func_mean"][4][0],fdg["func_mean"][5][0]] fdg_max = ["LTh_CMR_max","RTh_CMR_max","LCo_CMR_max","RCo_CMR_max","LPre_CMR_max","RPre_CMR_max"] f_max = [fdg["func_max"][0][0],fdg["func_max"][1][0],fdg["func_max"][2][0], fdg["func_max"][3][0],fdg["func_max"][4][0],fdg["func_max"][5][0]] fdg_min = ["LTh_CMR_min","RTh_CMR_min","LCo_CMR_min","RCo_CMR_min","LPre_CMR_min","RPre_CMR_min"] f_min = [fdg["func_min"][0][0],fdg["func_min"][1][0],fdg["func_min"][2][0], fdg["func_min"][3][0],fdg["func_min"][4][0],fdg["func_min"][5][0]] fdg_std = ["LTh_CMR_std","RTh_CMR_std","LCo_CMR_std","RCo_CMR_std","LPre_CMR_std","RPre_CMR_std"] f_std = [fdg["func_stdev"][0][0],fdg["func_stdev"][1][0],fdg["func_stdev"][2][0], fdg["func_stdev"][3][0],fdg["func_stdev"][4][0],fdg["func_stdev"][5][0]] fdg_titles = fdg_avg + fdg_max + fdg_min + fdg_std dwi = nx.to_numpy_matrix(dwi_ntwk, weight="weight") l_thal = ["LTh_RTh","LTh_LCo","LTh_RCo","LTh_LPre","LTh_RPre"] l_th = [dwi[0,1], dwi[0,2], dwi[0,3], dwi[0,4], dwi[0,5]] r_thal = ["RTh_LCo","RTh_RCo","RTh_LPre","RTh_RPre"] r_th = [dwi[1,2], dwi[1,3], dwi[1,4], dwi[1,5]] l_co = ["LCo_RCo","LCo_LPre","LCo_RPre"] l_cor = [dwi[2,3], dwi[2,4], dwi[2,5]] r_co = ["RCo_LPre","RCo_RPre"] r_cor = [dwi[3,4], dwi[3,5]] l_pre = ["LPre_RPre"] l_prec = [dwi[4,5]] conn_titles = l_thal + r_thal + l_co + r_co + l_pre all_titles = titles + fdg_titles + conn_titles volume_titles = ["VoxLTh","VoxRTh","VoxLCo", "VoxRCo", "VoxLPre", "VoxRPre"] all_titles = all_titles + volume_titles volumes = fdg["number_of_voxels"] all_data = f_avg + f_max + f_min + f_std + l_th + r_th + l_cor + r_cor + l_prec + volumes[:,0].tolist() out_file = op.abspath(subject_id + "_precoth.csv") f = open(out_file, "w") title_str = ",".join(all_titles) + "\n" f.write(title_str) all_data = map(float, all_data) data_str = subject_id + "," + ",".join(format(x, "10.5f") for x in all_data) + "\n" f.write(data_str) f.close() return out_file
def reduceGraph(read_g, write_g, minEdgeWeight, minNodeDegree, Lp, Sp): """ Simplify the undirected graph and then update the 3 undirected weight properties. :param read_g: is the graph pickle to read :param write_g: is the updated graph pickle to write :param minEdgeWeight: the original weight of each edge should be >= minEdgeWeight :param minNodeDegree: the degree of each node should be >= minNodeDegree. the degree here is G.degree(node), NOT G.degree(node,weight='weight) :return: None """ G=nx.read_gpickle(read_g) print 'number of original nodes: ', nx.number_of_nodes(G) print 'number of original edges: ', nx.number_of_edges(G) for (u,v,w) in G.edges(data='weight'): if w < minEdgeWeight: G.remove_edge(u,v) for n in G.nodes(): if G.degree(n)<minNodeDegree: G.remove_node(n) print 'number of new nodes: ', nx.number_of_nodes(G) print 'number of new edges: ', nx.number_of_edges(G) for (a, b, w) in G.edges_iter(data='weight'): unweight_allocation(G, a, b, w,Lp,Sp) print 'update weight ok' nx.write_gpickle(G, write_g) return
def pullnodeIDs(in_network, name_key='dn_name'): """ This function will return the values contained, for each node in a network, given an input key. By default it will return the node names """ import networkx as nx import numpy as np from nipype.interfaces.base import isdefined if not isdefined(in_network): raise ValueError return None try: ntwk = nx.read_graphml(in_network) except: ntwk = nx.read_gpickle(in_network) nodedata = ntwk.node ids = [] integer_nodelist = [] for node in nodedata.keys(): integer_nodelist.append(int(node)) for node in np.sort(integer_nodelist): try: nodeid = nodedata[node][name_key] except KeyError: nodeid = nodedata[str(node)][name_key] ids.append(nodeid) return ids
def experiment_4(): G = nx.Graph() G.add_edge(0, 11, weight=91) G.add_edge(1, 11, weight=72) G.add_edge(1, 13, weight=96) G.add_edge(2, 13, weight=49) G.add_edge(2, 6, weight=63) G.add_edge(2, 3, weight=31) G.add_edge(3, 9, weight=98) G.add_edge(3, 7, weight=1) G.add_edge(3, 12, weight=59) G.add_edge(4, 7, weight=6) G.add_edge(4, 9, weight=6) G.add_edge(4, 8, weight=95) G.add_edge(5, 11, weight=44) G.add_edge(6, 11, weight=53) G.add_edge(8, 10, weight=2) G.add_edge(8, 12, weight=48) G.add_edge(9, 12, weight=32) G.add_edge(10, 14, weight=16) G.add_edge(11, 13, weight=86) G = nx.read_gpickle('G.gpickle') path_nx = nx.dijkstra_path(G, 0, 14) path = dijkstra(G, 0, 14, True) if path_cost(G, path) > path_cost(G, path_nx): print 'Error' else: print 'Correct' return locals()
def main(): seed(0) #set seed #get graph info G = nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links print '|V| = ', len(G.nodes()) print '|E| = ', len(G.edges()) G = nx.freeze(G) #prevents edges or nodes to be added or deleted #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000. demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #get earthquake info q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #(input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl', 'input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None): 'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #( q.num_sites = len(q.lnsas[0]) #determine which scenarios you want to run good_indices = pick_scenarios(q.lnsas, q.weights) travel_index_times = [] index = 0 #loop over scenarios for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site if index in good_indices: print 'index: ', index (travel_time, vmt) = run_iteration(G, scenario, demand) travel_index_times.append((index, travel_time, vmt)) # print 'new travel times: ', travel_index_times if index%100 ==0: util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times) index += 1 #IMPORTANT util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times)
def topology(data, ell): """ Computation of topological characteristics. Parameters ------------ data : array of pathes to the graphs ell : list of length scales """ for i in data: G = nx.read_gpickle(i) B = nx.number_of_edges(G) V = nx.number_of_nodes(G) Euler = V - B C = (B-V)/float(V) eu.append(Euler) c_t.append(C) vert.append(V) bran.append(B) plt.plot(ell, c_t, '.', label='v23') # #np.save('/backup/yuliya/v23/graphs_largedom/Euler.npy', eu) #np.save('/backup/yuliya/v23/graphs_largedom/C_t.npy', c_t) #np.save('/backup/yuliya/v23/graphs_largedom/V.npy', vert) #np.save('/backup/yuliya/v23/graphs_largedom/B.npy', bran) #np.save('/backup/yuliya/vsi01/graphs_largdom/time.npv23/graphs_largedom/y', t) plt.yscale('log')
def remove_unconnected_graphs_and_threshold(in_file): import nipype.interfaces.cmtk as cmtk import nipype.pipeline.engine as pe import os import os.path as op import networkx as nx from nipype.utils.filemanip import split_filename connected = [] if in_file == None or in_file == [None]: return None elif len(in_file) == 0: return None graph = nx.read_gpickle(in_file) if not graph.number_of_edges() == 0: connected.append(in_file) _, name, ext = split_filename(in_file) filtered_network_file = op.abspath(name + '_filt' + ext) if connected == []: return None #threshold_graphs = pe.Node(interface=cmtk.ThresholdGraph(), name="threshold_graphs") threshold_graphs = cmtk.ThresholdGraph() from nipype.interfaces.cmtk.functional import tinv weight_threshold = 1 # tinv(0.95, 198-30-1) threshold_graphs.inputs.network_file = in_file threshold_graphs.inputs.weight_threshold = weight_threshold threshold_graphs.inputs.above_threshold = True threshold_graphs.inputs.edge_key = "weight" threshold_graphs.inputs.out_filtered_network_file = op.abspath( filtered_network_file) threshold_graphs.run() return op.abspath(filtered_network_file)
def readNetworks(fileNames): networks = [] for filename in fileNames: networks.append(nx.read_gpickle(filename)) return networks
def short_branches(): """ Visualization of short branches of the skeleton. """ data1_sk = glob.glob('/backup/yuliya/vsi05/skeletons_largdom/*.h5') data1_sk.sort() for i,j, k in zip(d[1][37:47], data1_sk[46:56], ell[1][37:47]): g = nx.read_gpickle(i) dat = tb.openFile(j) skel = np.copy(dat.root.skel) bra = np.copy(dat.root.branches) mask = np.zeros_like(skel) dat.close() length = nx.get_edge_attributes(g, 'length') number = nx.get_edge_attributes(g, 'number') num_dict = {} for m in number: for v in number[m]: num_dict.setdefault(v, []).append(m) find_br = ndimage.find_objects(bra) for l in list(length.keys()): if length[l]<0.5*k: #Criteria for b in number[l]: mask[find_br[b-1]] = bra[find_br[b-1]]==b mlab.figure(bgcolor=(1,1,1), size=(1200,1200)) mlab.contour3d(skel, colormap='hot') mlab.contour3d(mask) mlab.savefig('/backup/yuliya/vsi05/skeletons/short_bran/'+ i[42:-10] + '.png') mlab.close()
# ## Project 5 - Company Emails # # For this project we will be working with a company's email network where each node corresponds to a person at the company, #and each edge indicates that at least one email has been sent between two people. # # The network also contains the node attributes `Department` and `ManagementSalary`. # # `Department` indicates the department in the company which the person belongs to, and `ManagementSalary` indicates whether that person is receiving a management position salary. import networkx as nx import pandas as pd import numpy as np import pickle G = nx.read_gpickle('email_prediction.txt') print(nx.info(G)) # Salary Prediction # # Using network `G`, identify the people in the network with missing values for the node attribute `ManagementSalary` and predict whetheror not these individuals are receiving a management position salary. # Predictions will need to be given as the probability that the corresponding employee is receiving a management position salary. # from sklearn.svm import SVC from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import MinMaxScaler def is_management(node):
def load(self): with open(self._out_path, 'rb') as F: nx_graph = nx.read_gpickle(F) self.root_nodes = nxgraph_to_nodes(nx_graph)
def connect_bc_funct(cut_nonelected=False, network_objects=None, data_path=None, out_path=None, border_file4326=None): # import crossing_onlypoints with crossing points of ch1234567 containing coordinates # import ch123 and ch1234567 # for each border crossing, if it is not in a 123 way, find closest point to 123 on both sides of border # route between the border crossing and closest points in both sides of the border # keep the ways used in the routing # create a final graph with ch123 and the connected border crossings # out_path = r'C:/Users/Ion/IVT/OSM_python/networks/' # data_path = r'C:/Users/Ion/IVT/OSM_data' # border_file = str(data_path) + '/borderOSM_polygon_2056.shp' # network_objects = None # g_ch123_connected = nx.read_gpickle(str(ch1234567_path) + '/network_files/ch_connected_graph_bytime.gpickle') # if out_path is None: # g_ch1234567 = network_objects[0] # gdf = network_objects[1] # splitted_ways_dict = network_objects[2] # nodes_europe = network_objects[3] # ch_border = network_objects[4] # border has to be in 4326, loaded one is in 2056 # crossing_onlypoints = network_objects[5] # else: ch1234567_path = str(out_path) + '/ch1234567' ch123_path = str(out_path) + '/ch123' eu123_path = str(out_path) + '/eu123' eu123ch_path = str(out_path) + '/eu123ch4567' if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None: print(datetime.datetime.now(), 'Starting process of connecting border points and swiss nuts with unclassified ways.') print('------------------------------------------------------------------------') print(datetime.datetime.now(), 'Loading files.') # files from ch1234567 and eu123 # g_ch1234567 = nx.read_gpickle(str(ch1234567_path) + '/network_files/eu_network_graph_bytime.gpickle') # i think this should also be without islands, to avoid finding a closest node g_ch123 = nx.read_gpickle(str(ch123_path) + '/network_files/eu_network_largest_graph_bytime.gpickle') g_ch1234567 = nx.read_gpickle(str(ch1234567_path) + '/network_files/eu_network_largest_graph_bytime.gpickle') # here is preferable to avoid islands as they may be far from switzerland g_eu123 = nx.read_gpickle(str(eu123_path) + '/network_files/eu_network_largest_graph_bytime.gpickle') crossing_onlypoints = gpd.read_file(str(ch1234567_path) + '/bc_official/crossing_onlypoints.shp') file = open(str(ch1234567_path) + "/network_files/europe_nodes_dict4326.pkl", 'rb') nodes_europe = pickle.load(file) file.close() border_file4326 # border_file = str(data_path) + '/Switzerland_OSM_polygon_4326.shp' ch_border = gpd.read_file(border_file4326) # border has to be in 4326, loaded one is in 2056 new_nodes = {} new_ways = {} # ch_border.crs = "epsg:2056" # ch_border = ch_border.to_crs("epsg:4326") nuts_path = str(data_path) + '/nuts_borders' print(datetime.datetime.now(), 'Files loaded.') print_islands(g_ch123, 'g_ch123') print_islands(g_ch1234567, 'g_ch1234567') print_islands(g_eu123, 'g_eu123') print('------------------------------------------------------------------------') # Creates graph of ch123 from graph ch1234567 and split into IN and OUT graphs # g_ch123 = copy.deepcopy(g_ch1234567) print(datetime.datetime.now(), 'Nodes/ways in g_ch1234567: ' + str(len(g_ch1234567.nodes)) + '/' + str(len(g_ch1234567.edges))) # for (u, v, c) in g_ch1234567.edges.data('way_type'): # for way_type in ['secondary', 'tertiary', 'residential', 'unclassified']: # if way_type in c: # g_ch123.remove_edge(u, v) # g_ch123.remove_nodes_from(list(nx.isolates(g_ch123))) print(datetime.datetime.now(), 'Nodes/ways in g_ch123: ' + str(len(g_ch123.nodes)) + '/ ' + str(len(g_ch123.edges))) print('------------------------------------------------------------------------') if os.path.isfile(str(ch1234567_path) + '/network_files/g_ch1234567_out.gpickle') is False or out_path is None: # This splits both network graphs between in and out of the swiss border g_ch123_in, g_ch123_out = split_graphs(g_ch123, ch_border, nodes_europe) g_ch1234567_in, g_ch1234567_out = split_graphs(g_ch1234567, ch_border, nodes_europe) nx.write_gpickle(g_ch123_in, str(ch1234567_path) + '/network_files/g_ch123_in.gpickle') nx.write_gpickle(g_ch123_out, str(ch1234567_path) + '/network_files/g_ch123_out.gpickle') nx.write_gpickle(g_ch1234567_in, str(ch1234567_path) + '/network_files/g_ch1234567_in.gpickle') nx.write_gpickle(g_ch1234567_out, str(ch1234567_path) + '/network_files/g_ch1234567_out.gpickle') elif os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None: g_ch123_in = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch123_in.gpickle') g_ch123_out = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch123_out.gpickle') g_ch1234567_in = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch1234567_in.gpickle') g_ch1234567_out = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch1234567_out.gpickle') print_islands(g_ch123_in, 'g_ch123_in') print_islands(g_ch123_out, 'g_ch123_out') print_islands(g_ch1234567_in, 'g_ch1234567_in') print_islands(g_ch1234567_out, 'g_ch1234567_out') if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None: # This creates the tree of the ch123 graphs to find the closest nodes g_in_tree, g_in_lonlat = closest_node(g_ch123_in, nodes_europe) g_out_tree, g_out_lonlat = closest_node(g_ch123_out, nodes_europe) # g_infull_lonlat, g_infull_tree = closest_node(g_ch1234567_in, nodes_europe) g_full_tree, g_full_lonlat = closest_node(g_ch1234567, nodes_europe) print('------------------------------------------------------------------------') # ----------------------------------------------------------------------------- # CONNECT BORDER CROSSINGS WITH CH123 # ----------------------------------------------------------------------------- if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None: print(datetime.datetime.now(), 'Number of edges in graphs (in/out graphs) BEFORE connecting border crossings: ' + str(len(g_ch123_in.edges)) + '/' + str(len(g_ch123_out.edges))) found_count = [0, 0] for index, row in crossing_onlypoints.iterrows(): way_id = row['new_id'] start_node_id = row['start_node'] end_node_id = row['end_node_i'] # Check if bc is from a principal highway # for (u, v, c) in g_ch123.edges.data('new_id'): # if way_id == c: # continue in_ch = ch_border.contains(Point(nodes_europe[start_node_id])) if in_ch[0] == True: in_node = start_node_id out_node = end_node_id else: in_node = end_node_id out_node = start_node_id # route in and out point in respective graphs to closest point in ch123 # it may be that it is not connected, then 'continue' g_ch123_in, new_nodes, new_ways, found_count = route_bc(in_node, None, g_ch123_in, g_ch1234567_in, nodes_europe, new_nodes, new_ways, g_in_tree, g_in_lonlat, found_count) g_ch123_out, new_nodes, new_ways, found_count = route_bc(out_node, None, g_ch123_out, g_ch1234567_out, nodes_europe, new_nodes, new_ways, g_out_tree, g_out_lonlat, found_count) print(datetime.datetime.now(), 'Found path: ' + str(found_count[0])) print(datetime.datetime.now(), 'Not found path: ' + str(found_count[1])) print(datetime.datetime.now(), 'Number of edges in graphs (in/out graphs) AFTER connecting border crossings: ' + str( len(g_ch123_in.edges)) + '/' + str(len(g_ch123_out.edges))) print_islands(g_ch123_in, 'g_ch123_in') print_islands(g_ch123_out, 'g_ch123_out') print('------------------------------------------------------------------------') # ----------------------------------------------------------------------------- # CONNECT NUTS CENTROIDS WITH CH123 # ----------------------------------------------------------------------------- unique_nuts_gdf = nuts_merging(nuts_path) found_count = [0, 0] for index, row in unique_nuts_gdf.iterrows(): nutid = row['NUTS_ID'] if 'CH' in nutid: nut_poly = row['geometry'] centroid = nut_poly.centroid centroid_coords = (centroid.x, centroid.y) g_ch123_in, new_nodes, new_ways, found_count = route_bc(None, None, g_ch123_in, g_ch1234567_in, nodes_europe, new_nodes, new_ways, g_in_tree, g_in_lonlat, found_count, centroid_coords, g_full_tree, g_full_lonlat) print(datetime.datetime.now(), 'Found path: ' + str(found_count[0])) print(datetime.datetime.now(), 'Not found path: ' + str(found_count[1])) print(datetime.datetime.now(), 'Number of edges in graphs (in/out graphs) AFTER connecting nuts centroids: ' + str( len(g_ch123_in.edges)) + '/' + str(len(g_ch123_out.edges))) print_islands(g_ch123_in, 'g_ch123_in') print('------------------------------------------------------------------------') # ----------------------------------------------------------------------------- # ADD EDGES WHICH CROSS BORDER (DELETED WHEN SPLITTING GRAPH TO IN/OUT) # ----------------------------------------------------------------------------- # at the end, after merging both out and in graphs the edges which cross the border will have to be added again, # as this process does not count with them g_ch123_connected = nx.compose(g_ch123_in, g_ch123_out) print(datetime.datetime.now(), 'Number of edges in connected graph after merging IN and OUT graphs WITHOUT border crossings: ' + str( len(g_ch123_connected.edges))) print_islands(g_ch123_connected, 'g_ch123_connected') for index, row in crossing_onlypoints.iterrows(): try: start_node_id = row['start_node'] end_node_id = row['end_node_i'] new_id = row['new_id'] # length = row['length'] time = g_ch1234567[start_node_id][end_node_id]['time'] way_type = row['way_type'] # and save the ways, implement them into ch_123 # g_ch123_connected.add_edge(start_node_id, end_node_id, time=time, length=length, new_id=new_id, way_type=way_type) g_ch123_connected.add_edge(start_node_id, end_node_id, time=time, new_id=new_id, way_type=way_type) except: continue print(datetime.datetime.now(), 'Number of edges in connected graph after merging IN and OUT graphs WITH border crossings: ' + str( len(g_ch123_connected.edges))) print_islands(g_ch123_connected, 'g_ch123_connected') # ----------------------------------------------------------------------------- # CONNECT REMAINING ISLANDS OF FINAL CONNECTED GRAPH # ----------------------------------------------------------------------------- # Last, as there are some islands in the last connected graph due to splitting of graph, connected islands: g_ch123_connected_largest = copy.deepcopy(g_ch123_connected) print_islands(g_ch123_connected_largest, 'g_ch123_connected_largest') components = list(nx.connected_components(g_ch123_connected_largest)) # list because it returns a generator components.sort(key=len, reverse=True) largest = components.pop(0) isolated = set(g for cc in components for g in cc) g_ch123_connected_largest.remove_nodes_from(isolated) print_islands(g_ch123_connected_largest, 'g_ch123_connected_largest') g_tree_large, g_lonlat_large = closest_node(g_ch123_connected_largest, nodes_europe) found_count = [0, 0] for i in range(len(components)): net = components[i] node = random.choice(list(net)) g_ch123_connected, new_nodes, new_ways, found_count = route_bc(node, None, g_ch123_connected, g_ch1234567, nodes_europe, new_nodes, new_ways, g_tree_large, g_lonlat_large, found_count) print(datetime.datetime.now(), 'Number of edges in connected SWISS graph after connecting islands in connected graph: ' + str( len(g_ch123_connected.edges))) print_islands(g_ch123_connected, 'g_ch123_connected') # In case the none elected border crossings want to be deleted, activate this if cut_nonelected: g_ch123_connected = cut_nonelected_bc(g_ch123_connected, network_objects=network_objects, out_path=out_path) # Join final connected graph with eu123 graph, to complete the full network g_eu123_connected = nx.compose(g_ch123_connected, g_eu123) print(datetime.datetime.now(), 'Number of edges in connected EUROPE graph: ' + str( len(g_eu123_connected.edges))) print_islands(g_eu123_connected, 'g_eu123_connected') # ----------------------------------------------------------------------------- # ADD TO EUROPE NETWORK FILES THE ADDED NODES AND WAYS TO THE CONNECTED GRAPH # ----------------------------------------------------------------------------- # file = open(str(eu123_path) + "/network_files/europe_nodes_dict4326.pkl", 'rb') # all_europe_nodes = pickle.load(file) # file.close() # europe_nodes_merged = {**all_europe_nodes, **new_nodes} # # with open(str(out_path) + '/europe_nodes_dict4326.pkl', 'wb') as f: # # pickle.dump(europe_nodes_merged, f, pickle.HIGHEST_PROTOCOL) # print(len(all_europe_nodes), len(all_europe_nodes)) # # file = open(str(eu123_path) + "/network_files/europe_ways_splitted_dict.pkl", 'rb') # all_europe_sw = pickle.load(file) # file.close() # europe_sw_merged = {**all_europe_sw, **new_ways} # # # eu_gdf = pd.read_csv(str(eu123_path) + "/network_files/gdf_MTP_europe.csv", low_memory=False) # # new_ways_df = pd.DataFrame.from_dict(new_ways, orient='index', columns=['start_node_id', 'end_node_id', 'nodes_list']) # new_ways_df = new_ways_df.reset_index() # new_ways_df = new_ways_df.rename(columns={"index": "new_id"}) # # eu_gdf = pd.concat([new_ways_df, eu_gdf]) # export graph and shp file of final network if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False and out_path: nx.write_gpickle(g_eu123_connected, str(eu123_path) + "/network_files/eu_connected_graph_bytime.gpickle") nx.write_gpickle(g_ch123_connected, str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") if os.path.isfile(str(out_path) + "/network_files/eu_connected_graph_bytime.shp") is False: create_shp_largest(g_ch123_connected, None, None, None, str(ch1234567_path) + "/network_files", 'ch_connected_graph_bytime', list_nodes=None) # create_shp_largest(g_eu123_connected, europe_nodes_merged, europe_sw_merged, eu_gdf, # str(eu123_path) + "/network_files", 'eu_connected_graph_bytime', list_nodes=None) create_shp_largest(g_eu123_connected, None, None, None, str(eu123ch_path) + "/network_files", 'eu_connected_graph_bytime', list_nodes=None) else: print(datetime.datetime.now(), 'Connected files already exist.') print('------------------------------------------------------------------------') print('------------------------------------------------------------------------')
import networkx as nx import matplotlib.pyplot as plt import numpy as np import random G = nx.read_gpickle('spotify_data.pickle') genres_ = [ 'jazz', 'step', 'classical', 'elec', 'metal', 'rock', 'edm', 'tech', 'indie', 'house', 'rap', 'hip hop', 'pop' ] temp_set = set() remove_nodes = set() for node in G.nodes(data=True): try: if len(node[1]['genres']) > 0 and G.degree(node[0]) > 1: if get_genre(node[1]['genres']) is not None: temp_set.add(get_genre(node[1]['genres'])) else: remove_nodes.add(node[0]) if len(temp_set) == 0: remove_nodes.add(node[0]) node[1]['genres'] = list(temp_set) temp_set = set() except: remove_nodes.add(node[0]) def get_genre(l):
def load(self, model_data_dir=None): model_data_dir = self.model_data_dir if model_data_dir is None else model_data_dir if not os.path.exists(self.graph_save_path): logger.warning("File '{}' not found".format(model_data_dir)) else: self.graph = nx.read_gpickle(self.graph_save_path)
new = [] for date in init: if date >= start and date <= end: new.append(date) if len(new) > 0: ndates[tran] = new nG.add_node(tran[0]) nG.add_node(tran[1]) nG.add_edge(tran[0], tran[1], weight=len(new)) npos[tran[0]] = pos[tran[0]] npos[tran[1]] = pos[tran[1]] return npos, nG, ndates #decryption aG = nx.read_gpickle("agraph.gpickle") pickle_in = open("adates.pickle", "rb") a_dates = pickle.load(pickle_in) pickle_in = open("apos.pickle", "rb") apos = pickle.load(pickle_in) mG = nx.read_gpickle("mgraph.gpickle") pickle_in = open("mdates.pickle", "rb") m_dates = pickle.load(pickle_in) pickle_in = open("mpos.pickle", "rb") mpos = pickle.load(pickle_in) dates = [ datetime.datetime(2020, 1, 20, 0, 0), datetime.datetime(2020, 3, 1, 0, 0) ]
def get_time_point(self, time): """ Loads the specific network from the time point and returns it returns - network file """ return nx.read_gpickle(self._networks[time])
def cmat(track_file, roi_file, resolution_network_file, matrix_name, matrix_mat_name, endpoint_name, intersections=False): """ Create the connection matrix for each resolution using fibers and ROIs. """ stats = {} iflogger.info('Running cmat function') # Identify the endpoints of each fiber en_fname = op.abspath(endpoint_name + '_endpoints.npy') en_fnamemm = op.abspath(endpoint_name + '_endpointsmm.npy') iflogger.info('Reading Trackvis file %s', track_file) fib, hdr = nb.trackvis.read(track_file, False) stats['orig_n_fib'] = len(fib) roi = nb.load(roi_file, mmap=NUMPY_MMAP) roiData = roi.get_data() roiVoxelSize = roi.header.get_zooms() (endpoints, endpointsmm) = create_endpoints_array(fib, roiVoxelSize) # Output endpoint arrays iflogger.info('Saving endpoint array: %s', en_fname) np.save(en_fname, endpoints) iflogger.info('Saving endpoint array in mm: %s', en_fnamemm) np.save(en_fnamemm, endpointsmm) n = len(fib) iflogger.info('Number of fibers: %i', n) # Create empty fiber label array fiberlabels = np.zeros((n, 2)) final_fiberlabels = [] final_fibers_idx = [] # Add node information from specified parcellation scheme path, name, ext = split_filename(resolution_network_file) if ext == '.pck': gp = nx.read_gpickle(resolution_network_file) elif ext == '.graphml': gp = nx.read_graphml(resolution_network_file) else: raise TypeError("Unable to read file:", resolution_network_file) nROIs = len(gp.nodes()) # add node information from parcellation if 'dn_position' in gp.nodes[list(gp.nodes())[0]]: G = gp.copy() else: G = nx.Graph() for u, d in gp.nodes(data=True): G.add_node(int(u), **d) # compute a position for the node based on the mean position of the # ROI in voxel coordinates (segmentation volume ) xyz = tuple( np.mean(np.where( np.flipud(roiData) == int(d["dn_correspondence_id"])), axis=1)) G.nodes[int(u)]['dn_position'] = tuple([xyz[0], xyz[2], -xyz[1]]) if intersections: iflogger.info("Filtering tractography from intersections") intersection_matrix, final_fiber_ids = create_allpoints_cmat( fib, roiData, roiVoxelSize, nROIs) finalfibers_fname = op.abspath(endpoint_name + '_intersections_streamline_final.trk') stats['intersections_n_fib'] = save_fibers(hdr, fib, finalfibers_fname, final_fiber_ids) intersection_matrix = np.matrix(intersection_matrix) I = G.copy() H = nx.from_numpy_matrix(np.matrix(intersection_matrix)) H = nx.relabel_nodes( H, lambda x: x + 1) # relabel nodes so they start at 1 I.add_weighted_edges_from( ((u, v, d['weight']) for u, v, d in H.edges(data=True))) dis = 0 for i in range(endpoints.shape[0]): # ROI start => ROI end try: startROI = int(roiData[endpoints[i, 0, 0], endpoints[i, 0, 1], endpoints[i, 0, 2]]) endROI = int(roiData[endpoints[i, 1, 0], endpoints[i, 1, 1], endpoints[i, 1, 2]]) except IndexError: iflogger.error( 'AN INDEXERROR EXCEPTION OCCURED FOR FIBER %s. ' 'PLEASE CHECK ENDPOINT GENERATION', i) break # Filter if startROI == 0 or endROI == 0: dis += 1 fiberlabels[i, 0] = -1 continue if startROI > nROIs or endROI > nROIs: iflogger.error( "Start or endpoint of fiber terminate in a voxel which is labeled higher" ) iflogger.error( "than is expected by the parcellation node information.") iflogger.error("Start ROI: %i, End ROI: %i", startROI, endROI) iflogger.error("This needs bugfixing!") continue # Update fiber label # switch the rois in order to enforce startROI < endROI if endROI < startROI: tmp = startROI startROI = endROI endROI = tmp fiberlabels[i, 0] = startROI fiberlabels[i, 1] = endROI final_fiberlabels.append([startROI, endROI]) final_fibers_idx.append(i) # Add edge to graph if G.has_edge(startROI, endROI) and 'fiblist' in G.edge[startROI][endROI]: G.edge[startROI][endROI]['fiblist'].append(i) else: G.add_edge(startROI, endROI, fiblist=[i]) # create a final fiber length array finalfiberlength = [] if intersections: final_fibers_indices = final_fiber_ids else: final_fibers_indices = final_fibers_idx for idx in final_fibers_indices: # compute length of fiber finalfiberlength.append(length(fib[idx][0])) # convert to array final_fiberlength_array = np.array(finalfiberlength) # make final fiber labels as array final_fiberlabels_array = np.array(final_fiberlabels, dtype=int) iflogger.info( 'Found %i (%f percent out of %i fibers) fibers that start or ' 'terminate in a voxel which is not labeled. (orphans)', dis, dis * 100.0 / n, n) iflogger.info('Valid fibers: %i (%f%%)', n - dis, 100 - dis * 100.0 / n) numfib = nx.Graph() numfib.add_nodes_from(G) fibmean = numfib.copy() fibmedian = numfib.copy() fibdev = numfib.copy() for u, v, d in G.edges(data=True): G.remove_edge(u, v) di = {} if 'fiblist' in d: di['number_of_fibers'] = len(d['fiblist']) idx = np.where((final_fiberlabels_array[:, 0] == int(u)) & (final_fiberlabels_array[:, 1] == int(v)))[0] di['fiber_length_mean'] = float( np.mean(final_fiberlength_array[idx])) di['fiber_length_median'] = float( np.median(final_fiberlength_array[idx])) di['fiber_length_std'] = float(np.std( final_fiberlength_array[idx])) else: di['number_of_fibers'] = 0 di['fiber_length_mean'] = 0 di['fiber_length_median'] = 0 di['fiber_length_std'] = 0 if not u == v: # Fix for self loop problem G.add_edge(u, v, **di) if 'fiblist' in d: numfib.add_edge(u, v, weight=di['number_of_fibers']) fibmean.add_edge(u, v, weight=di['fiber_length_mean']) fibmedian.add_edge(u, v, weight=di['fiber_length_median']) fibdev.add_edge(u, v, weight=di['fiber_length_std']) iflogger.info('Writing network as %s', matrix_name) nx.write_gpickle(G, op.abspath(matrix_name)) numfib_mlab = nx.to_numpy_matrix(numfib, dtype=int) numfib_dict = {'number_of_fibers': numfib_mlab} fibmean_mlab = nx.to_numpy_matrix(fibmean, dtype=np.float64) fibmean_dict = {'mean_fiber_length': fibmean_mlab} fibmedian_mlab = nx.to_numpy_matrix(fibmedian, dtype=np.float64) fibmedian_dict = {'median_fiber_length': fibmedian_mlab} fibdev_mlab = nx.to_numpy_matrix(fibdev, dtype=np.float64) fibdev_dict = {'fiber_length_std': fibdev_mlab} if intersections: path, name, ext = split_filename(matrix_name) intersection_matrix_name = op.abspath(name + '_intersections') + ext iflogger.info('Writing intersection network as %s', intersection_matrix_name) nx.write_gpickle(I, intersection_matrix_name) path, name, ext = split_filename(matrix_mat_name) if not ext == '.mat': ext = '.mat' matrix_mat_name = matrix_mat_name + ext iflogger.info('Writing matlab matrix as %s', matrix_mat_name) sio.savemat(matrix_mat_name, numfib_dict) if intersections: intersect_dict = {'intersections': intersection_matrix} intersection_matrix_mat_name = op.abspath(name + '_intersections') + ext iflogger.info('Writing intersection matrix as %s', intersection_matrix_mat_name) sio.savemat(intersection_matrix_mat_name, intersect_dict) mean_fiber_length_matrix_name = op.abspath(name + '_mean_fiber_length') + ext iflogger.info('Writing matlab mean fiber length matrix as %s', mean_fiber_length_matrix_name) sio.savemat(mean_fiber_length_matrix_name, fibmean_dict) median_fiber_length_matrix_name = op.abspath(name + '_median_fiber_length') + ext iflogger.info('Writing matlab median fiber length matrix as %s', median_fiber_length_matrix_name) sio.savemat(median_fiber_length_matrix_name, fibmedian_dict) fiber_length_std_matrix_name = op.abspath(name + '_fiber_length_std') + ext iflogger.info('Writing matlab fiber length deviation matrix as %s', fiber_length_std_matrix_name) sio.savemat(fiber_length_std_matrix_name, fibdev_dict) fiberlengths_fname = op.abspath(endpoint_name + '_final_fiberslength.npy') iflogger.info('Storing final fiber length array as %s', fiberlengths_fname) np.save(fiberlengths_fname, final_fiberlength_array) fiberlabels_fname = op.abspath(endpoint_name + '_filtered_fiberslabel.npy') iflogger.info('Storing all fiber labels (with orphans) as %s', fiberlabels_fname) np.save( fiberlabels_fname, np.array(fiberlabels, dtype=np.int32), ) fiberlabels_noorphans_fname = op.abspath(endpoint_name + '_final_fiberslabels.npy') iflogger.info('Storing final fiber labels (no orphans) as %s', fiberlabels_noorphans_fname) np.save(fiberlabels_noorphans_fname, final_fiberlabels_array) iflogger.info("Filtering tractography - keeping only no orphan fibers") finalfibers_fname = op.abspath(endpoint_name + '_streamline_final.trk') stats['endpoint_n_fib'] = save_fibers(hdr, fib, finalfibers_fname, final_fibers_idx) stats['endpoints_percent'] = float(stats['endpoint_n_fib']) / float( stats['orig_n_fib']) * 100 stats['intersections_percent'] = float( stats['intersections_n_fib']) / float(stats['orig_n_fib']) * 100 out_stats_file = op.abspath(endpoint_name + '_statistics.mat') iflogger.info('Saving matrix creation statistics as %s', out_stats_file) sio.savemat(out_stats_file, stats)
if __name__ == "__main__": start = time.time() dataset = "gnu09" model = "MultiValency" print dataset, model if model == "MultiValency": ep_model = "range" elif model == "Random": ep_model = "random" elif model == "Categories": ep_model = "degree" G = nx.read_gpickle("../../graphs/U%s.gpickle" % dataset) print 'Read graph G' print time.time() - start Ep = dict() with open("Ep_%s_%s1.txt" % (dataset, ep_model)) as f: for line in f: data = line.split() Ep[(int(data[0]), int(data[1]))] = float(data[2]) R = 500 I = 1000 ALGO_NAME = "CCWP" FOLDER = "Data4InfMax/"
def read_graph(name): dirname = os.path.dirname(__file__) path = os.path.join(dirname, name + '.gpickle.bz2') return nx.read_gpickle(path)
import networkx as nx G = nx.read_gpickle("HPRD-Biogrid.pkl") nx.double_edge_swap(G,G.number_of_nodes(),G.number_of_nodes()*10) print nx.number_of_edges(G) nx.write_gpickle(G,"edge_swapped_graph.pkl")
import operator import networkx as nx import pickle item = 'german' g = nx.read_gpickle('../y_data/trees/' + item) print len(g.nodes()) #in degree ind = {} for node in g.nodes(): ind[node] = g.in_degree(node) top_10_in = dict( sorted(ind.iteritems(), key=operator.itemgetter(1), reverse=True)[:50]) #print top_10_in # file = open('../y_data/basics/' + item + '/in', 'w') # print>>file, ind # file.close() print item, 'in ok' #out degree outd = {} for node in g.nodes(): outd[node] = g.out_degree(node) top_10_out = dict( sorted(outd.iteritems(), key=operator.itemgetter(1), reverse=True)[:50]) #print top_10_out # file = open('../y_data/basics/' + item + '/out', 'w')
graph_path = CONFIG.input result_path = CONFIG.output enable_word = CONFIG.word # paths = ['uiu', 'iui', 'uiciu'] if enable_word: # user_paths = ['uiwiu', 'uiu'] user_paths = ['uiwiu'] else: user_paths = ['uiu'] item_paths = ['iui'] # item_paths = ['iui', 'iwi'] # word_paths = ['wiuiw'] print("Reading graph file...") graph = nx.read_gpickle(graph_path) result_path = result_path nodes = graph.nodes() user_nodes = [node for node in nodes if node.startswith('u_')] item_nodes = [node for node in nodes if node.startswith('i_')] word_nodes = [node for node in nodes if node.startswith('w_')] category_nodes = [node for node in nodes if node.startswith('c_')] print("{} users, {} items, {} words, {} category".format( len(user_nodes), len(item_nodes), len(word_nodes), len(category_nodes))) user_sentences = [] item_sentences = [] word_sentences = [] print("Walking user sentences...")
for edge in joint_g.edges(data=True): (p1, p2, data) = edge # if p1 in all_profiles and p2 in all_profiles: edge_outf.write("%s\t%s\t%f\n" % (p1, p2, data['weight'])) nodes_to_write.update([p1, p2]) with open(os.path.join(graph_save_dir, "nodes.txt"), "w") as node_outf: for p in nodes_to_write: if p in cas4_profiles: _type = 1 elif p in uvrd_profiles: _type = 2 else: _type = 0 node_outf.write("%s\t%s\t%d\n" % (p, profile2gene[p], _type)) return joint_g if __name__ == "__main__": preprocessed_file = os.path.join(work_dir, "adj_graph.p") # G = graph_preprocessing(preprocessed_file) G = nx.read_gpickle(preprocessed_file) # gt.degree_distributions(G) # gt.clustering_coefficients(G)
def __init__(self, handle): super(GraphCleaner, self).__init__() self.handle = handle self.G = nx.read_gpickle('{0} Graph with PWIs.pkl'.format(self.handle))
os.path.join(output_path, 'indices.info')) shutil.copy(os.path.join(path_to_files, 'rootNode.info'), os.path.join(output_path, 'rootNode.info')) shutil.copy(os.path.join(path_to_files, 'startNodeId.info'), os.path.join(output_path, 'startNodeId.info')) print path_to_files if os.path.isfile(os.path.join(path_to_files, 'equivalence.info')): shutil.copy(os.path.join(path_to_files, 'equivalence.info'), os.path.join(output_path, 'equivalence.info')) counter += 1 if __name__ == '__main__': data = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/folds/fold1-train.gpickle' pattern_path = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/experiments_inference/page_class/General_patterns/pattern3/' output = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/experiments_inference/page_class/PATTERNS/' general_path_file = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/experiments_inference/page_class/patterns.info' data_graph = nx.read_gpickle(data) pattern = nx.read_gml(os.path.join(pattern_path, 'pattern.gml')) groundings = an.get_all_possible_values(data_graph, 'word') ground_patterns = ground_pattern(pattern, groundings, [4, 6]) write_patterns(ground_patterns, pattern_path, 52, output) #write file with all the patterns dirs = os.listdir(output) dirs.sort(key=lambda f: int(filter(str.isdigit, f))) with open(general_path_file, 'w') as fajl: for d in dirs: if "pattern" in d: fajl.write(os.path.join(output, d) + "\n")
import pandas as pd import matplotlib.pyplot as plt from sklearn import datasets # plt.style.use('ggplot') # filename = 'c:\\Users\\okigboo\\Desktop\\PythonDataScience\\NetworkAnalysis\\ego-twitter.p' filename = 'C:\\Users\\Jose\\Desktop\\PythonDataScience\\NetworkAnalysis\\ego-twitter.p' infile = open(filename, 'rb') myfile = pickle.load(infile) infile.close() # G = nx.path_graph(50) # nx.write_gpickle(G, filename) G = nx.read_gpickle(filename) # G = nx.read_gpickle(filename) print(type(G)) # Create Ghe CircosPlot object: c c = CircosPlot(G) # Draw c to the screen c.draw() plt.show() # Create the un-customized ArcPlot object: a a = ArcPlot(G) # Draw a to the screen
G_ini.remove_nodes_from( [node for node in G.nodes if G_ini.nodes[node]['immunization']]) length_of_vulnerable_region = list( map(len, list(nx.connected_components(G_ini)))) if len(length_of_vulnerable_region) > 0: size_T = max(length_of_vulnerable_region) else: size_T = 0 G_ini = G.to_undirected() G_ini, max_T, R_t = paintTarget(G_ini, size_T) return utility_s(G_ini, v, R_t, max_T) - len(list( G.out_edges(v))) * alpha - G.nodes[v]['immunization'] * beta if __name__ == '__main__': G = nx.read_gpickle("../test/Global test/forest.pickle") G.add_node(16) G.add_edge(17, 18) G.add_node(19) G.add_edge(17, 16) G.add_edge(16, 19) G.nodes[16]['immunization'] = True G.nodes[18]['immunization'] = False G.nodes[17]['immunization'] = False G.nodes[19]['immunization'] = False alpha = G.nodes[0]['alpha']
def _run_interface(self, runtime): global gpickled, nodentwks, edgentwks, kntwks, matlab gpickled = list() nodentwks = list() edgentwks = list() kntwks = list() matlab = list() ntwk = nx.read_gpickle(self.inputs.in_file) # Each block computes, writes, and saves a measure # The names are then added to the output .pck file list # In the case of the degeneracy networks, they are given specified output names calculate_cliques = self.inputs.compute_clique_related_measures weighted = self.inputs.treat_as_weighted_graph global_measures = compute_singlevalued_measures( ntwk, weighted, calculate_cliques) if isdefined(self.inputs.out_global_metrics_matlab): global_out_file = op.abspath(self.inputs.out_global_metrics_matlab) else: global_out_file = op.abspath( self._gen_outfilename('globalmetrics', 'mat')) sio.savemat(global_out_file, global_measures, oned_as='column') matlab.append(global_out_file) node_measures = compute_node_measures(ntwk, calculate_cliques) for key in node_measures.keys(): newntwk = add_node_data(node_measures[key], ntwk) out_file = op.abspath(self._gen_outfilename(key, 'pck')) nx.write_gpickle(newntwk, out_file) nodentwks.append(out_file) if isdefined(self.inputs.out_node_metrics_matlab): node_out_file = op.abspath(self.inputs.out_node_metrics_matlab) else: node_out_file = op.abspath( self._gen_outfilename('nodemetrics', 'mat')) sio.savemat(node_out_file, node_measures, oned_as='column') matlab.append(node_out_file) gpickled.extend(nodentwks) edge_measures = compute_edge_measures(ntwk) for key in edge_measures.keys(): newntwk = add_edge_data(edge_measures[key], ntwk) out_file = op.abspath(self._gen_outfilename(key, 'pck')) nx.write_gpickle(newntwk, out_file) edgentwks.append(out_file) if isdefined(self.inputs.out_edge_metrics_matlab): edge_out_file = op.abspath(self.inputs.out_edge_metrics_matlab) else: edge_out_file = op.abspath( self._gen_outfilename('edgemetrics', 'mat')) sio.savemat(edge_out_file, edge_measures, oned_as='column') matlab.append(edge_out_file) gpickled.extend(edgentwks) ntwk_measures = compute_network_measures(ntwk) for key in ntwk_measures.keys(): if key == 'k_core': out_file = op.abspath( self._gen_outfilename(self.inputs.out_k_core, 'pck')) if key == 'k_shell': out_file = op.abspath( self._gen_outfilename(self.inputs.out_k_shell, 'pck')) if key == 'k_crust': out_file = op.abspath( self._gen_outfilename(self.inputs.out_k_crust, 'pck')) nx.write_gpickle(ntwk_measures[key], out_file) kntwks.append(out_file) gpickled.extend(kntwks) out_pickled_extra_measures = op.abspath( self._gen_outfilename(self.inputs.out_pickled_extra_measures, 'pck')) dict_measures = compute_dict_measures(ntwk) iflogger.info( 'Saving extra measure file to {path} in Pickle format'.format( path=op.abspath(out_pickled_extra_measures))) file = open(out_pickled_extra_measures, 'w') pickle.dump(dict_measures, file) file.close() iflogger.info('Saving MATLAB measures as {m}'.format(m=matlab)) # Loops through the measures which return a dictionary, # converts the keys and values to a Numpy array, # stacks them together, and saves them in a MATLAB .mat file via Scipy global dicts dicts = list() for idx, key in enumerate(dict_measures.keys()): for idxd, keyd in enumerate(dict_measures[key].keys()): if idxd == 0: nparraykeys = np.array(keyd) nparrayvalues = np.array(dict_measures[key][keyd]) else: nparraykeys = np.append(nparraykeys, np.array(keyd)) values = np.array(dict_measures[key][keyd]) nparrayvalues = np.append(nparrayvalues, values) nparray = np.vstack((nparraykeys, nparrayvalues)) out_file = op.abspath(self._gen_outfilename(key, 'mat')) npdict = {} npdict[key] = nparray sio.savemat(out_file, npdict, oned_as='column') dicts.append(out_file) return runtime
def load_network_from_gpickle(filename, verbose=True): filename = re.sub('~', expanduser('~'), filename) G = nx.read_gpickle(filename) return G
# Listlabel.append(ut.get_estgraphlabel(gest, "egr", weightflag=0)) # remove specific columns from label # Labelarray = Listlabel[0] # Labelarraynew = np.delete(Labelarray, [0]) # Labelarraynew = np.delete(Labelarraynew, tempt) fileext = "\\plc_5000_egr_estsupbayesian" nx.write_gpickle(gest, config.datapath + 'Bayesian'+ fileext + ".gpickle") # with open(config.datapath + 'Bayesian'+ fileext + "_label.pickle", 'wb') as b: # pickle.dump(Listlabel, b) ## ================ load estimated graph and gobs label ============== fileext = "\\plc_5000_egr_estsupbayesian" gest = nx.read_gpickle(config.datapath + 'Bayesian'+ fileext+".gpickle") fileext = "\\plc_5000_egr_bayesian" with open(config.datapath + 'Bayesian' + fileext+ "_label.pickle", 'rb') as b: Listlabel = pickle.load(b) # remove enties for which no node is present Labelarray = Listlabel[0] Labelarray = np.delete(Labelarray, [0]) ## ================ load noisy graph and gobs label ============== fileext = "\\plc_5000_gobsnoiseadd_bayesian_6195" gobsnoise = nx.read_gpickle(config.datapath + 'Bayesian'+ fileext+".gpickle")
def main(args): if args.graph_path: G = nx.read_gpickle(args.graph_path) dict_path = args.graph_path[0:args.graph_path.rfind('/') + 1] if (not args.no_dictionary): node_to_id_dict = pickle.load(open(dict_path+'node_to_id_dict.pickle', 'rb')) id_to_node_dict = None if args.print_node_names_in_top_k: id_to_node_dict = pickle.load(open(dict_path+'id_to_node_dict.pickle', 'rb')) else: # We need to create the graph from a csv G = utils.graph.get_graph_from_csv(file=args.file_path, source=args.source, target=args.target, edge_attr=args.edge_attr) # Convert all node names to integer IDs (starting with ID=0) ids = range(G.number_of_nodes()) nodes = list(G.nodes()) id_to_node_dict = {ids[i]: nodes[i] for i in range(len(ids))} node_to_id_dict = {nodes[i]: ids[i] for i in range(G.number_of_nodes())} G = nx.relabel_nodes(G, node_to_id_dict) # Save the graph and the dictionaries nx.write_gpickle(G, path=args.graph_output_dir + 'graph.gpickle') with open(args.graph_output_dir + 'id_to_node_dict.pickle', 'wb') as handle: pickle.dump(id_to_node_dict, handle) with open(args.graph_output_dir + 'node_to_id_dict.pickle', 'wb') as handle: pickle.dump(node_to_id_dict, handle) print('Input graph has', G.number_of_nodes(), 'nodes and', G.number_of_edges(), 'edges') utils.auxiliary_functions.set_json_attr_val('graph_info', {'num_nodes': G.number_of_nodes(), 'num_edges': G.number_of_edges()}, file_path=args.output_dir+'args.json') if args.user_specified_query_nodes: # Use the user specified query nodes Q = [] for q_name in args.user_specified_query_nodes: Q.append(node_to_id_dict[q_name]) else: # Select 'k' query nodes randomly. The nodes selected must have an out-degree of at least 1. Q = utils.auxiliary_functions.get_query_nodes(G, k=args.num_q_nodes) # Save the chosen nodes Q into the json file utils.auxiliary_functions.set_json_attr_val('query_nodes', Q, file_path=args.output_dir+'args.json') # Get the PPR scores for every node in G given a set of query nodes Q using particle filtering start = timer() print('Calculating PPR using particle filtering...') ppr_np_array, num_iterations = utils.ppr.get_ppr(G, Q, return_type='array') elapsed_time = timer()-start print('Finished calculating PPR using particle filtering. Took', num_iterations, 'iterations for convergence. Elapsed time is:', elapsed_time, 'seconds.\n') with open(args.output_dir + 'particle_filtering_ppr_scores.npy', 'wb') as f: np.save(f, ppr_np_array) utils.auxiliary_functions.set_json_attr_val('ppr_using_pf', {'runtime': elapsed_time, 'num_iterations': num_iterations }, file_path=args.output_dir+'info.json') # Check if we want to also run PPR from each query node seperately if args.run_ppr_from_each_query_node: single_source_output_dir = args.output_dir + 'single_source_ppr_scores/' print('Calculating PPR from each source in the query set...') if args.distributed_single_source_ppr: # Single source ppr multi-core implementation start_timer = timer() aggregate_ppr_single_source_node_np_array, stats_dict = utils.ppr.get_ppr_from_single_source_nodes_parallel(G, Q) print('Total Elapsed time distribute implementation:', timer()-start_timer) else: # Single source ppr single-core implementation aggregate_ppr_single_source_node_np_array = np.zeros(G.number_of_nodes()) stats_dict = {} start_timer = timer() for query_node in tqdm(Q): start = timer() ppr_single_source_node_np_array, num_iterations = utils.ppr.get_ppr(G, [query_node], return_type='array') elapsed_time = timer()-start stats_dict[query_node] = {'runtime': elapsed_time, 'num_iterations': num_iterations} aggregate_ppr_single_source_node_np_array += ppr_single_source_node_np_array print('Total Elapsed time with single cpu:', timer()-start_timer) # Calculate a combined ppr vector for all sources in the query ppr_single_sources = aggregate_ppr_single_source_node_np_array / len(Q) utils.auxiliary_functions.set_json_attr_val('ppr_single_source_using_pf', stats_dict, file_path=args.output_dir+'info.json') print('Finished calculating PPR from each source in the query set.\n') with open(args.output_dir + 'ppr_single_source_scores.npy', 'wb') as f: np.save(f, ppr_single_sources) # Evaluation of the results # Top-10 nodes using particle filtering top_k_ppr = utils.auxiliary_functions.get_top_k_vals_numpy(ppr_np_array, k=10) print('TOP-10 nodes using particle filtering') utils.auxiliary_functions.print_top_k_nodes(top_k_ppr, id_to_node_dict, args.print_node_names_in_top_k) if args.run_ppr_from_each_query_node: # Get top-k values from numpy array top_k_ppr_single_sources = utils.auxiliary_functions.get_top_k_vals_numpy(ppr_single_sources, 10) print('\nTOP-10 nodes using multiple sources particle filtering') utils.auxiliary_functions.print_top_k_nodes(top_k_ppr_single_sources, id_to_node_dict, args.print_node_names_in_top_k) # Calculate the normalized discounted cumulative gain (NDCG) between the ppr vs the ppr_single_source rankings k_vals = [1, 5, 10, 50, 100, 200, 500, 1000] ndcg_dict = {} print('\n\nNormalized discounted cumulative gain (NDCG) scores at various k values') for k in k_vals: ndcg_dict[str(k)] = ndcg_score(np.array([ppr_np_array]), np.array([ppr_single_sources]), k=k) print('NDCG score at k=' + str(k) + ':', ndcg_dict[str(k)]) # Calculate NDCG scores for all rankings (k=total_number_of_nodes) ndcg_dict['full'] = ndcg_score(np.array([ppr_np_array]), np.array([ppr_single_sources])) utils.auxiliary_functions.set_json_attr_val('ndcg_scores', ndcg_dict, file_path=args.output_dir+'info.json') if args.distributed_pf: #Evaluation of Distributed Particle Filtering ppr_dist, num_iterations_dist = utils.ppr.get_ppr_distributed(G, Q, return_type='array') top_k_ppr_dist = utils.auxiliary_functions.get_top_k_vals_numpy(ppr_dist, 10) utils.auxiliary_functions.print_top_k_nodes(top_k_ppr_dist, id_to_node_dict, args.print_node_names_in_top_k) k_vals = [1, 5, 10, 50, 100, 200, 500, 1000] ndcg_dist_dict = {} print('The number of iterations for distributed PPR', num_iterations_dist) print('\n\nNormalized discounted cumulative gain (NDCG) scores at various k values for Dist PPR') for k in k_vals: ndcg_dict[k] = ndcg_score(np.array([ppr_np_array]), np.array([ppr_dist]), k=k) print('NDCG score at k=' + str(k) + ':', ndcg_dict[k]) if args.run_networkx_ppr: # Top-10 nodes using networkx implementation of PPR personalization_dict = {} for q in Q: personalization_dict[q] = 1 start = timer() print('\n\nCalculating PPR using NetworkX implementation of PPR') ppr_dict_nx = nx.pagerank(G, alpha=0.85, personalization=personalization_dict) print('Finished calculating PPR using NetworkX implementation of PPR. Elapsed time is:', timer()-start, 'seconds.') with open(args.output_dir + 'networkx_ppr_scores.pickle', 'wb') as handle: pickle.dump(ppr_dict_nx, handle) # Convert 'ppr_dict_nx' into an array for easy NDCG scores comparison ppr_array_nx = [] for id in ppr_dict_nx: ppr_array_nx.append(ppr_dict_nx[id]) print('\n\nTOP-10 nodes using Networkx implementation of PPR') top_k_ppr_nx = utils.auxiliary_functions.get_top_k_vals_numpy(np.array(ppr_array_nx), 10) utils.auxiliary_functions.print_top_k_nodes(top_k_ppr_nx, id_to_node_dict, args.print_node_names_in_top_k) # Calculate the NDCG scores using networkx vs ppr_np_array. The networkx scores are used as the ground truth k_vals = [1, 5, 10, 50, 100, 200, 500, 1000] ndcg_dict = {} print('\n\nNormalized discounted cumulative gain (NDCG) scores at various k values for networkx PPR vs PPR using PF') for k in k_vals: ndcg_dict[str(k)] = ndcg_score(np.array([ppr_array_nx]), np.array([ppr_np_array]), k=k) print('NDCG score at k=' + str(k) + ':', ndcg_dict[str(k)]) ndcg_dict['full'] = ndcg_score(np.array([ppr_array_nx]), np.array([ppr_np_array])) utils.auxiliary_functions.set_json_attr_val('ndcg_scores_nx', ndcg_dict, file_path=args.output_dir+'info.json')
def create_structure(self, aggregate_by, data_directory, file_name, **feature_function_pairs): """ Creates structure of graph from provided relational datasets using python networkx. :aggregate_by: str, geographic entity by which to aggregate relational data. This will become the nodes of the graph and will be a column in the geo-tagged relational dataset. :data_directory: str, path to the relevant data directory/storage bucket for the dataset indicated by file_name :file_name: str, path to the name of the geo-tagged dataset we want to use to populate the graph node attributes :**feature_function_pairs: kwargs, feature to be aggregated from relational dataset as the key and the aggregation function to be applied as its value Returns: Writes resulting graph structre as pkl to disk in graph bucket. """ assert (isinstance(aggregate_by, str)), "\ argument aggrebate_by must be of type str" assert (isinstance(data_directory, str)), "\ argument data_directory must be of type str" assert (isinstance(file_name, str)), "\ argument file_name must be of type str" # load in geographic entity shape files if self.gcp: blob = self.geo_bucket.blob( 'chicago_{}_reformatted.json'.format(aggregate_by)) geo_entities = json.loads(blob.download_as_string(client=None)) else: with open( '{}/chicago_{}_reformatted.json'.format( self.geo_directory, aggregate_by), 'r') as f: geo_entities = json.load(f) # load the requested specified dataframe df = pd.read_csv('{}/{}'.format(data_directory, file_name)) # aggregate features from dataframe to populate graph node attributes df_aggregated = self.aggregate_features(df, geo_entities, aggregate_by, **feature_function_pairs) # determine whether a serialized version of this graph_model already exists exists = "" if self.gcp: graph_list = list(self.graph_bucket.list_blobs()) result = [ 1 if self.graph_model_name + '.pkl' in str(name) else 0 for name in graph_list ] if sum(result) >= 1: exists = True ix = result.index(1) else: exists = False else: graph_list = os.listdir(self.graph_directory) result = [ 1 if self.graph_model_name + '.pkl' in name else 0 for name in graph_list ] if sum(result) >= 1: exists = True ix = result.index(1) else: exists = False # if there isn't already a serialized version of this graph_model # we create nodes named after 'aggregate_by' with attributes corresponding # created by aggregating the dataframe features if not exists: # first create nodes, return resulting graph structure G = self.create_pynx_nodes(df_aggregated, node_category=aggregate_by, attribute_columns=list( df_aggregated.columns)) # then create edge relationships between nodes # current the only option is "NEXT_TO" G = self.add_edges_to_pynx(G, "NEXT_TO", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \ aggregate_by, bidirectional=True, polygon_dict_1=geo, \ polygon_dict_2=geo) # if the serialized version of the graph_model already exists, # we load it into memory and using networkx to add nodes, attributes, edge relations # that dont already exist if exists: print("adding to existing graph") if self.gcp: blob = self.graph_bucket.blob('{}.pkl'.format( self.graph_model_name)) os.system("mkdir {}/create_graph_model/temp".format( self.home_directory)) blob.download_to_filename( "{}/create_graph_model/temp/temp.pkl".format( self.home_directory), client=None) G = nx.read_gpickle( "{}/create_graph_model/temp/temp.pkl".format( self.home_directory)) os.remove("{}/create_graph_model/temp/temp.pkl".format( self.home_directory)) os.system("rmdir {}/create_graph_model/temp".format( self.home_directory)) else: G = nx.read_gpickle("{}/{}.pkl".format(self.graph_directory, self.graph_model_name)) # we note the 'existing_graph' kwarg is specified here G = self.create_pynx_nodes(df_aggregated, node_category=aggregate_by, attribute_columns=list( df_aggregated.columns), existing_graph=G) G = self.add_edges_to_pynx(G, "NEXT_TO", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \ aggregate_by, bidirectional=True, polygon_dict_1=geo, \ polygon_dict_2=geo) """ RETURN TO THIS: SEPARATE SCRIPT FOR MULTIPLE GEO FILES # create unidirectional edges between census tract and neighborhood G = pynx_to_neo4j.add_edges_to_pynx(G, "CONTAINS", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \ "neighborhood", "tract", bidirectional=False, polygon_dict_1=neighborhoods, \ polygon_dict_2=tracts) G = pynx_to_neo4j.add_edges_to_pynx(G, "IS_WITHIN", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \ "tract", "neighborhood", bidirectional=False, polygon_dict_1=tracts, \ polygon_dict_2=neighborhoods) """ # save as pkl file if self.gcp: # write to disk first os.system("sudo mkdir {}/create_graph_model/temp".format( self.home_directory)) nx.write_gpickle( G, "{}/create_graph_model/temp/{}.pkl".format( self.home_directory, self.graph_model_name)) # then upload to bucket blob = self.graph_bucket.blob('{}.pkl'.format( self.graph_model_name)) blob.upload_from_filename( "{}/create_graph_model/temp/{}.pkl".format( self.home_directory, self.graph_model_name)) os.remove("{}/create_graph_model/temp/{}.pkl".format( self.home_directory, self.graph_model_name)) os.system("rmdir {}/create_graph_model/temp".format( self.home_directory)) else: nx.write_gpickle( G, "{}/{}.pkl".format(self.graph_directory, self.graph_model_name))
def average_networks(in_files, ntwk_res_file, group_id): """ Sums the edges of input networks and divides by the number of networks Writes the average network as .pck and .gexf and returns the name of the written networks """ import networkx as nx import os.path as op iflogger.info( "Creating average network for group: {grp}".format(grp=group_id)) matlab_network_list = [] if len(in_files) == 1: avg_ntwk = read_unknown_ntwk(in_files[0]) else: count_to_keep_edge = np.round(float(len(in_files)) / 2) iflogger.info( "Number of networks: {L}, an edge must occur in at least {c} to remain in the average network" .format(L=len(in_files), c=count_to_keep_edge)) ntwk_res_file = read_unknown_ntwk(ntwk_res_file) iflogger.info("{n} Nodes found in network resolution file".format( n=ntwk_res_file.number_of_nodes())) ntwk = remove_all_edges(ntwk_res_file) counting_ntwk = ntwk.copy() # Sums all the relevant variables for index, subject in enumerate(in_files): tmp = nx.read_gpickle(subject) iflogger.info('File {s} has {n} edges'.format( s=subject, n=tmp.number_of_edges())) edges = tmp.edges_iter() for edge in edges: data = {} data = tmp.edge[edge[0]][edge[1]] data['count'] = 1 if ntwk.has_edge(edge[0], edge[1]): current = {} current = ntwk.edge[edge[0]][edge[1]] #current['count'] = current['count'] + 1 data = add_dicts_by_key(current, data) ntwk.add_edge(edge[0], edge[1], data) nodes = tmp.nodes_iter() for node in nodes: data = {} data = ntwk.node[node] if tmp.node[node].has_key('value'): data['value'] = data['value'] + tmp.node[node]['value'] ntwk.add_node(node, data) # Divides each value by the number of files nodes = ntwk.nodes_iter() edges = ntwk.edges_iter() iflogger.info( 'Total network has {n} edges'.format(n=ntwk.number_of_edges())) avg_ntwk = nx.Graph() newdata = {} for node in nodes: data = ntwk.node[node] newdata = data if data.has_key('value'): newdata['value'] = data['value'] / len(in_files) ntwk.node[node]['value'] = newdata avg_ntwk.add_node(node, newdata) edge_dict = {} edge_dict['count'] = np.zeros( (avg_ntwk.number_of_nodes(), avg_ntwk.number_of_nodes())) for edge in edges: data = ntwk.edge[edge[0]][edge[1]] if ntwk.edge[edge[0]][edge[1]]['count'] >= count_to_keep_edge: iflogger.info( 'Count: {c} is greater than or equal to the minimum, {n}, for edge {e1}-{e2}' .format(c=ntwk.edge[edge[0]][edge[1]]['count'], n=count_to_keep_edge, e1=edge[0], e2=edge[1])) for key in data.keys(): if not key == 'count': data[key] = data[key] / len(in_files) ntwk.edge[edge[0]][edge[1]] = data avg_ntwk.add_edge(edge[0], edge[1], data) edge_dict['count'][edge[0] - 1][edge[1] - 1] = ntwk.edge[edge[0]][edge[1]]['count'] iflogger.info( 'After thresholding, the average network has has {n} edges'.format( n=avg_ntwk.number_of_edges())) avg_edges = avg_ntwk.edges_iter() for edge in avg_edges: data = avg_ntwk.edge[edge[0]][edge[1]] for key in data.keys(): if not key == 'count': edge_dict[key] = np.zeros((avg_ntwk.number_of_nodes(), avg_ntwk.number_of_nodes())) edge_dict[key][edge[0] - 1][edge[1] - 1] = data[key] for key in edge_dict.keys(): tmp = {} network_name = group_id + '_' + key + '_average.mat' matlab_network_list.append(op.abspath(network_name)) tmp[key] = edge_dict[key] sio.savemat(op.abspath(network_name), tmp) iflogger.info( 'Saving average network for key: {k} as {out}'.format( k=key, out=op.abspath(network_name))) # Writes the networks and returns the name network_name = group_id + '_average.pck' nx.write_gpickle(avg_ntwk, op.abspath(network_name)) iflogger.info( 'Saving average network as {out}'.format(out=op.abspath(network_name))) avg_ntwk = fix_keys_for_gexf(avg_ntwk) network_name = group_id + '_average.gexf' nx.write_gexf(avg_ntwk, op.abspath(network_name)) iflogger.info( 'Saving average network as {out}'.format(out=op.abspath(network_name))) return network_name, matlab_network_list
updateType = 'async' targetDirectory = f'{os.getcwd()}/{args.dir}' os.makedirs(targetDirectory, exist_ok=True) settings = dict( nSamples = nSamples, \ burninSteps = burninSteps, \ updateMethod = updateType ) IO.saveSettings(targetDirectory, settings) for i, g in enumerate(ensemble): graph = nx.read_gpickle(g) filename = os.path.split(g)[-1].strip('.gpickle') modelSettings = dict(\ graph = graph,\ updateType = updateType,\ magSide = magSide ) model = fastIsing.Ising(**modelSettings) Tc = Tc_idx = -1 while Tc < 0: mags, sus, binder, abs_mags = simulation.magnetizationParallel(model, \ temps = temps, \ n = nSamples, \ burninSteps = burninSteps)
def __read_cache(self): return nx.read_gpickle(self.CACHE_PATH)
currentPath = paths.popleft() currentWord = currentPath[-1] if currentWord == goal: return currentPath elif currentWord in extended: continue extended.add(currentWord) transforms = graph[currentWord] for word in transforms: if word not in currentPath: paths.append(currentPath[:] + [word]) #no transformation return [] G = nx.read_gpickle('test3.gpickle') def generateStartEnd(): lower_limit = 3 upper_limit = 8 flag = True while flag: node1 = random.choice(G.nodes()) node2 = random.choice(G.nodes()) try: # Using networkx function bidrectional_dijkstra isConnected = nx.bidirectional_dijkstra(G, node1, node2) except: node1 = random.choice(G.nodes()) node2 = random.choice(G.nodes()) words = transformWord(G, node1, node2)
# Otherwise, save direction else: undir_G.dir_dict[(n1, n2, relation)] = '-->' undir_G.dir_dict[(n2, n1, relation)] = '<--' TAG = '' # Save ConceptNet graph nx.write_gpickle(undir_G, "data/ConceptNet/conceptnet_full_di_rel" + TAG + ".gpickle") # Save direction dict with open('data/ConceptNet/dir_dict.pickle', 'wb') as handle: pickle.dump(undir_G.dir_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) # Load graph, get all nodes cn_graph_full = nx.read_gpickle('data/ConceptNet/conceptnet_full_di_rel' + TAG + '.gpickle') cn_nodes = list(cn_graph_full.nodes) print('# nodes:', len(cn_nodes)) # Discard nodes that have only one neighbor, create new subgraph and save cn_nodes = [c for c in cn_nodes if len(list(cn_graph_full.neighbors(c))) > 1] reduced_graph = nx.subgraph(cn_graph_full, cn_nodes).copy() nx.write_gpickle( reduced_graph, "data/ConceptNet/conceptnet_full_di_rel_red" + TAG + ".gpickle") print('# nodes (after removing nodes without neighbors):', len(cn_nodes))
def LoadGraph(filename): return nx.read_gpickle(filename)
def loadRealGraphSeries(file_prefix, startId, endId): graphs = [] for file_id in range(startId, endId + 1): graph_file = file_prefix + str(file_id) + '_graph.gpickle' graphs.append(nx.read_gpickle(graph_file)) return graphs