def community(): g = gt.load_graph(filename) print 'Graph loaded, now finding community' # state = gt.BlockState(g, B=blocks) # for i in xrange(iterations): # if i < iterations / 2: # gt.mcmc_sweep(state) # else: # gt.mcmc_sweep(state, beta=float('inf')) # g.vp['blocks'] = state.get_blocks() spins = {} if 'blocks' in g.vp: spins = {'spins': g.vp['blocks']} g.vp['blocks'] = gt.community_structure(g, n_iter=iterations, n_spins=blocks, **spins) if 'pos' in g.vp: gt.sfdp_layout(g, groups=g.vp['blocks'], pos=g.vp['pos']) for i in xrange(blocks): print '%d nodes in block %d' % (len(gt.find_vertex(g, g.vp['blocks'], i)), i) g.save(filename)
def get_stats(dataset, dataset_id2rating_count, dataset_id2title, rec_type): folder = os.path.join('data', dataset, 'graphs') res = {} nodes = {} stats = {} links_to_scc = {} for N in Ns: print(' ', N) gt_file = os.path.join(folder, '%s_%d.gt' % (rec_type, N)) graph = gt.load_graph(gt_file, fmt='gt') # # DEBUG # title2dataset_id = {v: k for k, v in dataset_id2title.items()} # dataset_id2node = {graph.vp['name'][n]: n for n in graph.vertices()} # title2node = {title: dataset_id2node[title2dataset_id[title]] for title in dataset_id2title.values()} # #/DEBUG dataset_id2bow_tie = {graph.vp['name'][n]: graph.vp['bowtie'][n] for n in graph.vertices()} bt2ratings = {l: [] for l in bt_labels} bt2nodes = {l: [] for l in bt_labels} for did, bt in dataset_id2bow_tie.items(): bt2ratings[bt].append(dataset_id2rating_count[did]) bt2nodes[bt].append(dataset_id2title[did]) res[N] = {k: [np.mean(v), np.median(v)] for k, v in bt2ratings.items()} res[N] = {k: v for k, v in res[N].items() if not np.isnan(v[0])} stats[N] = graph_stats(graph) links_to_scc[N] = get_links_to_scc(graph) nodes[N] = bt2nodes return res, nodes, stats, links_to_scc
def compute_shortest_paths(self): import graph_tool.all as gt graph_file = home+'/data/text-analysis/vichakshana/page_graphs/' + self.keyword + '_entitylinks_core.graphml' g = gt.load_graph(graph_file, fmt='xml') distance_data = gt.shortest_distance(g) vertices = list(g.vertices()) rows = [] cols = [] distances = [] for src_v in vertices: for i in xrange(len(vertices)): if distance_data[src_v][i] > 100: continue rows.append(self.fileindex[unicode(g.vertex_properties['_graphml_vertex_id'][src_v], encoding='utf-8')]) cols.append(self.fileindex[unicode(g.vertex_properties['_graphml_vertex_id'][vertices[i]], encoding='utf-8')]) distances.append(distance_data[src_v][i]) n = max(self.fileindex.values())+1 # since the indexing starts with 0 shortest_paths = sparse.coo_matrix((distances, (rows, cols)), shape=(n, n)) shortest_paths = sparse.csr_matrix(shortest_paths).todense() if not exists(home+'/data/text-analysis/vichakshana/page_graphs/'+self.keyword+'_shortest_paths/'): mkdir(home+'/data/text-analysis/vichakshana/page_graphs/'+self.keyword+'_shortest_paths/') for i in xrange(shortest_paths.shape[0]): pickle.dump(shortest_paths[i], file(home+'/data/text-analysis/vichakshana/page_graphs/' + self.keyword+'_shortest_paths/'+str(i)+'.pickle', 'w'))
def Initial_Networks(filename='2Dlattice.xml.gz'): graph = gt.load_graph(filename) ''' each node stands for a player, he can choose to always cooperate with its immediate neighbour or always defect in every round of the game. Our game are synchronous. Synchronization Games is easy to study theoretically. each edge stands for a game. The source always plays a game with the target. ''' pos = graph.vertex_properties['pos'] ''' old cooperator is Cooperator new cooperator is New_Coop old defector is Defector new defector is New_Defc ''' Charactor = graph.new_vertex_property('vector<double>') Utility = graph.new_vertex_property('long double') for v in graph.vertices(): Charactor[v] = Cooperator Utility.a = 0.0 was_Coop = graph.new_vertex_property('bool') was_Coop.a = True Center = graph.vertex(int(graph.num_vertices()/2)) Charactor[Center] = Defector was_Coop[Center] = False graph.vertex_properties['was_cha'] = was_Coop graph.vertex_properties['Utility'] = Utility graph.vertex_properties['is_cha'] = Charactor return graph, pos
def read_graphml(self,citation_file,citation_meta): self.graph = gt.load_graph(citation_file) self.graph.vertex_properties['year']=self.graph.new_vertex_property('int') self._citation_graphml_vertex_id_to_gt_id = {} for v in self.graph.vertices(): self._citation_graphml_vertex_id_to_gt_id[self.graph.vertex_properties['_graphml_vertex_id'][v]]=int(self.graph.vertex_index[v]) f=open(citation_meta,'r') dialect=csv.Sniffer().sniff(f.readline()) csv_delimiter=dialect.delimiter f.close() with open(citation_meta,'r') as f: #read header to determine property name header = f.readline() header = header.split(csv_delimiter) multiplex_edge_property_name = header[2].rstrip() #write multiplex edges with multiplex edge property (year) for line in f: tmp = line.split(csv_delimiter) paper_tmp = tmp[0] author_tmp = tmp[1] year = int(tmp[2].rstrip()) try: paper_obj = self.graph.vertex(self._citation_graphml_vertex_id_to_gt_id[paper_tmp]) except KeyError: paper_obj = self.add_paper(paper_tmp,year) self.graph.vertex_properties['year'][paper_obj]=year self.min_year=min(self.graph.vertex_properties['year'].get_array()) self.max_year=max(self.graph.vertex_properties['year'].get_array())
def __init__(self, graphml_soubor): from graph_tool.all import load_graph if graphml_soubor.endswith('graphml'): fmt='xml' else: fmt='auto' self.graf = load_graph(graphml_soubor, fmt)
def get_most_central_twids(N=100): g = gt.load_graph(GT_GRAPH_PATH) katzc=gt.katz(g) katzc_array = katzc.get_array() katzc_sorted = sorted(enumerate(katzc_array), key=lambda (i, v):v) most_central = [id for (id, c) in katzc_sorted][:N] most_central_twids = [get_twitter_id(g,id) for id in most_central] return most_central_twids
def get_central_and_followed(): g = gt.load_graph(GT_GRAPH_PATH) katz_centrality = gt.katz(g) central_id = katz_centrality.get_array().argmax() central = g.vertex(central_id) central_twid = get_twitter_id(g, central_id) followed_twids = [get_twitter_id(g, n) for n in central.out_neighbours()] return central_twid, followed_twids
def top(): g = gt.load_graph(filename) print 'Graph loaded, now calculating top nodes' vblocks = g.vp['blocks'] largest_block = max(range(blocks), key=lambda b: len(gt.find_vertex(g, vblocks, b))) print 'Largest block is %d with %d nodes' % (largest_block, len(gt.find_vertex(g, vblocks, largest_block))) for tup in top_ids(g, largest_block): print tup
def show(): g = gt.load_graph(filename) kwargs = {} print 'Graph loaded, now drawing to %s' % imagename if 'blocks' in g.vp: kwargs['vertex_fill_color'] = g.vertex_properties['blocks'] if 'rank' in g.vp: kwargs['vertex_size'] = gt.prop_to_size(g.vp['rank'], mi=5, ma=15) if 'pos' in g.vp: kwargs['pos'] = g.vp['pos'] gt.graph_draw(g, vertex_shape=g.vp['rc'], output=imagename, **kwargs)
def load_graph(self, filename="graph.gt.gz"): """ Load a word-document network generated by make_graph() and saved with save_graph(). """ self.g = gt.load_graph(filename) self.words = [ self.g.vp["name"][v] for v in self.g.vertices() if self.g.vp["kind"][v] == 1 ] self.documents = [ self.g.vp["name"][v] for v in self.g.vertices() if self.g.vp["kind"][v] == 0 ]
def add_pagerank(): g = gt.load_graph( "/home/mrunelov/KTH/exjobb/SICS-cite/APS/data/APS.graphml") pr = gt.pagerank(g, damping=0.5) with open("all_APS_with_fellows.csv","r") as old,\ open("all_APS_with_fellows_and_pagerank.csv", "w+") as new: header = old.next() new.write(header.strip() + ",pagerank\n") for line in old: old_data = line.strip().split(",") gt_index = int(old_data[0]) new_data = line.strip() + "," + str(pr.a[gt_index]) + "\n" new.write(new_data)
def test3(self,datasetnum): curdir = os.getcwd() index=1 while index<=datasetnum: # print "data "+str(index)+" test---------------------->:1" path=curdir+"/ci6/data"+str(index) isExists=os.path.exists(path) if not isExists: index += 1 continue Qgraph = gt.load_graph(path+"/Qgraph.xml.gz") self.P = gt.Graph(Qgraph) # Dgraph = gt.load_graph(path+"/Dgraph.xml.gz") for i in xrange(1,6): view = gt.load_graph(path+"/view"+str(i)+".xml.gz") self.Vset.append(view) if self.containCheck(): mincontain = self.minContain() print mincontain self.clear_all_paramater() index +=1
def load_graph(self, filename='graph.gt.gz'): ''' Load a word-document network generated by make_graph() and saved with save_graph(). ''' self.g = gt.load_graph(filename) self.words = [ self.g.vp['name'][v] for v in self.g.vertices() if self.g.vp['kind'][v] == 1 ] self.documents = [ self.g.vp['name'][v] for v in self.g.vertices() if self.g.vp['kind'][v] == 0 ]
def top(): g = gt.load_graph(filename) print 'Graph loaded, now calculating top nodes' vblocks = g.vp['blocks'] blocks = sorted(range(nblocks), key=lambda b: len(gt.find_vertex(g, vblocks, b)), reverse=True) for block in blocks: print 'Block %d with %d nodes' % (block, len(gt.find_vertex(g, vblocks, block))) tups = top_ids(g, block, n) ids = [t[0] for t in tups] names = get_names(ids) for tup, name in zip(tups, names): print name, tup[0], tup[1]
def read_citation_graphml(self,citation_file): '''Reads a citation graphml file and writes the citation layer.''' self.citation = gt.load_graph(citation_file) self.citation.vertex_properties['year']=self.citation.new_vertex_property('int') for v in self.citation.vertices(): self._multiplex_citation[v]={} #since I do not know how to address a node in graph_tool using his properties, create a dictionary to have this info: self._citation_graphml_vertex_id_to_gt_id = {} for v in self.citation.vertices(): self._citation_graphml_vertex_id_to_gt_id[self.citation.vertex_properties['_graphml_vertex_id'][v]]=int(self.citation.vertex_index[v])
def main(active_method, n_rounds, gtype, size_param, p, q, epsilon, sampling_method, mwu_reward_method='dist', dog_fraction=0.0, debug=False): g = load_graph('data/{}/{}/graph.gt'.format(gtype, size_param)) n_nodes = g.num_vertices() print('|V| = {}'.format(n_nodes)) def mwu_wrapper(active_method, reward_method): return experiment_mwu_n_rounds( n_rounds, g, p, q, epsilon, sampling_method, active_method, reward_method, seed=None) if active_method == MAX_MU: counts = mwu_wrapper(MAX_MU, mwu_reward_method) method_name = '{}-{}'.format(active_method, mwu_reward_method) elif active_method == RANDOM: counts = mwu_wrapper(RANDOM, mwu_reward_method) method_name = '{}-{}'.format(active_method, mwu_reward_method) elif active_method == 'dog': counts = experiment_dog_n_rounds( n_rounds, g, q, sampling_method, query_fraction=dog_fraction) method_name = 'dog-{:.1f}'.format(dog_fraction) elif active_method == NOISY_BINARY_SEARCH: counts = experiment_noisy_bs_n_rounds( g, sp_len, n_rounds, consistency_multiplier=0.9) method_name = NOISY_BINARY_SEARCH else: raise ValueError('unknown methoder') stat = counts_to_stat(counts) dirname = 'outputs/query_count/{}/{}'.format(method_name, gtype) if not os.path.exists(dirname): os.makedirs(dirname) if not debug: stat.to_pickle('{}/{}.pkl'.format(dirname, size_param)) else: print(stat)
def načtu_graph_tool_graf(xml_soubor): ''' spouštím funkci main() ''' from graph_tool.all import Graph, load_graph if xml_soubor.endswith('graphml'): fmt='xml' else: fmt='auto' graf = load_graph(xml_soubor, fmt) print(graf) return graf
def load_sentence_graph_from_file(input_file_path, sentence, file_format="gt"): try: loaded_graph = load_graph(input_file_path, fmt=file_format) print("Loaded graph: |%s|" % loaded_graph) if loaded_graph is not None and loaded_graph.num_vertices() != 0: return SentenceGraph(sentence=sentence, graph=loaded_graph) else: print( "ERROR: Sentence graph loaded is: %s and is either None or has 0 vertices!" % load_graph) except Exception, e: print( "ERROR: Cannot load sentence graph in file format: %s from file path: %s," " due to exception:\n%s\nThis is normal if caching is being relied on." % (file_format, input_file_path, e))
def read_graphml(self, citation_file, citation_meta): self.graph = gt.load_graph(citation_file) self.graph.vertex_properties['year'] = self.graph.new_vertex_property( 'int') self._citation_graphml_vertex_id_to_gt_id = {} for v in self.graph.vertices(): self._citation_graphml_vertex_id_to_gt_id[ self.graph.vertex_properties['_graphml_vertex_id'][v]] = int( self.graph.vertex_index[v]) f = open(citation_meta, 'r') dialect = csv.Sniffer().sniff(f.readline()) csv_delimiter = dialect.delimiter f.close() with open(citation_meta, 'r') as f: #read header to determine property name header = f.readline() header = header.split(csv_delimiter) multiplex_edge_property_name = header[2].rstrip() #write multiplex edges with multiplex edge property (year) for line in f: tmp = line.split(csv_delimiter) paper_tmp = tmp[0] author_tmp = tmp[1] year = int(tmp[2].rstrip()) try: paper_obj = self.graph.vertex( self._citation_graphml_vertex_id_to_gt_id[paper_tmp]) except KeyError: v = self.graph.add_vertex() self.graph.vertex_properties['_graphml_vertex_id'][ v] = paper_tmp paper_obj = v self.graph.vertex_properties['year'][paper_obj] = year self.min_year = min(self.graph.vertex_properties['year'].get_array()) self.max_year = max(self.graph.vertex_properties['year'].get_array()) ########################################################## ## #Calculate statistics of long-range-correlation motif def long_range_motif(self): pass
def cascades_on_grid(): cascades = [] g = load_graph('data/grid/2-6/graph.gt') for model in MODELS: for q in QS: for i in range(K): ret = gen_nontrivial_cascade(g, P, q, model=model, return_tree=True, source_includable=True) ret = (g, ) + ret + (model, q, i) cascades.append( ret) # g, infection_times, source, obs_nodes, true_tree return cascades
def load_graph(path, algorithms, format='graphml', component=False): sys.stdout.write('Loading network ...') sys.stdout.flush() t0 = time.time() g = gt.load_graph(path, fmt=format) if 'kores' in algorithms: gt.remove_parallel_edges(g) gt.remove_self_loops(g) if component: largest_component = gt.label_largest_component(g, directed=False) g.set_vertex_filter(largest_component) g.purge_vertices() t = time.time() sys.stdout.write('Ok! ({0} s.)\n'.format(t - t0)) return g
def load_graph(file): if os.path.splitext(file)[1] == '.mtx': g = load_mm(file) elif os.path.splitext(file)[1] == '.csv': g = load_csv(file) elif os.path.splitext(file)[1] == '.graph': g = load_chaco(file) elif os.path.splitext(file)[1] == '.vna': g = load_vna(file) else: # Give the file to graph_tool and hope for the best. g = gt.load_graph(file) g.set_directed(False) gt.remove_parallel_edges(g) return g
def maybe_load_graph(data_path, withIgraph): """ Try to load graph, if file doesn't exist return None :param data_path: path for data storage :type data_path: str :param withIgraph: set to True to use igraph library else graph-tool is used :type withIgraph: bool :return: graph object :rtype: """ try: if withIgraph: g = ig.Graph.Read_Picklez(path.join(data_path, "graphi")) else: g = gt.load_graph("my_graph.xml.gz") except FileNotFoundError: g = None return g
def rysuj_wejscie(input_file, output=None, size=(600, 600)): rozszerzenie = input_file.split('.')[1] g2 = load_graph(input_file) g2.vertex_properties['wyswietlany_tekst'] = g2.new_vertex_property('string') if rozszerzenie == 'dot': for v in g2.vertices(): g2.vertex_properties['wyswietlany_tekst'][v] = v.__str__() + ': ' + \ g2.vertex_properties['liczba_kolorow'][v] elif rozszerzenie == 'xml' or 'graphml': for v in g2.vertices(): g2.vertex_properties['wyswietlany_tekst'][v] = v.__str__() + ': ' + \ str(g2.vertex_properties['liczba_kolorow'][v]) graph_draw(g2, vertex_text=g2.vertex_properties['wyswietlany_tekst'] , bg_color=[255., 255., 255., 1] , output_size=size , output=output )
def __init__(self, graphml=None, graph=None, seed=2): if graphml != None: self.g = gt.load_graph(graphml) #self.v_name = self.g.new_edge_property("str") print(self.g.list_properties()) print("Create graph from graphml {}".format(graphml)) elif graph != None: self.g = graph giant = gt.label_largest_component(self.g) origin_size = self.g.num_vertices() for v in range(1, origin_size + 1): if giant[origin_size - v] == False: self.g.remove_vertex(origin_size - v, fast=True) self.g.set_directed(False) print("Create graph from graph.") else: print("No graphml or graph are provided!") print("Number of vertices: {}\nNumber of edges: {}"\ .format(self.g.num_vertices(), self.g.num_edges())) print("\n-----------------------------------------") self.diversity = seed self.v_infected = [ self.g.new_vertex_property("bool") for i in range(self.diversity) ] self.v_reinfected = [ self.g.new_vertex_property("float") for i in range(self.diversity) ] self.e_reinfected = [ self.g.new_edge_property("float") for i in range(self.diversity) ] self.e_spread_beta = [ self.g.new_edge_property("double") for i in range(self.diversity) ] self.e_extinct_beta = [ self.g.new_edge_property("double") for i in range(self.diversity) ] self.threshold = 2 self.activate = [self.threshold for v in self.g.vertices()]
def load_graphs_gt(path, year='2020'): fnames = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] graphs = {} for file in fnames: if 'pro_' in file or 'combined' in file or 'complete' in file: continue if year == '2020': #bias = ' '.join(file.split('_')[:-2]) bias = ' '.join(file.split('_')[:-3]) # temporarily, since we have both complete and simple in one dir, # we will name the bias the full path so that we can get them all in one table elif year == '2016': bias = PATH_TO_BIAS_2016[os.path.join(path, file)] else: raise ValueError print(bias) graph = gt.load_graph(os.path.join(path, file)) graphs[bias] = graph return graphs
def load_taxonomy_graph(filename): '''loads the ncbi taxonomy graph saved as filename from the appropriate basepath''' print "loading graph" G = gt.load_graph(join(basepath, filename)) n = G.num_vertices print "generating tid2v and name2v dictionaries" print str(n) + " entries" tid2v = {} name2v = {} v2tid = G.vertex_properties['taxid'] v2name = G.vertex_properties['name'] v2rank = G.vertex_properties['rank'] i = 0 for v in G.vertices(): tid = v2tid[v] name = v2name[v] tid2v[tid] = v name2v[name] = v return G, tid2v, name2v
def rysuj_wejscie(input_file, output=None, size=(600, 600)): rozszerzenie = input_file.split('.')[1] g2 = load_graph(input_file) g2.vertex_properties['wyswietlany_tekst'] = g2.new_vertex_property( 'string') if rozszerzenie == 'dot': for v in g2.vertices(): g2.vertex_properties['wyswietlany_tekst'][v] = v.__str__() + ': ' + \ g2.vertex_properties['liczba_kolorow'][v] elif rozszerzenie == 'xml' or 'graphml': for v in g2.vertices(): g2.vertex_properties['wyswietlany_tekst'][v] = v.__str__() + ': ' + \ str(g2.vertex_properties['liczba_kolorow'][v]) graph_draw(g2, vertex_text=g2.vertex_properties['wyswietlany_tekst'], bg_color=[255., 255., 255., 1], output_size=size, output=output)
def __init__(self, graph_file, match_origin=None, match_pc_start=None, match_pc_end=None, match_mem_start=None, match_mem_end=None, match_deref_start=None, match_deref_end=None, match_syscall=None, match_perms=None, match_otype=None, match_alloc_start=None, match_alloc_end=None, match_len_start=None, match_len_end=None, match_any=None, show_predecessors=False): self.graph = load_graph(graph_file) """The graph to dump.""" self.match_origin = None """Search for nodes with this origin""" self._check_origin_arg(match_origin) self.match_pc_start = match_pc_start self.match_pc_end = match_pc_end self.match_mem_start = match_mem_start self.match_mem_end = match_mem_end self.match_deref_start = match_deref_start self.match_deref_end = match_deref_end self.match_alloc_start = match_alloc_start self.match_alloc_end = match_alloc_end self.match_len_start = match_len_start self.match_len_end = match_len_end self.match_syscall = match_syscall self.match_perms = match_perms self.match_otype = match_otype self.match_any = match_any self.dump_predecessors = show_predecessors
def load_taxonomy_graph(source): g = gt.load_graph(source) g.vertex_name = g.vp['name'] g.vertex_taxid = g.vp['taxid'] g.edge_in_taxonomy = g.ep['istaxon'] g.vertex_in_taxonomy = g.vp['istaxon'] ## g.dubious = g.vp['dubious'] g.incertae_sedis = g.vp['incertae_sedis'] g.hindex = g.vp['hindex'] g.taxid_vertex = {} g.set_vertex_filter(g.vertex_in_taxonomy) for v in g.vertices(): tid = g.vertex_taxid[v] g.taxid_vertex[tid] = v g.set_vertex_filter(None) g.edge_strees = get_or_create_ep(g, 'stree', 'vector<int>') g.vertex_snode = get_or_create_vp(g, 'snode', 'int') g.vertex_strees = get_or_create_vp(g, 'stree', 'vector<int>') g.vertex_stem_cdef = get_or_create_vp(g, 'stem_cdef', 'vector<int>') g.stem_cdef_vertex = defaultdict(lambda: g.add_vertex()) g.set_vertex_filter(g.vertex_in_taxonomy, inverted=True) for v in g.vertices(): cdef = g.vertex_stem_cdef[v] if cdef: g.stem_cdef_vertex[tuple(cdef)] = v g.clear_filters() ## # collapsed nodes are now purged on graph creation ## if g.vp.get('collapsed'): ## g.collapsed = g.vp['collapsed'] ## g.set_vertex_filter(g.collapsed, inverted=True) _attach_funcs(g) g.root = g.vertex(0) ## r = [ x for x in g.vertices() if x.in_degree()==0 ] ## assert len(r)==1 ## g.root = r[0] ## p = g.vp.get('collapsed') ## if p and sum(p.a): ## g = graph_view(g, vfilt=p) return g
def create_graph(self, graph_file=None, save_file='csx_graph.gt'): self.vertex_idx_map = {} if graph_file: try: g = gt.load_graph(graph_file) print g.list_properties() if 'id' not in g.vp: g.vp['id'] = g.new_vp('int', vals=g.get_vertices()) for v in g.vertices(): self.vertex_idx_map[g.vp['id'][v]] = g.vertex_index[v] self.g = g print('Loading graph from file: Succeeded') return self.g except IOError as e: print('Loading graph from file: Failed - {}'.format(e)) g = gt.Graph(directed=False) # add vertices g.vp['id'] = g.new_vp("string") for idx, node in enumerate(self.nodes): v = g.add_vertex() g.vp['id'][v] = node self.vertex_idx_map[node] = v if idx % 100e3 == 0: print('Adding vertex {}'.format(idx)) print('Vertices added') for idx, edge in enumerate(self.edges): src, tar = edge e = g.add_edge(self.vertex_idx_map[src], self.vertex_idx_map[tar]) if idx % 1e6 == 0: print('Adding edge {}'.format(idx)) print('Edges added') self.g = g self.g.save(save_file) print('Graph created and saved') return self.g
def test_mospp_large(): G = load_graph("./tests/test_graphs/Trafalgar.gt") G.list_properties() vcolor = G.new_vertex_property("string") pos = G.new_vertex_property("vector<double>") float_length = G.new_edge_property("double") float_x = G.new_vertex_property("double") float_y = G.new_vertex_property("double") G.edge_properties["float_length"] = float_length pollution = G.new_edge_property("double") mean = G.edge_properties["NO2_mean"] length = G.edge_properties["length"] x = G.vertex_properties["x"] y = G.vertex_properties["y"] number = G.new_vertex_property("string") for v in G.get_vertices(): vcolor[v] = "red" pos[v] = [float(x[v]), float(y[v])] float_x[v] = float(x[v]) float_y[v] = float(y[v]) number[v] = str(G.vertex_index[v]) for e in G.edges(): float_length[e] = float(length[e]) pollution[e] = float(mean[e]) * float(length[e]) source = 253 target = 3043 vcolor[source] = "blue" vcolor[target] = "blue" solution = [] for route in data["solution"]: solution.append(list(reversed(route))) assert [[G.vertex_index[r] for r in route] for route in mospp(G.vertex(source), G.vertex(target), float_length, pollution)] == solution
def tree_and_cascade(): g = load_graph('data/balanced-tree/2-6/graph.gt') c, s, o = gen_nontrivial_cascade(g, 0.8, 0.5) return g, get_gvs(g, 0.5, 100), c, s, o
def load(graph_name): return gt.load_graph("{}.gt".format(graph_name))
followed_ids = [get_tw_index(ug, f) for f in followed] subgraph.add_edges_from([(u_id, f_id) for f_id in followed_ids]) new_unvisited.update(followed) visited.add(u) new_unvisited = new_unvisited - visited unvisited = new_unvisited n_nodes = subgraph.number_of_nodes() n_edges = subgraph.number_of_edges() print "%d nodes, %d edges" % (n_nodes, n_edges) return subgraph # TODO: restrict universe to layer0 + layer1 if __name__ == '__main__': print "Loading universe graph..." ug = gt.load_graph('huge_graph.gt') print "Loading seeds..." seeds = [v for v in ug.vertices() if v.out_degree() == 50] print "%d seeds loaded" % len(seeds) print "Computing subgraph..." subgraph = sample_subgraph(ug, seeds, K=50) # subgraph.save('subgraph.gt') # nx.write_gpickle(subgraph, 'subgraph.gpickle') nx.write_gpickle(subgraph, 'subgraph_gt_nx.gpickle')
# Convert an i, j square index to long form (see sketchlib for info) def square_to_condensed(i, j, n): assert (j > i) return n * i - ((i * (i + 1)) >> 1) + j - 1 - i # main code if __name__ == "__main__": # Get command line options args = get_options() # Load network import graph_tool.all as gt G = gt.load_graph(args.graph) if "weight" in G.edge_properties: quit_msg("Graph already contains weights") # Load dists with open(args.distances + ".pkl", 'rb') as pickle_file: rlist, qlist, self = pickle.load(pickle_file) if not self: quit_msg("Distances are from query mode") dist_mat = np.load(args.distances + ".npy") # Check network and dists are compatible network_labels = G.vertex_properties["id"] if set(network_labels) != set(rlist): quit_msg("Names in distances do not match those in graph") n = G.num_vertices()
import graph_tool.all as gt import itertools import config as conf dataset = conf.settings['dataset'] G = gt.load_graph("../datasets/" + dataset + "/data/" + dataset + ".graphml")) print "Loaded a graph with " + str(G.num_vertices()) + " vertices and " + str(G.num_edges()) + " edges." del G.properties[("v","date")] del G.properties[("v","label")] G2 = gt.GraphView(G, vfilt=lambda v: v.in_degree() > 0, efilt=lambda e: e.source().out_degree >= 2) CC = gt.Graph(G2) #gt.load_graph("tmp/co-citation-APS-10000.graphml") CC.purge_edges() CC.set_directed(False) # CC = gt.load_graph("tmp/co-citation-APS-80000.graphml") # load partly processed graph weight = CC.new_edge_property("int") CC.edge_properties["weight"] = weight def build_co_citation(CC): N = str(CC.num_vertices()) idx = -1 for n in G2.vertices(): idx += 1 if idx <= 80000: continue if idx%10000 == 0:
import graph_tool.all as gt import graph_tool import time import math import os start = time.time() filename = 'graph14' path = '/scratch/jmj418/Sloth' loadpath = os.path.join(path,filename+'.gt') savepath = os.path.join(path,filename+'_Upagerank.gt') g = gt.load_graph(loadpath) print(g.num_vertices(), g.num_edges()) g.set_directed(False) pagerank = graph_tool.centrality.pagerank(g) g.vertex_properties['pagerank'] = pagerank g.save(savepath) diff = time.time() - start nodes = g.num_vertices() edges = g.num_edges() filename = 'pagerank_%s_%dMN_%dME' % (filename,nodes/1000000,edges/1000000)
from haversine import haversine from cleanair.loggers import get_logger from routex import astar, mospp from urbanroute.geospatial import ( ellipse_bounding_box, coord_match, remove_leaves, remove_paths, ) APP = FastAPI() logger = get_logger("Shortest path entrypoint") logger.setLevel(logging.DEBUG) logger.info("Loading graph of London...") start = time.time() G = load_graph("../graphs/Trafalgar.gt") logger.info("Graph loaded in %s seconds.", time.time() - start) logger.info("%s nodes and %s edges in the graph.", G.num_vertices, G.num_edges) # add position property, and add float versions of string edge attributes pos = G.new_vertex_property("vector<double>") float_length = G.new_edge_property("double") float_x = G.new_vertex_property("double") float_y = G.new_vertex_property("double") G.edge_properties["float_length"] = float_length pollution = G.new_edge_property("double") mean = G.edge_properties["NO2_mean"] length = G.edge_properties["length"] # used in linear scalarisation scalarisation = G.new_edge_property("double") x = G.vertex_properties["x"]
def useGraphTool(pd, space): # Extract the graphml representation of the planner data graphml = pd.printGraphML() f = open("graph.xml", 'w') f.write(graphml) f.close() # Load the graphml data using graph-tool graph = gt.load_graph("graph.xml") edgeweights = graph.edge_properties["weight"] # Write some interesting statistics avgdeg, stddevdeg = gt.vertex_average(graph, "total") avgwt, stddevwt = gt.edge_average(graph, edgeweights) print "---- PLANNER DATA STATISTICS ----" print str(graph.num_vertices()) + " vertices and " + str(graph.num_edges()) + " edges" print "Average vertex degree (in+out) = " + str(avgdeg) + " St. Dev = " + str(stddevdeg) print "Average edge weight = " + str(avgwt) + " St. Dev = " + str(stddevwt) comps, hist = gt.label_components(graph) print "Strongly connected components: " + str(len(hist)) graph.set_directed(False) # Make the graph undirected (for weak components, and a simpler drawing) comps, hist = gt.label_components(graph) print "Weakly connected components: " + str(len(hist)) # Plotting the graph gt.remove_parallel_edges(graph) # Removing any superfluous edges edgeweights = graph.edge_properties["weight"] colorprops = graph.new_vertex_property("string") vertexsize = graph.new_vertex_property("double") start = -1 goal = -1 for v in range(graph.num_vertices()): # Color and size vertices by type: start, goal, other if (pd.isStartVertex(v)): start = v colorprops[graph.vertex(v)] = "cyan" vertexsize[graph.vertex(v)] = 10 elif (pd.isGoalVertex(v)): goal = v colorprops[graph.vertex(v)] = "green" vertexsize[graph.vertex(v)] = 10 else: colorprops[graph.vertex(v)] = "yellow" vertexsize[graph.vertex(v)] = 5 # default edge color is black with size 0.5: edgecolor = graph.new_edge_property("string") edgesize = graph.new_edge_property("double") for e in graph.edges(): edgecolor[e] = "black" edgesize[e] = 0.5 # using A* to find shortest path in planner data if start != -1 and goal != -1: dist, pred = gt.astar_search(graph, graph.vertex(start), edgeweights) # Color edges along shortest path red with size 3.0 v = graph.vertex(goal) while v != graph.vertex(start): p = graph.vertex(pred[v]) for e in p.out_edges(): if e.target() == v: edgecolor[e] = "red" edgesize[e] = 2.0 v = p # Writing graph to file: # pos indicates the desired vertex positions, and pin=True says that we # really REALLY want the vertices at those positions gt.graph_draw (graph, vertex_size=vertexsize, vertex_fill_color=colorprops, edge_pen_width=edgesize, edge_color=edgecolor, output="graph.png") print print 'Graph written to graph.png'
def read_graphml(self,collab_file,citation_file,mult_file): '''Read multiplex from files specifying the collaboration network, the citation network and multiplex meta data''' ################################## #determine csv delimiter f=open(mult_file,'r') dialect=csv.Sniffer().sniff(f.readline()) csv_delimiter=dialect.delimiter f.close() #read data self.collab = gt.load_graph(collab_file) self.citation = gt.load_graph(citation_file) self.citation.vertex_properties['year']=self.citation.new_vertex_property('int') #create the multiplex structure, implemented with property maps self._multiplex_collab = self.collab.new_vertex_property('object') self._multiplex_citation = self.citation.new_vertex_property('object') for v in self.collab.vertices(): self._multiplex_collab[v]={} for v in self.citation.vertices(): self._multiplex_citation[v]={} #since I do not know how to address a node in graph_tool using his properties, create a dictionary to have this info: self._collab_graphml_vertex_id_to_gt_id = {} self._citation_graphml_vertex_id_to_gt_id = {} for v in self.collab.vertices(): self._collab_graphml_vertex_id_to_gt_id[self.collab.vertex_properties['_graphml_vertex_id'][v]]=int(self.collab.vertex_index[v]) for v in self.citation.vertices(): self._citation_graphml_vertex_id_to_gt_id[self.citation.vertex_properties['_graphml_vertex_id'][v]]=int(self.citation.vertex_index[v]) #fill the multiplex with open(mult_file,'r') as f: #read header to determine property name header = f.readline() header = header.split(csv_delimiter) multiplex_edge_property_name = header[2].rstrip() #write multiplex edges with multiplex edge property (year) for line in f: tmp = line.split(csv_delimiter) paper_tmp = tmp[0] author_tmp = tmp[1] year = int(tmp[2].rstrip()) try: paper_obj = self.citation.vertex(self._citation_graphml_vertex_id_to_gt_id[paper_tmp]) except KeyError: v=self.citation.add_vertex() self.citation.vertex_properties['_graphml_vertex_id'][v]=paper_tmp self._multiplex_citation[v]={} paper_obj = self.add_paper(paper_tmp,year,author_tmp,update_collaborations=False) try: author_obj = self.collab.vertex(self._collab_graphml_vertex_id_to_gt_id[author_tmp]) except KeyError: v=self.collab.add_vertex() self.collab.vertex_properties['_graphml_vertex_id'][v]=author_tmp self._multiplex_collab[v]={} author_obj = v self.citation.vertex_properties['year'][paper_obj]=year self._multiplex_collab[author_obj][paper_obj] = True self._multiplex_citation[paper_obj][author_obj] = True
#! /usr/bin/env python # -*- coding: utf-8 -*- """ @author: lockheed Information and Electronics Engineering Huazhong University of science and technology E-mail:[email protected] Created on: 4/30/14 9:58 AM Copyright (C) lockheedphoenix """ import graph_tool.all as gt g = gt.load_graph('RG.xml.gz') pos = g.vertex_properties['pos'] gt.graph_draw(g, pos=pos,output_size=[1024,800],output='RG.png') gt.random_rewire(g,model='erdos') gt.graph_draw(g, pos=pos,output_size=[1024,800],output='ER.png')
#!/usr/bin/python3 import os import statistics as stats import graph_tool.all as gt os.chdir("/home/jen/Documents/School/GradSchool/Thesis/Images/") g_link = gt.load_graph("Examples/ToyLinked.xml.gz") g_bran = gt.load_graph("Examples/ToyBranching.xml.gz") #Misc Stats link_deg_avg, link_deg_std = gt.vertex_average(g_link, deg="total") bran_deg_avg, bran_deg_std = gt.vertex_average(g_bran, deg="total") #Centrality vp_btwn_link, ep_btwn_link = gt.betweenness(g_link) link_btwn = [vp_btwn_link[v] for v in g_link.vertices()] vp_btwn_bran, ep_btwn_bran = gt.betweenness(g_bran) bran_btwn = [vp_btwn_bran[v] for v in g_bran.vertices()] link_btwn_avg = stats.mean(link_btwn) link_btwn_std = stats.stdev(link_btwn) bran_btwn_avg = stats.mean(bran_btwn) bran_btwn_std = stats.stdev(bran_btwn) #Cost and efficiency link_mst = gt.min_spanning_tree(g_link) bran_mst = gt.min_spanning_tree(g_bran) link_shortest = [x for vector in gt.shortest_distance(g_link) for x in vector]
## Separate the personally identifiable information ## Rotated SID allows us to reconnect PII to publicizable data later pii_data = data[['given', 'surname', 'sid', 'sidr']] pii_data.to_csv(pii_outfile) #print(pii_data) ## Remove the PII from the publicizable data del data['given'] del data['surname'] del data['sid'] data.to_csv(metadata_file) ## ---------- ## Sanitize the graph files gt_net = gt.load_graph(net_gt_file) #print(gt_net.vertex_properties.keys()) #print([gt_net.vp['sid'][v] for v in gt_net.vertices()[1:10]]) gt_net.vp['sidr'] = gt_net.new_vp('string', vals = data['sidr']) del gt_net.vp['surname'] del gt_net.vp['given'] del gt_net.vp['sid'] #print(gt_net.vertex_properties.keys()) gt_net.save(net_gt_file) gml_net = gt.load_graph(net_graphml_file) #print(gml_net.vertex_properties.keys()) #print([gml_net.vp['sid'][v] for v in gml_net.vertices()[1:10]]) gml_net.vp['sidr'] = gml_net.new_vp('string', vals = data['sidr']) del gml_net.vp['surname'] del gml_net.vp['given']
def load_graph(path: Union[pathlib.Path, str]) -> GRAPH_T: return gtall.load_graph(path)
def grid_and_cascade(): g = load_graph('data/grid/2-6/graph.gt') c, s, o = gen_nontrivial_cascade(g, 0.8, 0.5) return g, get_gvs(g, 0.5, 100), c, s, o
print("\tTotal: ", assortativity(g, "total")[0]) print("\tIncidência: ", assortativity(g, "in")[0]) print("\tSaída: ", assortativity(g, "out")[0]) def centralidadeStats(g, callback, title): pr = None if callback == betweenness: pr = callback(g)[0].a else: if callback == closeness: pr = callback(g, harmonic=True).a else: pr = callback(g).a print(title) stats(pr) histogram(np.histogram(pr), title, "Frequência", "Quantidade", sys.argv[1][:-8] + ".{}".format(title.lower())) g = load_graph(sys.argv[1]) #g = collection.data["cond-mat-2003"] degreeStats(g) distanceStats(g) clusteringStats(g) assort(g) centralidadeStats(g, pagerank, "PageRank") centralidadeStats(g, betweenness, "Betweenness") centralidadeStats(g, closeness, "Closeness") centralidadeStats(g, katz, "Katz")
def load_net(infile, core = False, filter = False): ''' Load a `graphml` file. :param infile: The `graphml` file to load. :param core: Does the net contain a core vertex property map? :filter: Apply a filter? :return: the graph_tool `Graph`, a prefix for output files, and (if core is True) the property map for core vertices ''' # Output filename # Prefix only, not extension: # `split('.')` splits `infile` at the periods and returns a list # `[:-1]` grabs everything except the extension # `'.'.join` recombines everything with periods outfile_pre = '.'.join(infile.split('.')[:-1]) if path.exists('output/' + outfile_pre + '.out.gt'): print('Found pre-procesed graph') infile = 'output/' + outfile_pre + '.out.gt' print('Loading ' + infile) net = gt.load_graph(infile) # If `core` is true, extract the core set if core: core_pmap = net.vertex_properties['core'] core_vertices = [vertex for vertex in net.vertices() if core_pmap[vertex]] # Print basic network statistics print('Loaded ' + infile) print('Vertices: ' + str(net.num_vertices())) print('Edges: ' + str(net.num_edges())) if core: print('Core vertices: ' + str(len(core_vertices))) if core and filter: # Add a filter print('Adding filter') # Recent papers filter for the citation net if 'citenet0' in infile: year = net.vp['year'] recent_list = [year[vertex] > 2005 for vertex in net.vertices()] recent_pmap = net.new_vertex_property('boolean') recent_pmap.a = np.array(recent_list) net.set_vertex_filter(recent_pmap) # Distance from core set for the author nets else: net.set_directed(False) extended_set_pmap = core_pmap.copy() gt.infect_vertex_property(net, extended_set_pmap, vals=[True]) gt.infect_vertex_property(net, extended_set_pmap, vals=[True]) net.set_vertex_filter(extended_set_pmap) # Remove everything caught in the filter net.purge_vertices() # Extract the largest component net.set_vertex_filter(gt.label_largest_component(net, directed=False)) net.purge_vertices() # Rebuild core core_pmap = net.vertex_properties['core'] core_vertices = [vertex for vertex in net.vertices() if core_pmap[vertex]] print('Filtered vertices: ' + str(net.num_vertices())) print('Filtered edges: ' + str(net.num_edges())) print('Filtered core: ' + str(len(core_vertices))) elif filter and not core: print('Filter = true with core = false') if core: return net, outfile_pre, core_pmap, core_vertices else: return net, outfile_pre
rootdir = Path('../25%Data') # Return a list of regular files only, not directories file_list = [str(f) for f in rootdir.glob('*') if f.is_file()] # For absolute paths instead of relative the current dir file_list_abs = [f for f in rootdir.resolve().glob('*') if f.is_file()] accum = 0 hundreds = 0 thousands = 0 tenthousands = 0 hundthousands = 0 for f in file_list: g2 = gt.load_graph(f) #print((g2.graph_properties["position_list"].getSteps())) length = len(g2.graph_properties["position_list"].getList()) accum += length if length < 100: hundreds+=1 elif length < 1000: thousands+=1 elif length < 10000: tenthousands+=1 else: hundthousands+=1
save_name = ('_').join(posplitowane[:-1]) if posplitowane[-1] == 'dot': save_name = save_name + '.dot' else: save_name = save_name + '.xml' g.save(save_name) if zparsowane.PNG: rysuj_graf_wejsciowy(g, zparsowane.Output_file + '.png', size=(int(zparsowane.PNG[0]), int(zparsowane.PNG[1]))) if zparsowane.interactive: rysuj_graf_wejsciowy(g) elif 'rysuj_we' in argumenty: g = load_graph(zparsowane.Input) file_name = zparsowane.Input.split('.')[0] if zparsowane.PNG: rysuj_graf_wejsciowy(g, file_name + '.png' , size=(int(zparsowane.PNG[0]), int(zparsowane.PNG[1])) , bez_napisow=zparsowane.bez_napisow) if zparsowane.interactive: rysuj_graf_wejsciowy(g, bez_napisow=zparsowane.bez_napisow) elif 'rysuj_wy' in argumenty: file_name = zparsowane.Input.split('.')[0] if zparsowane.PNG: rysuj_wynik(zparsowane.Input , file_name + '.png'
def deserialize_graph(bytestring): buf = BytesIO() buf.write(bytestring) buf.seek(0) return gt.load_graph(buf)
import graph_tool.all as gt from sys import argv from re import findall if __name__ == '__main__': for f in argv[1:]: g = gt.GraphView(gt.load_graph(f), directed=False, skip_properties=True) gt.remove_parallel_edges(g) gt.remove_self_loops(g) name = findall('[^/.]+', f)[-2].split('--')[0] g.save('output/{}.xml'.format(name)) gt.graph_draw(g, output='output/{}.png'.format(name))
def useGraphTool(pd): # Extract the graphml representation of the planner data graphml = pd.printGraphML() f = open("graph.graphml", 'w') f.write(graphml) f.close() # Load the graphml data using graph-tool graph = gt.load_graph("graph.graphml", fmt="xml") edgeweights = graph.edge_properties["weight"] # Write some interesting statistics avgdeg, stddevdeg = gt.vertex_average(graph, "total") avgwt, stddevwt = gt.edge_average(graph, edgeweights) print("---- PLANNER DATA STATISTICS ----") print( str(graph.num_vertices()) + " vertices and " + str(graph.num_edges()) + " edges") print("Average vertex degree (in+out) = " + str(avgdeg) + " St. Dev = " + str(stddevdeg)) print("Average edge weight = " + str(avgwt) + " St. Dev = " + str(stddevwt)) _, hist = gt.label_components(graph) print("Strongly connected components: " + str(len(hist))) # Make the graph undirected (for weak components, and a simpler drawing) graph.set_directed(False) _, hist = gt.label_components(graph) print("Weakly connected components: " + str(len(hist))) # Plotting the graph gt.remove_parallel_edges(graph) # Removing any superfluous edges edgeweights = graph.edge_properties["weight"] colorprops = graph.new_vertex_property("string") vertexsize = graph.new_vertex_property("double") start = -1 goal = -1 for v in range(graph.num_vertices()): # Color and size vertices by type: start, goal, other if pd.isStartVertex(v): start = v colorprops[graph.vertex(v)] = "cyan" vertexsize[graph.vertex(v)] = 10 elif pd.isGoalVertex(v): goal = v colorprops[graph.vertex(v)] = "green" vertexsize[graph.vertex(v)] = 10 else: colorprops[graph.vertex(v)] = "yellow" vertexsize[graph.vertex(v)] = 5 # default edge color is black with size 0.5: edgecolor = graph.new_edge_property("string") edgesize = graph.new_edge_property("double") for e in graph.edges(): edgecolor[e] = "black" edgesize[e] = 0.5 # using A* to find shortest path in planner data if start != -1 and goal != -1: _, pred = gt.astar_search(graph, graph.vertex(start), edgeweights) # Color edges along shortest path red with size 3.0 v = graph.vertex(goal) while v != graph.vertex(start): p = graph.vertex(pred[v]) for e in p.out_edges(): if e.target() == v: edgecolor[e] = "red" edgesize[e] = 2.0 v = p pos = graph.new_vertex_property("vector<double>") for v in range(graph.num_vertices()): vtx = pd.getVertex(v) st = vtx.getState() pos[graph.vertex(v)] = [st[0], st[1]] # Writing graph to file: # pos indicates the desired vertex positions, and pin=True says that we # really REALLY want the vertices at those positions # gt.graph_draw(graph, pos=pos, vertex_size=vertexsize, vertex_fill_color=colorprops, # edge_pen_width=edgesize, edge_color=edgecolor, # output="graph.pdf") gt.graph_draw(graph, pos=pos, output="graph.pdf") print('\nGraph written to graph.pdf') graph.vertex_properties["pos"] = pos graph.vertex_properties["vsize"] = vertexsize graph.vertex_properties["vcolor"] = colorprops graph.edge_properties["esize"] = edgesize graph.edge_properties["ecolor"] = edgecolor graph.save("mgraph.graphml") print('\nGraph saved to mgraph.graphml')
import sys from graph_tool.all import load_graph path = sys.argv[1] g = load_graph('{}/graph.graphml'.format(path)) g.save('{}/graph.gt'.format(path))
__author__ = 'coxious' from config import * import graph_tool.all as gt import random import threading import pandas as pd import entities import multiprocessing G = gt.load_graph(base_path + graph_tool_file) taxi_data = pd.DataFrame() G_no_moving = G inactive_vertex = [] count = 0 core_lock = threading.Lock() #for road in G.edges(): # print G.edge_properties['distance'][road] def core_atomic_routine(f): def func(*args, **kwargs): core_lock.acquire() val = f(*args, **kwargs) core_lock.release() return val return func
def __init__(self, file_input): self.graph = load_graph(file_input) self.spr = Sprawdzenie(self.graph) self.stat = StatInfo(self.graph)