def session_draw_bis_melty(sessions_id, weblog, weblog_columns_dict): """ Draw the graph of sessions with sessions_id given in entry """ from graph_tool.all import Graph from graph_tool.all import graph_draw session = weblog[weblog.session_id == sessions_id] session = session.rename(index=str,columns = {weblog_columns_dict['requested_page_column']:'requested_page',\ weblog_columns_dict['referrer_page_column']:'referrer_page'}) s_pages = session[['requested_page', 'requested_external']] s_pages_ref = session[['referrer_page', 'referrer_external']] s_pages_ref = s_pages_ref.rename(index=str, columns={ 'referrer_page': 'requested_page', 'referrer_external': 'requested_external' }) s_pages = s_pages.append(s_pages_ref) s_pages.drop_duplicates(subset='requested_page', inplace=True) g = Graph() v = {} halo = g.new_vertex_property("bool") for row in s_pages.itertuples(): v[row.requested_page] = g.add_vertex() if row.requested_external: halo[v[row.requested_page]] = True else: halo[v[row.requested_page]] = False session.apply( lambda x: g.add_edge(v[x.referrer_page], v[x.requested_page]), axis=1) graph_draw(g, vertex_halo=halo, output="./_session" + str(sessions_id) + ".png") return
def gen_graph_from_mongo(self): ''' load graph structure from storage. note that add_edge_list will not match vertex ids (str ids) in subsequent calls of the function ''' self.from_storage(find={'projection': {'material_id': 1, 'edges': 1}}) sources = self.memory['material_id'] destinations = self.memory['edges'] self.memory = None # cleanup memory attribute print('loaded data structures') edge_list = [(sources[i], destinations[i][j]) for i in range(len(sources)) for j in range(len(destinations[i]))] print('generated edge list') sources = None # cleanup temporary data variables destinations = None graph = Graph(directed=False) graph.add_edge_list(edge_list, hashed=True, string_vals=True) return graph
def make_article_graph(self, layout="arf"): """Make an article graph""" self.graph = Graph(directed=False) # add vertex self.graph.add_vertex(len(self.db)) # add properties cb = self.graph.new_vertex_property("int", self.db['Cited by'].values) self.graph.vertex_properties['nmb_citation'] = cb # Add links auths = list(self.author_betweeness.keys()) auth2ind = {auths[i]: i for i in range(len(auths))} auth2pub = self._get_author_publication() for _, pubs in auth2pub.items(): if len(pubs) < 2: continue combis = itertools.combinations(pubs, 2) self.graph.add_edge_list(list(combis)) # layout if layout == "arf": self.layout_pos = arf_layout(self.graph) elif layout == "sfpd": self.layout_pos = sfdp_layout(self.graph) elif layout == "fr": self.layout_pos = fruchterman_reingold_layout(self.graph) elif layout == "radial": self.layout_pos = radial_tree_layout(self.graph, auth2ind['Logan, B.E.']) else: raise ValueError()
def simulate_cascade(g, p, source=None, return_tree=False): """ graph_tool version of simulating cascade return np.ndarray on vertices as the infection time in cascade uninfected node has dist -1 """ if source is None: source = random.choice(np.arange(g.num_vertices(), dtype=int)) gv = sample_graph_by_p(g, p) times = get_infection_time(gv, source) if return_tree: all_edges = set() for target in np.nonzero(times != -1)[0]: path = shortest_path(gv, source=source, target=gv.vertex(target))[0] edges = set(zip(path[:-1], path[1:])) all_edges |= edges tree = Graph(directed=True) for _ in range(g.num_vertices()): tree.add_vertex() for u, v in all_edges: tree.add_edge(int(u), int(v)) return source, times, tree else: return source, times
def test_feasibility(g, weights): internal_g = from_gt(g, weights) edges = minimum_branching(internal_g, [0]) tree = Graph(directed=True) tree.add_edge_list(edges) assert is_arborescence(tree)
def construct_motif_graph(graph_container, motif, vertex_maps=None): """Construct and return a undirected gt graph containing motif relationship. Note that graph_tool generates empty nodes to fill in the missing indices. For example, if we add edge (1,2) to an empty graph, the graph will have 3 nodes: 0, 1, 2 and 1 edge (1,2). For this reason, the returned `m_graph` usually has a large number of disconnected nodes. Parameters: graph_container - GraphContainer - Store the original network motif - Motif - Motif in study Returns: m_graph - gt.Graph - Undirected graph for motif cooccurence """ if motif.anchors is None: print("Warning: Turning motif groups into cliques.") graph = graph_container.get_gt_graph() graph.set_directed(motif.gt_motif.is_directed()) # graph_tool.Graph m_graph = Graph(directed=False) if vertex_maps is None: m, c, vertex_maps = count_motif(graph, motif) for prop_list in vertex_maps: for prop in prop_list: edges = [i for i in motif.anchored_edges(graph, prop.get_array())] m_graph.add_edge_list(edges) return m_graph
def alignment_graph(lengths=[], pairings=[], alignments=[]): #print('making graph') g = Graph(directed=False) seq_index = g.new_vertex_property("int") time = g.new_vertex_property("int") #add vertices g.add_vertex(sum(lengths)) seq_index.a = np.concatenate([np.repeat(i,l) for i,l in enumerate(lengths)]) time.a = np.concatenate([np.arange(l) for l in lengths]) #add edges (alignments) alignment_index = g.new_edge_property("int") segment_index = g.new_edge_property("int") for i,a in enumerate(alignments): if len(a) > 0: j, k = pairings[i] pairs = np.concatenate(a, axis=0) indicesJ = (np.arange(lengths[j]) + sum(lengths[:j]))[pairs.T[0]] indicesK = (np.arange(lengths[k]) + sum(lengths[:k]))[pairs.T[1]] seg_indices = np.concatenate([np.repeat(i, len(a)) for i,a in enumerate(a)]) g.add_edge_list(np.vstack([indicesJ, indicesK, np.repeat(i, len(pairs)), seg_indices]).T, eprops=[alignment_index, segment_index]) #g.add_edge_list([(b, a) for (a, b) in g.edges()]) #print('created alignment graph', g) #g = prune_isolated_vertices(g) #print('pruned alignment graph', g) #g = transitive_closure(g) #graph_draw(g, output_size=(1000, 1000), output="results/casey_jones_bars.pdf") return g, seq_index, time, alignment_index, segment_index
def graph_measures(graph: gt.Graph) -> pd.DataFrame: def get_attrs(attrs): return (attrs[1][0], attrs[1][1][1], attrs[0]) def append_val(key, prop, v): measures[key][0].append(prop[v]) _, vp_authority, vp_hub = gt.hits(graph) measures = { key: ([], prop) for key, prop in { 'tp_group': graph.vp.group_name, 'tp_author': graph.vp.username, 'tn_degree_in': graph.degree_property_map('in'), 'tn_degree_out': graph.degree_property_map('out'), 'tn_degree_total': graph.degree_property_map('total'), 'tn_pagerank': gt.pagerank(graph), 'tn_betweenness': gt.betweenness(graph)[0], 'tn_closeness': gt.closeness(graph), 'tn_eigenvector': gt.eigenvector(graph)[1], 'tn_authority': vp_authority, 'tn_hub': vp_hub, 'tn_lcc': gt.local_clustering(graph) }.items() } for attrs in product(graph.vertices(), measures.items()): append_val(*get_attrs(attrs)) return pd.DataFrame( dict(map(lambda item: (item[0], item[1][0]), measures.items()))).fillna(0)
def __init__(self, edges, weights): self.edges = edges self.graph = Graph() self.size = len(edges['target']) self.graph.add_vertex(self.size) self.weights = weights # init weights part self.graph.vp.weights = self.graph.new_vertex_property('int16_t') for index in range(0, self.size): self.graph.vp.weights[index] = weights[index] for source in self.edges['source'].keys(): for target in self.edges['source'][source]: self._add_edge(source, target) self.depth_per_node = {x: 0 for x in range(0, self.size)} self.accum_weights = {x: 0 for x in range(0, self.size)} self.find_depth() self.find_accum_weights(self.size - 1) self.depth = {x: [] for x in set(self.depth_per_node.values())} for node, depth in self.depth_per_node.items(): self.depth[depth].append(node) self.routes_t = {} self.find_routes(self.size - 1, 0, self.routes_t) self.routes = [] self.transpose_routes(self.size - 1, self.routes_t[self.size - 1])
def gen_sub_graph_from_mongo(self, center, snn=1): ''' load graph structure from storage. note that add_edge_list will not match vertex ids (str ids) in subsequent calls of the function Args: center (str) mp-id of the center of the graph snn (int) the number of second nearest neighbors to expand to ''' edge_list = [] self.from_storage( find={ 'filter': { 'material_id': center }, 'projection': { 'material_id': 1, 'edges': 1 } }) sources = self.memory['material_id'][0] destinations = self.memory['edges'][0] edge_list.extend([(sources, destinations[j]) for j in range(len(destinations))]) for i in range(snn): self.from_storage( find={ 'filter': { 'material_id': { '$in': destinations } }, 'projection': { 'material_id': 1, 'edges': 1 } }) sources = self.memory['material_id'] destinations = self.memory['edges'] edge_list.extend([(sources[i], destinations[i][j]) for i in range(len(sources)) for j in range(len(destinations[i]))]) destinations = [ destinations[i][j] for i in range(len(sources)) for j in range(len(destinations[i])) ] print('generated edge list') graph = Graph(directed=False) graph.add_edge_list(edge_list, hashed=True, string_vals=True) return graph
def test_feasibility(g, weights): edges = [(e[0], e[1], w) for e, w in zip(g.get_edges(), weights)] min_edges = find_minimum_branching(g.num_vertices(), edges, roots=[0]) tree = Graph(directed=True) tree.add_edge_list(min_edges) assert is_arborescence(tree)
def init_graph(graphml_path): global g g = Graph(directed=True) t0 = time() g.load(graphml_path) t1 = time() print "Loaded from GraphML in", t1-t0 print "Loaded", g.num_vertices(), "nodes" print "Loaded", g.num_edges(), "edges"
def __init__(self): self.graph = Graph() self.v_names = self.graph.new_vertex_property("string") self.v_colors = self.graph.new_vertex_property("string") self.e_names = self.graph.new_edge_property("string") self.vertices = {} self.edges = {}
def get_graph(fname: str) -> Graph: fdir = os.path.join(f'..', f'data') fpath = os.path.join(fdir, name) if os.path.exists(fpath): g = Graph() g.load(file_name=f'../data/{name}', fmt='gt') return g else: raise FileNotFoundError('Invalid Graph, options are:\n' + '\n'.join(os.listdir(fdir)))
def build_word_graph(model_fname, limiar=0.2): """ Constroi um grafo de walavras ponderado pela similaridade entre elas de acordo com o modelo. :param model_fname: Nome do arquivo com o modelo word2vec como foi salvo :return: objeto grafo """ m = Word2Vec.load(model_fname) g = Graph() freq = g.new_vertex_property("int") weight = g.new_edge_property("float") i = 0 vdict = {} for w1, w2 in combinations(m.vocab.keys(), 2): if w1 == '' or w2 == '': continue # print(w1,w2) v1 = g.add_vertex() if w1 not in vdict else vdict[w1] vdict[w1] = v1 freq[v1] = m.vocab[w1].count v2 = g.add_vertex() if w2 not in vdict else vdict[w2] vdict[w2] = v2 freq[v2] = m.vocab[w2].count sim = m.similarity(w1, w2) if sim > 0.1: e = g.add_edge(v1, v2) weight[e] = sim if i > 10000: break i += 1 g.vertex_properties['freq'] = freq g.edge_properties['sim'] = weight return g
def simulate_cascade(g, p, source=None, return_tree=False): """ graph_tool version of simulating cascade return np.ndarray on vertices as the infection time in cascade uninfected node has dist -1 """ gv = sample_graph_by_p(g, p) if source is None: # consider the largest cc infected_nodes = np.nonzero(label_largest_component(gv).a)[0] source = np.random.choice(infected_nodes) times = get_infection_time(gv, source) if return_tree: # get the tree edges _, pred_map = shortest_distance(gv, source=source, pred_map=True) edges = [(pred_map[i], i) for i in infected_nodes if i != source] # create tree tree = Graph(directed=True) tree.add_vertex(g.num_vertices()) for u, v in edges: tree.add_edge(int(u), int(v)) vfilt = tree.new_vertex_property('bool') vfilt.a = False for v in set(itertools.chain(*edges)): vfilt[v] = True tree.set_vertex_filter(vfilt) if return_tree: return source, times, tree else: return source, times
def get_incompatible_segments(g, seg_index, out_edges): incomp_graph = Graph(directed=False) num_segs = np.max(seg_index.a)+1 incomp_graph.add_vertex(num_segs) for v in g.get_vertices(): for vs in group_adjacent(sorted(g.get_out_neighbors(v))): edges = out_edges[v][np.where(np.isin(out_edges[v][:,1], vs))][:,2] segments = list(np.unique(seg_index.a[edges])) [incomp_graph.add_edge(s,t) for i,s in enumerate(segments) for t in segments[i+1:]] return label_components(incomp_graph)[0].a
def __init__(self, size: Tuple[int] = (10, 10), field_size: Tuple[int] = (100, 100)): self.g = Graph(directed=True) self.n_zones = size[0] * size[1] self.fwidth = field_size[0] self.fheight = field_size[1] self.n_rows = size[0] self.n_cols = size[1] self.row_size: float = self.fheight / self.n_rows self.col_size: float = self.fwidth / self.n_cols self.g.add_vertex(self.n_zones)
def reset_variables(self): """Resets all variables.""" self.__graph = Graph() self.__vertices_by_school_id = {} self.__vertices_by_student_id = {} self.__students_by_id = {} self.__schools_by_id = {} self.__entity_id = self.__graph.new_vertex_property("int") self.__graph.vertex_properties["entity_id"] = self.__entity_id self.__entity_type = self.__graph.new_vertex_property("string") self.__graph.vertex_properties["entity_type"] = self.__entity_type
def __init__(self): self.g = Graph(directed=True) self.player_id_to_vertex = {} self.pairs = {} # player pair: edge # property maps for additional information self.g.vertex_properties['player_id'] = self.g.new_vertex_property( "string") self.g.vertex_properties['player_coords'] = self.g.new_vertex_property( "vector<float>") self.g.vertex_properties[ 'average_player_coords'] = self.g.new_vertex_property( "vector<float>") self.g.vertex_properties[ 'player_n_coords'] = self.g.new_vertex_property("int") self.g.edge_properties['weight'] = self.g.new_edge_property("float")
def getDegreeValuesOf(g: gt.Graph): # max deg (clique) min_deg = len(list(g.vertices())) - 1 max_deg = 0 avg_deg = 0 for v in g.vertices(): v: gt.Vertex = v # in_degree is 0 for undirected graphs if v.out_degree() + v.in_degree() > max_deg: max_deg = v.out_degree() + v.in_degree() if v.out_degree() + v.in_degree() < min_deg: min_deg = v.out_degree() + v.in_degree() avg_deg += v.out_degree() + v.in_degree() avg_deg = avg_deg / len(list(g.vertices())) return {"min_deg": min_deg, "avg_deg": avg_deg, "max_deg": max_deg}
def __init__(self, directed=True, verbose=1): self.graphtool = GRAPH_TOOL # Initialize graph if self.graphtool: self.graph = Graph(directed=directed) self.weight = self.graph.new_edge_property("float") else: if directed: print("directed graph") self.graph = nx.DiGraph() else: self.graph = nx.Graph() # set metaparameter self.time_logs = {} self.verbose = verbose
def __init__(self): logger.info("starting UNIS Network Runtime Environment...") fconf = get_file_config(nre_settings.CONFIGFILE) self.conf = deepcopy(nre_settings.STANDALONE_DEFAULTS) merge_dicts(self.conf, fconf) self.unis_url = str(self.conf['properties']['configurations']['unis_url']) self.ms_url = str(self.conf['properties']['configurations']['ms_url']) self._unis = unis_client.UNISInstance(self.conf) self.time_origin = int(time()) self._schemas = SchemaCache() self._resources = self.conf['resources'] self._subunisclient = {} for resource in self._resources: setattr(self, resource, {'new': {}, 'existing': {}}) # construct the hierarchical representation of the network for resource in self._resources: # only pullRuntime once at the beginning, as pubsub will only update # them later when resources are modified on the server self.pullRuntime(self, self._unis, self._unis.get(resource), resource, False) # construct the graph representation of the network, of which this NRE is in charge self.g = Graph() self.nodebook = {} for key in self.nodes['existing'].keys(): self.nodebook[key] = self.g.add_vertex() for key, link in self.links['existing'].iteritems(): if hasattr(link, 'src') and hasattr(link, 'dst'): self.g.add_edge(self.nodebook[link.src.node.selfRef],\ self.nodebook[link.dst.node.selfRef], add_missing=False)
def make_author_graph(self, layout="arf"): """Make an author graph""" self.graph = Graph(directed=False) # add vertex auths = self.author_list self.graph.add_vertex(len(auths)) # add links auth2ind = {auths[i]: i for i in range(len(auths))} abet = [] authbet = copy.deepcopy(self.author_betweeness) for auth in auths: for col, weight in authbet[auth].items(): if col == auth: continue self.graph.add_edge(auth2ind[auth], auth2ind[col]) del authbet[col][auth] # ensure that edges are not doubled abet.append(weight) # add properties cb = self.graph.new_edge_property("int", abet) self.graph.edge_properties['weight'] = cb # layout if layout == "arf": self.layout_pos = arf_layout(self.graph, weight=self.graph.ep.weight, pos=self.layout_pos, max_iter=10000) elif layout == "sfpd": self.layout_pos = sfdp_layout(self.graph, eweight=self.graph.ep.weight, pos=self.layout_pos) elif layout == "fr": self.layout_pos = fruchterman_reingold_layout(self.graph, weight=self.graph.ep.weight, circular=True, pos=self.layout_pos) elif layout == "radial": nc = self.get_total_citation() main_auth_ind = np.argmax(list(nc.values())) main_auth = list(nc.keys())[main_auth_ind] self.layout_pos = radial_tree_layout(self.graph, auth2ind[main_auth]) elif layout == "planar": self.layout_pos = planar_layout(self.graph) else: raise ValueError()
def test_graphtool(): g = Graph(directed=True) g.add_vertex(4) g.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 0)]) weight = g.new_edge_property('float') weight[g.edge(0, 1)] = 1 weight[g.edge(1, 2)] = 2 weight[g.edge(2, 3)] = 3 weight[g.edge(3, 0)] = 4 assert set(gt2edges_and_weights(g, weight)) == { (0, 1, 1), (1, 2, 2), (2, 3, 3), (3, 0, 4) }
class StackGraph(object): def __init__(self): self.g = None def load(self, filename): # Initialize the graph self.g = Graph() # Each node will store a FunctionWrapper() class instance. self.g.vertex_properties["functions"] = self.g.new_vertex_property("object") self.g.vertex_properties["display"] = self.g.new_vertex_property("string") # Each edge will store a [ ..tbd.. ] . self.g.edge_properties["calls"] = self.g.new_edge_property("object") # Load the log file and build the graph i = 0 f = open(filename, "rb") for line in f: i += 1 try: # Skip any informational lines if "*" in line: continue # Extract a call stack snapshot words = line.split() time = words[0][2:] depth = words[1][2:] stack = [FunctionWrapper(instring=item) for item in words[2].split("->")] # Add the top 2 functions to the graph, if necessary. Format: f1()->f2() f1, f2 = stack[-2], stack[-1] v1, v2 = None, None # Search for the vertices for v in self.g.vertices(): if self.g.vp.functions[v] == f1: v1 = v if self.g.vp.functions[v] == f2: v2 = v if v1 != None and v2 != None: break # Add new vertices if needed if v1 == None: v1 = self.g.add_vertex() self.g.vp.functions[v1] = f1 self.g.vp.display[v1] = f1.graphDisplayString() if v2 == None: v2 = self.g.add_vertex() self.g.vp.functions[v2] = f2 self.g.vp.display[v2] = f2.graphDisplayString() # Add the edge if necessary, and then add data to it if not self.g.edge(v1, v2): e = self.g.add_edge(v1, v2) self.g.ep.calls[e] = CallList(v1, v2) self.g.ep.calls[e].addCall(time, depth) except Exception as e: print "Exception on line", i, ":", e print [str(x) for x in stack] exit()
def get_pagerank_values(self): start = time.time() logger.info('Started call to get_pagerank') g = Graph() vp = g.add_edge_list(self.__v.get_graph_edges(), hashed=True, hash_type='int') logger.info('Delta time to build graph: {}s'.format( timedelta(seconds=(time.time() - start)))) start = time.time() ranks = pagerank(g) logger.info('Delta time to compute pagerank: {}s'.format( timedelta(seconds=(time.time() - start)))) for vertex in g.vertices(): qid = vp[vertex] r = ranks[vertex] yield qid, r
def load_train(name): ''' Training file is numbered from 0 to n. Not all nodes in the training file have their own row. ''' g = Graph() node_ids = set() n = -1 for n, (node_id, neighbor_ids) in enumerate(iter_adj_list(name)): node_ids.add(node_id) node_ids.update(neighbor_ids) n += 1 g.add_vertex(len(node_ids)) for i, (node_id, neighbor_ids) in enumerate(iter_adj_list(name)): print('adding edge for vertex {}/{}'.format(i + 1, n)) for neighbor_id in neighbor_ids: g.add_edge(node_id, neighbor_id) return g
def mwgm_graph_tool(pairs, sim_mat): from graph_tool.all import Graph, max_cardinality_matching if not isinstance(pairs, list): pairs = list(pairs) g = Graph() weight_map = g.new_edge_property("float") nodes_dict1 = dict() nodes_dict2 = dict() edges = list() for x, y in pairs: if x not in nodes_dict1.keys(): n1 = g.add_vertex() nodes_dict1[x] = n1 if y not in nodes_dict2.keys(): n2 = g.add_vertex() nodes_dict2[y] = n2 n1 = nodes_dict1.get(x) n2 = nodes_dict2.get(y) e = g.add_edge(n1, n2) edges.append(e) weight_map[g.edge(n1, n2)] = sim_mat[x, y] print("graph via graph_tool", g) res = max_cardinality_matching(g, heuristic=True, weight=weight_map, minimize=False) edge_index = np.where(res.get_array() == 1)[0].tolist() matched_pairs = set() for index in edge_index: matched_pairs.add(pairs[index]) return matched_pairs
def analysis(name: str): t = clock() g = Graph() g.load(file_name=f'../data/{name}', fmt='gt') print(f'Dados carregados. Tempo: {clock() - t:.2f}s', end='\n') v = g.get_vertices() e = g.get_edges() # Degree deg = g.get_total_degrees(v) deg_rpt = report(deg) # Connected Components com = components(g) com_rpt = report(com) td = clock() # Distances dis = distances(g) dis_rpt = report(dis) print(f'td = {clock() - td}') print( f'Vértices: {len(v)}; Arestas: {len(e)}; Componentes Conexas: {len(com)};', end='\n--\n') print(f'Grau dos vértices:\n{deg_rpt}', end='\n--\n') print(f'Tamanho das componentes conexas:\n{com_rpt}', end='\n--\n') print(f'Distâncias:\n{dis_rpt}', end='\n--\n') print(f"Tempo total: {clock() - t:.2f}s")
def init_graph(self): self.graph = Graph(directed=False) self.vertex_index = dict() self.graph.graph_properties["id"] = self.graph.new_graph_property( "long") self.graph.graph_properties["id"] = 0 self.graph.vertex_properties["id"] = self.graph.new_vertex_property( "long") self.graph.vertex_properties["x"] = self.graph.new_vertex_property( "double") self.graph.vertex_properties["y"] = self.graph.new_vertex_property( "double") self.graph.vertex_properties["t"] = self.graph.new_vertex_property( "long") self.graph.vertex_properties["f"] = self.graph.new_vertex_property( "vector<double>") self.graph.edge_properties["d"] = self.graph.new_edge_property( "double")
def calc_pagerank(g: gt.Graph) -> List[Tuple[int, str, float]]: """ Return: sorted list of tuples, [(vertex_idx, wk_title, pagerank_value), ....] """ vp_label = g.vp['_graphml_vertex_id'] # same as wktitle pr = gt.pagerank(g) ranks = [(g.vertex_index[v], vp_label[v], pr[v]) for v in g.vertices()] ranks = sorted(ranks, key=lambda e: -e[-1]) return ranks
def main(): args = parser.parse_args() print("Reading data...") set_dataloc(args.dloc) metadata = get_metadata() graph = GraphContainer(find_meta(args.dataset), args.dloc) print("Creating gt.Graph...") gt_graph = graph.get_gt_graph() assert args.motif_size == 4 or args.motif_size == 3 # Only motif 3 and 4 all_motif = None if args.motif_size == 3: if gt_graph.is_directed(): all_motif = all_3 else: all_motif = all_u3 else: if gt_graph.is_directed(): all_motif = all_4 else: all_motif = all_u4 motif_func = None if args.num_shuffles <= 0: # Motif count motif_func = motifs else: motif_func = motif_significance output = args.output + str(args.num_shuffles) print("Writing scores to file...") with open(output, "w") as ofile: info = "Dataset: {d} - Motif size: {m} - Directed: {di}\n".format( d=args.dataset, m=args.motif_size, di=str(gt_graph.is_directed())) ofile.write(info) for i, mc in enumerate(all_motif): idx = gt_graph.vertex_index.copy("int") shuffle(idx.a) g = Graph(gt_graph, vorder=idx) if args.num_shuffles <= 0: score = motifs(g, k=args.motif_size, motif_list=[mc.gt_motif])[1][0] else: score = motif_significance(g, k=args.motif_size, n_shuffles=args.num_shuffles, motif_list=[mc.gt_motif])[1][0] r = "Motif index {}: {}\n".format(i, score) print(r) ofile.write(r) print("Motif analysis for {} is completed.".format(args.dataset))
def __init__(self, sentence, directed=False, graph=None): # Create a SentenceGraph from an existing graph tool graph if graph is not None: self.sentence_graph = graph return # Create a new SentenceGraph from scratch self.sentence_graph = Graph(directed=directed) # Graph properties sentence_property = self.sentence_graph.new_graph_property("string", sentence) self.sentence_graph.graph_properties[SENTENCE_KEY] = sentence_property # Vertex properties word_property = self.sentence_graph.new_vertex_property("string") part_of_speech_property = self.sentence_graph.new_vertex_property("string") vertex_color_property = self.sentence_graph.new_vertex_property("vector<double>") self.sentence_graph.vertex_properties[WORD_KEY] = word_property self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY] = part_of_speech_property self.sentence_graph.vertex_properties[VERTEX_COLOR_KEY] = vertex_color_property # Edge properties sentence_edge_property = self.sentence_graph.new_edge_property("string") definition_edge_property = self.sentence_graph.new_edge_property("string") parsed_dependencies_edge_property = self.sentence_graph.new_edge_property("string") inter_sentence_edge_property = self.sentence_graph.new_edge_property("string") edge_color_property = self.sentence_graph.new_edge_property("vector<double>") dependency_edge_property = self.sentence_graph.new_edge_property("string") self.sentence_graph.edge_properties[SENTENCE_EDGE_KEY] = sentence_edge_property self.sentence_graph.edge_properties[DEFINITION_EDGE_KEY] = definition_edge_property self.sentence_graph.edge_properties[PARSED_DEPENDENCIES_EDGE_KEY] = parsed_dependencies_edge_property self.sentence_graph.edge_properties[INTER_SENTENCE_EDGE_KEY] = inter_sentence_edge_property self.sentence_graph.edge_properties[EDGE_COLOR_KEY] = edge_color_property self.sentence_graph.edge_properties[PARSE_TREE_DEPENDENCY_VALUE_KEY] = dependency_edge_property # Edge filter properties definition_edge_filter_property = self.sentence_graph.new_edge_property("bool") inter_sentence_edge_filter_property = self.sentence_graph.new_edge_property("bool") parsed_dependencies_edge_filter_property = self.sentence_graph.new_edge_property("bool") sentence_edge_filter_property = self.sentence_graph.new_edge_property("bool") self.sentence_graph.edge_properties[FILTER_DEFINITION_EDGE_KEY] = definition_edge_filter_property self.sentence_graph.edge_properties[FILTER_INTER_SENTENCE_EDGE_KEY] = inter_sentence_edge_filter_property self.sentence_graph.edge_properties[FILTER_PARSED_DEPENDENCIES_EDGE_KEY] = parsed_dependencies_edge_filter_property self.sentence_graph.edge_properties[FILTER_SENTENCE_EDGE_KEY] = sentence_edge_filter_property
class __Graph__: def __init__(self): self.graph = GT_Graph() self.cookies = dict() self.cookierecvr = CookieRecvr(self) self.cookierecvr.start() def new_cookie(self, cookie): self.cookies[cookie['cid']] = self.graph.add_vertex() logging.info('added cookie {} to graph'.format(cookie['cid'])) for parent in cookie['parents']: try: self.graph.add_edge(self.cookies[parent], self.cookies[cookie['cid']]) logging.info( 'added eddge from cookie {} to graph'.format(parent)) except KeyError: logging.info('parent not known in graph')
def graph_from_matrix(matrix, directed=False): g = Graph(directed=directed) g.add_vertex(len(matrix)) weights = g.new_ep("float") edges = np.nonzero(matrix) edges = np.append(edges, [matrix[edges]], axis=0) g.add_edge_list(list(zip(*edges)), eprops=[weights]) #graph_draw(g, output_size=(1000, 1000), output="results/structure.pdf") return g, weights
def to_gt(db): """Convert db to graph-tool representation""" from graph_tool.all import Graph graph = Graph(directed=True) mapping = dict() for native in db.query(vertices, get)(): vertex = graph.add_vertex() mapping[native.uid] = graph.vertex_index[vertex] for native in db.query(edges, get)(): start = native.start().uid start = mapping[start] end = native.end().uid end = mapping[end] graph.add_edge(start, end) return graph
def rysuj_graf_wejsciowy(g, output=None, size=(600, 600), bez_napisow=False): gx = Graph(g) gx.vertex_properties['wyswietlany_tekst'] = gx.new_vertex_property('string') for v in gx.vertices(): gx.vertex_properties['wyswietlany_tekst'][v] = v.__str__() + ': ' + \ str(gx.vertex_properties['liczba_kolorow'][v]) if bez_napisow: graph_draw(gx # , vertex_text=gx.vertex_properties['wyswietlany_tekst'] , bg_color=[255., 255., 255., 1] , output_size=size , output=output ) else: graph_draw(gx , vertex_text=gx.vertex_properties['wyswietlany_tekst'] , bg_color=[255., 255., 255., 1] , output_size=size , output=output )
def build_closure(g, cand_source, terminals, infection_times, k=-1, strictly_smaller=True, debug=False, verbose=False): """ build a clojure graph in which cand_source + terminals are all connected to each other. the number of neighbors of each node is determined by k the larger the k, the denser the graph""" r2pred = {} edges = {} terminals = list(terminals) # from cand_source to terminals vis = init_visitor(g, cand_source) cpbfs_search(g, source=cand_source, visitor=vis, terminals=terminals, forbidden_nodes=terminals, count_threshold=k) r2pred[cand_source] = vis.pred for u, v, c in get_edges(vis.dist, cand_source, terminals): edges[(u, v)] = c if debug: print('cand_source: {}'.format(cand_source)) print('#terminals: {}'.format(len(terminals))) print('edges from cand_source: {}'.format(edges)) if verbose: terminals_iter = tqdm(terminals) print('building closure graph') else: terminals_iter = terminals # from terminal to other terminals for root in terminals_iter: if strictly_smaller: late_terminals = [t for t in terminals if infection_times[t] > infection_times[root]] else: # respect what the paper presents late_terminals = [t for t in terminals if infection_times[t] >= infection_times[root]] late_terminals = set(late_terminals) - {cand_source} # no one can connect to cand_source if debug: print('root: {}'.format(root)) print('late_terminals: {}'.format(late_terminals)) vis = init_visitor(g, root) cpbfs_search(g, source=root, visitor=vis, terminals=list(late_terminals), forbidden_nodes=list(set(terminals) - set(late_terminals)), count_threshold=k) r2pred[root] = vis.pred for u, v, c in get_edges(vis.dist, root, late_terminals): if debug: print('edge ({}, {})'.format(u, v)) edges[(u, v)] = c if verbose: print('returning closure graph') gc = Graph(directed=True) for _ in range(g.num_vertices()): gc.add_vertex() for (u, v) in edges: gc.add_edge(u, v) eweight = gc.new_edge_property('int') eweight.set_2d_array(np.array(list(edges.values()))) # for e, c in edges.items(): # eweight[e] = c return gc, eweight, r2pred
def steiner_tree_mst(g, root, infection_times, source, terminals, closure_builder=build_closure, strictly_smaller=True, return_closure=False, k=-1, debug=False, verbose=True): gc, eweight, r2pred = closure_builder(g, root, terminals, infection_times, strictly_smaller=strictly_smaller, k=k, debug=debug, verbose=verbose) # get the minimum spanning arborescence # graph_tool does not provide minimum_spanning_arborescence if verbose: print('getting mst') gx = gt2nx(gc, root, terminals, edge_attrs={'weight': eweight}) try: nx_tree = nx.minimum_spanning_arborescence(gx, 'weight') except nx.exception.NetworkXException: if debug: print('fail to find mst') if return_closure: return None, gc, None else: return None if verbose: print('returning tree') mst_tree = Graph(directed=True) for _ in range(g.num_vertices()): mst_tree.add_vertex() for u, v in nx_tree.edges(): mst_tree.add_edge(u, v) if verbose: print('extract edges from original graph') # extract the edges from the original graph # sort observations by time # and also topological order topological_index = {} for i, e in enumerate(bfs_iterator(mst_tree, source=root)): topological_index[int(e.target())] = i sorted_obs = sorted( set(terminals) - {root}, key=lambda o: (infection_times[o], topological_index[o])) tree_nodes = {root} tree_edges = set() # print('root', root) for u in sorted_obs: if u in tree_nodes: if debug: print('{} covered already'.format(u)) continue # print(u) v, u = map(int, next(mst_tree.vertex(u).in_edges())) # v is ancestor tree_nodes.add(v) late_nodes = [n for n in terminals if infection_times[n] > infection_times[u]] vis = init_visitor(g, u) # from child to any tree node, including v cpbfs_search(g, source=u, terminals=list(tree_nodes), forbidden_nodes=late_nodes, visitor=vis, count_threshold=1) # dist, pred = shortest_distance(g, source=u, pred_map=True) node_set = {v for v, d in vis.dist.items() if d > 0} reachable_tree_nodes = node_set.intersection(tree_nodes) ancestor = min(reachable_tree_nodes, key=vis.dist.__getitem__) edges = extract_edges_from_pred(g, u, ancestor, vis.pred) edges = {(j, i) for i, j in edges} # need to reverse it if debug: print('tree_nodes', tree_nodes) print('connecting {} to {}'.format(v, u)) print('using ancestor {}'.format(ancestor)) print('adding edges {}'.format(edges)) tree_nodes |= {u for e in edges for u in e} tree_edges |= edges t = Graph(directed=True) for _ in range(g.num_vertices()): t.add_vertex() for u, v in tree_edges: t.add_edge(t.vertex(u), t.vertex(v)) tree_nodes = {u for e in tree_edges for u in e} vfilt = t.new_vertex_property('bool') vfilt.a = False for v in tree_nodes: vfilt[t.vertex(v)] = True t.set_vertex_filter(vfilt) if return_closure: return t, gc, mst_tree else: return t
class UNISrt(object): ''' This is the class represents UNIS in local runtime environment (local to the apps). All UNIS models defined in the periscope/settings.py will be represented as a corresponding item of the 'resources' list in this class. At the initialization phase, UNISrt will create an cache of the UNIS db, (and will maintain it consistent in a best-effort manner). ''' # should move this methods to utils def validate_add_defaults(self, data): if "$schema" not in data: return None schema = self._schemas.get(data["$schema"]) validictory.validate(data, schema) add_defaults(data, schema) def __init__(self): logger.info("starting UNIS Network Runtime Environment...") fconf = get_file_config(nre_settings.CONFIGFILE) self.conf = deepcopy(nre_settings.STANDALONE_DEFAULTS) merge_dicts(self.conf, fconf) self.unis_url = str(self.conf['properties']['configurations']['unis_url']) self.ms_url = str(self.conf['properties']['configurations']['ms_url']) self._unis = unis_client.UNISInstance(self.conf) self.time_origin = int(time()) self._schemas = SchemaCache() self._resources = self.conf['resources'] self._subunisclient = {} for resource in self._resources: setattr(self, resource, {'new': {}, 'existing': {}}) # construct the hierarchical representation of the network for resource in self._resources: # only pullRuntime once at the beginning, as pubsub will only update # them later when resources are modified on the server self.pullRuntime(self, self._unis, self._unis.get(resource), resource, False) # construct the graph representation of the network, of which this NRE is in charge self.g = Graph() self.nodebook = {} for key in self.nodes['existing'].keys(): self.nodebook[key] = self.g.add_vertex() for key, link in self.links['existing'].iteritems(): if hasattr(link, 'src') and hasattr(link, 'dst'): self.g.add_edge(self.nodebook[link.src.node.selfRef],\ self.nodebook[link.dst.node.selfRef], add_missing=False) def pullRuntime(self, mainrt, currentclient, data, resource_name, localnew): ''' this function should convert the input data into Python runtime objects ''' model = resources_classes[resource_name] print resource_name if data and 'redirect' in data and 'instances' in data: if len(data['instances']) == 0: return for instance_url in data['instances']: # TODO: needs SSL, not figured out yet, pretend it does not exist for now if instance_url == 'https://dlt.crest.iu.edu:9000' or instance_url == 'http://iu-ps01.crest.osris.org:8888'\ or instance_url == 'http://dev.crest.iu.edu:8888' or instance_url == 'http://unis.crest.iu.edu:8890'\ or instance_url == 'http://monitor.crest.iu.edu:9000' or instance_url == 'http://sc-ps01.osris.org:8888': continue if instance_url not in self._subunisclient: conf_tmp = deepcopy(self.conf) conf_tmp['properties']['configurations']['unis_url'] = instance_url conf_tmp['properties']['configurations']['ms_url'] = instance_url # assume ms is the same as unis self._subunisclient[instance_url] = unis_client.UNISInstance(conf_tmp) unis_tmp = self._subunisclient[instance_url] self.pullRuntime(mainrt, unis_tmp, unis_tmp.get(resource_name), resource_name, False) elif data and isinstance(data, list): # sorting: in unisrt res dictionaries, a newer record of same index will be saved data.sort(key=lambda x: x.get('ts', 0), reverse=False) for v in data: model(v, mainrt, currentclient, localnew) threading.Thread(name=resource_name + '@' + currentclient.config['unis_url'],\ target=self.subscribeRuntime, args=(resource_name, self._unis,)).start() def pushRuntime(self, resource_name): ''' this function upload specified resource to UNIS ''' def pushEntry(k, entry): data = entry.prep_schema() groups = data['selfRef'].split('/') unis_str = '/'.join(groups[:3]) if unis_str in self._subunisclient: uc = self._subunisclient[unis_str] else: uc = self._unis # use attribute "ts" to indicate an object downloaded from UNIS, and # only UPDATE the values of this kind of objects. if hasattr(entry, 'ts'): url = '/' + resource_name + '/' + getattr(entry, 'id') uc.put(url, data) else: url = '/' + resource_name uc.post(url, data) while True: try: key, value = getattr(self, resource_name)['new'].popitem() if not isinstance(value, list): pushEntry(key, value) else: for item in value: pushEntry(key, item) except KeyError: return def subscribeRuntime(self, resource_name, currentclient): ''' subscribe a channel(resource) to UNIS, and listen for any new updates on that channel ''' #name = resources_subscription[resource_name] name = resource_name model = resources_classes[resource_name] #url = self.unis_url.replace('http', 'ws', 1) unis_url = currentclient.config['unis_url'] url = unis_url.replace('http', 'ws', 1) url = url + '/subscribe/' + name ws = create_connection(url) data = ws.recv() while data: model(json.loads(data), self, currentclient, False) data = ws.recv() ws.close() def poke_data(self, query): ''' try to address this issue: - ms stores lots of data, and may be separated from unis - this data is accessible via /data url. They shouldn't be kept on runtime environment (too much) - however, sometimes they may be needed. e.g. HELM schedules traceroute measurement, and needs the results to schedule following iperf tests ''' return self._unis.get('/data/' + query) def post_data(self, data): ''' same as poke_data, the other way around ''' #headers = self._def_headers("data") print data return self._unis.pc.do_req('post', '/data', data)#, headers)
from graph_tool.all import Vertex, Graph class MyVertex: def __init__(self, g): self.g = g self.v = g.add_vertex() self.halted = False def vote_for_halt(self): self.halted = True def __getattr__(self, attr): return getattr(self.v, attr) if __name__ == "__main__": g = Graph() v1 = MyVertex(g) v2 = MyVertex(g) g.add_edge(v1, v2) v1.vote_for_halt() print v1.out_degree() # will print 1 print v1.halted # will print True print v1.foo # will raise error: AttributeError: 'Vertex' object has no attribute 'foo'
from graph_tool.flow import min_cut filename = '../pairparser/results/p_pairs7.txt' print(filename) coefficient = 3 word_dict = {} add_dict = {} f = open('bad.txt', 'r', encoding="utf-8") for s in f: # print(s.split(' ')[0]) add_dict[s.split(' ')[0]] = 1 f = open(filename, 'r', encoding="utf-8") pairs_graph = Graph(directed=False) edge_weights = pairs_graph.new_edge_property("int") ver_names = pairs_graph.new_vertex_property("string") for line in f: spl_line = line.split(' ') if len(spl_line) == 1: continue pos = int(spl_line[0]) neg = int(spl_line[1]) cur_weight = pos + coefficient * neg w1 = spl_line[2].strip(' \n\uefef') w2 = spl_line[3].strip(' \n\uefef')
def __init__(self): self.graph = GT_Graph() self.cookies = dict() self.cookierecvr = CookieRecvr(self) self.cookierecvr.start()
from graph_tool.flow import min_cut filename = 'buf.txt' print(filename) coefficient = 3 word_dict = {} add_dict = {} f = open('bad.txt', 'r', encoding="utf-8") for s in f: # print(s.split(' ')[0]) add_dict[s.split(' ')[0]] = 1 f = open(filename, 'r', encoding="utf-8") pairs_graph = Graph(directed=False) edge_weights = pairs_graph.new_edge_property("int") ver_names = pairs_graph.new_vertex_property("string") for line in f: spl_line = line.split(' ') if len(spl_line) == 1: continue pos = int(spl_line[0]) neg = int(spl_line[1]) cur_weight = pos + coefficient * neg w1 = spl_line[2].strip(' \n\uefef') w2 = spl_line[3].strip(' \n\uefef')
def gen_graph((repo, events)): graph = Graph() repo_on_graph = graph.new_graph_property('string') repo_on_graph[graph] = repo graph.graph_properties['repo_on_graph'] = repo_on_graph language_on_graph = graph.new_graph_property('string') language_on_graph[graph] = events[0]['language'] graph.graph_properties['language_on_graph'] = language_on_graph events_on_vertices = graph.new_vertex_property('object') graph.vertex_properties['events_on_vertices'] = events_on_vertices actors_on_vertices = graph.new_vertex_property('string') graph.vertex_properties['actors_on_vertices'] = actors_on_vertices weights_on_edges = graph.new_edge_property('long double') graph.edge_properties['weights_on_edges'] = weights_on_edges # pre_vertices = [] pre_events_map = {} pre_vertices_map = {} # owner_vertex = graph.add_vertex() # owner = repo.split('/')[0] # actors_on_vertices[owner_vertex] = owner # pre_vertices_map[owner] = owner_vertex events = sorted(events, key=lambda x: x['created_at']) for event in events: actor = event['actor'] if actor in pre_events_map: continue created_at = event['created_at'] vertex = graph.add_vertex() events_on_vertices[vertex] = event actors_on_vertices[vertex] = actor if 'actor-following' not in event: continue following = set(event['actor-following']) commons = following.intersection(pre_vertices_map.keys()) # pre_vertices.append(vertex) # if len(commons) == 0: # edge = graph.add_edge(vertex, owner_vertex) # weights_on_edges[edge] = 1.0 for pre_actor in commons: edge = graph.add_edge(vertex, pre_vertices_map[pre_actor]) interval =\ (created_at - pre_events_map[pre_actor]['created_at']).days weight = 1.0 / fib(interval + 2) weights_on_edges[edge] = weight pre_events_map[actor] = event pre_vertices_map[actor] = vertex return graph
def build_region_closure(g, root, regions, infection_times, obs_nodes, debug=False): """return a closure graph on the the components""" regions = copy(regions) root_region = {'nodes': {root}, 'head': root, 'head_time': -float('inf')} regions[len(regions)] = root_region gc = Graph(directed=True) for _ in range(len(regions)): gc.add_vertex() # connect each region gc_edges = [] original_edge_info = {} for i, j in combinations(regions, 2): # make group i the one with *later* head if regions[i]['head_time'] < regions[j]['head_time']: i, j = j, i if debug: print('i, j={}, {}'.format(i, j)) # only need to connect head i to one of the nodes in group j # where nodes in j have time stamp < head i # then an edge from region j to region i (because j is earlier) head_i = regions[i]['head'] def get_pseudo_time(n): if n == root: return - float('inf') else: return infection_times[n] targets = [n for n in regions[j]['nodes'] if get_pseudo_time(n) < regions[i]['head_time']] if debug: print('head_i: {}'.format(head_i)) print('targets: {}'.format(targets)) print('regions[j]["nodes"]: {}'.format(regions[j]['nodes'])) if len(targets) == 0: continue visitor = init_visitor(g, head_i) forbidden_nodes = list(set(regions[i]['nodes']) | (set(regions[j]['nodes']) - set(targets))) if debug: print('forbidden_nodes: {}'.format(forbidden_nodes)) # NOTE: count_threshold = 1 cpbfs_search(g, source=head_i, terminals=targets, forbidden_nodes=forbidden_nodes, visitor=visitor, count_threshold=1) reachable_targets = [t for t in targets if visitor.dist[t] > 0] if debug: print('reachable_targets: {}'.format(reachable_targets)) if len(reachable_targets) == 0: # cannot reach there continue source = min(reachable_targets, key=visitor.dist.__getitem__) dist = visitor.dist[source] assert dist > 0 gc_edges.append(((j, i, dist))) original_edge_info[(j, i)] = { 'dist': dist, 'pred': visitor.pred, 'original_edge': (source, head_i) } for u, v, _ in gc_edges: gc.add_edge(u, v) eweight = gc.new_edge_property('int') for u, v, c in gc_edges: eweight[gc.edge(gc.vertex(u), gc.vertex(v))] = c return gc, eweight, original_edge_info
def vytvořím_graph_tool_graf(): from graph_tool.all import Graph graf = Graph() u1 = graf.add_vertex() u2 = graf.add_vertex() graf.add_edge(u1, u2) vprop_double = graf.new_vertex_property("double") # Double-precision floating point vprop_double[graf.vertex(1)] = 3.1416 vprop_vint = graf.new_vertex_property("vector<int>") # Vector of ints vprop_vint[graf.vertex(0)] = [1, 3, 42, 54] eprop_dict = graf.new_edge_property("object") # Arbitrary python object. In this case, a dictionary. eprop_dict[graf.edges().next()] = {"foo": "bar", "gnu": 42} gprop_bool = graf.new_graph_property("bool") # Boolean gprop_bool[graf] = True graf.save('./data/graph_tool.graphml', fmt='xml')
[a, b] = get_indexes(ver_attr, low, hig) plt.plot(range(0, a + 1), ver_attr[:(a + 1)], 'go') plt.plot(range(a + 1, b + 1), ver_attr[(a + 1):(b + 1)], 'bo') plt.plot(range(b + 1, len(ver_attr)), ver_attr[(b + 1):], 'ro') print("-------------------------------------------------") filename = '../pairparser/results/en_pairs(7).txt' ftag = '7_3imp' coefficient = 3 word_dict = {} # dict with indexes of nodes by word f = open(filename, 'r', encoding="utf-8") pairs_graph = Graph(directed=False) edge_weights = pairs_graph.new_edge_property("double") ver_names = pairs_graph.new_vertex_property("string") ver_id = pairs_graph.new_vertex_property("int") for line in f: spl_line = line.split(' ') if len(spl_line) == 1: continue pos = int(spl_line[0]) neg = int(spl_line[1]) cur_weight = pos + coefficient * neg w1 = spl_line[2].strip(' \n\uefef') w2 = spl_line[3].strip(' \n\uefef')