def __init__(self, state: SampleState, graph: Graph, old_true_block_assignment: np.ndarray) -> None: """Creates a new Sample object. Contains information about the sampled vertices and edges, the mapping of sampled vertices to the original graph vertices, and the true block membership for the sampled vertices. Parameters ---------- state : SampleState contains the sampled vertices graph : Graph the graph from which the sample is taken old_true_block_assignment : np.ndarray[int] the vertex-to-community assignment array. Currently assumes that community assignment is non-overlapping. """ self.state = state sampled_vertices = sorted(state.sample_idx[-state.sample_size:]) self.vertex_mapping = dict([(v, k) for k, v in enumerate(sampled_vertices)]) binary_filter = np.zeros(graph.num_vertices()) binary_filter[sampled_vertices] = 1 graph.set_vertex_filter( graph.new_vertex_property("bool", binary_filter)) self.graph = Graph( graph, prune=True ) # If ordering is wacky, may need to play around with vorder graph.clear_filters() true_block_assignment = old_true_block_assignment[sampled_vertices] # Assuming the sample doesn't capture all the blocks, the block numbers in the sample may not be consecutive # The true_blocks_mapping ensures that they are consecutive true_blocks = list(set(true_block_assignment)) self.true_blocks_mapping = dict([(v, k) for k, v in enumerate(true_blocks)]) self.true_block_assignment = np.asarray( [self.true_blocks_mapping[b] for b in true_block_assignment]) self.sample_num = len(self.vertex_mapping)
def build_closure(g, terminals, debug=False, verbose=False): terminals = list(terminals) # build closure gc = Graph(directed=False) gc.add_vertex(g.num_vertices()) edges_with_weight = set() r2pred = {} for r in terminals: if debug: print('root {}'.format(r)) vis = init_visitor(g, r) pbfs_search(g, source=r, terminals=terminals, visitor=vis) new_edges = set(get_edges(vis.dist, r, terminals)) if debug: print('new edges {}'.format(new_edges)) edges_with_weight |= new_edges r2pred[r] = vis.pred for u, v, c in edges_with_weight: gc.add_edge(u, v) eweight = gc.new_edge_property('int') weights = np.array([c for _, _, c in edges_with_weight]) eweight.set_2d_array(weights) vfilt = gc.new_vertex_property('bool') vfilt.a = False for v in terminals: vfilt[v] = True gc.set_vertex_filter(vfilt) return gc, eweight, r2pred
def compose_graph(uid_pid_pairs): # set up graph g = Graph() g.vp['pid'] = v_pid_p = g.new_vertex_property('string') g.vp['count'] = v_count_p = g.new_vertex_property('int') g.ep['count'] = e_count_p = g.new_edge_property('int') pid_v_map = {} uid_last_v_map = {} vv_e_map = {} for uid, pid in uid_pid_pairs: # vertex v = pid_v_map.get(pid) if v is None: v = g.add_vertex() v_pid_p[v] = pid v_count_p[v] = 0 pid_v_map[pid] = v v_count_p[v] += 1 # edge last_v = uid_last_v_map.get(uid) uid_last_v_map[uid] = v if last_v is None: continue vv = (last_v, v) e = vv_e_map.get(vv) if e is None: e = g.add_edge(*vv) e_count_p[e] = 0 vv_e_map[vv] = e e_count_p[e] += 1 # calculate closeness g.vp['closeness'] = v_closeness_p = g.new_vertex_property('float') e_inverse_count_p = g.new_edge_property('int') e_inverse_count_p.a = e_count_p.a.max()-e_count_p.a debug('e_inverse_count_p.a: {}', e_inverse_count_p.a) closeness(g, weight=e_inverse_count_p, vprop=v_closeness_p) debug('v_closeness_p.a : {}', v_closeness_p.a) v_closeness_p.a = nan_to_num(v_closeness_p.a) debug('v_closeness_p.a : {}', v_closeness_p.a) # fillter g.vp['picked'] = v_picked_p = g.new_vertex_property('bool') debug('v_count_p.a.mean() : {}', v_count_p.a.mean()) v_picked_p.a = v_count_p.a > v_count_p.a.mean() debug('v_picked_p.a : {}', v_picked_p.a) g.set_vertex_filter(v_picked_p) g.set_vertex_filter(None) return g
def steiner_tree_greedy( g, root, infection_times, source, obs_nodes, debug=False, verbose=True): # root = min(obs_nodes, key=infection_times.__getitem__) sorted_obs = list(sorted(obs_nodes, key=infection_times.__getitem__))[1:] tree_nodes = {root} tree_edges = set() for u in sorted_obs: # connect u to the tree vis = init_visitor(g, u) if debug: print('connect {} to tree'.format(u)) print('nodes connectable: {}'.format(tree_nodes)) forbidden_nodes = list(set(obs_nodes) - tree_nodes) cpbfs_search(g, u, visitor=vis, terminals=list(tree_nodes), forbidden_nodes=forbidden_nodes, count_threshold=1) # add edge reachable_nodes = set(np.nonzero(vis.dist > 0)[0]).intersection(tree_nodes) if debug: print('reachable_nodes: {}'.format(reachable_nodes)) assert len(reachable_nodes) > 0 sorted_ancestors = sorted(reachable_nodes, key=vis.dist.__getitem__) ancestor = sorted_ancestors[0] if debug: print('ancestor: {}'.format(ancestor)) print('dist to reachable: {}'.format(vis.dist[sorted_ancestors])) new_edges = extract_edges_from_pred(g, u, ancestor, vis.pred) new_edges = {(v, u) for u, v in new_edges} # needs to reverse the order if debug: print('new_edges: {}'.format(new_edges)) tree_edges |= set(new_edges) tree_nodes |= {v for e in new_edges for v in e} t = Graph(directed=True) for _ in range(g.num_vertices()): t.add_vertex() vfilt = t.new_vertex_property('bool') vfilt.a = False for v in tree_nodes: vfilt[t.vertex(v)] = True for u, v in tree_edges: t.add_edge(t.vertex(u), t.vertex(v)) t.set_vertex_filter(vfilt) return t
def build_closure(g, terminals, p=None, debug=False, verbose=False): """build the transitive closure on terminals""" def get_edges(dist, root, terminals): """get adjacent edges to root with weight""" return {(root, t, dist[t]) for t in terminals if dist[t] != -1 and t != root} terminals = list(terminals) gc = Graph(directed=False) gc.add_vertex(g.num_vertices()) edges_with_weight = set() r2pred = {} # root to predecessor map (from bfs) # shortest path to all other nodes for r in terminals: if debug: print('root {}'.format(r)) targets = list(set(terminals) - {r}) dist_map, pred_map = shortest_distance(g, source=r, target=targets, weights=p, pred_map=True) dist_map = dict(zip(targets, dist_map)) # print(dist_map) # print(pred_map) new_edges = get_edges(dist_map, r, targets) # if p is None: # vis = init_visitor(g, r) # bfs_search(g, source=r, visitor=vis) # new_edges = set(get_edges(vis.dist, r, terminals)) # else: # print('weighted graph') if debug: print('new edges {}'.format(new_edges)) edges_with_weight |= new_edges # r2pred[r] = vis.pred r2pred[r] = pred_map for u, v, c in edges_with_weight: gc.add_edge(u, v) # edge weights eweight = gc.new_edge_property('int') weights = np.array([c for _, _, c in edges_with_weight]) eweight.set_2d_array(weights) vfilt = gc.new_vertex_property('bool') vfilt.a = False for v in terminals: vfilt[v] = True gc.set_vertex_filter(vfilt) return gc, eweight, r2pred
def create_sample(graph: Graph, old_true_block_assignment: np.ndarray, args: argparse.Namespace, prev_state: SampleState) -> 'Sample': """Performs sampling according to the sample type in args. TODO: either re-write how this method is used, or get rid of it - it seems to be a code smell. """ # get rid of 1-degree vertices degrees = graph.get_total_degrees(np.arange(graph.num_vertices())) degree_filter = degrees > 2 mapping = np.where(degrees > 2)[0] graph.set_vertex_filter( graph.new_vertex_property("bool", degree_filter)) filtered_graph = Graph(graph, prune=True) print(filtered_graph.num_vertices()) graph.clear_filters() # TODO: keep track of the mapping to original graph # TODO: below methods can return a SampleState, which we map back to original vertices here, then create the # sample before return. This is brilliant! I am genius! if args.sample_type == "degree_weighted": state = Sample.degree_weighted_sample(filtered_graph, graph.num_vertices(), prev_state, args) elif args.sample_type == "expansion_snowball": state = Sample.expansion_snowball_sample(filtered_graph, graph.num_vertices(), prev_state, args) elif args.sample_type == "forest_fire": state = Sample.forest_fire_sample(filtered_graph, graph.num_vertices(), prev_state, args) elif args.sample_type == "max_degree": state = Sample.max_degree_sample(filtered_graph, graph.num_vertices(), prev_state, args) elif args.sample_type == "random_jump": state = Sample.random_jump_sample(filtered_graph, graph.num_vertices(), prev_state, args) elif args.sample_type == "random_node_neighbor": state = Sample.random_node_neighbor_sample(filtered_graph, graph.num_vertices(), prev_state, args) elif args.sample_type == "random_walk": state = Sample.random_walk_sample(filtered_graph, graph.num_vertices(), prev_state, args) elif args.sample_type == "uniform_random": state = Sample.uniform_random_sample(filtered_graph, graph.num_vertices(), prev_state, args) else: raise NotImplementedError( "Sample type: {} is not implemented!".format(args.sample_type)) state.sample_idx = mapping[state.sample_idx] return Sample(state, graph, old_true_block_assignment)
def tree1(): g = Graph(directed=True) g.add_vertex(5) # one remaining singleton g.add_edge_list([(0, 1), (1, 2), (1, 3)]) # to test 4 is not included vfilt = g.new_vertex_property('bool') vfilt.set_value(True) vfilt[4] = False g.set_vertex_filter(vfilt) return g
def build_closure(g, terminals, debug=False, verbose=False): """build the transitive closure on terminals""" def get_edges(dist, root, terminals): """get adjacent edges to root with weight""" return ((root, t, dist[t]) for t in terminals if dist[t] != -1 and t != root) terminals = list(terminals) gc = Graph(directed=False) gc.add_vertex(g.num_vertices()) edges_with_weight = set() r2pred = {} # root to predecessor map (from bfs) # bfs to all other nodes for r in terminals: if debug: print('root {}'.format(r)) vis = init_visitor(g, r) bfs_search(g, source=r, visitor=vis) new_edges = set(get_edges(vis.dist, r, terminals)) if debug: print('new edges {}'.format(new_edges)) edges_with_weight |= new_edges r2pred[r] = vis.pred for u, v, c in edges_with_weight: gc.add_edge(u, v) # edge weights eweight = gc.new_edge_property('int') weights = np.array([c for _, _, c in edges_with_weight]) eweight.set_2d_array(weights) # vfilt = gc.new_vertex_property('bool') vfilt.a = False for v in terminals: vfilt[v] = True gc.set_vertex_filter(vfilt) return gc, eweight, r2pred
def build_closure(g, terminals, debug=False, verbose=False): terminals = list(terminals) # build closure gc = Graph(directed=False) for _ in range(g.num_vertices()): gc.add_vertex() edges_with_weight = set() r2pred = {} for r in terminals: if debug: print('root {}'.format(r)) vis = init_visitor(g, r) pbfs_search(g, source=r, terminals=terminals, visitor=vis) new_edges = set(get_edges(vis.dist, r, terminals)) if debug: print('new edges {}'.format(new_edges)) edges_with_weight |= new_edges r2pred[r] = vis.pred for u, v, c in edges_with_weight: gc.add_edge(u, v) eweight = gc.new_edge_property('int') weights = np.array([c for _, _, c in edges_with_weight]) eweight.set_2d_array(weights) vfilt = gc.new_vertex_property('bool') vfilt.a = False for v in terminals: vfilt[v] = True gc.set_vertex_filter(vfilt) return gc, eweight, r2pred
def si(g, p, source=None, stop_fraction=0.5): """ g: the graph p: edge-wise infection probability stop_fraction: stopping if more than N x stop_fraction nodes are infected """ weighted = False if isinstance(p, PropertyMap): weighted = True else: # is float and uniform assert 0 < p and p <= 1 if source is None: source = random.choice(np.arange(g.num_vertices())) infected = {source} infection_times = np.ones(g.num_vertices()) * -1 infection_times[source] = 0 time = 0 edges = [] stop = False infected_nodes_until_t = copy(infected) while True: infected_nodes_until_t = copy(infected) # print('current cascade size: {}'.format(len(infected_nodes_until_t))) time += 1 for i in infected_nodes_until_t: vi = g.vertex(i) for e in vi.all_edges(): if weighted: inf_proba = p[e] else: inf_proba = p vj = e.target() j = int(vj) rand = random.random() # print('rand=', rand) # print('inf_proba=', inf_proba) # print('{} infected?'.format(j), j not in infected) if j not in infected and rand <= inf_proba: # print('SUCCESS') infected.add(j) infection_times[j] = time edges.append((i, j)) # stop when enough nodes have been infected if (len(infected) / g.num_vertices()) >= stop_fraction: stop = True break if stop: break if stop: break tree = Graph(directed=True) for _ in range(g.num_vertices()): tree.add_vertex() vertex_nodes = set() for u, v in edges: tree.add_edge(u, v) vertex_nodes.add(u) vertex_nodes.add(v) vfilt = tree.new_vertex_property('bool') vfilt.set_value(False) vfilt.a[list(vertex_nodes)] = True tree.set_vertex_filter(vfilt) return source, infection_times, tree
def find_tree_greedy(g, root, infection_times, source, obs_nodes, debug=False, verbose=True): # root = min(obs_nodes, key=infection_times.__getitem__) sorted_obs = list(sorted(obs_nodes, key=infection_times.__getitem__))[1:] tree_nodes = {root} tree_edges = set() for u in sorted_obs: if u in tree_nodes: continue # connect u to the tree vis = init_visitor(g, u) if debug: print('connect {} to tree'.format(u)) print('nodes connectable: {}'.format(tree_nodes)) forbidden_nodes = list(set(obs_nodes) - tree_nodes) cpbfs_search(g, u, visitor=vis, terminals=list(tree_nodes), forbidden_nodes=forbidden_nodes, count_threshold=1) # add edge reachable_nodes = set(filter(lambda k: vis.dist[k] > 0, vis.dist)).intersection(tree_nodes) if debug: print('reachable_nodes: {}'.format(reachable_nodes)) assert len(reachable_nodes) > 0 sorted_ancestors = sorted(reachable_nodes, key=vis.dist.__getitem__) ancestor = sorted_ancestors[0] if debug: print('ancestor: {}'.format(ancestor)) print('dist to reachable: {}'.format(vis.dist[sorted_ancestors])) new_edges = extract_edges_from_pred(g, u, ancestor, vis.pred) new_edges = {(v, u) for u, v in new_edges} # needs to reverse the order if debug: print('new_edges: {}'.format(new_edges)) tree_edges |= set(new_edges) tree_nodes |= {v for e in new_edges for v in e} t = Graph(directed=True) t.add_vertex(g.num_vertices()) vfilt = t.new_vertex_property('bool') vfilt.a = False for v in tree_nodes: vfilt[t.vertex(v)] = True for u, v in tree_edges: t.add_edge(t.vertex(u), t.vertex(v)) t.set_vertex_filter(vfilt) return t
class graphtool(): def get_edges(self): self.edges = [] for dev in Device.objects: port = dev['ports'] for port in dev['ports']: if not port['acc']: self.edges.append([int(port['dev']), int(dev['devid'])]) for edge in self.edges: if edge[::-1] in self.edges: self.edges.remove(edge) def create_graph(self): self.get_edges() self.g = Graph(directed=False) self.g.add_edge_list(self.edges) def load_graph(self): self.g = pickle.loads(System.objects.first().graph.read()) def shortestpath(self, source, dest): if source == dest: return ('нужны разные пипишники') #ip to id source = Device.objects(uri=source) dest = Device.objects(uri=dest) if len(source) > 0 and len(dest) > 0: source = self.g.vertex(source[0].devid) dest = self.g.vertex(dest[0].devid) result = graph_tool.topology.shortest_path(self.g, source, dest) path = [self.g.vertex_index[x] for x in result[0]] filteredge = self.g.new_edge_property('bool') filteredge[result[1][0]] = True self.g.set_edge_filter(filteredge, inverted=True) result = graph_tool.topology.shortest_path(self.g, source, dest) second_path = [self.g.vertex_index[x] for x in result[0]] self.g.clear_filters() another_paths = [] all_shortest = graph_tool.topology.all_shortest_paths( self.g, source, dest) for i in all_shortest: another_paths.append([self.g.vertex_index[j] for j in i]) self.all_paths = [path] + [second_path] + another_paths self.all_paths = [tuple(t) for t in self.all_paths] self.all_paths = [t for t in self.all_paths if len(t) > 0] self.all_paths = list(set(self.all_paths)) self.all_paths = [list(t) for t in self.all_paths] dev_from_stp = [] count = 0 for path in self.all_paths: for dev in path: dev = Device.objects(devid=dev).first().uri if Stpdomins.objects(devices__=dev): count += 1 [ dev_from_stp.append(x) for x in Stpdomins.objects( devices__=dev).first().devices if x not in dev_from_stp ] if len(dev_from_stp) > 0 and count > 1: print('stp domains') filtevertex = self.g.new_vertex_property('bool') for x in dev_from_stp: filtevertex[self.g.vertex( Device.objects(uri=x).first().devid)] = True self.g.set_vertex_filter(filtevertex) source = self.g.vertex( Device.objects(uri=dev_from_stp[0]).first().devid) dest = self.g.vertex( Device.objects(uri=dev_from_stp[-1]).first().devid) result = graph_tool.topology.all_paths(self.g, source, dest) for x in result: self.all_paths.append([int(self.g.vertex(i)) for i in x]) self.g.clear_filters() self.all_paths.sort() self.all_paths = list( self.all_paths for self.all_paths, _ in itertools.groupby(self.all_paths)) self.all_paths = [ path for path in self.all_paths if len(path) > 0 ] return self.all_paths def fancy_shortest(self): self.fancy_paths = [] for path in self.all_paths: fancy = [] for i in path: d = Device.objects(devid=i).first() if d.devtype not in passive: fancy.append([d.uri, d.addr, dev_type_dict[d.devtype]]) self.fancy_paths.append(fancy) return self.fancy_paths def paths_ports(self): output = [] for path in self.all_paths: for i, j in zip(path, path[1:]): dev = Device.objects(devid=i).first() if dev.devtype in supported: ports = [x['num'] for x in dev.ports if x['dev'] == j] if len(ports) == 0: ports = 0 else: ports = ports[0] output.append([dev.uri, dev.devtype, ports]) dev = Device.objects(devid=j).first() if dev.devtype in supported: ports = [x['num'] for x in dev.ports if x['dev'] == i] if len(ports) == 0: ports = 0 else: ports = ports[0] output.append([dev.uri, dev.devtype, ports]) g_fancy_output = dict() g_output = dict() for key, group in groupby(output, lambda x: x[0]): ports = [] for i in group: ports.append(i[2]) if key in g_output: # print (g_output[key]['ports'], ports) g_output[key]['ports'] = g_output[key]['ports'] + ports else: g_output[key] = {'type': i[1], 'ports': ports} for key in g_output: g_output[key]['ports'] = list(set(g_output[key]['ports'])) g_fancy_output = copy.deepcopy(g_output) for i in g_fancy_output: g_fancy_output[i]['type'] = dev_type_dict[g_fancy_output[i] ['type']] return g_fancy_output, g_output
def import_e_coli_ppi(save=False, export=False): ''' Imports the dataset E_Coli and saves it as a graph (Snap, GTools and Greach format). ''' saveLoadFolder = "E_Coli" graphName = "E_Coli" graphFile = "../Data/Graphs/" + saveLoadFolder + "/E_Coli_Edge_List.txt" # labelsFile = "../Data/Graphs/"+saveLoadFolder+"/E_Coli_Essential_Genes.txt" labelsFile = "../Data/Graphs/" + saveLoadFolder + "/E_Coli_Labels.csv" g = Graph(directed=False) # interactionWeight = g.new_edge_property("double") proteinNameToNode = dict() with open(graphFile, "r") as inF: for line in inF: # splitted = line.rstrip().split('|') # fromNode = splitted[1].strip() # toNode = splitted[3].strip() # weight = float(splitted[4]) fromNode, toNode = line.strip().split() fromNode = fromNode.strip() toNode = toNode.strip() # print fromNode, toNode # print fromNode, toNode, weight if fromNode not in proteinNameToNode: newNode = g.add_vertex() proteinNameToNode[fromNode] = int(newNode) if toNode not in proteinNameToNode: newNode = g.add_vertex() proteinNameToNode[toNode] = int(newNode) source = proteinNameToNode[fromNode] target = proteinNameToNode[toNode] edge = g.add_edge(g.vertex(source), g.vertex(target)) # interactionWeight[edge] = weight essentiality = g.new_vertex_property("short") essentiality.a = 0 symbolToInt = {'N': 0, 'E': 1, '?': '2', 'X': 3} print g import csv with open(labelsFile, "r") as inFile: count = 0 data = [row for row in csv.reader(inFile.read().splitlines())] for pair in data: proteinName, attribute = pair try: essentiality.a[proteinNameToNode[ proteinName.lower()]] = symbolToInt[attribute] except: count += 1 print count g.vp["essentiality"] = essentiality lethalOrNot = essentiality.a == 0 lethalOrNot += essentiality.a == 1 lethality = g.new_vertex_property("boolean") lethality.a = lethalOrNot g.set_vertex_filter(lethality) g.purge_vertices() print g p() lethality.a = 0 lethality.a[essentiality.a == 1] = 1 g.vp["lethality"] = lethality # lethality = g.new_vertex_property("boolean") # lethality.a = 0 # with open(labelsFile, "r") as inF: # for line in inF: # try: # nodeID = proteinNameToNode[line.rstrip()] # lethality.a[nodeID] = 1 # except: #we don't have this node in the PPI net # pass # # # g.vp["lethality"] = lethality # # g.ep["weights"] = interactionWeight g = graph_analysis.IO.make_simple_graph(g, undirected=True, gcc=True) if save: graph_analysis.IO.save_data( "../Data/Graphs/" + saveLoadFolder + "/" + graphName + ".GT.graph", g) if export: exportToSnapAndGreach(graphName, saveLoadFolder) return g
class graph: def __init__(self, mol): """ instantiate a graph object which will be attached to the parent mol :Parameter: - mol : a mol type object (can be a derived type like bb or topo as well) """ self._mol = mol logger.debug("generated the graph addon") return def make_graph(self, idx=None, hashes=True): """ generate a graph for the mol object (atoms should be typed) we use the atomtype name with the "_" and everything after it (rule=2) truncated. in other words the vertex property is the element plus the coordination number """ if idx == None: idx = range(self._mol.natoms) self.molg = Graph(directed=False) # now add vertices self.molg.vp.type = self.molg.new_vertex_property("string") self.vert2atom = [ ] # this list maps vertex indices to the real atoms becasue we omit the hydrogens in the graph ig = 0 for i in idx: if self._mol.elems[i] != "x": self.molg.add_vertex() self.vert2atom.append(i) vtype = self._mol.atypes[i] # extract element and coordination number if "_" in vtype: vtype = vtype.split("_")[0] # if the coordination number is one replace the element by a # if hashes: if vtype[-1] == "1": vtype = "#" self.molg.vp.type[ig] = vtype ig += 1 self.nvertices = len(self.vert2atom) logger.info("generated a graph for a mol object with %d vertices" % self.nvertices) # now add edges ... only bonds between vertices for i in range(self.nvertices): ia = self.vert2atom[i] for ja in self._mol.conn[ia]: if ja >= ia: #we need a .le. here for those atoms/vertices connected to itself twice in different boxes if ja in self.vert2atom: # print("bond from %d to %d" % (ia, ja)) # print(self._mol.atypes[ia], self._mol.atypes[ja]) self.molg.add_edge( self.molg.vertex(i), self.molg.vertex(self.vert2atom.index(ja))) #self.molg.add_edge( self.molg.vertex(self.vert2atom.index(ja)),self.molg.vertex(i)) return def plot_graph(self, fname, g=None, size=1000, fsize=16, vsize=8, ptype="pdf", method='arf'): """ plot the grap (needs more tuning options :Parameter: - fname : filename (will write filename.pdf) - size : outputsize will be (size, size) in px [default 800] - fsize : font size [default 10] - method : placement method to draw graph, can be one of arf frucht radtree sfdp random """ if g: draw_g = g else: draw_g = self.molg import graph_tool.draw import graph_tool.draw as gt g = draw_g if method == 'arf': pos = graph_tool.draw.arf_layout(draw_g, max_iter=0) elif method == 'frucht': pos = graph_tool.draw.fruchterman_reingold_layout(draw_g, n_iter=1000) elif method == 'radtree': pos = gt.radial_tree_layout(g, g.vertex(0)) elif method == 'sfdp': pos = gt.sfdp_layout(g) elif method == 'sfdp': pos = gt.random_layout(g) else: pos = None from graph_tool.draw import graph_draw graph_draw(draw_g,pos=pos, vertex_text=draw_g.vp.type, vertex_font_size=fsize, vertex_size=vsize, \ output_size=(size, size), output=fname+"."+ptype, bg_color=[1,1,1,1]) return def find_subgraph(self, graph, subg): """ use graph_tools subgraph_isomorphism tool to find substructures :Parameter: - graph : parent graph to be searched - subg : graph to be found :Returns: a list of lists with the (sorted) vertex indices of the substructure """ maps = subgraph_isomorphism(subg, graph, vertex_label=(subg.vp.type, graph.vp.type)) subs = [] subs_check = [] for m in maps: sl = list(m) sl_check = copy.deepcopy(sl) sl_check.sort() if sl_check not in subs_check: subs.append(sl) subs_check.append(sl_check) return subs def find_sub(self, subg): """ use graph_tools subgraph_isomorphism tool to find substructures :Parameter: - subg : graph object (from another molsys) to be searched :Returns: a list of lists with the (sorted) vertex indices of the substructure """ subs = self.find_subgraph(self.molg, subg.molg) return subs def find_fragment(self, frag, add_hydrogen=False): """ find a complete fragment (including the hydrogen atoms not included in the graph) Note that the fragment found can be different from the fragment by the number of hydrogen atoms!! :Parameter: - frag : mol object with graph addon to be found :Returns: a list of lists with the atom indices of the fragment in the full system """ subs = self.find_sub(frag.graph) frags = [] for s in subs: # loop over all vertices f = [] for v in s: a = self.vert2atom[v] f.append(a) # check all atoms connected to this atom if they are hydrogen if add_hydrogen: for ca in self._mol.conn[a]: if self._mol.elems[ca] == "h": f.append(ca) frags.append(f) return frags def util_graph(self, vertices, conn): """ generate a generate a graph with vertices and connectivity in conn """ g = Graph(directed=False) # now add vertices g.vp.type = g.new_vertex_property("string") for i, v in enumerate(vertices): g.add_vertex() g.vp.type[i] = v # now add edges ... for i, v in enumerate(vertices): for j in conn[i]: if j >= i: g.add_edge(g.vertex(i), g.vertex(j)) return g def filter_graph(self, idx): """ filters all atoms besides the given out of the graph :Parameters: - idx (list): indices of atoms to keep """ # TODO use vert2atom assert type(idx) == list self.molg.clear_filters() filter = self.molg.new_vertex_property("bool") filter.set_value(False) for i in idx: filter[self.molg.vertex(i)] = True self.molg.set_vertex_filter(filter) return
def find_tree_by_closure(g, root, infection_times, terminals, closure_builder=build_closure_with_order, strictly_smaller=True, return_closure=False, k=-1, debug=False, verbose=True): """find the steiner tree by trainsitive closure """ gc, eweight = closure_builder(g, root, terminals, infection_times, strictly_smaller=strictly_smaller, k=k, return_r2pred=False, debug=debug, verbose=verbose) # get the minimum spanning arborescence # graph_tool does not provide minimum_spanning_arborescence if verbose: print('getting mst') tree_edges = find_minimum_branching(gc, [root], weights=eweight) efilt = gc.new_edge_property('bool') efilt.a = False for u, v in tree_edges: efilt[gc.edge(u, v)] = True mst_tree = GraphView(gc, efilt=efilt) if verbose: print('extract edges from original graph') # extract the edges from the original graph # sort observations by time # and also topological order # why doing this: we want to start collecting the edges # for nodes with higher order topological_index = {} for i, e in enumerate(bfs_iterator(mst_tree, source=root)): topological_index[int(e.target())] = i try: sorted_obs = sorted(set(terminals) - {root}, key=lambda o: (infection_times[o], topological_index[o])) except KeyError: raise TreeNotFound( "it's likely that the input cannot produce a feasible solution, " + "because the topological sort on terminals does not visit all terminals" ) # next, we start reconstructing the minimum steiner arborescence tree_nodes = {root} tree_edges = set() # print('root', root) for u in sorted_obs: if u in tree_nodes: if debug: print('{} covered already'.format(u)) continue # print(u) v, u = map(int, next(mst_tree.vertex(u).in_edges())) # v is ancestor tree_nodes.add(v) late_nodes = [ n for n in terminals if infection_times[n] > infection_times[u] ] vis = init_visitor(g, u) # from child to any tree node, including v cpbfs_search(g, source=u, terminals=list(tree_nodes), forbidden_nodes=late_nodes, visitor=vis, count_threshold=1) # dist, pred = shortest_distance(g, source=u, pred_map=True) node_set = {v for v, d in vis.dist.items() if d > 0} reachable_tree_nodes = node_set.intersection(tree_nodes) ancestor = min(reachable_tree_nodes, key=vis.dist.__getitem__) edges = extract_edges_from_pred(g, u, ancestor, vis.pred) edges = {(j, i) for i, j in edges} # need to reverse it if debug: print('tree_nodes', tree_nodes) print('connecting {} to {}'.format(v, u)) print('using ancestor {}'.format(ancestor)) print('adding edges {}'.format(edges)) tree_nodes |= {u for e in edges for u in e} tree_edges |= edges t = Graph(directed=True) t.add_vertex(g.num_vertices()) for u, v in tree_edges: t.add_edge(t.vertex(u), t.vertex(v)) tree_nodes = {u for e in tree_edges for u in e} vfilt = t.new_vertex_property('bool') vfilt.a = False for v in tree_nodes: vfilt[t.vertex(v)] = True t.set_vertex_filter(vfilt) if return_closure: return t, gc, mst_tree else: return t
def build_closure_with_order(g, cand_source, terminals, infection_times, k=-1, strictly_smaller=True, return_r2pred=False, debug=False, verbose=False): """ build transitive closure with infection order constraint g: gt.Graph(directed=False) cand_source: int terminals: list of int infection_times: dict int -> float build a clojure graph in which cand_source + terminals are all connected to each other. the number of neighbors of each node is determined by k the larger the k, the denser the graph note that vertex ids are preserved (without re-mapping to consecutive integers) return: gt.Graph(directed=True) """ if return_r2pred: r2pred = {} edges = {} terminals = list(terminals) # from cand_source to terminals vis = init_visitor(g, cand_source) cpbfs_search(g, source=cand_source, visitor=vis, terminals=terminals, forbidden_nodes=terminals, count_threshold=k) if return_r2pred: r2pred[cand_source] = vis.pred for u, v, c in get_edges(vis.dist, cand_source, terminals): edges[(u, v)] = c if debug: print('cand_source: {}'.format(cand_source)) print('#terminals: {}'.format(len(terminals))) print('edges from cand_source: {}'.format(edges)) if verbose: terminals_iter = tqdm(terminals) print('building closure graph') else: terminals_iter = terminals # from terminal to other terminals for root in terminals_iter: if strictly_smaller: late_terminals = [ t for t in terminals if infection_times[t] > infection_times[root] ] else: # respect what the paper presents late_terminals = [ t for t in terminals if infection_times[t] >= infection_times[root] ] late_terminals = set(late_terminals) - { cand_source } # no one can connect to cand_source if debug: print('root: {}'.format(root)) print('late_terminals: {}'.format(late_terminals)) vis = init_visitor(g, root) cpbfs_search( g, source=root, visitor=vis, terminals=list(late_terminals), forbidden_nodes=list(set(terminals) - set(late_terminals)), count_threshold=k) if return_r2pred: r2pred[root] = vis.pred for u, v, c in get_edges(vis.dist, root, late_terminals): if debug: print('edge ({}, {})'.format(u, v)) edges[(u, v)] = c if verbose: print('returning closure graph') gc = Graph(directed=True) gc.add_vertex(g.num_vertices()) vfilt = gc.new_vertex_property('bool') vfilt.a = False for (u, v) in edges: gc.add_edge(u, v) vfilt[u] = vfilt[v] = True eweight = gc.new_edge_property('int') eweight.set_2d_array(np.array(list(edges.values()))) gc.set_vertex_filter(vfilt) rets = (gc, eweight) if return_r2pred: rets += (r2pred, ) return rets
class BaseGraph(object): """ Class representing a graph. We do not use pure graph_tool.Graph for we want to be able to easily change this library. Neither we use inheritance as graph_tool has inconvenient licence. """ def __init__(self): self._g = None self._node_dict = {} self._syn_to_vertex_map = None self._lemma_to_nodes_dict = None self._lu_on_vertex_dict = None def use_graph_tool(self): """ Returns underlying graph_tool.Graph. It should be avoided at all costs. """ return self._g def get_node_for_synset_id(self, syn_id): """ Lazy function to makes the map of synset identifiers to nodes into the graph. The building of map is made only on the first funcion call. The first and the next calls of this function will return the built map. """ if not self._syn_to_vertex_map: self._syn_to_vertex_map = {} for node in self.all_nodes(): if node.synset: synset_id = node.synset.synset_id self._syn_to_vertex_map[synset_id] = node return self._syn_to_vertex_map.get(syn_id, None) def pickle(self, filename): self._g.save(filename) def unpickle(self, filename): self._g = load_graph(filename) def init_graph(self, drctd=False): self._g = Graph(directed=drctd) def copy_graph_from(self, g): self._g = g._g.copy() def set_directed(self, drctd): self._g.set_directed(drctd) def is_directed(self): return self._g.is_directed() def merge_graphs(self, g1, g2): self._g = graph_union(g1._g, g2._g, internal_props=True) # Node operations: def all_nodes(self): for node in self._g.vertices(): yield BaseNode(self._g, node) def create_node_attribute(self, name, kind, value=None): if not self.has_node_attribute(name): node_attr = self._g.new_vertex_property(kind, value) self._g.vertex_properties[name] = node_attr def create_node_attributes(self, node_attributes_list): for attr in node_attributes_list: if not self.has_node_attribute(attr[0]): node_attr = self._g.new_vertex_property(attr[1]) self._g.vertex_properties[attr[0]] = node_attr def has_node_attribute(self, name): """ Checks if a node attribute already exists """ return name in self._g.vertex_properties def delete_node_attribute(self, name): """ Delete node attribute """ del self._g.vertex_properties[name] def add_node(self, name, node_attributes_list=None): if node_attributes_list is None: node_attributes_list = [] if name not in self._node_dict: new_node = self._g.add_vertex() self._node_dict[name] = BaseNode(self._g, new_node) for attr in node_attributes_list: self._g.vertex_properties[attr[0]][new_node] = attr[1] return self._node_dict[name] def get_node(self, name): return self._node_dict[name] def remove_node(self, name): self._g.remove_vertex(self._node_dict[name]._node) del self._node_dict[name] def nodes_filter(self, nodes_to_filter_set, inverted=False, replace=False, soft=False): """ Filters out nodes from set Args: nodes_to_filter_set (Iterable): Nodes which fill be filtered out. inverted (bool): If True, nodes NOT in set will be filtered out. Defaults to False. replace (bool): Replace current filter instead of combining the two. Defaults to False. soft (bool): Hide nodes without removing them so they can be restored with reset_nodes_filter. Defaults to False. """ predicate = lambda node: node not in nodes_to_filter_set self.nodes_filter_conditional(predicate, inverted, replace, soft) def nodes_filter_conditional(self, predicate, inverted=False, replace=False, soft=False): """ Filters node based on a predicate Args: predicate (Callable): Predicate returning False for nodes that should be filtered out. inverted (bool): Invert condition. Defaults to False. replace (bool): Replace current filter instead of combining the two. Defaults to False. soft (bool): Hide nodes without removing them so they can be restored with reset_nodes_filter. Defaults to False. """ (old_filter, old_inverted) = self._g.get_vertex_filter() new_filter = self._g.new_vertex_property("bool") for node in self.all_nodes(): kept = predicate(node) != inverted if not replace and old_filter: old_kept = bool(old_filter[node._node]) != old_inverted kept = kept and old_kept new_filter[node._node] = kept self._g.set_vertex_filter(new_filter, False) if not soft: self.apply_nodes_filter() def apply_nodes_filter(self): """ Removes nodes that are currently filtered out """ self._g.purge_vertices() def reset_nodes_filter(self): """ Clears node filter """ self._g.set_vertex_filter(None) # Edge operations: def num_edges(self): return self._g.num_edges() def all_edges(self): for e in self._g.edges(): yield BaseEdge(self._g, e) def get_edges_between(self, source, target): """ Return all edges between source and target. Source and target can be either BaseNode or integer. """ if isinstance(source, BaseNode): source = source._node if isinstance(target, BaseNode): target = target._node for e in self._g.edge(source, target, all_edges=True): yield BaseEdge(self._g, e) def get_edge(self, source, target, add_missing=False): """ Return some edge between source and target. Source and target can be either BaseNode or integer. """ if isinstance(source, BaseNode): source = source._node if isinstance(target, BaseNode): target = target._node e = self._g.edge(source, target, add_missing) if e is not None: return BaseEdge(self._g, e) else: return None def create_edge_attribute(self, name, kind, value=None): if not self.has_edge_attribute(name): edge_attr = self._g.new_edge_property(kind, value) self._g.edge_properties[name] = edge_attr def alias_edge_attribute(self, name, alias): self._g.edge_properties[alias] = self._g.edge_properties[name] def create_edge_attributes(self, edge_attributes_list): for attr in edge_attributes_list: if not self.has_edge_attribute(attr[0]): edge_attr = self._g.new_edge_property(attr[1]) self._g.edge_properties[attr[0]] = edge_attr def has_edge_attribute(self, name): """ Checks if an edge attribute already existst """ return name in self._g.edge_properties def delete_edge_attribute(self, name): """ Delete edge attribute """ del self._g.edge_properties[name] def add_edge(self, parent, child, edge_attributes_list=None): if edge_attributes_list is None: edge_attributes_list = [] new_edge = self._g.add_edge(parent._node, child._node) for attr in edge_attributes_list: self._g.edge_properties[attr[0]][new_edge] = attr[1] return BaseEdge(self._g, new_edge) def edges_filter(self, edges_to_filter_set): edge_filter = self._g.new_edge_property("bool") for e in self.all_edges(): if e in edges_to_filter_set: edge_filter[e._edge] = False else: edge_filter[e._edge] = True self._g.set_edge_filter(edge_filter) self._g.purge_edges() def ungraph_tool(self, thingy, lemma_on_only_synset_node_dict): """ Converts given data structure so that it no longer have any graph_tool dependencies. """ logger = logging.getLogger(__name__) if type(thingy) == dict: return { self.ungraph_tool(k, lemma_on_only_synset_node_dict): self.ungraph_tool(thingy[k], lemma_on_only_synset_node_dict) for k in thingy } nodes_to_translate = set() for vset in lemma_on_only_synset_node_dict.values(): for v in vset: nodes_to_translate.add(v) if type(thingy) == gt.PropertyMap: dct = {} if thingy.key_type() == 'v': for node in nodes_to_translate: dct[node] = thingy[node.use_graph_tool()] elif thingy.key_type() == 'e': for edge in self.all_edges(): dct[edge] = thingy[edge.use_graph_tool()] else: logger.error('Unknown property type %s', thingy.key_type()) raise NotImplemented return dct def generate_lemma_to_nodes_dict_synsets(self): """ This method generates a utility dictionary, which maps lemmas to corresponding node objects. It is expensive in menas of time needed to generate the dictionary. It should therefore be executed at the beginning of the runtime and later its results should be reused as many times as needed without re-executing the function. """ lemma_to_nodes_dict = defaultdict(set) for node in self.all_nodes(): try: lu_set = node.synset.lu_set except KeyError: continue for lu in lu_set: lemma = lu.lemma.lower() lemma_to_nodes_dict[lemma].add(node) self._lemma_to_nodes_dict = lemma_to_nodes_dict def generate_lemma_to_nodes_dict_lexical_units(self): """ This method generates a utility dictionary, which maps lemmas to corresponding node objects. It is expensive in menas of time needed to generate the dictionary. It should therefore be executed at the beginning of the runtime and later its results should be reused as many times as needed without re-executing the function. """ lemma_to_nodes_dict = defaultdict(set) for node in self.all_nodes(): try: lemma = node.lu.lemma.lower() lemma_to_nodes_dict[lemma].add(node) except: continue self._lemma_to_nodes_dict = lemma_to_nodes_dict @property def lemma_to_nodes_dict(self): return self._lemma_to_nodes_dict def _make_lu_on_v_dict(self): """ Makes dictionary lu on vertex """ lu_on_vertex_dict = defaultdict(set) for node in self.all_nodes(): try: nl = node.lu except Exception: continue if nl: lu_on_vertex_dict[node.lu.lu_id] = node self._lu_on_vertex_dict = lu_on_vertex_dict
qm = m * m.T # question adj matrix via unipartite projection g = Graph() edges = zip(*qm.nonzero()) g.add_edge_list(edges) vfilt = label_largest_component(g) f = np.sum(vfilt.a) / len(vfilt.a) print('fraciton of nodes in largest cc: {}'.format(f)) prop_question_id = g.new_vertex_property('int') prop_question_id.a = np.array(list(id2q_map.values())) # focus on largest CC g.set_vertex_filter(vfilt) # re-index the graph # SO qustion: https://stackoverflow.com/questions/46264296/graph-tool-re-index-vertex-ids-to-be-consecutive-integers n2i = {n: i for i, n in enumerate(g.vertices())} i2n = dict(zip(n2i.values(), n2i.keys())) new_g = Graph() new_g.add_edge_list([(n2i[e.source()], n2i[e.target()]) for e in g.edges()]) # update question ids new_prop_question_id = new_g.new_vertex_property('int') new_prop_question_id.a = [prop_question_id[i2n[i]] for i in range(new_g.num_vertices())] new_g.vertex_properties['question_id'] = new_prop_question_id