def rng_next_goal_rejection_sampling(start_node_ids, batch_size, gtG, rng, max_dist, min_dist, max_dist_to_compute, sampling_d, target_d, nodes, n_ori, step_size, bins, M): sample_start_nodes = start_node_ids is None dists = [] pred_maps = [] end_node_ids = [] start_node_ids_ = [] hardnesss = [] gt_dists = [] num_nodes = gtG.num_vertices() for i in range(batch_size): done = False while not done: if sample_start_nodes: start_node_id = rng.choice(num_nodes) else: start_node_id = start_node_ids[i] gt_dist = gt.topology.shortest_distance(gt.GraphView( gtG, reversed=False), source=start_node_id, target=None, max_dist=max_dist) gt_dist = np.array(gt_dist.get_array()) ind = np.where( np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0] ind = rng.permutation(ind) gt_dist = gt_dist[ind] * 1. h_dist = heuristic_fn_vec(nodes[ind, :], nodes[[start_node_id], :], n_ori, step_size)[:, 0] hardness = 1. - h_dist / gt_dist sampled_ind = _rejection_sampling(rng, sampling_d, target_d, bins, hardness, M) if sampled_ind < ind.size: # print sampled_ind end_node_id = ind[sampled_ind] hardness = hardness[sampled_ind] gt_dist = gt_dist[sampled_ind] done = True # Compute distance from end node to all nodes, to return. dist, pred_map = gt.topology.shortest_distance( gt.GraphView(gtG, reversed=True), source=end_node_id, target=None, max_dist=max_dist_to_compute, pred_map=True) dist = np.array(dist.get_array()) pred_map = np.array(pred_map.get_array()) hardnesss.append(hardness) dists.append(dist) pred_maps.append(pred_map) start_node_ids_.append(start_node_id) end_node_ids.append(end_node_id) gt_dists.append(gt_dist) paths = None return start_node_ids_, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists
def _get_gt_graph(g, directed, weights, combine_weights=None, return_all=False): ''' Returns the correct graph(view) given the options ''' import graph_tool as gt from graph_tool.stats import label_parallel_edges directed = g.is_directed() if directed is None else directed weights = "weight" if weights is True else weights weights = None if weights is False else weights if not directed and g.is_directed(): if weights is not None: u, weights = _to_undirected(g, weights, combine_weights) if return_all: return u, u.graph, u.edge_attributes[weights] return u.graph graph = gt.GraphView(g.graph, directed=False) graph = gt.GraphView(graph, efilt=label_parallel_edges(graph).fa == 0) if return_all: return g, graph, None return graph if return_all: return g, g.graph, weights return g.graph
def get_distance_node_list(gtG, source_nodes, direction, weights=None): gtG_ = gt.Graph(gtG) v = gtG_.add_vertex() if weights is not None: weights = gtG_.edge_properties[weights] for s in source_nodes: e = gtG_.add_edge(s, int(v)) if weights is not None: weights[e] = 0. if direction == 'to': dist = gt.topology.shortest_distance( gt.GraphView(gtG_, reversed=True), source=gtG_.vertex(int(v)), target=None, weights=weights) elif direction == 'from': dist = gt.topology.shortest_distance( gt.GraphView(gtG_, reversed=False), source=gtG_.vertex(int(v)), target=None, weights=weights) dist = np.array(dist.get_array()) dist = dist[:-1] if weights is None: dist = dist-1 return dist
def graph_edges_split(G, p): """ Split graph edges for validation and training disjoint sets. Parameters ---------- G: graph_tool.Graph Graph object from graph-tool module. p: int, (0, 1] Test proportion. Returns ------- train_graph, test_graph: graph_tool.Graph """ N = G.num_edges() K = np.int(N * p) train_mask = np.array([0] * K + [1] * (N-K), dtype=np.bool) np.random.shuffle(train_mask) test_mask = ~train_mask train_graph = gt.GraphView(G, directed=False) test_graph = gt.GraphView(G, directed=False) prop_train = train_graph.new_edge_property("bool") prop_train.a = train_mask prop_test = test_graph.new_edge_property("bool") prop_test.a = test_mask train_graph.set_edge_filter(prop_train) test_graph.set_edge_filter(prop_test) return train_graph, test_graph
def subgraph(self, nodes=None, vertex_filter=None, edge_filter=None, copy_positions=True): if nodes is not None: view = graph_tool.GraphView( self.network, vfilt=lambda x: self.id_to_label[x] in nodes).copy() elif vertex_filter is not None or edge_filter is not None: view = graph_tool.GraphView(self.network, vfilt=vertex_filter, efilt=edge_filter).copy() else: raise ValueError("at least one filter must be specified") old_vertex_ids = set(map(int, view.vertices())) if copy_positions and self.node_layout is not None: vertex_positions = [ self.node_layout.get_position(v_id) for v_id in view.vertices() ] else: vertex_positions = None node_map_keys = valfilter(lambda x: x in old_vertex_ids, self.node_map).keys() # TODO: vertex ids also change # TODO: vertex weights if self.edge_weight is not None: edge_weight = [self.edge_weight[e_id] for e_id in view.edges()] else: edge_weight = None view.purge_vertices() view.purge_edges() if edge_weight is not None: weight_prop = view.new_edge_property("double") weight_prop.a = edge_weight else: weight_prop = None result = Network(view, edge_weight=weight_prop) result.node_map = dict(zip(node_map_keys, map(int, view.vertices()))) # print(result.node_map) result.id_to_label = itemmap(reversed, result.node_map) if vertex_positions: result.layout_nodes(method="precomputed", positions=vertex_positions) return result
def rng_next_goal(start_node_ids, batch_size, gtG, rng, max_dist, max_dist_to_compute, node_room_ids, nodes=None, compute_path=False, dists_from_start_node=None): # Compute the distance field from the starting location, and then pick a # destination in another room if possible otherwise anywhere outside this # room. dists = []; pred_maps = []; paths = []; end_node_ids = []; for i in range(batch_size): room_id = node_room_ids[start_node_ids[i]] # Compute distances. if dists_from_start_node == None: dist, pred_map = gt.topology.shortest_distance( gt.GraphView(gtG, reversed=False), source=gtG.vertex(start_node_ids[i]), target=None, max_dist=max_dist_to_compute, pred_map=True) dist = np.array(dist.get_array()) else: dist = dists_from_start_node[i] # Randomly sample nodes which are within max_dist. near_ids = dist <= max_dist near_ids = near_ids[:, np.newaxis] # Check to see if there is a non-negative node which is close enough. non_same_room_ids = node_room_ids != room_id non_hallway_ids = node_room_ids != -1 good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids)) good2_ids = np.logical_and(near_ids, non_hallway_ids) good3_ids = near_ids if np.any(good1_ids): end_node_id = rng.choice(np.where(good1_ids)[0]) elif np.any(good2_ids): end_node_id = rng.choice(np.where(good2_ids)[0]) elif np.any(good3_ids): end_node_id = rng.choice(np.where(good3_ids)[0]) else: logging.error('Did not find any good nodes.') # Compute distance to this new goal for doing distance queries. dist, pred_map = gt.topology.shortest_distance( gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id), target=None, max_dist=max_dist_to_compute, pred_map=True) dist = np.array(dist.get_array()) pred_map = np.array(pred_map.get_array()) dists.append(dist) pred_maps.append(pred_map) end_node_ids.append(end_node_id) path = None if compute_path: path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) paths.append(path) return start_node_ids, end_node_ids, dists, pred_maps, paths
def get_hardness_distribution(gtG, max_dist, min_dist, rng, trials, bins, nodes, n_ori, step_size): heuristic_fn = lambda node_ids, node_id: \ heuristic_fn_vec(nodes[node_ids, :], nodes[[node_id], :], n_ori, step_size) num_nodes = gtG.num_vertices() gt_dists = [] h_dists = [] for i in range(trials): end_node_id = rng.choice(num_nodes) gt_dist = gt.topology.shortest_distance(gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id), target=None, max_dist=max_dist) gt_dist = np.array(gt_dist.get_array()) ind = np.where(np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0] gt_dist = gt_dist[ind] h_dist = heuristic_fn(ind, end_node_id)[:, 0] gt_dists.append(gt_dist) h_dists.append(h_dist) gt_dists = np.concatenate(gt_dists) h_dists = np.concatenate(h_dists) hardness = 1. - h_dists * 1. / gt_dists hist, _ = np.histogram(hardness, bins) hist = hist.astype(np.float64) hist = hist / np.sum(hist) return hist
def connect_task(graph, task, task_vtx): target = task.get('slot_id', False) if target: for v in graph.vertices(): if graph.vp.slot_id[v] == target: add_edge_type(graph, task_vtx, v, 'slot') else: raise RuntimeError("no matching slot-id found") else: # print 'deriving task slot' executable = gt.GraphView(graph, vfilt=graph.vp.executable) # print executable e_leaves = gt.GraphView(executable, vfilt=lambda v: v.in_degree() == 1 and v.out_degree() == 0) # print e_leaves for v in e_leaves.vertices(): add_edge_type(graph, task_vtx, v, 'slot')
def __init__(self, graph: Graph, vfilt_name: str = None, efilt_name: str = None): super(SubGraph, self).__init__() # Add vertex and/or edge filter that define the sub-graph. By default, they are treated bool. assert vfilt_name is not None or efilt_name is not None, "At least one of vfilt/efilt must be given." self._vfilt_name = vfilt_name self._efilt_name = efilt_name gt_vfilt = gt_efilt = None # Add (internalize) vertex and edge filters to graph if vfilt_name is not None: graph.add_vertex_property(name=vfilt_name, of_type="bool", default=False) gt_vfilt = graph._graph.vertex_properties[vfilt_name] if efilt_name is not None: graph.add_edge_property(name=efilt_name, of_type="bool", default=True) gt_efilt = graph._graph.edge_properties[efilt_name] # Update internal graph representation self._graph = gt.GraphView(g=graph._graph, vfilt=gt_vfilt, efilt=gt_efilt)
def local_eff(g): """Returns the local efficiency of a graph g.""" n = g.num_vertices() eff_sum = 0 for node in range(n): # Extract the neighbors of node_i vfilt = g.new_vertex_property('bool') neighbor = g.vertex(node).all_neighbors() # Create a sub graph containing those neighbors # and calculate their shortest distances for n_node in neighbor: vfilt[n_node] = True sub = gt.GraphView(g, vfilt) sub = Graph(sub, prune=True) # prune for true copy sub_dist = gt.topology.shortest_distance(sub) # Calculate the local efficiency of node_i eff_sum_i = 0 for dist_row in sub_dist: for dist in dist_row: if dist != 0: eff_sum_i += 1 / dist deg = g.vertex(node).out_degree() if deg > 1: eff_sum_i = eff_sum_i / (deg * (deg - 1)) eff_sum += eff_sum_i return eff_sum / n
def find_subgraphs(self, Q_synset, syn_graph): nodes = dict() for syn_id, activation_value in Q_synset.iteritems(): if activation_value > self.tau_3: try: node = syn_graph.get_node_for_synset_id(syn_id) nodes[node.use_graph_tool()] = node except Exception: continue filt = syn_graph.use_graph_tool().new_vertex_property('boolean') for vertex, node in nodes.iteritems(): filt[vertex] = True g = gt.GraphView(syn_graph.use_graph_tool(), filt) lead_nodes = [] for graph in self.subgraphs(g): lead = None for vertex in graph.vertices(): if not lead: lead = nodes[vertex] else: lead_id = lead.synset.synset_id new_id = nodes[vertex].synset.synset_id if Q_synset[lead_id] < Q_synset[new_id]: lead = nodes[vertex] lead_nodes.append(lead) return lead_nodes
def gt_subgraph(g: gt.Graph, vertices): filter_option = g.new_vertex_property('bool') for v in vertices: filter_option[v] = True sub = gt.GraphView(g, filter_option).copy() return sub
def prepare_cora(directed=False): print("directed", directed) edges = np.genfromtxt('res/cora/cora.edges', dtype=np.int, delimiter=',')[:, :2] - 1 labels = np.genfromtxt('res/cora/cora.node_labels', dtype=np.int, delimiter=',')[:, 1] g = graph_tool.Graph(directed=directed) g.add_edge_list(edges) vfilt = graph_tool.topology.label_largest_component(g, directed=False) labels = labels[vfilt.a.astype(np.bool)] g = graph_tool.GraphView(g, vfilt=vfilt) g.purge_vertices() weight_prop = g.new_edge_property("int", val=1) #spc = shortest_path_cover_logn_apx(g, weight_prop) spc = pickle.load( open("res/cora/largest_comp_new_spc_" + str(directed) + ".p", "rb")) print("spc", len(spc)) pickle.dump( spc, open("res/cora/largest_comp_new_spc_" + str(directed) + ".p", "wb")) #spc = pickle.load(open("res/cora/largest_comp_new_spc_" + str(directed) + ".p", "rb")) return g, labels, spc
def rng_target_dist_field(batch_size, gtG, rng, max_dist, max_dist_to_compute, nodes=None, compute_path=False): # Sample a single node, compute distance to all nodes less than max_dist, # sample nodes which are a particular distance away. dists = []; pred_maps = []; paths = []; start_node_ids = [] end_node_ids = rng.choice(gtG.num_vertices(), size=(batch_size,), replace=False).tolist() for i in range(batch_size): dist, pred_map = gt.topology.shortest_distance( gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_ids[i]), target=None, max_dist=max_dist_to_compute, pred_map=True) dist = np.array(dist.get_array()) pred_map = np.array(pred_map.get_array()) dists.append(dist) pred_maps.append(pred_map) # Randomly sample nodes which are withing max_dist near_ids = np.where(dist <= max_dist)[0] start_node_id = rng.choice(near_ids, size=(1,), replace=False)[0] start_node_ids.append(start_node_id) path = None if compute_path: path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) paths.append(path) return start_node_ids, end_node_ids, dists, pred_maps, paths
def subgraph(self, vertices: List[int]) -> 'DWGraph': filter_option = self.new_vertex_property('bool') for v in vertices: filter_option[v] = True sub = gt.GraphView(self, filter_option) new_g = DWGraph.from_graph(sub, self.is_directed_prop, self.is_weighted_prop) return new_g
def subgraphs(self, g): if g.num_vertices() == 0: raise StopIteration prop = label_largest_component(g, False) filt = g.new_vertex_property('boolean') for v in g.vertices(): if prop[v] > 0: filt[v] = True yield gt.GraphView(g, filt) filt = g.new_vertex_property('boolean') for v in g.vertices(): if prop[v] <= 0: filt[v] = True gv = gt.GraphView(g, filt) for sgv in self.subgraphs(gv): yield sgv
def make(self, target): # flow from receptive_field_list to receptive_field_graph receptive_field_list = self.make_receptive_field(target) receptive_field_graph = graph_tool.GraphView( self.graph, vfilt=lambda x: x in receptive_field_list) receptive_field_graph = self.register_distance(target, receptive_field_graph) canonized_receptive_field = self.canonize_receptive_field( receptive_field_graph) return canonized_receptive_field[:self. receptive_field_size], receptive_field_graph
def save_root_children(g: gt.Graph, root_id) -> gt.Graph: zero_id = vp_map(g, 'zero_id', 'int') roots = set() for v in g.vertices(): if zero_id[v] == root_id: roots.add(v) leave_prop = g.new_vertex_property('bool') for v in roots: gt_top.label_out_component(g, v, leave_prop) sub = gt.GraphView(g, leave_prop) new_g = create_q_graph(sub, add_back_reference=False) return new_g
def subsample_archive_from_matching(a: gt.Graph, mg: gt.Graph, t_graph: gt.Graph, e_idx): leave_prop = a.new_vertex_property('bool') one_prop = vp_map(mg, 'one_id', 'int') to_save = set([one_prop[n] for n in mg.vertices()]) for v in a.vertices(): leave_prop[v] = a.vertex_index[v] in to_save # get rid of all the nodes in A that aren't there sub = gt.GraphView(a, leave_prop) new_g = create_q_graph(sub, add_back_reference=True) return new_g
def prepare_coil(weighted=False): print("weighted", weighted) dataset = 3 X = np.genfromtxt('res/benchmark/SSL,set=' + str(dataset) + ',X.tab') # X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0)) y = (np.genfromtxt('res/benchmark/SSL,set=' + str(dataset) + ',y.tab')) n = X.shape[0] dists = scipy.spatial.distance.cdist(X, X) y = y[:n] W = dists[:n, :n] # np.exp(-(dists) ** 2 / (2 * sigma ** 2)) k = 10 for i in range(n): W[i, np.argsort(W[i])[(k + 1):]] = np.inf #k and itself with zero weight np.fill_diagonal(W, 0) weights = W[(W < np.inf) & (W > 0)].flatten() edges = np.array(np.where((W < np.inf) & (W > 0))).T edges = edges[edges[:, 0] < edges[:, 1]] g = graph_tool.Graph(directed=False) # construct actual graph g.add_vertex(n) g.add_edge_list(edges) vfilt = graph_tool.topology.label_largest_component(g, directed=False) y = y[vfilt.a.astype(np.bool)] g = graph_tool.GraphView(g, vfilt=vfilt) g.purge_vertices() if weighted: weight_prop = g.new_edge_property("double", vals=weights) else: weight_prop = g.new_edge_property("int", val=1) #spc = shortest_path_cover_logn_apx(g, weight_prop) spc = pickle.load( open("res/coil/largest_comp_10knn_spc_" + "_" + str(weighted) + ".p", "rb")) #pickle.dump(spc, open("res/coil/largest_comp_10knn_spc_" + "_" + str(weighted) + ".p", "wb")) print(len(spc)) return g, y, spc
def build_community_graph(self): from aves.visualization.networks import NodeLink ( tree, membership, order, ) = graph_tool.inference.nested_blockmodel.get_hierarchy_tree( self.state, empty_branches=False) self.nested_graph = tree self.nested_graph.set_directed(False) self.radial_positions = np.array( list( graph_tool.draw.radial_tree_layout( self.nested_graph, self.nested_graph.num_vertices() - 1))) self.node_angles = np.degrees( np.arctan2(self.radial_positions[:, 1], self.radial_positions[:, 0])) self.node_angles_dict = dict( zip(map(int, self.nested_graph.vertices()), self.node_angles)) self.node_ratio = np.sqrt( np.dot(self.radial_positions[0], self.radial_positions[0])) self.network.layout_nodes( method="precomputed", positions=self.radial_positions[:self.network.num_vertices()], angles=self.node_angles, ratios=np.sqrt( np.sum(self.radial_positions * self.radial_positions, axis=1)), ) self.community_graph = Network( graph_tool.GraphView( self.nested_graph, directed=True, vfilt=lambda x: x >= self.network.num_vertices(), )) self.community_nodelink = NodeLink(self.community_graph) self.community_nodelink.layout_nodes( method="precomputed", positions=self.radial_positions[self.network.num_vertices():], angles=self.node_angles, ratios=self.node_ratio, ) self.community_nodelink.set_node_drawing(method="plain") self.community_nodelink.set_edge_drawing(method="plain")
def dense_activations_to_graph(model, x_in, thresh=1.0E-5): G = dense_model_to_graph(model) input_layer = model.layers[0] dense_layers = [l for l in model.layers if isinstance(l, Dense)] dense_outputs = [layer.output for layer in dense_layers] # initialize keras op that includes all layer outputs dense_func = K.function(inputs=model.inputs, outputs=dense_outputs) # apply function to input layer_outputs = dense_func(x_in) # add axis to all outputs (including input layer) outputs = [ np.expand_dims(output, axis=-1) for output in [x_in] + layer_outputs ] # compute output masks masks = [output > thresh for output in outputs] # for each layer, starting with the input, broadcast the output of shape (batch, d, 1) to (batch, d, k) # where k is the dimension of the next layer; i.e. repeat outputs for each node Ws = [np.broadcast_to(output, (output.shape[0], output.shape[1], layer.get_weights()[0].shape[-1])) \ for layer, output in zip(dense_layers, outputs)] # 1. apply masks to outputs # 2. reshape/flatten masks from (batch, d, k) to (batch, d*k) def apply_mask(W, mask): return (W * mask).reshape((-1, np.prod(W.shape[1:]))) # 3. concatenate all masked outputs together to get full edge list Ws_masked = np.concatenate( [apply_mask(W, mask) for W, mask in zip(Ws, masks)], axis=1) # create mask over final outputs for graph-tool edge filter edge_masks = ~np.isclose(Ws_masked, 0.0, atol=thresh) # initialize graph views for all outputs in batch Gs = [ gt.GraphView(G, efilt=G.new_ep('bool', vals=mask)) for mask in edge_masks ] layer_sizes = [x_in.shape[-1]] + [layer.units for layer in dense_layers] labels = reduce(lambda cum, n: cum + n, [[i] * size for i, size in enumerate(layer_sizes)]) activations = np.concatenate(outputs, axis=1).squeeze() # add degree and layer properties to graph for i, (g, acts) in enumerate(zip(Gs, Ws_masked)): g.ep['activation'] = g.new_ep('float', vals=acts) g.vp['activation'] = g.new_vp('float', vals=activations[i]) g.vp['degree'] = g.degree_property_map('total', weight=g.ep['activation']) g.vp['layer'] = g.new_vp('int', labels) return G, Gs
def rng_room_to_room(batch_size, gtG, rng, max_dist, max_dist_to_compute, node_room_ids, nodes=None, compute_path=False): # Sample one of the rooms, compute the distance field. Pick a destination in # another room if possible otherwise anywhere outside this room. dists = []; pred_maps = []; paths = []; start_node_ids = []; end_node_ids = []; room_ids = np.unique(node_room_ids[node_room_ids[:,0] >= 0, 0]) for i in range(batch_size): room_id = rng.choice(room_ids) end_node_id = rng.choice(np.where(node_room_ids[:,0] == room_id)[0]) end_node_ids.append(end_node_id) # Compute distances. dist, pred_map = gt.topology.shortest_distance( gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id), target=None, max_dist=max_dist_to_compute, pred_map=True) dist = np.array(dist.get_array()) pred_map = np.array(pred_map.get_array()) dists.append(dist) pred_maps.append(pred_map) # Randomly sample nodes which are within max_dist. near_ids = dist <= max_dist near_ids = near_ids[:, np.newaxis] # Check to see if there is a non-negative node which is close enough. non_same_room_ids = node_room_ids != room_id non_hallway_ids = node_room_ids != -1 good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids)) good2_ids = np.logical_and(near_ids, non_hallway_ids) good3_ids = near_ids if np.any(good1_ids): start_node_id = rng.choice(np.where(good1_ids)[0]) elif np.any(good2_ids): start_node_id = rng.choice(np.where(good2_ids)[0]) elif np.any(good3_ids): start_node_id = rng.choice(np.where(good3_ids)[0]) else: logging.error('Did not find any good nodes.') start_node_ids.append(start_node_id) path = None if compute_path: path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) paths.append(path) return start_node_ids, end_node_ids, dists, pred_maps, paths
def ext_local_eff(g, d): """Returns the extended local efficiency of a network g up to a neighbor depth d. The measure is experimental, not an established concept. Motivated by the concept of extended clustering. """ n = g.num_vertices() eff_sum = 0 # Extract the neighbors of node_i up to depth d and # create a sub graph containing those neighbors for node in range(n): vfilt = g.new_vertex_property('bool') neighbors = [] node_n1 = node j = -1 # Code is a bit convoluted to have variable depth for _ in range(d): k = len(neighbors) while j < k: for node_n2 in g.vertex(node_n1).all_neighbors(): neighbors.append(int(node_n2)) vfilt[node_n2] = True j += 1 node_n1 = neighbors[j] # Calculate the neighbor distances up to depth d deg = g.vertex(node).out_degree() sub = gt.GraphView(g, vfilt) sub_dist = [] for i in range(deg): sub_dist = sub_dist + list( gt.topology.shortest_distance( sub, source=g.vertex(neighbors[i]), target=[g.vertex(m) for m in neighbors[:deg]])) # Calculate the local efficiency of node_i eff_sum_i = 0 for dist in sub_dist: if dist != 0: eff_sum_i += 1 / dist eff_sum += eff_sum_i / (deg * (deg - 1)) return eff_sum / n
def prepare_citeseer(directed=False, weighted=False): print("directed", directed) attributes_df = pd.read_csv('res/citeseer/citeseer.content', sep="\t", header=None, dtype=np.str) features = attributes_df.iloc[:, 1:-1].to_numpy(dtype=np.int) labels, _ = pd.factorize(attributes_df.iloc[:, -1]) new_ids, old_ids = pd.factorize(attributes_df.iloc[:, 0]) edges_df = pd.read_csv('res/citeseer/citeseer.cites', sep="\t", header=None, dtype=np.str) edges_df = edges_df[edges_df.iloc[:, 0].apply(lambda x: x in old_ids)] edges_df = edges_df[edges_df.iloc[:, 1].apply(lambda x: x in old_ids)] renamed = edges_df.replace(old_ids, new_ids) edges = renamed.to_numpy(dtype=np.int) edges = np.fliplr(edges) g = graph_tool.Graph(directed=directed) g.add_edge_list(edges) vfilt = graph_tool.topology.label_largest_component(g, directed=False) labels = labels[vfilt.a.astype(np.bool)] g = graph_tool.GraphView(g, vfilt=vfilt) g.purge_vertices() weight = np.sum(np.abs(features[edges[:, 0]] - features[edges[:, 1]]), axis=1) if weighted: weight_prop = g.new_edge_property("int", vals=weight) else: weight_prop = g.new_edge_property("int", val=1) #spc = shortest_path_cover_logn_apx(g, weight_prop) spc = pickle.load( open( "res/citeseer/largest_comp_new_spc_directed_" + str(directed) + "_weighted_" + str(weighted) + ".p", "rb")) print("spc", len(spc)) #pickle.dump(spc, open("res/citeseer/largest_comp_new_spc_directed_"+str(directed)+"_weighted_"+str(weighted)+".p", "wb")) return g, labels, spc
def clear_unconnected(g: gt.Graph, key_node) -> gt.Graph: # Get rid of any component that doesn't contain the key node. label_map, hist = gt_top.label_components(g, directed=False) zero_id = vp_map(g, 'zero_id', 'int') key_comps = set() for v in g.vertices(): if zero_id[v] == key_node: key_comps.add(label_map[v]) leave_prop = g.new_vertex_property('bool') for v in g.vertices(): leave_prop[v] = label_map[v] in key_comps sub = gt.GraphView(g, leave_prop) new_g = create_q_graph(sub, add_back_reference=False) return new_g
def gt_net(model): # Extract nodes mets = [m.id for m in model.metabolites] rxns = [r.id for r in model.reactions] nodes = mets + rxns # make dict: {index:node_id} dd = {nodes[n]: n for n in range(len(nodes))} #Extract edges substrates = [] products = [] for r in model.reactions: for s in r.reactants: substrates.append((s.id, r.id)) for p in r.products: products.append((r.id, p.id)) edges = substrates + products # Translate edges_id -> edges_index edges_index = [(dd[e[0]], dd[e[1]]) for e in edges] #Initialize Graph G = gt.Graph() #Populate Graph G.add_edge_list(edges_index) #Extract largest network component l = gt.topology.label_largest_component(G) GG = gt.GraphView(G, vfilt=l) return GG
def sgm_match(t_graph: gt.Graph, g_graph: gt.Graph, delta, tau, n_idx, e_idx) -> gt.Graph: # T is a query tree # G is a query graph # Delta is the score delta that we can accept from perfect match # tau is how far off this tree is from the graph, at most. # nIdx is an index containing node attributes # eIdx is an index containing edge attributes # root_match = [n for n, d in list(T.in_degree().items()) if d == 0] root_match = [v for v in t_graph.vertices() if v.in_degree() == 0] root = root_match[0] n_keys = list(n_idx.keys())[0] e_keys = list(e_idx.keys())[0] # print 'Building matching graph' print('Printing MDST Graph') print(root) print_graph(t_graph) # Step 1: Get all the matches for the nodes node_matches = dict() for v in t_graph.vertices(): if t_graph.vp[n_keys][v] in list(n_idx[n_keys].keys()): node_matches[v] = n_idx[n_keys][t_graph.vp[n_keys][v]] else: node_matches[v] = set() # Step 2: Get all the edge matches for the node edge_matches = dict() for e in t_graph.edges(): if t_graph.ep[e_keys][e] in list(e_idx[e_keys].keys()): edge_matches[e] = e_idx[e_keys][t_graph.ep[e_keys][e]] else: edge_matches[e] = set() # Make sure you count just the ones that have matching nodes too. edge_matches[e] = set([ em for em in edge_matches[e] if em[0] in node_matches[e.source()] and em[1] in node_matches[e.target()] ]) # Scoring, initially, is going to be super-simple: # You get a 1 if you match, and a 0 if you don't. Everything's created equal. # Score everything and put it in a graph. for k in list(edge_matches.keys()): if len(edge_matches[k]) == 0: pass # stop_here = 1 match_graph = gt.Graph(directed=True) # for nT in T.nodes(): # for nG in node_matches[nT]: # MatchGraph.add_node(tuple([nT,nG]),score=1,solo_score=1) mg_edges = set() mg_vertices = set() mg_vertices_to_index = {} for eT in t_graph.edges(): for eG in edge_matches[eT]: v1 = (eT.source(), eG[0]) v2 = (eT.target(), eG[1]) mg_vertices.add(v1) mg_vertices.add(v2) mg_edges.add((v1, v2)) # match_graph.add_edge([(eT.source(), eG.source()), (eT.target(), eG.target())]) zero_id = vp_map(match_graph, 'zero_id') one_id = vp_map(match_graph, 'one_id') for tup in mg_vertices: v = match_graph.add_vertex() zero_id[v], one_id[v] = tup mg_vertices_to_index[tup] = v # it = iter(mg_vertices) # for v in match_graph.vertices(): # tup = next(it) # zero_id[v], one_id[v] = tup # mg_vertices_to_index[tup] = v for t1, t2 in mg_edges: match_graph.add_edge(mg_vertices_to_index[t1], mg_vertices_to_index[t2]) # debug_match_graph(match_graph) solo_score_vp = vp_map(match_graph, 'solo_score', 'int') score_vp = vp_map(match_graph, 'score_v', 'int') score_ep = ep_map(match_graph, 'score_e', 'int') path_vp = vp_map(match_graph, 'path', 'object') g_graph_original = original_vp(g_graph) t_graph_original = original_vp(t_graph) for v in match_graph.vertices(): solo_score_vp[v] = 1 score_vp[v] = 1 # Here we insert original nodes d = coll.deque() d.append((t_graph_original[zero_id[v]], g_graph_original[one_id[v]])) path_vp[v] = d for e in match_graph.edges(): score_ep[e] = 1 # gt_draw.graph_draw(match_graph, vprops={'text': zero_id}) # Get rid of anybody flying solo match_graph = clear_unconnected(match_graph, root) # this is clearly not working. # Now acquire/organize all hypotheses with scores above Max_Score - tau - delta # Figure out how much score you could possibly get at every node in the query. max_score_v = vp_map(t_graph, 'max_score_v', 'int') max_score_e = ep_map(t_graph, 'max_score_e', 'int') score_vp = vp_map(match_graph, 'score_v', 'int') score_ep = ep_map(match_graph, 'score_e', 'int') path_vp = vp_map(match_graph, 'path', 'object') zero_id = vp_map(match_graph, 'zero_id') # gt_draw.graph_draw(match_graph, vprops={'text': zero_id}) for n in t_graph.vertices(): max_score_v[n] = 1 for e in t_graph.edges(): max_score_e[e] = 1 bfs_edges = list(gt_s.bfs_iterator(t_graph, source=root)) reversed_bfs_edges = list(reversed(bfs_edges)) t_index = t_graph.vertex_index # debug_match_graph(match_graph) for e in reversed_bfs_edges: # Reverse BFS search - should do leaf nodes first. # What's the best score we could get at this node? v1, v2 = e max_score_v[v1] += max_score_v[v2] + max_score_e[e] # Find all the edges equivalent to this one in the match graph edge_matches = [ (eG1, eG2) for eG1, eG2 in match_graph.edges() if zero_id[eG1] == t_index[v1] and zero_id[eG2] == t_index[v2] ] parent_nodes = set([eM1 for eM1, eM2 in edge_matches]) for p in parent_nodes: child_nodes = [eM2 for eM1, eM2 in edge_matches if eM1 == p] # First, check if the bottom node has a score best_score = 0 # best_node = None c_path = None for c in child_nodes: c_edge = match_graph.edge(p, c) c_score = score_vp[c] + score_ep[c_edge] c_path = path_vp[c] if c_score > best_score: best_score = c_score # best_child_path = c_path score_vp[p] += best_score for pathNode in c_path: path_vp[p].appendleft(pathNode) leave_prop = match_graph.new_vertex_property('bool') # CLEAN IT UP. for n in match_graph.vertices(): leave_prop[n] = score_vp[n] >= max_score_v[t_graph.vertex( zero_id[n])] - delta sub = gt.GraphView(match_graph, leave_prop) new_match_graph = create_q_graph(sub, add_back_reference=False) # Get rid of anybody flying solo match_graph = save_root_children(new_match_graph, root) zero_id = vp_map(match_graph, 'zero_id') one_id = vp_map(match_graph, 'one_id') path_list_vp = vp_map(match_graph, 'path_list', 'object') for n in match_graph.vertices(): d = coll.deque() d.append((t_graph_original[zero_id[n]], g_graph_original[one_id[n]])) path_list_vp[n] = [d] # Get a list of solutions alive in the graph for e in reversed_bfs_edges: v1, v2 = e edge_matches = [ (eG1, eG2) for eG1, eG2 in match_graph.edges() if zero_id[eG1] == t_index[v1] and zero_id[eG2] == t_index[v2] ] parent_nodes = set([eM1 for eM1, eM2 in edge_matches]) for p in parent_nodes: child_nodes = [eM2 for eM1, eM2 in edge_matches if eM1 == p] # First, check if the bottom node has a score tmpList = [] for c in child_nodes: for _p in path_list_vp[p]: for _c in path_list_vp[c]: tmpList.append(_p + _c) path_list_vp[p] = tmpList # debug_match_graph(match_graph) # Score the root solutions return match_graph
def calculate_mdst_v2(g: gt.Graph, n_idx, e_idx, used_stuff=set()): # Step 1: Figure out the weights. n_att_name = list(n_idx.keys())[0] e_att_name = list(e_idx.keys())[0] # Create an MDSTWeight vector on the nodes and edges. v_weight = vp_map(g, 'MDST_v_weight', 'float') e_weight = ep_map(g, 'MDST_e_weight', 'float') v_attribute_list = list(n_idx[n_att_name].keys()) e_attribute_list = list(e_idx[e_att_name].keys()) v_a_map = vp_map(g, n_att_name) e_a_map = ep_map(g, e_att_name) for n in g.vertices(): if v_a_map[n] in v_attribute_list: v_weight[n] = len( n_idx[n_att_name][v_a_map[n]]) / n_idx[n_att_name]['size'] else: v_weight[n] = 0 for e in g.edges(): if e in used_stuff: e_weight[e] = 1 else: if e_a_map[e] in e_attribute_list: e_weight[e] = len( e_idx[e_att_name][e_a_map[e]]) / e_idx[e_att_name]['size'] else: e_weight[e] = 0 # for e1,e2 in G.edges(): # G.adj[e1][e2]['Nonsense'] = 5 # Step 2: Calculate the MST. # gt.draw.graph_draw(g, vertex_text=g.vp['old'], vertex_font_size=18, output_size=(300, 300), output='G.png') t_map = gt_top.min_spanning_tree(g, e_weight, g.vertex(0)) # T = nx.algorithms.minimum_spanning_tree(G,weight='Nonsense') # Step 3: Figure out which root results in us doing the least work. t = gt.GraphView(g, efilt=t_map, directed=False) # gt.draw.graph_draw(t, vertex_text=t.vp['old'], vertex_font_size=18, output_size=(300, 300), output='T.png') best_t = None best_score = np.inf for root in t.vertices(): # Generate a new tree it = gt_s.bfs_iterator(t, root) nodes = [] edges = [] for e in it: edges.append(e) nodes.extend([e.source(), e.target()]) nodes = np.unique(nodes) new_t = create_q_graph(t, q_nodes=nodes, q_edges=edges, directed=True) new_t_score = mdst_score_v2(t, root) if new_t_score < best_score: # print(best_score) best_t = new_t best_score = new_t_score return best_t, best_score
vertex_fill_color=bv, edge_pen_width=be, output="filtered-bt.png") g.set_edge_filter(None) bv, be = gt.centrality.betweenness(g) be.a /= (be.a.max() / 5) gt.draw.graph_draw(g, pos=pos, vertex_fill_color=bv, edge_pen_width=be, output="nonfiltered-bt.png") assert not g.is_directed(), "" ug = gt.GraphView(g, directed=True) assert ug.is_directed(), "" tv = gt.GraphView(g, efilt=tree) bv, be = gt.centrality.betweenness(tv) be.a /= (be.a.max() / 5) gt.draw.graph_draw(tv, pos=pos, vertex_fill_color=bv, edge_pen_width=be, output="mst-view.png") bv, be = gt.centrality.betweenness(g) u = gt.GraphView(g, efilt=lambda e: be[e] > be.a.max() / 2) be.a /= (be.a.max() / 5)