def compute_distance_map(self, g): dist_map = gt.shortest_distance(g, directed=True, return_reached=False, weights=g.edge_properties['weight']) gap_map = gt.shortest_distance(g, directed=True) return dist_map, gap_map
def test_paths_length_tree(tree_and_cascade): g = tree_and_cascade[0] for i in range(10): s, t = np.random.permutation(g.num_vertices())[:2] length = shortest_distance(g, s, t) forbidden_nodes = {} for p in all_simple_paths_of_length(g, s, t, length, forbidden_nodes=forbidden_nodes, debug=True): correct_path = [ int(v) for v in shortest_path(g, g.vertex(s), g.vertex(t))[0] ] assert correct_path == p for i in range(10): s, t = np.random.permutation(g.num_vertices())[:2] length = shortest_distance(g, s, t) if length > 2: forbidden_nodes = { int( random.choice( shortest_path(g, g.vertex(s), g.vertex(t))[0][1:-1])) } with pytest.raises(StopIteration): next( all_simple_paths_of_length(g, s, t, length, forbidden_nodes=forbidden_nodes, debug=True))
def run_graph_tool(n, niter): pb = progressbar.ProgressBar(maxval=niter).start() g = gt.price_network(n, 2, directed=False) for e in g.edges(): g.add_edge(e.target(), e.source()) g.set_directed(True) start = time.time() for i in range(niter): gt.shortest_distance(g) pb.update(i) pb.finish() end = time.time() return (start, end)
def extract_max_pg(ball_view, qgraph, w, S_w, d_Q): ''' Extract the maximum perfect graph of qgraph in the ball. ''' if valid_sim_w(S_w, w, qgraph) == False: return None vertex_matchset = set(v for u in qgraph.vertices() for v in S_w[u]) # edge_matchset = set(e for e in ball_view.edges() for (u, v) in qgraph.edges() if e.source() in S_w[u] and e.target() in S_w[v]) edge_matchset = set() for e in qgraph.edges(): source = e.source() target = e.target() for sim_v1 in S_w[source]: for sim_v2 in S_w[target]: eg = ball_view.edge(sim_v1, sim_v2) if eg: edge_matchset.add(eg) pg_view = gt.GraphView(ball_view, vfilt=lambda v: v in vertex_matchset, efilt=lambda e: e in edge_matchset) dist = gt.shortest_distance(pg_view, w, None, None, None, None, False) maxPGC = gt.GraphView(pg_view, vfilt=lambda v: dist.a[int(v)] <= d_Q) for u in qgraph.vertices(): S_w[u] = set(v for v in maxPGC.vertices() if v in S_w[u]) if len(S_w[u]) == 0: maxPGC = None return maxPGC
def gen_cascade(g, scale=1.0, source=None, stop_fraction=0.5, return_tree=True): rands = np.random.exponential(scale, g.num_edges()) delays = g.new_edge_property('float') delays.set_2d_array(rands) if source is None: source = random.choice(np.arange(g.num_vertices())) dist, pred = shortest_distance(g, source=source, weights=delays, pred_map=True) q = stop_fraction * 100 percentile = np.percentile(dist.a, q) infected_nodes = np.nonzero(dist.a <= percentile)[0] uninfected_nodes = np.nonzero(dist.a > percentile)[0] infection_times = np.array(dist.a) infection_times[uninfected_nodes] = -1 rets = (source, infection_times) if return_tree: tree_edges = set() for n in infected_nodes: c = n while pred[c] != c: edge = ((pred[c], c)) if edge not in tree_edges: tree_edges.add(edge) else: break tree = edges2graph(g, tree_edges) rets += (tree, ) return rets
def compute_and_save_distance_info(g, filename): max_dist = [] min_dist = [] mean_dist = [] stddev_dist = [] overall_dist = {} for v in g.get_vertices(): if g.vertex_index[v] % 2000 == 0: print g.vertex_index[v] dist_map = gt.shortest_distance(g, v) dist_map = dist_map.get_array().astype("int32") dist_map = dist_map[(dist_map != MAX_INT) & (dist_map != 0)] if dist_map.size: max_dist.append(int(np.max(dist_map))) min_dist.append(int(np.min(dist_map))) mean_dist.append(int(np.mean(dist_map))) stddev_dist.append(int(np.std(dist_map))) unique, counts = np.unique(dist_map, return_counts=True) for distance, count in zip(unique, counts): if distance not in overall_dist: overall_dist[distance] = 0 overall_dist[distance] += count else: max_dist.append(0) min_dist.append(0) mean_dist.append(0) stddev_dist.append(0) add_property(g, "max_distance_dist", "vector<int32_t>", max_dist) add_property(g, "min_distance_dist", "vector<int32_t>", min_dist) add_property(g, "mean_distance_dist", "vector<int32_t>", mean_dist) add_property(g, "stddev_distance_dist", "vector<int32_t>", stddev_dist) add_property(g, "overall_dist", "python::object", overall_dist) g.save(filename)
def simulate_cascade(g, p, source=None, return_tree=False): """ graph_tool version of simulating cascade return np.ndarray on vertices as the infection time in cascade uninfected node has dist -1 """ gv = sample_graph_by_p(g, p) if source is None: # consider the largest cc infected_nodes = np.nonzero(label_largest_component(gv).a)[0] source = np.random.choice(infected_nodes) times = get_infection_time(gv, source) if return_tree: # get the tree edges _, pred_map = shortest_distance(gv, source=source, pred_map=True) edges = [(pred_map[i], i) for i in infected_nodes if i != source] # create tree tree = Graph(directed=True) tree.add_vertex(g.num_vertices()) for u, v in edges: tree.add_edge(int(u), int(v)) vfilt = tree.new_vertex_property('bool') vfilt.a = False for v in set(itertools.chain(*edges)): vfilt[v] = True tree.set_vertex_filter(vfilt) if return_tree: return source, times, tree else: return source, times
def getDiameterAndAverageDistance(g): numVertex = g.num_vertices() numEdges = g.num_edges() print "Número de vértices de G:", numVertex print "Número de arestas de G:", numEdges print " - Executando método shortest_distance..." sD = gt.shortest_distance(g, directed=False) print " - Executando laço de cálculo da distância média e diâmetro..." x = 0 diameter = sD[0][0] sumSD = 0.0 n_vertices = 0 while x < numVertex: for d in sD[x]: if (d != 2147483647): sumSD = sumSD + d if (d > diameter): diameter = d x += 1 averageDistance = sumSD / (numVertex * (numVertex - 1)) print "Diâmetro:", diameter print "Distância média:", averageDistance return diameter, averageDistance
def get_shortest_path_distance_matrix(g, k=10, weights=None): # Used to find which vertices are not connected. This has to be this weird, # since graph_tool uses maxint for the shortest path distance between # unconnected vertices. def get_unconnected_distance(): g_mock = gt.Graph() g_mock.add_vertex(2) shortest_distances_mock = gt.shortest_distance(g_mock) unconnected_dist = shortest_distances_mock[0][1] return unconnected_dist # Get the value (usually maxint) that graph_tool uses for distances between # unconnected vertices. unconnected_dist = get_unconnected_distance() # Get shortest distances for all pairs of vertices in a NumPy array. X = gt.shortest_distance(g, weights=weights).get_2d_array( range(g.num_vertices())) if len(X[X == unconnected_dist]) > 0: print('[distance_matrix] There were disconnected components!') # Get maximum shortest-path distance (ignoring maxint) X_max = X[X != unconnected_dist].max() # Set the unconnected distances to k times the maximum of the other # distances. X[X == unconnected_dist] = k * X_max return X
def efficiency(graph): all_shortest_paths = gt.shortest_distance(g_link) N = graph.num_vertices() path_list = [x for vector in all_shortest_paths for x in vector] inverse_sum = sum([1/x if x > 0 else x for x in path_list]) efficiency = 1/(N*(N-1))*inverse_sum return efficiency
def compute_shortest_paths(self): import graph_tool.all as gt graph_file = home+'/data/text-analysis/vichakshana/page_graphs/' + self.keyword + '_entitylinks_core.graphml' g = gt.load_graph(graph_file, fmt='xml') distance_data = gt.shortest_distance(g) vertices = list(g.vertices()) rows = [] cols = [] distances = [] for src_v in vertices: for i in xrange(len(vertices)): if distance_data[src_v][i] > 100: continue rows.append(self.fileindex[unicode(g.vertex_properties['_graphml_vertex_id'][src_v], encoding='utf-8')]) cols.append(self.fileindex[unicode(g.vertex_properties['_graphml_vertex_id'][vertices[i]], encoding='utf-8')]) distances.append(distance_data[src_v][i]) n = max(self.fileindex.values())+1 # since the indexing starts with 0 shortest_paths = sparse.coo_matrix((distances, (rows, cols)), shape=(n, n)) shortest_paths = sparse.csr_matrix(shortest_paths).todense() if not exists(home+'/data/text-analysis/vichakshana/page_graphs/'+self.keyword+'_shortest_paths/'): mkdir(home+'/data/text-analysis/vichakshana/page_graphs/'+self.keyword+'_shortest_paths/') for i in xrange(shortest_paths.shape[0]): pickle.dump(shortest_paths[i], file(home+'/data/text-analysis/vichakshana/page_graphs/' + self.keyword+'_shortest_paths/'+str(i)+'.pickle', 'w'))
def _count_root_depth(self): # self.depth, (id1, id2) = graph_tool.pseudo_diameter(self.g) shortest_path = graph_tool.shortest_distance(self.g, source=self.v_root) distances = shortest_path.a self.depth = np.mean(distances) return self.depth
def connectivity_prune(ball, w, sim, d_Q, Qgraph): ''' Use the matching relation sim to prune the ball ''' #输出的结果是 以w为球心,d_Q为半径的球,满足数据图dual simulation约束的球 #if write as follow then the result will be wrong # tmp = set(v for u in Qgraph.vertices() for v in sim[u] if v in ball.vertices()) # view_1 = gt.GraphView(ball, vfilt = lambda v: v in tmp) vertex_matchset = set(v for u in Qgraph.vertices() for v in sim[u] if v in ball.vertices()) edge_matchset = set() for e in Qgraph.edges(): source = e.source() target = e.target() for sim_v1 in sim[source]: for sim_v2 in sim[target]: eg = ball.edge(sim_v1, sim_v2) if eg: edge_matchset.add(eg) view_1 = gt.GraphView(ball, vfilt=lambda v: v in vertex_matchset, efilt=lambda e: e in edge_matchset) dist = gt.shortest_distance(view_1, w, None, None, None, None, False) view_2 = gt.GraphView(view_1, vfilt=lambda v: dist.a[int(v)] <= d_Q) return view_2
def test_simulate_cascade(grid_and_cascade): g = grid_and_cascade[0] for p in np.arange(0.2, 1.0, 0.1): source, times, tree = simulate_cascade( g, p, source=None, return_tree=True) dist = shortest_distance(tree, source=source).a dist[dist == MAXINT] = -1 aae(dist, times)
def get_distances_to_node(self, target, max_dist): if type(target) is int: target = self._g.vertex(target) dist_map = gt.shortest_distance(self._g, None, self._g.vertex(target), max_dist=max_dist) return dist_map
def is_border_node(v, ball, w, d_Q): ''' Judge whether v is a border node of ball[w] ''' dist = gt.shortest_distance(ball, w, None, None, None, None, False) if dist.a[int(v)] == d_Q: return True return False
def distanceStats(g): dist = shortest_distance(g) np_dist = np.array(dist.get_2d_array(g.get_vertices())) print("Distâncias") stats(np_dist) distribution = distance_histogram(g) histogram(distribution, "Distribuição de distâncias", "$d$", "$f_{D}(d)$", sys.argv[1][:-8] + ".distancias")
def graph_k_adj(graph, hop): ''' 功能描述:根据 hop 中的跳数信息返回邻接列表 输入参数:融合后的图,跳数 输出参数:一个 map,key 是跳数,value 是映射关系 ''' g = graph.copy() # 返回节点数量 ver_num = g.num_vertices() # 判断索引是否越界 # print(ver_num) assert max(hop) <= ver_num - 1 # 结果存储 # hop_adj = {} edge_indexs = {} # 生成邻接的矩阵图 for h in hop: edge_indexs[h] = [[], []] # 遍历 所有 的节点 for ver in g.vertices(): # a 表示返回实际的距离矩阵 # print("*****************************", g.vertex_index[ver]) dist = gt.shortest_distance(g, source=g.vertex(ver)).a # 如果说,是连通图,且 h 跳邻居存在。那么就取 h 的。 for h in hop: if h in dist: target = h # 否则取最大的跳数 else: # 降序排列 ls = [i for i in range(max(hop), min(hop) - 1, -1)] # 判断哪个最大的跳先满足 mask = np.isin(ls, dist).tolist() if True in mask: # 第一个 true 就是要取值的点 target = ls[mask.index(True)] # 孤立节点 和自己距离是 0 ,和其他点的距离都无限 else: target = 0 mask = np.where(dist == target) idx1 = g.vertex_index[ver] # a 返回的矩阵是(size,) 没有列,0 表示取第一维 idx2_group = mask[0] if len(idx2_group) != 0: for idx2 in idx2_group: edge_indexs[h][0].append(idx1) edge_indexs[h][1].append(idx2) for h in hop: edge_indexs[h] = torch.from_numpy(np.array(edge_indexs[h])) return edge_indexs
def test_simulate_cascade(grid_and_cascade): g = grid_and_cascade[0] for p in np.arange(0.2, 1.0, 0.1): source, times, tree = simulate_cascade(g, p, source=None, return_tree=True) dist = shortest_distance(tree, source=source).a dist[dist == MAXINT] = -1 aae(dist, times)
def get_duality_gap(self,link_flow,link_cost,demand): network = self.network primal = float(np.sum(link_flow.a * link_cost.a)) dual = 0.0 for ori in self.ori_index.keys(): d_map = gt.shortest_distance(network, source=network.vertex(ori), weights=link_cost, max_dist=100000) for key,value in demand.items(): if key[0] == ori: dual += value * d_map[key[1]] dg = primal - dual return dg
def max_infection_time(g, infection_times, obs_nodes, cand_source, debug): t_min = min(infection_times[obs_nodes]) earliest_node = min(obs_nodes, key=infection_times.__getitem__) if debug: print('candidate {}'.format(cand_source)) print('earliest node: {} (t={})'.format(earliest_node, t_min)) # maximum infection time of source assuming cand_source is source # consider only latest infection time # can be generalized to other times return t_min - shortest_distance(g, source=cand_source, target=earliest_node)
def max_infection_time(g, infection_times, obs_nodes, cand_source, debug): t_min = min(infection_times[obs_nodes]) earliest_node = min(obs_nodes, key=infection_times.__getitem__) if debug: print('candidate {}'.format(cand_source)) print('earliest node: {} (t={})'.format(earliest_node, t_min)) # maximum infection time of source assuming cand_source is source # consider only latest infection time # can be generalized to other times return t_min - shortest_distance( g, source=cand_source, target=earliest_node)
def pathExists(pathFinder): """Checks whether an adjacency matrix M contains a path or not""" G = pathFinder.getGraph() target = pathFinder.target source = pathFinder.source logger.debug('Checking if path exists between %s and %s' % (source,target)) try: dist = gt.shortest_distance(G,source=source,target=target) except: logger.error (sys.exc_info()) logger.debug('Found distance %s' % dist) return dist < 100
def source_likelihood_stat(g, gvs, p, q, N1, estimation_method, precond_method, eps, debug=True): sll_array = [] sources = [] dist_array = [] if debug: iters = tqdm(range(N1)) else: iters = range(N1) for i in iters: infection_times, source, obs_nodes = gen_nontrivial_cascade(g, p, q) sources.append(source) if estimation_method == 'steiner-tree-exact': if debug: print('using steiner tree exact') sll = best_tree_sizes(g, obs_nodes, infection_times) else: if debug: print( 'using steiner tree order ({})'.format(estimation_method)) sll = tree_sizes_by_roots(g, obs_nodes, infection_times, source, method=estimation_method) winner = np.argmax(sll) dist_to_max_n = shortest_distance(g, source=source, target=winner) dist_array.append(dist_to_max_n) sll_array.append(sll) source_likelihood_array = np.array(sll_array, dtype=np.float64) source_llh = np.array( [source_likelihood_array[i, src] for i, src in enumerate(sources)]) ranks = np.array([ get_rank_index(source_likelihood_array[i, :], src) for i, src in enumerate(sources) ]) return { 'dist': pd.Series(dist_array).describe(), 'mu[s]': pd.Series(source_llh).describe(), 'rank': pd.Series(ranks).describe(), }
def create_ball_view(w, d_Q, Dgraph): ''' Create a ball [w, d_Q] view on top of data graph ''' #global Dgraph dist = gt.shortest_distance(Dgraph, w, None, None, None, None, False) ball_view = gt.GraphView(Dgraph, vfilt=lambda v: dist.a[int(v)] <= d_Q) # print "ball------------------------->" # for e in ball_view.edges(): # print ball_view.vertex_properties["label"][e.source()],"-->",ball_view.vertex_properties["label"][e.target()] return ball_view
def pathExists(pathFinder): """Checks whether an adjacency matrix M contains a path or not""" G = pathFinder.getGraph() target = pathFinder.target source = pathFinder.source logger.debug('Checking if path exists between %s and %s' % (source, target)) try: dist = gt.shortest_distance(G, source=source, target=target) except: logger.error(sys.exc_info()) logger.debug('Found distance %s' % dist) return dist < 100
def tree_sizes_by_roots(g, obs_nodes, infection_times, source, method='sync_tbfs', return_trees=False): """ use temporal BFS to get the scores for each node in terms of the negative size of the inferred tree thus, the larger the better """ assert method in {'sync_tbfs', 'tbfs', 'closure'} cand_sources = set(np.arange(g.num_vertices())) - set(obs_nodes) tree_sizes = np.ones(g.num_vertices()) * float('inf') trees = {} for r in cand_sources: try: if method == 'tbfs': from tbfs import temporal_bfs early_node = min(obs_nodes, key=infection_times.__getitem__) t_min = infection_times[early_node] D = t_min - shortest_distance( g, source=g.vertex(r), target=g.vertex(early_node)) # print('D: {}'.format(D)) tree = temporal_bfs(g, r, D, infection_times, source, obs_nodes, debug=False) elif method == 'closure': from core import find_tree_by_closure tree = find_tree_by_closure(g, r, infection_times, terminals=list(obs_nodes), debug=False) except TreeNotFound: tree = None if tree: tree_sizes[r] = tree.num_edges() if return_trees: trees[r] = tree if return_trees: return -tree_sizes, trees else: return -tree_sizes
def induce_contractility(eptm, a_cell, max_ci, rate_ci, span=1): """ """ focus_on_cell(eptm, a_cell, radius=3*span) c0 = eptm.params['contractility'] eptm.graph.set_directed(False) for cell in eptm.cells.local_cells(): dist = gt.shortest_distance(eptm.graph, source=a_cell, target=cell) / 2. increase = 1 + (rate_ci - 1) * np.exp((1 - dist) / span) new_c = eptm.cells.contractilities[cell] * increase eptm.cells.contractilities[cell] = min(new_c, max_ci*c0) eptm.graph.set_directed(True)
def cal_diameter_qgraph(qgraph): ''' Calculate the diameter of qgraph ''' #ug=gt.Graph(qgraph) #ug.set_directed(False) temp_dia = 0 max_dia = qgraph.num_vertices() - 1 for u in qgraph.vertices(): dist = gt.shortest_distance(qgraph, u, None, None, None, None, False) for i in xrange(0, len(dist.a)): if dist.a[i] <= max_dia and temp_dia < dist.a[i]: temp_dia = dist.a[i] return temp_dia
def test_paths_length_tree(tree_and_cascade): g = tree_and_cascade[0] for i in range(10): s, t = np.random.permutation(g.num_vertices())[:2] length = shortest_distance(g, s, t) forbidden_nodes = {} for p in all_simple_paths_of_length(g, s, t, length, forbidden_nodes=forbidden_nodes, debug=True): correct_path = [int(v) for v in shortest_path(g, g.vertex(s), g.vertex(t))[0]] assert correct_path == p for i in range(10): s, t = np.random.permutation(g.num_vertices())[:2] length = shortest_distance(g, s, t) if length > 2: forbidden_nodes = {int(random.choice( shortest_path(g, g.vertex(s), g.vertex(t))[0][1:-1]))} with pytest.raises(StopIteration): next(all_simple_paths_of_length(g, s, t, length, forbidden_nodes=forbidden_nodes, debug=True))
def induce_contractility(eptm, a_cell, max_ci, rate_ci, span=1): """ """ focus_on_cell(eptm, a_cell, radius=3 * span) c0 = eptm.params['contractility'] eptm.graph.set_directed(False) for cell in eptm.cells.local_cells(): dist = gt.shortest_distance(eptm.graph, source=a_cell, target=cell) / 2. increase = 1 + (rate_ci - 1) * np.exp((1 - dist) / span) new_c = eptm.cells.contractilities[cell] * increase eptm.cells.contractilities[cell] = min(new_c, max_ci * c0) eptm.graph.set_directed(True)
def induce_tension(eptm, a_cell, max_ti, rate_ti, span=1): """ """ focus_on_cell(eptm, a_cell, radius=3 * span) t0 = eptm.params['line_tension'] eptm.graph.set_directed(False) for cell in eptm.cells.local_cells(): dist = gt.shortest_distance(eptm.graph, source=a_cell, target=cell) / 2. increase = 1 + (rate_ti - 1) * np.exp((1 - dist) / span) for je in eptm.cells.junctions[cell]: new_t = eptm.junctions.line_tensions[je] * increase eptm.junctions.line_tensions[je] = min(new_t, max_ti * t0) eptm.graph.set_directed(True)
def graphtool_sp_tree(self, source, target): """ Interface to graph tool for computing both shortest path trees """ self.dist_map_ab, self.pred_map_ab = shortest_distance( self.graph, source, weights=self.weight, negative_weights=True, directed=self.graph.is_directed(), pred_map=True) # turn around edge directions # Attention: is_reversed must be True when it has NOT been reversed # before self.graph.set_reversed(is_reversed=True) self.dist_map_ba, self.pred_map_ba = shortest_distance( self.graph, target, weights=self.weight, negative_weights=True, pred_map=True, directed=self.graph.is_directed()) # again turn around to recover graph self.graph.set_reversed(is_reversed=False)
def induce_tension(eptm, a_cell, max_ti, rate_ti, span=1): """ """ focus_on_cell(eptm, a_cell, radius=3*span) t0 = eptm.params['line_tension'] eptm.graph.set_directed(False) for cell in eptm.cells.local_cells(): dist = gt.shortest_distance(eptm.graph, source=a_cell, target=cell) / 2. increase = 1 + (rate_ti - 1) * np.exp((1 - dist) / span) for je in eptm.cells.junctions[cell]: new_t = eptm.junctions.line_tensions[je] * increase eptm.junctions.line_tensions[je] = min(new_t, max_ti*t0) eptm.graph.set_directed(True)
def test_paths_length_grid(grid_and_cascade): g = grid_and_cascade[0] for i in range(10): s, t = np.random.permutation(g.num_vertices())[:2] length = shortest_distance(g, s, t) forbidden_nodes = {} for p in all_simple_paths_of_length(g, s, t, length, forbidden_nodes=forbidden_nodes, debug=True): assert len(p) - 1 == length, '{} != {}'.format(len(p)-1, length) for u, v in zip(p[:-1], p[1:]): assert g.edge(u, v) is not None for u in p: assert u not in forbidden_nodes assert p[0] == s assert p[-1] == t
def synonyms(self, word, top=10, dist_type='LeacockChodorow', threshold=100): """ Finds closest top synonyms. :param word: search synonyms for that :param top: top nearest synonyms :param dist_type: distance measure :param threshold: we filter candidates for synonyms with shortest path shorter that threshold :return: list of words """ # finding shortest paths vertex = self.g.vertex(self.lemma_to_vertex_id[word]) shortest_path = graph_tool.shortest_distance(self.g, source=vertex) distances = shortest_path.a vertices = list(range(len(shortest_path.a))) # sorting vertices by distance vertices = np.array(vertices) distances = np.array(distances) inds = distances.argsort() sorted_vertices = vertices[inds] # find words which are candidates for nearest neighbours # we choose only vertices with shortest path shorter than threshold and lexical (not synsets) vertices lemma_ids = list(self.lemma_to_vertex_id.values()) candidates = list() for v_id in sorted_vertices[ 1:]: # first one with distance equal to 0 is word vertex if v_id in lemma_ids and distances[v_id] < threshold: candidates.append( list(self.lemma_to_vertex_id.keys())[list( self.lemma_to_vertex_id.values()).index(v_id)]) if len(candidates) == top: break # sorting with distance measure # this part is only a synthetic sugar since we use shortest as the only variable in searching for synonyms distances = {} dist_fun = self.get_distance_function(dist_type) for neighbour in candidates: distances[neighbour] = dist_fun(word, neighbour) sorted_dist = sorted(distances.items(), key=lambda kv: kv[1]) closest = [sd[0] for sd in sorted_dist[:top]] return closest
def shortest_distance(self, g=None, source=None, target=None, weights=None, negative_weights=False, max_dist=None, directed=None, dense=False, dist_map=None, pred_map=False): if g is None: g = self._g if weights: weights = self._edge_dist return gt.shortest_distance(g, source, target, weights, negative_weights, max_dist, directed, dense, dist_map, pred_map)
def source_likelihood_stat(g, gvs, p, q, N1, estimation_method, precond_method, eps, debug=True): sll_array = [] sources = [] dist_array = [] if debug: iters = tqdm(range(N1)) else: iters = range(N1) for i in iters: infection_times, source, obs_nodes = gen_nontrivial_cascade(g, p, q) sources.append(source) if estimation_method == 'steiner-tree-exact': if debug: print('using steiner tree exact') sll = best_tree_sizes(g, obs_nodes, infection_times) else: if debug: print('using steiner tree order ({})'.format(estimation_method)) sll = tree_sizes_by_roots(g, obs_nodes, infection_times, source, method=estimation_method) winner = np.argmax(sll) dist_to_max_n = shortest_distance(g, source=source, target=winner) dist_array.append(dist_to_max_n) sll_array.append(sll) source_likelihood_array = np.array(sll_array, dtype=np.float64) source_llh = np.array([source_likelihood_array[i, src] for i, src in enumerate(sources)]) ranks = np.array([get_rank_index(source_likelihood_array[i, :], src) for i, src in enumerate(sources)]) return { 'dist': pd.Series(dist_array).describe(), 'mu[s]': pd.Series(source_llh).describe(), 'rank': pd.Series(ranks).describe(), }
def is_convex(): print("citeseer") print("weighted") np.random.seed(0) attributes_df = pd.read_csv('res/citeseer/citeseer.content', sep="\t", header=None, dtype=np.str) features = attributes_df.iloc[:, 1:-1].to_numpy(dtype=np.int) labels, _ = pd.factorize(attributes_df.iloc[:, -1]) new_ids, old_ids = pd.factorize(attributes_df.iloc[:, 0]) edges_df = pd.read_csv('res/citeseer/citeseer.cites', sep="\t", header=None, dtype=np.str) edges_df = edges_df[edges_df.iloc[:, 0].apply(lambda x: x in old_ids)] edges_df = edges_df[edges_df.iloc[:, 1].apply(lambda x: x in old_ids)] renamed = edges_df.replace(old_ids, new_ids) edges = renamed.to_numpy(dtype=np.int) edges = np.fliplr(edges) g = gt.Graph(directed=True) g.add_edge_list(edges) weight = np.sum(np.abs(features[edges[:, 0]] - features[edges[:, 1]]), axis=1) weight_prop = g.new_edge_property("int", val=1) #weight = g.new_edge_property("double", vals=weight) comps, hist = gt.label_components(g) print(hist) dist_map = gt.shortest_distance(g, weights=weight_prop) #, weights=weight) simple = simplicial_vertices.simplicial_vertices(g) print("n=", g.num_vertices(), "s=", len(simple)) spc = shortest_path_cover_logn_apx(g, weight_prop) pickle.dump(spc, open("res/citeseer/spc_directed_unweighted.p", "wb")) '''intersection_0 = []
def test_paths_length_grid(grid_and_cascade): g = grid_and_cascade[0] for i in range(10): s, t = np.random.permutation(g.num_vertices())[:2] length = shortest_distance(g, s, t) forbidden_nodes = {} for p in all_simple_paths_of_length(g, s, t, length, forbidden_nodes=forbidden_nodes, debug=True): assert len(p) - 1 == length, '{} != {}'.format(len(p) - 1, length) for u, v in zip(p[:-1], p[1:]): assert g.edge(u, v) is not None for u in p: assert u not in forbidden_nodes assert p[0] == s assert p[-1] == t
def get_sp_trees(self, start_points): summed = np.zeros(self.critical_zones[0].shape) pred_maps = [] for i in range(2): start_node_ind = self.two_graphs[i].pos2node[start_points[i][0], start_points[i][1]] dist_map, pred_map = shortest_distance( self.two_graphs[i].graph, start_node_ind, weights=self.two_graphs[i].weight, negative_weights=True, pred_map=True) pred_maps.append(pred_map) for j in range(self.margin): for k in range(summed.shape[1]): node = self.critical_zones[i][j, k] if node >= 0: summed[j, k] += dist_map[node] else: summed[j, k] += np.inf self.summed = summed self.pred_maps = pred_maps
def tree_sizes_by_roots(g, obs_nodes, infection_times, source, method='sync_tbfs', return_trees=False): """ use temporal BFS to get the scores for each node in terms of the negative size of the inferred tree thus, the larger the better """ assert method in {'sync_tbfs', 'tbfs', 'mst', 'region_mst'} cand_sources = set(np.arange(g.num_vertices())) - set(obs_nodes) tree_sizes = np.ones(g.num_vertices()) * float('inf') trees = {} for r in cand_sources: if method == 'tbfs': early_node = min(obs_nodes, key=infection_times.__getitem__) t_min = infection_times[early_node] D = t_min - shortest_distance(g, source=g.vertex(r), target=g.vertex(early_node)) # print('D: {}'.format(D)) tree = temporal_bfs(g, r, D, infection_times, source, obs_nodes, debug=False) elif method == 'sync_tbfs': tree = temporal_bfs_sync(g, r, infection_times, source, obs_nodes, debug=False) elif method == 'mst': from steiner_tree_mst import steiner_tree_mst tree = steiner_tree_mst(g, r, infection_times, source, terminals=list(obs_nodes), debug=False) elif method == 'region_mst': from steiner_tree_region_mst import steiner_tree_region_mst tree = steiner_tree_region_mst(g, r, infection_times, source, terminals=list(obs_nodes), debug=False) if tree: tree_sizes[r] = tree.num_edges() if return_trees: trees[r] = tree if return_trees: return -tree_sizes, trees else: return -tree_sizes
def srednia_dlugosc_sciezki(self): # shortest_distance korzysta z algorytmu Johnson'a O(V E log V). srednie = [] for len_array in shortest_distance(self.graph): srednie.append(len_array) return numpy.average(srednie)
def temporal_bfs_sync(g, r, infection_times, source, obs_nodes, debug=False): t_lower = np.ones(g.num_vertices(), dtype=np.int32) * -1 # hidden nodes has lower bound -1 t_lower[obs_nodes] = infection_times[obs_nodes] t_lower[r] = infection_times[obs_nodes].min() - 1 visited = np.zeros(g.num_vertices(), dtype=bool) tree = [] obs_by_time = defaultdict(list) for o in obs_nodes: obs_by_time[infection_times[o]].append(o) obs_times = list(sorted(set(infection_times[obs_nodes]))) success = True queue = [r] for cur_t in obs_times: banned_nodes = {v for v in obs_nodes if infection_times[v] != cur_t} target_nodes = [v for v in obs_nodes if infection_times[v] == cur_t] if debug: print('---- current time = {}'.format(cur_t)) print('targets {}'.format(target_nodes)) # cover nodes of level t while len(queue) > 0: if np.all(visited[target_nodes] == 1): if debug: print('covered all targets') break v = queue.pop(0) for u in g.vertex(v).all_neighbours(): u = int(u) if u not in banned_nodes and visited[u] == 0: if debug and u in target_nodes: print('cover target {}'.format(u)) if debug: print('add edge {}'.format((v, u))) if u in target_nodes: if debug: print('adding {} to baned list'.format(u)) banned_nodes.add(u) else: queue.append(u) tree.append((v, u)) visited[u] = 1 if np.all(visited[target_nodes] == 1): # all targets covered if True: # remove redundant edges # construct the tree from used edges terminals = [o for o in obs_nodes if infection_times[o] <= cur_t] if debug: print('terminals to cover: {}'.format(terminals)) min_tree = remove_redundant_edges_from_tree(g, tree, r, terminals) if debug: print('size of min tree: {}'.format(min_tree.num_edges())) tree = extract_edges(min_tree) if debug: print('current tree edges {}'.format(tree)) # update visited table visited.fill(0) covered_nodes = {u for nodes in tree for u in nodes} sorted_by_time = list(sorted( covered_nodes, key=lambda v: shortest_distance(min_tree, source=r, target=v), reverse=False)) if debug: print('covered nodes: {}'.format(sorted_by_time)) queue = [] for v in sorted_by_time: visited[v] = 1 queue.append(v) if debug: print('current queue: {}'.format(queue)) continue else: if debug: print('failed to cover targets') success = False break if success: return remove_redundant_edges_from_tree(g, tree, r, obs_nodes) else: return None
def sample_consistent_cascade(g, obs_nodes, cand_source, infection_times, debug=False): tree_paths = [] ts_max = max_infection_time(g, infection_times, obs_nodes, cand_source, debug) if debug: print('observed infection times {}'.format({o: infection_times[o] for o in obs_nodes})) print('max(t_s) = {}'.format(ts_max)) # ranked by infection time in ascending order pred_infected_nodes = {cand_source} pred_infection_time = {cand_source: ts_max} for o in obs_nodes: pred_infection_time[o] = infection_times[o] for o in sorted(obs_nodes, key=infection_times.__getitem__): if debug: print('o={}'.format(o)) succeed = False # try node from late to early # in order to maximize path re-use for op in sorted(pred_infected_nodes, key=pred_infection_time.__getitem__, reverse=True): if pred_infection_time[op] >= infection_times[o]: if debug: print('t(op) >= t(o): {} >= {}\ntry next...'.format( pred_infection_time[op], infection_times[o])) continue if op == cand_source: length = infection_times[o] - ts_max else: length = infection_times[o] - pred_infection_time[op] if debug: print('try connecting {} and {} with length {}'.format(op, o, length)) d = shortest_distance(g, source=op, target=o) if d > length: if debug: print('however d({}, {})={} > {}: impossible'.format(o, op, d, length)) continue # cannot visit later nodes and itself forbidden_nodes = {u for u in obs_nodes if infection_times[u] >= infection_times[o] and u != o} # cannot visit nodes on accumulated paths forbidden_nodes |= {u for p in tree_paths for u in p if u != op and u != o} paths = all_simple_paths_of_length(g, op, o, length=length, forbidden_nodes=forbidden_nodes, debug=False) try: path = next(paths) if debug: # assert len(path) - 1 == length, "{} != {}".format(len(path) - 1, length) # pred_inf_time = ts_max + length + infection_times[op] # assert pred_inf_time == infection_times[o], \ # "{} != {}".format(pred_inf_time, infection_times[o]) print('connect {} and {} via {}'.format(op, o, path)) succeed = True break except StopIteration: # continue trying if debug: print('unable to find such path') pass if succeed: tree_paths.append(path) # update predicted infection time for l, u in enumerate(path): if u in pred_infection_time: assert pred_infection_time[u] == pred_infection_time[op] + l, \ 'update t({}): {} != {} + {}'.format( u, pred_infection_time[u], pred_infection_time[op], l) pred_infection_time[u] = pred_infection_time[op] + l pred_infected_nodes |= set(path) else: # failed to find a path return None edges = set([(u, v) for p in tree_paths for u, v in zip(p[:-1], p[1:])]) efilt = np.array([(((int(u), int(v)) in edges) or ((int(v), int(u)) in edges)) for u, v in g.edges()], dtype=bool) gv = GraphView(g, efilt=efilt) if debug: print(obs_nodes) return gv
print 'Loaded' tri, pos = gt.triangulation(ppoints, type="delaunay") print 'Done Triangulation' weight = tri.new_edge_property("double") for e in tri.edges(): weight[e] = np.sqrt(sum((np.array(pos[e.source()]) - np.array(pos[e.target()]))**2)) print 'Done weighting' b = gt.betweenness(tri, weight=weight) b[1].a *= 120 dist = gt.shortest_distance(tri,tri.vertex(0),tri.vertex(5),weights=weight) path, elist = gt.shortest_path(tri,tri.vertex(0),tri.vertex(5)) print 'Done shortest distance and path' print 'dist' print dist print 'path' for i in path: print i gt.graph_draw(tri, vertex_text=tri.vertex_index, edge_text=tri.edge_index, edge_pen_width=b[1], output_size=(1000,1000), output="triang.pdf") #weights = pdist(ppoints)
def get_infection_time(g, source): time = shortest_distance(g, source=source).a time[time == MAXINT] = -1 return time
def mwu(g, gvs, source, obs_nodes, infection_times, o2src_time=None, active_method=MAX_MU, reward_method='exact', eps=0.2, max_iter=float('inf'), use_uninfected=True, debug=False, save_log=False): if save_log: query_log = [] sll_log = [] is_nbr_log = [] if o2src_time is None: o2src_time = get_o2src_time(obs_nodes, gvs, debug=debug) if reward_method == 'time-diff': sp_len_dict = {o: shortest_distance(g, source=o).a for o in obs_nodes} else: sp_len_dict = None # init sll = sll_using_pairs( g, obs_nodes, infection_times, o2src_time, sp_len_dict=sp_len_dict, source=source, method=reward_method, eps=eps, precond_method='and', return_cascade=False, debug=debug) iter_i = 0 all_nodes = set(np.arange(g.num_vertices())) unqueried_nodes = all_nodes - set(obs_nodes) obs_nodes = copy(obs_nodes) queried_nodes = set() # reference nodes to use for MWU, # required to be **infected** ref_nodes = set(obs_nodes) nodes_to_use = [] # nodes coming from querying the neighbors while iter_i < max_iter: iter_i += 1 if len(unqueried_nodes) == 0: print('no more nodes to query') break if len(nodes_to_use) == 0: if active_method == MAX_MU: q = max(unqueried_nodes, key=lambda n: sll[n]) elif active_method == RANDOM: q = random.choice(list(unqueried_nodes)) else: raise ValueError('available query methods are {}'.format(MAX_MU)) if debug: print('query {}'.format(q)) queried_nodes.add(q) unqueried_nodes.remove(q) if save_log: query_log.append(q) is_nbr_log.append(False) else: if debug: print('using node from nodes_to_use') q = nodes_to_use.pop() q = int(q) if infection_times[q] == -1 and use_uninfected: # the query is uninfected if debug: print('{} is uninfected'.format(q)) probas = get_reward_for_uninfected_query(q, gvs) sll *= (eps + (1-eps) * probas) if np.isclose(sll.sum(), 0): print('warning: sll.sum() close to 0') sll = np.ones(g.num_vertices()) / g.num_vertices() else: sll /= sll.sum() else: if debug: print('using pairs to update sll') # the query is infected if reward_method == 'time-diff': sp_len_dict[q] = shortest_distance(g, source=q).a o2src_time[q] = np.array([get_infection_time(gv, q) for gv in gvs]) for o in ref_nodes: probas = None tq, to = infection_times[q], infection_times[o] dists_q, dists_o = o2src_time[q], o2src_time[o] mask = np.logical_and(dists_q != -1, dists_o != -1) if reward_method == 'time-exact': probas = exact_rewards(tq, to, dists_q, dists_o, mask) elif reward_method == 'time-order': probas = order_rewards(tq, to, dists_q, dists_o, mask) elif reward_method == 'time-diff': try: probas = dist_rewards( tq, to, dists_q, dists_o, sp_len_dict[q], sp_len_dict[o], mask) except ValueError: # zero-size array to reduction operation maximum which has no identity # or max_penalty = 0 # ignore this iteration continue else: raise ValueError('methoder is unknown') probas[np.isnan(probas)] = 0 if debug and probas is not None: print('source reward (without smoothing): {:.2f}'.format(probas[source])) print('max reward: {}'.format(np.max(probas))) # print('probas {}'.format(probas[:10])) sll *= (eps + (1-eps) * probas) if np.isclose(sll.sum(), 0): print('warning: sll.sum() close to 0') sll = np.ones(g.num_vertices()) / g.num_vertices() else: sll /= sll.sum() if debug: print('new sll[source] = {}'.format(sll[source])) if debug: if np.isclose(sll[source], 0): print('warning: source sll is 0!!') # if the query node infection time is larger than # the current known earliest infection, # it cannot be the source min_inf_t = min(infection_times[n] for n in ref_nodes) if (infection_times[q] == -1 or infection_times[q] > min_inf_t): sll[q] = 0 # when q is used for updating sll, add it to reference list ref_nodes.add(q) if debug: print('add q to ref_nodes (#nodes={})'.format(len(ref_nodes))) if save_log: sll_log.append(sll) if debug: print('source current rank = {}, {:.5f}'.format(get_rank_index(sll, source), sll[source])) # if some node has very large mu # query its neighbors winners = np.nonzero(sll == sll.max())[0] for w in winners: nbrs = set(map(int, g.vertex(w).all_neighbours())) unqueried_neighbors = nbrs - queried_nodes nodes_to_use += list(unqueried_neighbors) queried_nodes |= unqueried_neighbors if save_log: query_log += list(unqueried_neighbors) is_nbr_log += [True] * len(unqueried_neighbors) if infection_times[w] != -1: is_source = np.all([(infection_times[w] < infection_times[int(u)]) for u in nbrs if infection_times[int(u)] != -1]) else: is_source = False continue if debug: print('checking source {} with winner {}'.format(source, w)) print('winner\'s time {}'.format(infection_times[w])) print('winner\'s nbr infection time {}'.format([infection_times[int(u)] for u in nbrs])) if is_source: query_count = len(queried_nodes) if debug: print('**Found source and used {} queries'.format(query_count)) assert source == w if save_log: return query_count, query_log, sll_log, is_nbr_log else: return query_count else: sll[w] = 0 query_count = len(queried_nodes) if save_log: return query_count, query_log, sll_log, is_nbr_log else: return query_count
#Centrality vp_btwn_link, ep_btwn_link = gt.betweenness(g_link) link_btwn = [vp_btwn_link[v] for v in g_link.vertices()] vp_btwn_bran, ep_btwn_bran = gt.betweenness(g_bran) bran_btwn = [vp_btwn_bran[v] for v in g_bran.vertices()] link_btwn_avg = stats.mean(link_btwn) link_btwn_std = stats.stdev(link_btwn) bran_btwn_avg = stats.mean(bran_btwn) bran_btwn_std = stats.stdev(bran_btwn) #Cost and efficiency link_mst = gt.min_spanning_tree(g_link) bran_mst = gt.min_spanning_tree(g_bran) link_shortest = [x for vector in gt.shortest_distance(g_link) for x in vector] bran_shortest = [x for vector in gt.shortest_distance(g_bran) for x in vector] g_link.set_edge_filter(link_mst) g_bran.set_edge_filter(bran_mst) link_mst_shortest = [x for vector in gt.shortest_distance(g_link) for x in vector] bran_mst_shortest = [x for vector in gt.shortest_distance(g_bran) for x in vector] def efficiency(graph): all_shortest_paths = gt.shortest_distance(g_link) N = graph.num_vertices() path_list = [x for vector in all_shortest_paths for x in vector] inverse_sum = sum([1/x if x > 0 else x for x in path_list]) efficiency = 1/(N*(N-1))*inverse_sum return efficiency