def get_approx_boundary(G, query_nodes, target_nodes, n_edges, start_dist): """ Used to calculate an approximation guarantee for greedy algorithm """ H = G.copy() # GET A COPY OF THE GRAPH query_set_size = len(query_nodes) target_set_size = len(target_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) candidates = list(product(query_nodes, target_nodes)) # ALL minus exitsting in G eligible = [candidates[i] for i in range(len(candidates)) if H.has_edge(candidates[i][0], candidates[i][1]) == False] # CALCULATE MARGINAL GAIN TO EMPTY SET FOR ALL NODES IN STEEPNESS FUNCTION P = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums_empty = start_dist.dot(F.sum(axis=1))[0,0] # F(\emptyset) # candidates = list(product(query_nodes, target_nodes)) ac_marginal_empty = [] ac_marginal_full = [] source_idx_empty = [] node_processed = -1 for out_edge in eligible: abs_cen = -1 source_node = out_edge[0] if(node_processed == source_node): # skip updating matrix because this updates the F matrix in the same way continue node_processed = source_node F_updated = update_fundamental_mat(F, H, map_query_to_org, source_node) abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0] ac_marginal_empty.append(abs_cen) source_idx_empty.append(source_node) sorted_indexes_empty = [i[0] for i in sorted(enumerate(source_idx_empty), key=lambda x:x[1])] ac_marginal_empty = [ac_marginal_empty[i] for i in sorted_indexes_empty] # CALCULATE MARGINAL GAIN FOR FULL SET H.add_edges_from(eligible) P_all = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs_all = P_all[list(query_nodes),:][:,list(query_nodes)] F_all = compute_fundamental(P_abs_all) row_sums_all = start_dist.dot(F_all.sum(axis=1))[0,0] node_prcessed = -1 source_idx = [] for out_edge in eligible: abs_cen = -1 source_node = out_edge[0] if(node_prcessed == source_node): # skip updating matrix because this updates the F matrix in the same way continue node_prcessed = source_node F_all_updated = update_rev_fundamental_mat(F_all, H, map_query_to_org, source_node) abs_cen = start_dist.dot(F_all_updated.sum(axis = 1))[0,0] ac_marginal_full.append(abs_cen) source_idx.append(source_node) sorted_indexes = [i[0] for i in sorted(enumerate(source_idx), key=lambda x:x[1])] ac_marginal_full = [ac_marginal_full[i] for i in sorted_indexes] assert sorted_indexes == sorted_indexes_empty , "Something is wrong with the way scores are appended" all_steepness = (asarray(ac_marginal_full) - row_sums_all) / (row_sums_empty-asarray(ac_marginal_empty)) s = min(all_steepness) node_max = argmin(all_steepness) return 1-s, sorted_indexes[node_max]
def reverse_greedy(G, query_nodes, target_nodes, n_edges, start_dist): """Selects a set of links with a reverse greedy descent algorithm that reduce the absorbing RW centrality between a set of query nodes Q and a set of absorbing target nodes C such that Q \cap C = \emptyset. The query and target set must be a 'viable' partition of the graph. Parameters ---------- G : Networkx graph The graph from which the team will be selected. query : list The set of nodes from which random walker starts. target : list The set of nodes from where the random walker ends. n_edges : integer the number of links to be added start_dist: list The starting distribution over the query set P : Scipy matrix The transition matrix of the graph G F : Scipy matrix The fundamental matrix for the graph G with the given set of absorbing random walk nodes Returns ------- links : list The set of links that reduce the absorbing RW centrality """ H = G.copy() query_set_size = len(query_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) candidates = list(product(query_nodes, target_nodes)) eligible = [candidates[i] for i in range(len(candidates)) if H.has_edge(candidates[i][0], candidates[i][1]) == False] H.add_edges_from(eligible) P = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums = start_dist.dot(F.sum(axis=1))[0,0] # candidates = list(product(query_nodes, target_nodes)) worst_F = zeros(F.shape) worst_set = [] optimal_set = [] ac_scores = [] # ac_scores.append(row_sums) while len(eligible) > 0: round_min = -1 worst_link = (-1,-1) node_prcessed = -1 for out_edge in eligible: source_node = out_edge[0] if(node_prcessed == source_node): # skip updating matrix because this updates the F matrix in the same way continue node_prcessed = source_node F_updated = update_rev_fundamental_mat(F, H, map_query_to_org, source_node) abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0] if abs_cen < round_min or round_min == -1: worst_link = out_edge round_min = abs_cen worst_F = F_updated F = worst_F H.remove_edge(*worst_link) worst_set.append(worst_link) eligible.remove(worst_link) if (len(eligible) <= n_edges): ac_scores.append(round_min) optimal_set.append(worst_link) return list(reversed(optimal_set)), list(reversed(ac_scores))