def test_reverse_edge_weights(g): g_cp = g.copy() g_rev = reverse_edge_weights(g) p = get_edge_weights(g_cp) p_rev = get_edge_weights(g_rev) for e in g_cp.edges(): u, v = int(e.source()), int(e.target()) if u < v: assert p[g_cp.edge(u, v)] == p_rev[g_rev.edge(v, u)]
def test_preprocess(g): norm_g = preprocess(g) max_w = 1.5 expected_edges_and_weights = { (0, 1): 0.7 / max_w, (1, 0): 0.9 / max_w, (0, 2): 0.4 / max_w, (2, 0): 0.1 / max_w, (2, 3): 0.3 / max_w, (3, 2): 0.4 / max_w, (1, 3): 0.2 / max_w, (3, 1): 0.8 / max_w, # self-loops (0, 0): 0.5 / max_w, (1, 1): 0, (2, 2): 0.7 / max_w, (3, 3): 1.0 / max_w } new_edge_weights = get_edge_weights(norm_g) for (u, v), w in expected_edges_and_weights.items(): print(u, v) assert pytest.approx(w) == new_edge_weights[norm_g.edge(u, v)]
def test_normalize_globally(g): norm_g = normalize_globally(g) assert norm_g.is_directed() max_w = 1.5 expected_edges_and_weights = { (0, 1): 0.9 / max_w, (1, 0): 0.7 / max_w, (0, 2): 0.1 / max_w, (2, 0): 0.4 / max_w, (2, 3): 0.4 / max_w, (3, 2): 0.3 / max_w, (1, 3): 0.8 / max_w, (3, 1): 0.2 / max_w, # self-loops (0, 0): 0.5 / max_w, (1, 1): 0, (2, 2): 0.7 / max_w, (3, 3): 1.0 / max_w } assert norm_g.num_edges() == (3 * g.num_vertices()) assert set(extract_edges(norm_g)) == set(expected_edges_and_weights.keys()) assert set(extract_nodes(norm_g)) == set(extract_nodes(g)) new_edge_weights = get_edge_weights(norm_g) for (u, v), w in expected_edges_and_weights.items(): assert pytest.approx(w) == new_edge_weights[norm_g.edge(u, v)] deg = norm_g.degree_property_map("out", new_edge_weights) for v in norm_g.vertices(): assert pytest.approx(1.0) == deg[v]
def normalize_globally(g): print('global normlization') weights = get_edge_weights(g) deg = g.degree_property_map("out", weights) w_max = deg.a.max() new_g = g.copy() new_weights = get_edge_weights(new_g) new_weights.a /= w_max new_deg = new_g.degree_property_map("out", new_weights) # add self-loops self_loops = [(v, v) for v in new_g.vertices()] new_g.add_edge_list(self_loops) # assign new weights new_weights = get_edge_weights(new_g) for v, v in self_loops: new_weights[new_g.edge(v, v)] = 1 - new_deg[v] new_g.edge_properties['weights'] = new_weights return new_g
def reverse_edge_weights(g): print('reversing') weights = get_edge_weights(g) for e in g.edges(): u, v = int(e.source()), int(e.target()) if u < v: er = g.edge(e.target(), e.source()) # print('before', weights[e], weights[er]) weights[e], weights[er] = weights[er], weights[e] # print('after', weights[e], weights[er]) g.edge_properties['weights'] = weights return g
def add_incremental_edges(self, tree_nodes): if isinstance(tree_nodes, GraphView): raise TypeError('add_incremental_edges does not support GraphView yet. ' + 'Please pass in a set of nodes') fake_c = np.ones(self.num_nodes) * (-1) fake_c[list(tree_nodes)] = 1 edge_weights = get_edge_weights(self.g) assert edge_weights is not None, 'for incremental edge addition, edge weight should be given' new_c = incremental_simulation(self.g, fake_c, edge_weights, self.num_nodes, return_new_edges=False) return set(infected_nodes(new_c))
def run_with_or_without_resampling(g, cid, c, X, n_samples, sampling_method): gi = from_gt(g, get_edge_weights(g)) infected = infected_nodes(c) y_true = np.zeros((len(c), )) y_true[infected] = 1 X_set = set(X) mask = np.array([(i not in X_set) for i in range(len(c))]) root_sampler = build_true_root_sampler(c) options = { 'P': { 'with_resampling': True, 'true_casacde_proba_func': cascade_probability_gt }, 'P_new': { 'with_resampling': True, 'true_casacde_proba_func': ic_cascade_probability_gt }, 'no resampling': { 'with_resampling': False } } ap_ans, p_ans = {}, {} for name, opt in options.items(): sampler = TreeSamplePool(g, n_samples, sampling_method, gi=gi, return_type='nodes', **opt) sampler.fill(X, root_sampler=root_sampler) estimator = TreeBasedStatistics(g, sampler.samples) probas = infection_probability(g, X, sampler, estimator) ap_score = average_precision_score(y_true[mask], probas[mask]) p_score = precision_at_cascade_size(y_true[mask], probas[mask]) # print('with_resampling={}, AP score={}'.format(opt, score)) ap_ans[name] = ap_score p_ans[name] = p_score ap_ans['cid'] = cid p_ans['cid'] = cid # print(ans) return ap_ans, p_ans
def resample_trees(self, trees): possible_trees = list(set(trees)) self.p = get_edge_weights(self.g) # this is required for speed # graph_tool's out_neighbours is slow self.g_nx = nx.DiGraph() for e in self.g.edges(): self.g_nx.add_edge(int(e.source()), int(e.target())) self.p_dict = {tuple(map(int, [e.source(), e.target()])): self.p[e] for e in self.g.edges()} out_degree = self.g.degree_property_map('out', weight=self.p) out_degree_dict = {int(v): out_degree[v] for v in self.g.vertices()} # caching table # and we work in the log domain log_p_tbl = {t: self.true_casacde_proba_func(self.g, self.p_dict, t, self.g_nx, using_log=True) for t in possible_trees} log_pi_tbl = {t: tree_probability_gt(out_degree_dict, self.p_dict, t, using_log=True) for t in possible_trees} log_p_T = np.array([log_p_tbl[t] for t in trees]) log_pi_T = np.array([log_pi_tbl[t] for t in trees]) sampling_weights = np.exp(log_p_T - log_pi_T) # back to probabiliy weight_sum = sampling_weights.sum() if weight_sum > 0: sampling_weights /= weight_sum # normlization else: # uniform sampling sampling_weights = np.ones(len(sampling_weights)) sampling_weights /= sampling_weights.sum() # re-sampling trees by weights resampled_tree_idx = np.random.choice(self.n_samples, p=sampling_weights, replace=True, size=self.n_samples) resampled_trees = [trees[i] for i in resampled_tree_idx] return resampled_trees
def one_run(g, norm_g, q, eps, root_sampler_name, min_size, max_size, observation_method="uniform", with_inc=False): print("observation_method", observation_method) n_samples = 100 p = g.edge_properties['weights'] obs, c = gen_input(g, source=None, p=p, q=q, model='ic', observation_method=observation_method, min_size=min_size, max_size=max_size) print('cascade size', len(infected_nodes(c))) # inf_nodes = infected_nodes(c) source = np.nonzero(c == 0)[0][0] if root_sampler_name == 'pagerank': root_sampler = build_root_sampler_by_pagerank_score(g, obs, c, eps=eps) elif root_sampler_name == 'true': root_sampler = (lambda: source) else: root_sampler = (lambda: None) # method 2: # vanilla steiner tree sampling gi = from_gt(norm_g, weights=get_edge_weights(norm_g)) st_tree_nodes = sample_steiner_trees(g, obs, root=root_sampler(), method='cut', n_samples=n_samples, gi=gi, return_tree_nodes=True) node_stat = TreeBasedStatistics(g, st_tree_nodes) st_naive_probas = node_stat.unconditional_proba() if with_inc: # method 3 # with incremental cascade simulation st_tree_nodes = sample_steiner_trees(g, obs, root=root_sampler(), method='cut', n_samples=n_samples, gi=gi, return_tree_nodes=True) new_tree_nodes = [] for nodes in st_tree_nodes: fake_c = np.ones(g.num_vertices()) * (-1) fake_c[list(nodes)] = 1 new_c = incremental_simulation(g, fake_c, p, return_new_edges=False) new_tree_nodes.append(infected_nodes(new_c)) node_stat = TreeBasedStatistics(g, new_tree_nodes) st_tree_inc_probas = node_stat.unconditional_proba() # y_true = np.zeros((len(c), )) # y_true[inf_nodes] = 1 # mask = np.array([(i not in obs) for i in range(len(c))]) row = {'c': c, 'obs': obs, 'st_naive_probas': st_naive_probas} if with_inc: row['st_tree_inc_probas'] = st_tree_inc_probas # # for inf_probas in [brute_force_inf_probas, st_naive_probas, st_tree_inc_probas]: # for inf_probas in [st_naive_probas, st_tree_inc_probas]: # row.append(average_precision_score(y_true[mask], inf_probas[mask])) return row
def gi(g): return util.from_gt(g, get_edge_weights(g))