def test_reverse_edge_weights(g):
    g_cp = g.copy()
    g_rev = reverse_edge_weights(g)

    p = get_edge_weights(g_cp)
    p_rev = get_edge_weights(g_rev)

    for e in g_cp.edges():
        u, v = int(e.source()), int(e.target())
        if u < v:
            assert p[g_cp.edge(u, v)] == p_rev[g_rev.edge(v, u)]
def test_preprocess(g):
    norm_g = preprocess(g)

    max_w = 1.5
    expected_edges_and_weights = {
        (0, 1): 0.7 / max_w,
        (1, 0): 0.9 / max_w,
        (0, 2): 0.4 / max_w,
        (2, 0): 0.1 / max_w,
        (2, 3): 0.3 / max_w,
        (3, 2): 0.4 / max_w,
        (1, 3): 0.2 / max_w,
        (3, 1): 0.8 / max_w,

        # self-loops
        (0, 0): 0.5 / max_w,
        (1, 1): 0,
        (2, 2): 0.7 / max_w,
        (3, 3): 1.0 / max_w
    }

    new_edge_weights = get_edge_weights(norm_g)
    for (u, v), w in expected_edges_and_weights.items():
        print(u, v)
        assert pytest.approx(w) == new_edge_weights[norm_g.edge(u, v)]
def test_normalize_globally(g):
    norm_g = normalize_globally(g)

    assert norm_g.is_directed()

    max_w = 1.5
    expected_edges_and_weights = {
        (0, 1): 0.9 / max_w,
        (1, 0): 0.7 / max_w,
        (0, 2): 0.1 / max_w,
        (2, 0): 0.4 / max_w,
        (2, 3): 0.4 / max_w,
        (3, 2): 0.3 / max_w,
        (1, 3): 0.8 / max_w,
        (3, 1): 0.2 / max_w,

        # self-loops
        (0, 0): 0.5 / max_w,
        (1, 1): 0,
        (2, 2): 0.7 / max_w,
        (3, 3): 1.0 / max_w
    }
    assert norm_g.num_edges() == (3 * g.num_vertices())
    assert set(extract_edges(norm_g)) == set(expected_edges_and_weights.keys())
    assert set(extract_nodes(norm_g)) == set(extract_nodes(g))

    new_edge_weights = get_edge_weights(norm_g)
    for (u, v), w in expected_edges_and_weights.items():
        assert pytest.approx(w) == new_edge_weights[norm_g.edge(u, v)]

    deg = norm_g.degree_property_map("out", new_edge_weights)

    for v in norm_g.vertices():
        assert pytest.approx(1.0) == deg[v]
def normalize_globally(g):
    print('global normlization')
    weights = get_edge_weights(g)
    deg = g.degree_property_map("out", weights)
    w_max = deg.a.max()
    new_g = g.copy()
    new_weights = get_edge_weights(new_g)
    new_weights.a /= w_max
    new_deg = new_g.degree_property_map("out", new_weights)

    # add self-loops
    self_loops = [(v, v) for v in new_g.vertices()]
    new_g.add_edge_list(self_loops)

    # assign new weights
    new_weights = get_edge_weights(new_g)
    for v, v in self_loops:
        new_weights[new_g.edge(v, v)] = 1 - new_deg[v]

    new_g.edge_properties['weights'] = new_weights
    return new_g
def reverse_edge_weights(g):
    print('reversing')
    weights = get_edge_weights(g)
    for e in g.edges():
        u, v = int(e.source()), int(e.target())
        if u < v:
            er = g.edge(e.target(), e.source())
            # print('before', weights[e], weights[er])
            weights[e], weights[er] = weights[er], weights[e]
            # print('after', weights[e], weights[er])
    g.edge_properties['weights'] = weights
    return g
Exemple #6
0
    def add_incremental_edges(self, tree_nodes):
        if isinstance(tree_nodes, GraphView):
            raise TypeError('add_incremental_edges does not support GraphView yet. ' +
                            'Please pass in a set of nodes')
        fake_c = np.ones(self.num_nodes) * (-1)
        fake_c[list(tree_nodes)] = 1

        edge_weights = get_edge_weights(self.g)
        assert edge_weights is not None, 'for incremental edge addition, edge weight should be given'

        new_c = incremental_simulation(self.g, fake_c, edge_weights,
                                       self.num_nodes,
                                       return_new_edges=False)

        return set(infected_nodes(new_c))
def run_with_or_without_resampling(g, cid, c, X, n_samples, sampling_method):
    gi = from_gt(g, get_edge_weights(g))
    infected = infected_nodes(c)
    y_true = np.zeros((len(c), ))
    y_true[infected] = 1
    X_set = set(X)
    mask = np.array([(i not in X_set) for i in range(len(c))])

    root_sampler = build_true_root_sampler(c)

    options = {
        'P': {
            'with_resampling': True,
            'true_casacde_proba_func': cascade_probability_gt
        },
        'P_new': {
            'with_resampling': True,
            'true_casacde_proba_func': ic_cascade_probability_gt
        },
        'no resampling': {
            'with_resampling': False
        }
    }

    ap_ans, p_ans = {}, {}
    for name, opt in options.items():
        sampler = TreeSamplePool(g,
                                 n_samples,
                                 sampling_method,
                                 gi=gi,
                                 return_type='nodes',
                                 **opt)
        sampler.fill(X, root_sampler=root_sampler)

        estimator = TreeBasedStatistics(g, sampler.samples)

        probas = infection_probability(g, X, sampler, estimator)

        ap_score = average_precision_score(y_true[mask], probas[mask])
        p_score = precision_at_cascade_size(y_true[mask], probas[mask])
        # print('with_resampling={}, AP score={}'.format(opt, score))
        ap_ans[name] = ap_score
        p_ans[name] = p_score
    ap_ans['cid'] = cid
    p_ans['cid'] = cid
    # print(ans)
    return ap_ans, p_ans
Exemple #8
0
    def resample_trees(self, trees):
        possible_trees = list(set(trees))

        self.p = get_edge_weights(self.g)

        # this is required for speed
        # graph_tool's out_neighbours is slow
        self.g_nx = nx.DiGraph()
        for e in self.g.edges():
            self.g_nx.add_edge(int(e.source()), int(e.target()))

        self.p_dict = {tuple(map(int, [e.source(), e.target()])): self.p[e]
                       for e in self.g.edges()}

        out_degree = self.g.degree_property_map('out', weight=self.p)
        out_degree_dict = {int(v): out_degree[v] for v in self.g.vertices()}

        # caching table
        # and we work in the log domain
        log_p_tbl = {t: self.true_casacde_proba_func(self.g, self.p_dict, t, self.g_nx, using_log=True)
                     for t in possible_trees}
        log_pi_tbl = {t: tree_probability_gt(out_degree_dict, self.p_dict, t, using_log=True)
                      for t in possible_trees}

        log_p_T = np.array([log_p_tbl[t] for t in trees])
        log_pi_T = np.array([log_pi_tbl[t] for t in trees])

        sampling_weights = np.exp(log_p_T - log_pi_T)  # back to probabiliy

        weight_sum = sampling_weights.sum()
        if weight_sum > 0:
            sampling_weights /= weight_sum  # normlization
        else:
            # uniform sampling
            sampling_weights = np.ones(len(sampling_weights))
            sampling_weights /= sampling_weights.sum()

        # re-sampling trees by weights
        resampled_tree_idx = np.random.choice(self.n_samples,
                                              p=sampling_weights,
                                              replace=True,
                                              size=self.n_samples)

        resampled_trees = [trees[i] for i in resampled_tree_idx]
        return resampled_trees
def one_run(g,
            norm_g,
            q,
            eps,
            root_sampler_name,
            min_size,
            max_size,
            observation_method="uniform",
            with_inc=False):
    print("observation_method", observation_method)

    n_samples = 100

    p = g.edge_properties['weights']

    obs, c = gen_input(g,
                       source=None,
                       p=p,
                       q=q,
                       model='ic',
                       observation_method=observation_method,
                       min_size=min_size,
                       max_size=max_size)

    print('cascade size', len(infected_nodes(c)))
    # inf_nodes = infected_nodes(c)
    source = np.nonzero(c == 0)[0][0]

    if root_sampler_name == 'pagerank':
        root_sampler = build_root_sampler_by_pagerank_score(g, obs, c, eps=eps)
    elif root_sampler_name == 'true':
        root_sampler = (lambda: source)
    else:
        root_sampler = (lambda: None)

    # method 2:
    # vanilla steiner tree sampling
    gi = from_gt(norm_g, weights=get_edge_weights(norm_g))
    st_tree_nodes = sample_steiner_trees(g,
                                         obs,
                                         root=root_sampler(),
                                         method='cut',
                                         n_samples=n_samples,
                                         gi=gi,
                                         return_tree_nodes=True)
    node_stat = TreeBasedStatistics(g, st_tree_nodes)
    st_naive_probas = node_stat.unconditional_proba()

    if with_inc:
        # method 3
        # with incremental cascade simulation
        st_tree_nodes = sample_steiner_trees(g,
                                             obs,
                                             root=root_sampler(),
                                             method='cut',
                                             n_samples=n_samples,
                                             gi=gi,
                                             return_tree_nodes=True)
        new_tree_nodes = []
        for nodes in st_tree_nodes:
            fake_c = np.ones(g.num_vertices()) * (-1)
            fake_c[list(nodes)] = 1
            new_c = incremental_simulation(g,
                                           fake_c,
                                           p,
                                           return_new_edges=False)
            new_tree_nodes.append(infected_nodes(new_c))
        node_stat = TreeBasedStatistics(g, new_tree_nodes)
        st_tree_inc_probas = node_stat.unconditional_proba()

        # y_true = np.zeros((len(c), ))
        # y_true[inf_nodes] = 1

        # mask = np.array([(i not in obs) for i in range(len(c))])

    row = {'c': c, 'obs': obs, 'st_naive_probas': st_naive_probas}

    if with_inc:
        row['st_tree_inc_probas'] = st_tree_inc_probas
    # # for inf_probas in [brute_force_inf_probas, st_naive_probas, st_tree_inc_probas]:
    # for inf_probas in [st_naive_probas, st_tree_inc_probas]:
    #     row.append(average_precision_score(y_true[mask], inf_probas[mask]))
    return row
Exemple #10
0
def gi(g):
    return util.from_gt(g, get_edge_weights(g))