Ejemplo n.º 1
0
    def ptb_target(self, graph, ptb_rate, gpu):
        from nettack import utils as ne_utils
        from nettack import GCN as ne_GCN
        from nettack import nettack as ntk
        gpu_id = gpu
        nb_node = ptb_rate
        _A_obs, _X_obs, _z_obs = copy(graph)
        

        _X_obs = sp.csr_matrix(_X_obs).astype('float32')

        _N = _A_obs.shape[0]
        _K = _z_obs.shape[1]
        _Z_obs = _z_obs
        _z_obs = np.argmax(_Z_obs, 1)
        _An = ne_utils.preprocess_graph(_A_obs)
        sizes = [16, _K]
        degrees = _A_obs.sum(0).A1

        seed = 0
        unlabeled_share = 0.8
        val_share = 0.1
        train_share = 1 - unlabeled_share - val_share
        np.random.seed(seed)

        split_train, split_val, split_unlabeled = ne_utils.train_val_test_split_tabular(np.arange(_N),
                                                                            train_size=train_share,
                                                                            val_size=val_share,
                                                                            test_size=unlabeled_share,
                                                                            stratify=_z_obs)
        
        attacked = set()
        blacklist = set()
        while len(attacked) < nb_node:
            u = np.random.choice(split_unlabeled)
            while u in attacked or u in blacklist:
                u = np.random.choice(split_unlabeled)
            try:
                surrogate_model = ne_GCN.GCN(sizes, _An, _X_obs, with_relu=False, name="surrogate", gpu_id=gpu_id)
                surrogate_model.train(split_train, split_val, _Z_obs)
                W1 =surrogate_model.W1.eval(session=surrogate_model.session)
                W2 =surrogate_model.W2.eval(session=surrogate_model.session)
                nettack = ntk.Nettack(_A_obs, _X_obs, _z_obs, W1, W2, u, verbose=False)
                direct_attack = True
                n_influencers = 1 if direct_attack else 5
                n_perturbations = int(degrees[u]) # How many perturbations to perform. Default: Degree of the node
                perturb_features = False
                perturb_structure = True
                nettack.attack_surrogate(n_perturbations, perturb_structure=perturb_structure, perturb_features=perturb_features, direct=direct_attack, n_influencers=n_influencers)
                surrogate_model.session.close()
                tf.reset_default_graph()
            except:
                blacklist.add(u)
                continue
            attacked.add(u)
            _A_obs = nettack.adj.tocsr()
            _An = ne_utils.preprocess_graph(_A_obs)
        
        return _An, list(attacked)
Ejemplo n.º 2
0
    def pre_run(self):
        self._Z_obs_hat = np.eye(self._K)[self._z_obs_hat]
        self.sizes = [16, self._K]
        _An_1 = utils.preprocess_graph(self._A_obs_hat)
        _An_2 = utils.preprocess_graph(self._A_obs_hat_2)
        surrogate_model_1 = GCN.GCN(self.sizes,
                                    _An_1,
                                    self._X_obs_hat,
                                    with_relu=False,
                                    name="surrogate",
                                    gpu_id=self.gpu_id)
        surrogate_model_1.train(self.split_train,
                                self.split_val,
                                self._Z_obs_hat,
                                print_info=False)
        self.W1_1 = surrogate_model_1.W1.eval(
            session=surrogate_model_1.session)
        self.W2_1 = surrogate_model_1.W2.eval(
            session=surrogate_model_1.session)
        #self.surrogate_model_1=surrogate_model_1
        #Train GCN without perturbations
        self.gcn_before_1 = GCN.GCN(self.sizes,
                                    _An_1,
                                    self._X_obs_hat,
                                    "gcn_orig",
                                    gpu_id=self.gpu_id)
        self.gcn_before_1.train(self.split_train,
                                self.split_val,
                                self._Z_obs_hat,
                                print_info=False)

        #surrogate model of SBM 2 - needed for nettack
        surrogate_model_2 = GCN.GCN(self.sizes,
                                    _An_2,
                                    self._X_obs_hat,
                                    with_relu=False,
                                    name="surrogate",
                                    gpu_id=self.gpu_id)
        surrogate_model_2.train(self.split_train,
                                self.split_val,
                                self._Z_obs_hat,
                                print_info=False)
        self.W1_2 = surrogate_model_2.W1.eval(
            session=surrogate_model_2.session)
        self.W2_2 = surrogate_model_2.W2.eval(
            session=surrogate_model_2.session)

        #Train GCN without perturbations
        self.gcn_before_2 = GCN.GCN(self.sizes,
                                    _An_2,
                                    self._X_obs_hat,
                                    "gcn_orig",
                                    gpu_id=self.gpu_id)
        self.gcn_before_2.train(self.split_train,
                                self.split_val,
                                self._Z_obs_hat,
                                print_info=False)
Ejemplo n.º 3
0
    def __init__(self, adj, X_obs, z_obs, W1, W2, u, verbose=False):

        # Adjacency matrix
        self.adj = adj.copy().tolil()
        self.adj_no_selfloops = self.adj.copy()
        self.adj_no_selfloops.setdiag(0)
        self.adj_orig = self.adj.copy().tolil()
        self.u = u  # the node being attacked
        self.adj_preprocessed = utils.preprocess_graph(self.adj).tolil()
        # Number of nodes
        self.N = adj.shape[0]

        # Node attributes
        self.X_obs = X_obs.copy().tolil()
        self.X_obs_orig = self.X_obs.copy().tolil()
        # Node labels
        self.z_obs = z_obs.copy()
        self.label_u = self.z_obs[self.u]
        self.K = np.max(self.z_obs) + 1
        # GCN weight matrices
        self.W1 = W1
        self.W2 = W2
        self.W = sp.csr_matrix(self.W1.dot(self.W2))

        self.cooc_matrix = self.X_obs.T.dot(self.X_obs).tolil()
        self.cooc_constraint = None

        self.structure_perturbations = []
        self.feature_perturbations = []

        self.influencer_nodes = []
        self.potential_edges = []
        self.verbose = verbose
Ejemplo n.º 4
0
    def attack_surrogate(self,
                         n_perturbations,
                         perturb_structure=True,
                         perturb_features=True,
                         direct=True,
                         n_influencers=0,
                         delta_cutoff=0.004):
        """
        Perform an attack on the surrogate model.

        Parameters
        ----------
        n_perturbations: int
            The number of perturbations (structure or feature) to perform.

        perturb_structure: bool, default: True
            Indicates whether the structure can be changed.

        perturb_features: bool, default: True
            Indicates whether the features can be changed.

        direct: bool, default: True
            indicates whether to directly modify edges/features of the node attacked or only those of influencers.

        n_influencers: int, default: 0
            Number of influencing nodes -- will be ignored if direct is True

        delta_cutoff: float
            The critical value for the likelihood ratio test of the power law distributions.
             See the Chi square distribution with one degree of freedom. Default value 0.004
             corresponds to a p-value of roughly 0.95.

        Returns
        -------
        None.

        """

        assert not (direct == False and n_influencers == 0
                    ), "indirect mode requires at least one influencer node"
        assert n_perturbations > 0, "need at least one perturbation"
        assert perturb_features or perturb_structure, "either perturb_features or perturb_structure must be true"

        logits_start = self.compute_logits()
        best_wrong_class = self.strongest_wrong_class(logits_start)
        surrogate_losses = [
            logits_start[self.label_u] - logits_start[best_wrong_class]
        ]

        if self.verbose:
            print("##### Starting attack #####")
            if perturb_structure and perturb_features:
                print(
                    "##### Attack node with ID {} using structure and feature perturbations #####"
                    .format(self.u))
            elif perturb_features:
                print("##### Attack only using feature perturbations #####")
            elif perturb_structure:
                print("##### Attack only using structure perturbations #####")
            if direct:
                print("##### Attacking the node directly #####")
            else:
                print(
                    "##### Attacking the node indirectly via {} influencer nodes #####"
                    .format(n_influencers))
            print("##### Performing {} perturbations #####".format(
                n_perturbations))

        if perturb_structure:

            # Setup starting values of the likelihood ratio test.
            degree_sequence_start = self.adj_orig.sum(0).A1
            current_degree_sequence = self.adj.sum(0).A1
            d_min = 2
            S_d_start = np.sum(
                np.log(degree_sequence_start[degree_sequence_start >= d_min]))
            current_S_d = np.sum(
                np.log(
                    current_degree_sequence[current_degree_sequence >= d_min]))
            n_start = np.sum(degree_sequence_start >= d_min)
            current_n = np.sum(current_degree_sequence >= d_min)
            alpha_start = compute_alpha(n_start, S_d_start, d_min)
            log_likelihood_orig = compute_log_likelihood(
                n_start, alpha_start, S_d_start, d_min)

        if len(self.influencer_nodes) == 0:
            if not direct:
                # Choose influencer nodes
                infls, add_infls = self.get_attacker_nodes(
                    n_influencers, add_additional_nodes=True)
                self.influencer_nodes = np.concatenate(
                    (infls, add_infls)).astype("int")
                # Potential edges are all edges from any attacker to any other node, except the respective
                # attacker itself or the node being attacked.
                self.potential_edges = np.row_stack([
                    np.column_stack((np.tile(infl, self.N - 2),
                                     np.setdiff1d(np.arange(self.N),
                                                  np.array([self.u, infl]))))
                    for infl in self.influencer_nodes
                ])
                if self.verbose:
                    print("Influencer nodes: {}".format(self.influencer_nodes))
            else:
                # direct attack
                influencers = [self.u]
                self.potential_edges = np.column_stack(
                    (np.tile(self.u, self.N - 1),
                     np.setdiff1d(np.arange(self.N), self.u)))
                self.influencer_nodes = np.array(influencers)
        self.potential_edges = self.potential_edges.astype("int32")
        for _ in range(n_perturbations):
            if self.verbose:
                print("##### ...{}/{} perturbations ... #####".format(
                    _ + 1, n_perturbations))
            if perturb_structure:

                # Do not consider edges that, if removed, result in singleton edges in the graph.
                singleton_filter = filter_singletons(self.potential_edges,
                                                     self.adj)
                filtered_edges = self.potential_edges[singleton_filter]

                # Update the values for the power law likelihood ratio test.
                deltas = 2 * (
                    1 - self.adj[tuple(filtered_edges.T)].toarray()[0]) - 1
                d_edges_old = current_degree_sequence[filtered_edges]
                d_edges_new = current_degree_sequence[
                    filtered_edges] + deltas[:, None]
                new_S_d, new_n = update_Sx(current_S_d, current_n, d_edges_old,
                                           d_edges_new, d_min)
                new_alphas = compute_alpha(new_n, new_S_d, d_min)
                new_ll = compute_log_likelihood(new_n, new_alphas, new_S_d,
                                                d_min)
                alphas_combined = compute_alpha(new_n + n_start,
                                                new_S_d + S_d_start, d_min)
                new_ll_combined = compute_log_likelihood(
                    new_n + n_start, alphas_combined, new_S_d + S_d_start,
                    d_min)
                new_ratios = -2 * new_ll_combined + 2 * (new_ll +
                                                         log_likelihood_orig)

                # Do not consider edges that, if added/removed, would lead to a violation of the
                # likelihood ration Chi_square cutoff value.
                powerlaw_filter = filter_chisquare(new_ratios, delta_cutoff)
                filtered_edges_final = filtered_edges[powerlaw_filter]

                # Compute new entries in A_hat_square_uv
                a_hat_uv_new = self.compute_new_a_hat_uv(filtered_edges_final)
                # Compute the struct scores for each potential edge
                struct_scores = self.struct_score(a_hat_uv_new,
                                                  self.compute_XW())
                best_edge_ix = struct_scores.argmin()
                best_edge_score = struct_scores.min()
                best_edge = filtered_edges_final[best_edge_ix]

            if perturb_features:
                # Compute the feature scores for each potential feature perturbation
                feature_ixs, feature_scores = self.feature_scores()
                best_feature_ix = feature_ixs[0]
                best_feature_score = feature_scores[0]

            if perturb_structure and perturb_features:
                # decide whether to choose an edge or feature to change
                if best_edge_score < best_feature_score:
                    if self.verbose:
                        print("Edge perturbation: {}".format(best_edge))
                    change_structure = True
                else:
                    if self.verbose:
                        print(
                            "Feature perturbation: {}".format(best_feature_ix))
                    change_structure = False
            elif perturb_structure:
                change_structure = True
            elif perturb_features:
                change_structure = False

            if change_structure:
                # perform edge perturbation

                self.adj[tuple(best_edge)] = self.adj[tuple(
                    best_edge[::-1])] = 1 - self.adj[tuple(best_edge)]
                self.adj_preprocessed = utils.preprocess_graph(self.adj)

                self.structure_perturbations.append(tuple(best_edge))
                self.feature_perturbations.append(())
                surrogate_losses.append(best_edge_score)

                # Update likelihood ratio test values
                current_S_d = new_S_d[powerlaw_filter][best_edge_ix]
                current_n = new_n[powerlaw_filter][best_edge_ix]
                current_degree_sequence[best_edge] += deltas[powerlaw_filter][
                    best_edge_ix]

            else:
                self.X_obs[tuple(
                    best_feature_ix)] = 1 - self.X_obs[tuple(best_feature_ix)]

                self.feature_perturbations.append(tuple(best_feature_ix))
                self.structure_perturbations.append(())
                surrogate_losses.append(best_feature_score)