def ptb_target(self, graph, ptb_rate, gpu): from nettack import utils as ne_utils from nettack import GCN as ne_GCN from nettack import nettack as ntk gpu_id = gpu nb_node = ptb_rate _A_obs, _X_obs, _z_obs = copy(graph) _X_obs = sp.csr_matrix(_X_obs).astype('float32') _N = _A_obs.shape[0] _K = _z_obs.shape[1] _Z_obs = _z_obs _z_obs = np.argmax(_Z_obs, 1) _An = ne_utils.preprocess_graph(_A_obs) sizes = [16, _K] degrees = _A_obs.sum(0).A1 seed = 0 unlabeled_share = 0.8 val_share = 0.1 train_share = 1 - unlabeled_share - val_share np.random.seed(seed) split_train, split_val, split_unlabeled = ne_utils.train_val_test_split_tabular(np.arange(_N), train_size=train_share, val_size=val_share, test_size=unlabeled_share, stratify=_z_obs) attacked = set() blacklist = set() while len(attacked) < nb_node: u = np.random.choice(split_unlabeled) while u in attacked or u in blacklist: u = np.random.choice(split_unlabeled) try: surrogate_model = ne_GCN.GCN(sizes, _An, _X_obs, with_relu=False, name="surrogate", gpu_id=gpu_id) surrogate_model.train(split_train, split_val, _Z_obs) W1 =surrogate_model.W1.eval(session=surrogate_model.session) W2 =surrogate_model.W2.eval(session=surrogate_model.session) nettack = ntk.Nettack(_A_obs, _X_obs, _z_obs, W1, W2, u, verbose=False) direct_attack = True n_influencers = 1 if direct_attack else 5 n_perturbations = int(degrees[u]) # How many perturbations to perform. Default: Degree of the node perturb_features = False perturb_structure = True nettack.attack_surrogate(n_perturbations, perturb_structure=perturb_structure, perturb_features=perturb_features, direct=direct_attack, n_influencers=n_influencers) surrogate_model.session.close() tf.reset_default_graph() except: blacklist.add(u) continue attacked.add(u) _A_obs = nettack.adj.tocsr() _An = ne_utils.preprocess_graph(_A_obs) return _An, list(attacked)
def pre_run(self): self._Z_obs_hat = np.eye(self._K)[self._z_obs_hat] self.sizes = [16, self._K] _An_1 = utils.preprocess_graph(self._A_obs_hat) _An_2 = utils.preprocess_graph(self._A_obs_hat_2) surrogate_model_1 = GCN.GCN(self.sizes, _An_1, self._X_obs_hat, with_relu=False, name="surrogate", gpu_id=self.gpu_id) surrogate_model_1.train(self.split_train, self.split_val, self._Z_obs_hat, print_info=False) self.W1_1 = surrogate_model_1.W1.eval( session=surrogate_model_1.session) self.W2_1 = surrogate_model_1.W2.eval( session=surrogate_model_1.session) #self.surrogate_model_1=surrogate_model_1 #Train GCN without perturbations self.gcn_before_1 = GCN.GCN(self.sizes, _An_1, self._X_obs_hat, "gcn_orig", gpu_id=self.gpu_id) self.gcn_before_1.train(self.split_train, self.split_val, self._Z_obs_hat, print_info=False) #surrogate model of SBM 2 - needed for nettack surrogate_model_2 = GCN.GCN(self.sizes, _An_2, self._X_obs_hat, with_relu=False, name="surrogate", gpu_id=self.gpu_id) surrogate_model_2.train(self.split_train, self.split_val, self._Z_obs_hat, print_info=False) self.W1_2 = surrogate_model_2.W1.eval( session=surrogate_model_2.session) self.W2_2 = surrogate_model_2.W2.eval( session=surrogate_model_2.session) #Train GCN without perturbations self.gcn_before_2 = GCN.GCN(self.sizes, _An_2, self._X_obs_hat, "gcn_orig", gpu_id=self.gpu_id) self.gcn_before_2.train(self.split_train, self.split_val, self._Z_obs_hat, print_info=False)
def __init__(self, adj, X_obs, z_obs, W1, W2, u, verbose=False): # Adjacency matrix self.adj = adj.copy().tolil() self.adj_no_selfloops = self.adj.copy() self.adj_no_selfloops.setdiag(0) self.adj_orig = self.adj.copy().tolil() self.u = u # the node being attacked self.adj_preprocessed = utils.preprocess_graph(self.adj).tolil() # Number of nodes self.N = adj.shape[0] # Node attributes self.X_obs = X_obs.copy().tolil() self.X_obs_orig = self.X_obs.copy().tolil() # Node labels self.z_obs = z_obs.copy() self.label_u = self.z_obs[self.u] self.K = np.max(self.z_obs) + 1 # GCN weight matrices self.W1 = W1 self.W2 = W2 self.W = sp.csr_matrix(self.W1.dot(self.W2)) self.cooc_matrix = self.X_obs.T.dot(self.X_obs).tolil() self.cooc_constraint = None self.structure_perturbations = [] self.feature_perturbations = [] self.influencer_nodes = [] self.potential_edges = [] self.verbose = verbose
def attack_surrogate(self, n_perturbations, perturb_structure=True, perturb_features=True, direct=True, n_influencers=0, delta_cutoff=0.004): """ Perform an attack on the surrogate model. Parameters ---------- n_perturbations: int The number of perturbations (structure or feature) to perform. perturb_structure: bool, default: True Indicates whether the structure can be changed. perturb_features: bool, default: True Indicates whether the features can be changed. direct: bool, default: True indicates whether to directly modify edges/features of the node attacked or only those of influencers. n_influencers: int, default: 0 Number of influencing nodes -- will be ignored if direct is True delta_cutoff: float The critical value for the likelihood ratio test of the power law distributions. See the Chi square distribution with one degree of freedom. Default value 0.004 corresponds to a p-value of roughly 0.95. Returns ------- None. """ assert not (direct == False and n_influencers == 0 ), "indirect mode requires at least one influencer node" assert n_perturbations > 0, "need at least one perturbation" assert perturb_features or perturb_structure, "either perturb_features or perturb_structure must be true" logits_start = self.compute_logits() best_wrong_class = self.strongest_wrong_class(logits_start) surrogate_losses = [ logits_start[self.label_u] - logits_start[best_wrong_class] ] if self.verbose: print("##### Starting attack #####") if perturb_structure and perturb_features: print( "##### Attack node with ID {} using structure and feature perturbations #####" .format(self.u)) elif perturb_features: print("##### Attack only using feature perturbations #####") elif perturb_structure: print("##### Attack only using structure perturbations #####") if direct: print("##### Attacking the node directly #####") else: print( "##### Attacking the node indirectly via {} influencer nodes #####" .format(n_influencers)) print("##### Performing {} perturbations #####".format( n_perturbations)) if perturb_structure: # Setup starting values of the likelihood ratio test. degree_sequence_start = self.adj_orig.sum(0).A1 current_degree_sequence = self.adj.sum(0).A1 d_min = 2 S_d_start = np.sum( np.log(degree_sequence_start[degree_sequence_start >= d_min])) current_S_d = np.sum( np.log( current_degree_sequence[current_degree_sequence >= d_min])) n_start = np.sum(degree_sequence_start >= d_min) current_n = np.sum(current_degree_sequence >= d_min) alpha_start = compute_alpha(n_start, S_d_start, d_min) log_likelihood_orig = compute_log_likelihood( n_start, alpha_start, S_d_start, d_min) if len(self.influencer_nodes) == 0: if not direct: # Choose influencer nodes infls, add_infls = self.get_attacker_nodes( n_influencers, add_additional_nodes=True) self.influencer_nodes = np.concatenate( (infls, add_infls)).astype("int") # Potential edges are all edges from any attacker to any other node, except the respective # attacker itself or the node being attacked. self.potential_edges = np.row_stack([ np.column_stack((np.tile(infl, self.N - 2), np.setdiff1d(np.arange(self.N), np.array([self.u, infl])))) for infl in self.influencer_nodes ]) if self.verbose: print("Influencer nodes: {}".format(self.influencer_nodes)) else: # direct attack influencers = [self.u] self.potential_edges = np.column_stack( (np.tile(self.u, self.N - 1), np.setdiff1d(np.arange(self.N), self.u))) self.influencer_nodes = np.array(influencers) self.potential_edges = self.potential_edges.astype("int32") for _ in range(n_perturbations): if self.verbose: print("##### ...{}/{} perturbations ... #####".format( _ + 1, n_perturbations)) if perturb_structure: # Do not consider edges that, if removed, result in singleton edges in the graph. singleton_filter = filter_singletons(self.potential_edges, self.adj) filtered_edges = self.potential_edges[singleton_filter] # Update the values for the power law likelihood ratio test. deltas = 2 * ( 1 - self.adj[tuple(filtered_edges.T)].toarray()[0]) - 1 d_edges_old = current_degree_sequence[filtered_edges] d_edges_new = current_degree_sequence[ filtered_edges] + deltas[:, None] new_S_d, new_n = update_Sx(current_S_d, current_n, d_edges_old, d_edges_new, d_min) new_alphas = compute_alpha(new_n, new_S_d, d_min) new_ll = compute_log_likelihood(new_n, new_alphas, new_S_d, d_min) alphas_combined = compute_alpha(new_n + n_start, new_S_d + S_d_start, d_min) new_ll_combined = compute_log_likelihood( new_n + n_start, alphas_combined, new_S_d + S_d_start, d_min) new_ratios = -2 * new_ll_combined + 2 * (new_ll + log_likelihood_orig) # Do not consider edges that, if added/removed, would lead to a violation of the # likelihood ration Chi_square cutoff value. powerlaw_filter = filter_chisquare(new_ratios, delta_cutoff) filtered_edges_final = filtered_edges[powerlaw_filter] # Compute new entries in A_hat_square_uv a_hat_uv_new = self.compute_new_a_hat_uv(filtered_edges_final) # Compute the struct scores for each potential edge struct_scores = self.struct_score(a_hat_uv_new, self.compute_XW()) best_edge_ix = struct_scores.argmin() best_edge_score = struct_scores.min() best_edge = filtered_edges_final[best_edge_ix] if perturb_features: # Compute the feature scores for each potential feature perturbation feature_ixs, feature_scores = self.feature_scores() best_feature_ix = feature_ixs[0] best_feature_score = feature_scores[0] if perturb_structure and perturb_features: # decide whether to choose an edge or feature to change if best_edge_score < best_feature_score: if self.verbose: print("Edge perturbation: {}".format(best_edge)) change_structure = True else: if self.verbose: print( "Feature perturbation: {}".format(best_feature_ix)) change_structure = False elif perturb_structure: change_structure = True elif perturb_features: change_structure = False if change_structure: # perform edge perturbation self.adj[tuple(best_edge)] = self.adj[tuple( best_edge[::-1])] = 1 - self.adj[tuple(best_edge)] self.adj_preprocessed = utils.preprocess_graph(self.adj) self.structure_perturbations.append(tuple(best_edge)) self.feature_perturbations.append(()) surrogate_losses.append(best_edge_score) # Update likelihood ratio test values current_S_d = new_S_d[powerlaw_filter][best_edge_ix] current_n = new_n[powerlaw_filter][best_edge_ix] current_degree_sequence[best_edge] += deltas[powerlaw_filter][ best_edge_ix] else: self.X_obs[tuple( best_feature_ix)] = 1 - self.X_obs[tuple(best_feature_ix)] self.feature_perturbations.append(tuple(best_feature_ix)) self.structure_perturbations.append(()) surrogate_losses.append(best_feature_score)