def checkNodeClassification(attack, dataset: torch_geometric.data.Data, attacked_node: torch.Tensor, y_target: torch.Tensor, print_answer: Print, attack_num): """ checks if the node is currecly classified to y_target Parameters ---------- attack: oneGNNAttack dataset: torch_geometric.data.Data attacked_node: torch.Tensor - the victim node y_target: torch.Tensor - the target labels of the attack print_answer: Print - the type of print attack_num: int - the index of the node (out of the train/val/test-set) Returns ------- classified_to_target: torch.Tensor - the defence of the model """ results = test(dataset.data, attack.model_wrapper.model, attack.targeted, attacked_node, y_target) classified_to_target = not results[3] if not classified_to_target and print_answer is Print.YES: attack_log = 'Attack: {:03d}, Node: {}, Misclassified already!\n' \ .format(attack_num, attacked_node.item()) if attack.mode.isAdversarial(): attack_log = 'Adv Epoch: {:03d}, '.format(attack.idx) + attack_log print(attack_log, flush=True) return classified_to_target
def adversarialTrainer(attack): """ trains the model adversarial (the model learns to classify correctly harmful feature matrices) Parameters ---------- attack: oneGNNAttack Returns ------- model: Model model_log: str test_accuracy: torch.Tensor """ model = attack.model_wrapper.model # important note: this is a fresh, untrained model! data = attack.getDataset().data patience_counter, best_val_accuracy = 0, 0 adversarial_model_train_epochs = 200 log_template = 'Adversarial Model - Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}, Attack: {:.4f}' model.attack = True # train in an adversarial way for epoch in range(0, adversarial_model_train_epochs): tmp_attack = copy.deepcopy(attack) tmp_attack.setIdx(epoch + 1) attacked_x, attacked_nodes, y_targets = \ getTheMostHarmfulInput(attack=tmp_attack, approach=NodeApproach.TOPOLOGY) train(model=attack.model_wrapper.model, optimizer=attack.model_wrapper.optimizer, data=data, attacked_nodes=attacked_nodes, attacked_x=attacked_x) train_results = test(data=data, model=attack.model_wrapper.model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets) print(log_template.format(epoch + 1, *train_results)) # patience val_acc = train_results[1] if val_acc > best_val_accuracy: best_val_accuracy = val_acc patience_counter = 0 else: patience_counter += 1 if patience_counter >= attack.patience: break attack.model_wrapper.model.attack = False print() model_log = 'Adversarial Model - Train: {:.4f}, Val: {:.4f}, Test: {:.4f}, Attack: {:.4f}'\ .format(*train_results) return attack.model_wrapper.model, model_log, train_results[2]
def findMinimalEdges(sorted_edges: torch.Tensor, data, model, targeted: bool, attacked_node: torch.Tensor, y_target: torch.Tensor, node_num: int, print_flag: bool, log_template, end_log_template): """ flips each edge with a non-zero gradient this function is only available for multi approaches Parameters ---------- sorted_edges: torch_geometric.data.Data.data - non-zero gradient edges, sorted by decreasing gradient data: torch_geometric.data.Data.data targeted: bool model: Model targeted: bool attacked_node: torch.Tensor - the victim node y_target: torch.Tensor - the target label of the attack node_num: int - the index of the attacked/victim node (out of the train/val/test-set) print_flag: bool - whether to print every iteration or not log_template: str - prefix of the log format end_log_template: str - suffix of the log format Returns ------- attack_result: torch.Tensor """ for edge_num, malicious_edge in enumerate(sorted_edges): model.edge_weight.data[ malicious_edge] = not model.edge_weight.data[malicious_edge] attack_results = test(data=data, model=model, targeted=targeted, attacked_nodes=attacked_node, y_targets=y_target) if print_flag: print(log_template.format(node_num, edge_num + 2, *attack_results[:-1]), flush=True, end='') if attack_results[3]: break if print_flag: print(end_log_template.format(attack_results[-1]) + '\n', flush=True) return attack_results
def attackTrainerDiscrete(attack, attacked_nodes: torch.Tensor, y_targets: torch.Tensor, malicious_nodes: torch.Tensor, node_num: int, discrete_stop_after_1iter: bool) -> torch.Tensor: """ a trainer function that attacks our model by changing the input attribute for a limited number of attributes 1.attack the model with i attributes 2.backprop 3.add the attribute with the largest gradient as the i+1 attribute Parameters ---------- attack: oneGNNAttack attacked_nodes: torch.Tensor - the victim nodes y_targets: torch.Tensor - the target labels of the attack malicious_nodes: torch.Tensor - the attacker/malicious node node_num: int - the index of the attacked/victim node (out of the train/val/test-set) discrete_stop_after_1iter: bool - whether or not to stop the discrete after 1 iteration this is a specific flag for the GRAD_CHOICE Approach Returns ------- attack_results: torch.Tensor - 2d-tensor that includes 1st-col - the defence 2nd-col - the number of attributes used if the number of attributes is 0 the node is misclassified to begin with """ # initialize model = attack.model_wrapper.model lr = attack.lr print_answer = attack.print_answer dataset = attack.getDataset() data = dataset.data num_attributes = data.x.shape[1] max_attributes_per_malicious = int(num_attributes * attack.l_0) max_attributes = max_attributes_per_malicious * malicious_nodes.shape[0] changed_attributes_all_malicious, epoch = 0, 0 log_template = createLogTemplate(attack=attack, dataset=dataset) # changing the parameters which require grads and setting adversarial optimizer optimizer_params = setRequiresGrad(model=model, malicious_nodes=malicious_nodes) optimizer = torch.optim.Adam(params=optimizer_params, lr=lr) optimizer.zero_grad() # zero attributes with torch.no_grad(): changed_attributes = 0 for malicious_node in malicious_nodes: changed_attributes += model.node_attribute_list[malicious_node][ 0].sum().item() model.setNodesAttributes(idx_node=malicious_node, values=torch.zeros(num_attributes)) # flip the attribute with the largest gradient model0 = copy.deepcopy(model) changed_attributes, prev_changed_attributes = 0, 0 num_attributes_left = max_attributes_per_malicious * torch.ones_like( malicious_nodes).to(attack.device) while True: epoch += 1 prev_model = copy.deepcopy(model) # train train(model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets, optimizer=optimizer) num_attributes_left = flipUpBestNewAttributes( model=model, model0=prev_model, malicious_nodes=malicious_nodes, num_attributes_left=num_attributes_left) changed_attributes = max_attributes - num_attributes_left.sum().item() # test correctness test_discrete(model=model, model0=model0, malicious_nodes=malicious_nodes, changed_attributes=changed_attributes, max_attributes=max_attributes) # test results = test(data=data, model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets) # prints if print_answer is not Print.NO and epoch != 1: print() if print_answer is Print.YES: print(log_template.format(node_num, epoch, changed_attributes, *results[:-1]), flush=True, end='') # breaks if results[ 3] or changed_attributes == max_attributes or changed_attributes == prev_changed_attributes: break prev_changed_attributes = changed_attributes if discrete_stop_after_1iter: break if print_answer is Print.YES: print(', Attack Success: {}\n'.format(results[-1]), flush=True) if changed_attributes > max_attributes: return torch.tensor([[results[3], max_attributes]]).type(torch.long) else: return torch.tensor([[results[3], changed_attributes]]).type(torch.long)
def attackTrainerContinuous(attack, attacked_nodes: torch.Tensor, y_targets: torch.Tensor, malicious_nodes: torch.Tensor, node_num: int) -> torch.Tensor: """ a trainer function that attacks our model by changing the input attributes a successful attack is when we attack successfully AND embed the attributes Parameters ---------- attack: oneGNNAttack attacked_nodes: torch.Tensor - the victim nodes y_targets: torch.Tensor - the target labels of the attack malicious_nodes: torch.Tensor - the attacker/malicious node node_num: int - the index of the attacked/victim node (out of the train/val/test-set) Returns ------- attack_results: torch.Tensor - 2d-tensor that includes 1st-col - the defence 2nd-col - the number of attributes used if the number of attributes is 0 the node is misclassified to begin with """ # initialize model = attack.model_wrapper.model attack_epochs = attack.attack_epochs lr = attack.lr print_answer = attack.print_answer dataset = attack.getDataset() data = dataset.data num_attributes = data.x.shape[1] max_attributes = num_attributes * malicious_nodes.shape[0] log_template = createLogTemplate(attack=attack, dataset=dataset) # changing the parameters which require grads and setting adversarial optimizer optimizer_params = setRequiresGrad(model=model, malicious_nodes=malicious_nodes) optimizer = torch.optim.Adam(params=optimizer_params, lr=lr) # find best_attributes model0 = copy.deepcopy(model) prev_changed_attributes = 0 for epoch in range(0, attack_epochs): # train train(model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets, optimizer=optimizer) # test correctness changed_attributes = (model.getInput() != model0.getInput())[malicious_nodes].sum().item() test_discrete(model=model, model0=model0, malicious_nodes=malicious_nodes, changed_attributes=changed_attributes, max_attributes=max_attributes) # test results = test(data=data, model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets) # breaks if results[3]: # embed embeded_model = copy.deepcopy(model) for malicious_idx, malicious_node in enumerate(malicious_nodes): embedRowContinuous(model=embeded_model, malicious_node=malicious_node, model0=model0, l_inf=attack.l_inf) # test correctness changed_attributes = ( embeded_model.getInput() != model0.getInput())[malicious_nodes].sum().item() test_continuous(model=embeded_model, model0=model0, malicious_nodes=malicious_nodes, changed_attributes=changed_attributes, max_attributes=max_attributes, l_inf=attack.l_inf) # test results = test(data=data, model=embeded_model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets) if results[3]: if print_answer is Print.YES: print(log_template.format(node_num, epoch + 1, *results[:-1]), flush=True, end='') break # prints if print_answer is Print.YES: print(log_template.format(node_num, epoch + 1, *results[:-1]), flush=True, end='') if changed_attributes == prev_changed_attributes: break prev_changed_attributes = changed_attributes if epoch != attack_epochs - 1 and print_answer is not Print.NO: print() if print_answer is Print.YES: print(', Attack Success: {}\n'.format(results[-1]), flush=True) if not results[3]: changed_attributes = max_attributes return torch.tensor([[results[3], changed_attributes]]).type(torch.long)
def edgeAttackVictim(attack, approach: Approach, print_flag: bool, attacked_node: torch.Tensor, y_target: torch.Tensor, node_num: int) -> torch.Tensor: """ chooses the edge we attack with from our pool of possible edges. the pool of possible edges changes per approach this BFS environments is also calculated according to our selected approach lastly, we attack using attackTrainer important note: the victim node is already known (attacked node) Parameters ---------- attack: oneGNNAttack approach: Approach print_flag: bool - whether to print every iteration or not attacked_node: torch.Tensor - the victim node y_target: torch.Tensor - the target label of the attack node_num: int - the index of the attacked/victim node (out of the train/val/test-set) Returns ------- attack_result: torch.Tensor """ device = attack.device dataset = attack.getDataset() data = dataset.data model = attack.model_wrapper.model targeted = attack.targeted end_log_template = ', Attack Success: {}' neighbours_and_dist = kBFS(root=attacked_node, device=device, reversed_arr_list=dataset.reversed_arr_list, K=model.num_layers - 1) if not neighbours_and_dist.nelement(): if print_flag: print('Attack: {:03d}, Node: {} is a solo node'.format(node_num, attacked_node.item()), flush=True) return None malicious_indices = neighbours_and_dist[:, 0] if print_flag: print('Attack: {:03d}, Node: {}'.format(node_num, attacked_node.item()), flush=True, end='') # according to our approach choose the edge we wish to flip if approach is EdgeApproach.RANDOM: # select a random node on the graph and - malicious index # select a random node from our BFS of distance K-1 - attacked node # use flipEdge malicious_index = np.random.choice(data.num_nodes, 1).item() new_attacked_node_index = np.random.choice(malicious_indices.shape[0] + 1, 1).item() if new_attacked_node_index == malicious_indices.shape[0]: new_attacked_node = attacked_node else: new_attacked_node = torch.tensor([malicious_indices[new_attacked_node_index].item()]).to(device) flipEdge(model=model, attacked_node=new_attacked_node, malicious_index=malicious_index, device=device) attack_results = test(data=data, model=model, targeted=targeted, attacked_nodes=new_attacked_node, y_targets=y_target) if print_flag: print(end_log_template.format(attack_results[3]), flush=True) else: # EdgeApproach.SINGLE # select a random node on the graph - malicious index # Add all possible edges between the malicious index and the BFS of distance K-1 # calculate the edge with the largest gradient and flip it, using edgeTrainer # # EdgeApproach.GRAD_CHOICE # Add all possible edges between all possible nodes and the BFS of distance K-1 # calculate the edge with the largest gradient and flip it, using edgeTrainer malicious_index = model.expandEdgesByMalicious(dataset=dataset, approach=approach, attacked_node=attacked_node, neighbours=malicious_indices, device=device) attack_results = edgeTrainer(data=data, approach=approach, targeted=targeted, model=model, attacked_node=attacked_node, y_target=y_target, node_num=node_num, malicious_index=malicious_index, device=device, print_flag=print_flag, end_log_template=end_log_template) if attack_results is None: print("Node approach doesnt exist", flush=True) quit() return attack_results[3]
def edgeTrainer(data, approach: Approach, targeted: bool, model, attacked_node: torch.Tensor, y_target: torch.Tensor, malicious_index: torch.Tensor, node_num: int, device, print_flag, end_log_template): """ a forward pass function which chooses the edge with the largest gradient in edge_weight and flips it for multi approaches this process is repeated for each edge with a non-zero gradient Parameters ---------- data: torch_geometric.data.Data.data approach: Approach targeted: bool model: Model attacked_node: torch.Tensor - the victim node y_target: torch.Tensor - the target label of the attack malicious_index: torch.Tensor - the attacker/malicious index node_num: int - the index of the attacked/victim node (out of the train/val/test-set) device: torch.cuda print_flag: bool - whether to print every iteration or not end_log_template: str - suffix of the log format Returns ------- attack_result: torch.Tensor """ log_template = '\nAttack: {:03d}, #Edge: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}' edge_weight0 = model.edge_weight.clone().detach() optimizer_params = setRequiresGrad(model) optimizer = torch.optim.SGD(optimizer_params, lr=0.01) train(model=model, targeted=targeted, attacked_nodes=attacked_node, y_targets=y_target, optimizer=optimizer) with torch.no_grad(): diff = model.edge_weight - edge_weight0 mask1 = torch.logical_and(edge_weight0 == 1, diff > 0).to(device) mask2 = torch.logical_and(edge_weight0 == 0, diff < 0).to(device) mask = torch.logical_or(mask1, mask2).to(device) diff[mask] = 0 abs_diff = torch.abs(diff) # when approach is grad you have the attacker chosen if not approach.isGlobal(): malicious_mask = model.edge_index[0] != torch.tensor(malicious_index).to(device) abs_diff[malicious_mask] = 0 # use of the best edge max_malicious_edge = torch.argmax(abs_diff).to(device) # when approach is globalGrad you can choose the attacker if approach.isGlobal(): malicious_index = model.edge_index[0][max_malicious_edge] malicious_node_mask = model.edge_index[0] != malicious_index abs_diff[malicious_node_mask] = 0 # return edge weights to back to original values and flip model.edge_weight.data = edge_weight0 model.edge_weight.data[max_malicious_edge] = not model.edge_weight.data[max_malicious_edge] attack_results = test(data=data, model=model, targeted=targeted, attacked_nodes=attacked_node, y_targets=y_target) if not approach.isMulti(): if print_flag: print(end_log_template.format(attack_results[-1]), flush=True) else: malicious_node_abs_diff = abs_diff[abs_diff != 0] # sort edges by absolute diff _, sorted_malicious_edge = torch.sort(malicious_node_abs_diff, descending=True) if print_flag: print(', #Edges: {}'.format(sorted_malicious_edge.shape[0]), flush=True, end='') print(log_template.format(node_num, 1, *attack_results[:-1]), flush=True, end='') if not attack_results[3] and sorted_malicious_edge.shape[0] > 1: attack_results = \ findMinimalEdges(sorted_edges=sorted_malicious_edge[1:], data=data, model=model, targeted=targeted, attacked_node=attacked_node, y_target=y_target, node_num=node_num, print_flag=print_flag, log_template=log_template, end_log_template=end_log_template) elif print_flag: print(end_log_template.format(attack_results[-1]) + '\n', flush=True) return attack_results
def attackVictim(attack, approach: Approach, attacked_node: torch.Tensor, y_target: torch.Tensor, node_num: int)\ -> torch.Tensor: """ chooses the node we attack with (the malicious node) from our BFS environment this BFS environments is also calculated according to our selected approach lastly, we attack using attackTrainer important note: the victim node is already known (attacked node) Parameters ---------- attack: oneGNNAttack approach: Approach attacked_node: torch.Tensor - the victim node y_target: torch.Tensor - the target label of the attack node_num: int - the index of the attacked/victim node (out of the train/val/test-set) Returns ------- attack_results: torch.Tensor - 2d-tensor that includes 1st-col - the defence 2nd-col - the number of attributes used if the number of attributes is 0 the node is misclassified to begin with """ device = attack.device dataset = attack.getDataset() print_answer = attack.print_answer neighbours_and_dist = kBFS(root=attacked_node, device=device, reversed_arr_list=dataset.reversed_arr_list, K=attack.num_layers) if neighbours_and_dist.nelement(): neighbours_and_dist = manipulateNeighborhood(attack=attack, approach=approach, attacked_node=attacked_node, neighbours_and_dist=neighbours_and_dist, device=device) attack_log = 'Attack: {:03d}, Node: {}, BFS clique: {}'.format(node_num, attacked_node.item(), neighbours_and_dist.shape[0] + 1) else: attack_log = 'Attack: {:03d}, Node: {} is a solo node'.format(node_num, attacked_node.item()) # in adversarial mode add #Epoch if attack.mode.isAdversarial(): attack_log = 'Adv Epoch: {:03d}, '.format(attack.idx) + attack_log # special cases of solo node and duo node for double BFS_size = neighbours_and_dist.shape[0] if not neighbours_and_dist.nelement(): if print_answer is Print.YES: print(attack_log, flush=True) return None if print_answer is Print.YES: print(attack_log, end='', flush=True) if approach is not NodeApproach.MULTIPLE_ATTACKERS: print() malicious_node, attack = approach.getMaliciousNode(attack=attack, attacked_node=attacked_node, y_target=y_target, node_num=node_num, neighbours_and_dist=neighbours_and_dist, BFS_size=BFS_size) # calculates the malicious node for the irregular approaches if approach is NodeApproach.AGREE: print() malicious_node_heuristic = heuristicApproach(reversed_arr_list=dataset.reversed_arr_list, neighbours_and_dist=neighbours_and_dist, device=attack.device) malicious_node_gradient = gradientApproach(attack=attack, attacked_node=attacked_node, y_target=y_target, node_num=node_num, neighbours_and_dist=neighbours_and_dist) attack_results = torch.zeros(1, 2) attack_results[0][0] = malicious_node_heuristic == malicious_node_gradient # in attackSet we change to equal return attack_results if approach is NodeApproach.ZERO_FEATURES: model = attack.model_wrapper.model data = dataset.data zero_model = copy.deepcopy(model) # train zero_model.node_attribute_list[malicious_node][:] = 0 # test correctness changed_attributes = (zero_model.getInput() != model.getInput())[malicious_node].sum().item() # test results = test(data=data, model=zero_model, targeted=attack.targeted, attacked_nodes=attacked_node, y_targets=y_target) log_template = createLogTemplate(attack=attack, dataset=dataset) + ', Attack Success: {}\n' if dataset.type is DatasetType.DISCRETE: print(log_template.format(node_num, 1, changed_attributes, *results), flush=True) if dataset.type is DatasetType.CONTINUOUS: print(log_template.format(node_num, 1, *results), flush=True) attack_results = torch.tensor([[results[3], changed_attributes]]) return attack_results if approach is NodeApproach.MULTIPLE_ATTACKERS: if malicious_node is None: if print_answer is Print.YES: print(f': Too small for {attack.num_of_attackers} attackers\n', flush=True) return None else: print() if approach is NodeApproach.INJECTION: dataset = attack.getDataset() classified_to_target = checkNodeClassification(attack=attack, dataset=dataset, attacked_node=attacked_node, y_target=y_target, print_answer=Print.NO, attack_num=node_num + 1) if not classified_to_target: print("misclassified right after injection!\n", flush=True) attack.model_wrapper.model.removeInjectedNode(attack=attack) return torch.tensor([[1, 0]]) attack_results = attackTrainer(attack=attack, attacked_nodes=attacked_node, y_targets=y_target, malicious_nodes=malicious_node, node_num=node_num) if approach is NodeApproach.INJECTION: attack.model_wrapper.model.removeInjectedNode(attack=attack) return attack_results