def attackTrainerContinuous(attack, attacked_nodes: torch.Tensor, y_targets: torch.Tensor, malicious_nodes: torch.Tensor, node_num: int) -> torch.Tensor: """ a trainer function that attacks our model by changing the input attributes a successful attack is when we attack successfully AND embed the attributes Parameters ---------- attack: oneGNNAttack attacked_nodes: torch.Tensor - the victim nodes y_targets: torch.Tensor - the target labels of the attack malicious_nodes: torch.Tensor - the attacker/malicious node node_num: int - the index of the attacked/victim node (out of the train/val/test-set) Returns ------- attack_results: torch.Tensor - 2d-tensor that includes 1st-col - the defence 2nd-col - the number of attributes used if the number of attributes is 0 the node is misclassified to begin with """ # initialize model = attack.model_wrapper.model attack_epochs = attack.attack_epochs lr = attack.lr print_answer = attack.print_answer dataset = attack.getDataset() data = dataset.data num_attributes = data.x.shape[1] max_attributes = num_attributes * malicious_nodes.shape[0] log_template = createLogTemplate(attack=attack, dataset=dataset) # changing the parameters which require grads and setting adversarial optimizer optimizer_params = setRequiresGrad(model=model, malicious_nodes=malicious_nodes) optimizer = torch.optim.Adam(params=optimizer_params, lr=lr) # find best_attributes model0 = copy.deepcopy(model) prev_changed_attributes = 0 for epoch in range(0, attack_epochs): # train train(model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets, optimizer=optimizer) # test correctness changed_attributes = (model.getInput() != model0.getInput())[malicious_nodes].sum().item() test_discrete(model=model, model0=model0, malicious_nodes=malicious_nodes, changed_attributes=changed_attributes, max_attributes=max_attributes) # test results = test(data=data, model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets) # breaks if results[3]: # embed embeded_model = copy.deepcopy(model) for malicious_idx, malicious_node in enumerate(malicious_nodes): embedRowContinuous(model=embeded_model, malicious_node=malicious_node, model0=model0, l_inf=attack.l_inf) # test correctness changed_attributes = ( embeded_model.getInput() != model0.getInput())[malicious_nodes].sum().item() test_continuous(model=embeded_model, model0=model0, malicious_nodes=malicious_nodes, changed_attributes=changed_attributes, max_attributes=max_attributes, l_inf=attack.l_inf) # test results = test(data=data, model=embeded_model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets) if results[3]: if print_answer is Print.YES: print(log_template.format(node_num, epoch + 1, *results[:-1]), flush=True, end='') break # prints if print_answer is Print.YES: print(log_template.format(node_num, epoch + 1, *results[:-1]), flush=True, end='') if changed_attributes == prev_changed_attributes: break prev_changed_attributes = changed_attributes if epoch != attack_epochs - 1 and print_answer is not Print.NO: print() if print_answer is Print.YES: print(', Attack Success: {}\n'.format(results[-1]), flush=True) if not results[3]: changed_attributes = max_attributes return torch.tensor([[results[3], changed_attributes]]).type(torch.long)
def attackTrainerDiscrete(attack, attacked_nodes: torch.Tensor, y_targets: torch.Tensor, malicious_nodes: torch.Tensor, node_num: int, discrete_stop_after_1iter: bool) -> torch.Tensor: """ a trainer function that attacks our model by changing the input attribute for a limited number of attributes 1.attack the model with i attributes 2.backprop 3.add the attribute with the largest gradient as the i+1 attribute Parameters ---------- attack: oneGNNAttack attacked_nodes: torch.Tensor - the victim nodes y_targets: torch.Tensor - the target labels of the attack malicious_nodes: torch.Tensor - the attacker/malicious node node_num: int - the index of the attacked/victim node (out of the train/val/test-set) discrete_stop_after_1iter: bool - whether or not to stop the discrete after 1 iteration this is a specific flag for the GRAD_CHOICE Approach Returns ------- attack_results: torch.Tensor - 2d-tensor that includes 1st-col - the defence 2nd-col - the number of attributes used if the number of attributes is 0 the node is misclassified to begin with """ # initialize model = attack.model_wrapper.model lr = attack.lr print_answer = attack.print_answer dataset = attack.getDataset() data = dataset.data num_attributes = data.x.shape[1] max_attributes_per_malicious = int(num_attributes * attack.l_0) max_attributes = max_attributes_per_malicious * malicious_nodes.shape[0] changed_attributes_all_malicious, epoch = 0, 0 log_template = createLogTemplate(attack=attack, dataset=dataset) # changing the parameters which require grads and setting adversarial optimizer optimizer_params = setRequiresGrad(model=model, malicious_nodes=malicious_nodes) optimizer = torch.optim.Adam(params=optimizer_params, lr=lr) optimizer.zero_grad() # zero attributes with torch.no_grad(): changed_attributes = 0 for malicious_node in malicious_nodes: changed_attributes += model.node_attribute_list[malicious_node][ 0].sum().item() model.setNodesAttributes(idx_node=malicious_node, values=torch.zeros(num_attributes)) # flip the attribute with the largest gradient model0 = copy.deepcopy(model) changed_attributes, prev_changed_attributes = 0, 0 num_attributes_left = max_attributes_per_malicious * torch.ones_like( malicious_nodes).to(attack.device) while True: epoch += 1 prev_model = copy.deepcopy(model) # train train(model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets, optimizer=optimizer) num_attributes_left = flipUpBestNewAttributes( model=model, model0=prev_model, malicious_nodes=malicious_nodes, num_attributes_left=num_attributes_left) changed_attributes = max_attributes - num_attributes_left.sum().item() # test correctness test_discrete(model=model, model0=model0, malicious_nodes=malicious_nodes, changed_attributes=changed_attributes, max_attributes=max_attributes) # test results = test(data=data, model=model, targeted=attack.targeted, attacked_nodes=attacked_nodes, y_targets=y_targets) # prints if print_answer is not Print.NO and epoch != 1: print() if print_answer is Print.YES: print(log_template.format(node_num, epoch, changed_attributes, *results[:-1]), flush=True, end='') # breaks if results[ 3] or changed_attributes == max_attributes or changed_attributes == prev_changed_attributes: break prev_changed_attributes = changed_attributes if discrete_stop_after_1iter: break if print_answer is Print.YES: print(', Attack Success: {}\n'.format(results[-1]), flush=True) if changed_attributes > max_attributes: return torch.tensor([[results[3], max_attributes]]).type(torch.long) else: return torch.tensor([[results[3], changed_attributes]]).type(torch.long)
def edgeTrainer(data, approach: Approach, targeted: bool, model, attacked_node: torch.Tensor, y_target: torch.Tensor, malicious_index: torch.Tensor, node_num: int, device, print_flag, end_log_template): """ a forward pass function which chooses the edge with the largest gradient in edge_weight and flips it for multi approaches this process is repeated for each edge with a non-zero gradient Parameters ---------- data: torch_geometric.data.Data.data approach: Approach targeted: bool model: Model attacked_node: torch.Tensor - the victim node y_target: torch.Tensor - the target label of the attack malicious_index: torch.Tensor - the attacker/malicious index node_num: int - the index of the attacked/victim node (out of the train/val/test-set) device: torch.cuda print_flag: bool - whether to print every iteration or not end_log_template: str - suffix of the log format Returns ------- attack_result: torch.Tensor """ log_template = '\nAttack: {:03d}, #Edge: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}' edge_weight0 = model.edge_weight.clone().detach() optimizer_params = setRequiresGrad(model) optimizer = torch.optim.SGD(optimizer_params, lr=0.01) train(model=model, targeted=targeted, attacked_nodes=attacked_node, y_targets=y_target, optimizer=optimizer) with torch.no_grad(): diff = model.edge_weight - edge_weight0 mask1 = torch.logical_and(edge_weight0 == 1, diff > 0).to(device) mask2 = torch.logical_and(edge_weight0 == 0, diff < 0).to(device) mask = torch.logical_or(mask1, mask2).to(device) diff[mask] = 0 abs_diff = torch.abs(diff) # when approach is grad you have the attacker chosen if not approach.isGlobal(): malicious_mask = model.edge_index[0] != torch.tensor(malicious_index).to(device) abs_diff[malicious_mask] = 0 # use of the best edge max_malicious_edge = torch.argmax(abs_diff).to(device) # when approach is globalGrad you can choose the attacker if approach.isGlobal(): malicious_index = model.edge_index[0][max_malicious_edge] malicious_node_mask = model.edge_index[0] != malicious_index abs_diff[malicious_node_mask] = 0 # return edge weights to back to original values and flip model.edge_weight.data = edge_weight0 model.edge_weight.data[max_malicious_edge] = not model.edge_weight.data[max_malicious_edge] attack_results = test(data=data, model=model, targeted=targeted, attacked_nodes=attacked_node, y_targets=y_target) if not approach.isMulti(): if print_flag: print(end_log_template.format(attack_results[-1]), flush=True) else: malicious_node_abs_diff = abs_diff[abs_diff != 0] # sort edges by absolute diff _, sorted_malicious_edge = torch.sort(malicious_node_abs_diff, descending=True) if print_flag: print(', #Edges: {}'.format(sorted_malicious_edge.shape[0]), flush=True, end='') print(log_template.format(node_num, 1, *attack_results[:-1]), flush=True, end='') if not attack_results[3] and sorted_malicious_edge.shape[0] > 1: attack_results = \ findMinimalEdges(sorted_edges=sorted_malicious_edge[1:], data=data, model=model, targeted=targeted, attacked_node=attacked_node, y_target=y_target, node_num=node_num, print_flag=print_flag, log_template=log_template, end_log_template=end_log_template) elif print_flag: print(end_log_template.format(attack_results[-1]) + '\n', flush=True) return attack_results