def generate_ae(model, data, labels, attack_configs, save=False, output_dir="../../task1_data/"): """ Generate adversarial examples :param model: WeakDefense. The targeted model. :param data: array. The benign samples to generate adversarial for. :param labels: array or list. The true labels. :param attack_configs: dictionary. Attacks and corresponding settings. :param save: boolean. True, if save the adversarial examples. :param output_dir: str or path. Location to save the adversarial examples. It cannot be None when save is True. :return: """ img_rows, img_cols = data.shape[1], data.shape[2] num_attacks = attack_configs.get("num_attacks") data_loader = (data, labels) if len(labels.shape) > 1: labels = np.asarray([np.argmax(p) for p in labels]) # generate attacks one by one for id in range(num_attacks): key = "configs{}".format(id) data_adv = generate(model=model, data_loader=data_loader, attack_args=attack_configs.get(key)) # predict the adversarial examples predictions = model.predict(data_adv) predictions = np.asarray([np.argmax(p) for p in predictions]) err = error_rate(y_pred=predictions, y_true=labels) print(">>> error rate:", err) # plotting some examples num_plotting = min(data.shape[0], 5) for i in range(num_plotting): img = data_adv[i].reshape((img_rows, img_cols)) plt.imshow(img, cmap='gray') title = '{}: {}->{}'.format( attack_configs.get(key).get("description"), labels[i], predictions[i]) plt.title(title) plt.show() plt.close() # save the adversarial example if save: if output_dir is None: raise ValueError("Cannot save images to a none path.") # save with a random name file = os.path.join(output_dir, "{}.npy".format(time.monotonic())) print("Save the adversarial examples to file [{}].".format(file)) np.save(file, data_adv)
def adv_train(model, attack, device, train_loader, optimizer, epoch): model.train() train_loss = 0.0 for _, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) adv_output = model(attack.generate(model, data, target)) loss = attack.lf(output, target) + attack.lf(adv_output, target) loss.backward() optimizer.step() train_loss += loss.item() train_loss /= len(train_loader) print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, train_loss))
def adv_guide_pgd_train(model, attack, device, train_loader, guide_sets, optimizer, epoch, beta, gamma, epsilon, weight_decay, gradient_decay): model.train() def aux_targets_gen(target): aux_target = torch.empty((9, 0), dtype=torch.int) for i in range(len(target)): target_list = list(range(10)) target_list.remove(target[i]) random.shuffle(target_list) target_tensor = torch.tensor(target_list).unsqueeze(dim=1) aux_target = torch.cat((aux_target, target_tensor), dim=-1) return aux_target def guide_sample(datasets, adv_pred): def sample(dataset): import random idex = random.randint(0, len(dataset) - 1) return dataset[idex] data, target = sample(datasets[adv_pred[0]]) databatch = data.unsqueeze(dim=0) labels = [] labels.append(target) for i in range(1, len(adv_pred)): data, target = sample(datasets[adv_pred[i]]) databatch = torch.cat([databatch, data.unsqueeze(dim=0)], dim=0) labels.append(target) labels = torch.tensor(labels) return databatch, labels loss_sum = 0.0 train_loss_sum = 0.0 guided_loss_sum = 0.0 adv_loss_sum = 0.0 for _, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) # target selecting # generate adversarial perturbation for 9 target # target_label = target.clone().detach() aux_targets = aux_targets_gen(target) guided_loss = 0 adv_loss = 0 for target_label in aux_targets: target_label = target_label.to(device) data_copy = data.clone().detach().requires_grad_(True) output_copy = model(data_copy) L1 = F.nll_loss(output_copy, target_label) adv_pertur = -torch.autograd.grad(L1, data_copy, create_graph=True)[0] # normalizing adversarial perturbation adv_max = torch.max(torch.max(adv_pertur, dim=-1)[0], dim=-1)[0] adv_min = torch.min(torch.min(adv_pertur, dim=-1)[0], dim=-1)[0] adv_mid = (adv_max + adv_min) / 2 adv_zero = (adv_max - adv_min) / 2 adv_norm = torch.ones_like(adv_pertur) for i in range(len(adv_pertur)): for j in range(len(adv_pertur[0])): adv_norm[i][j] = (adv_pertur[i][j] - adv_mid[i][j].item() ) / adv_zero[i][j].item() adv_norm = epsilon * adv_norm # min = torch.min(adv_pertur) # max = torch.max(adv_pertur) # mid = (max + min) / 2 # zero_mean = (max - min) / 2 # adv_pertur_norm = epsilon * (adv_pertur - mid) / zero_mean adv_data = data.clone().detach() + adv_norm guide_data, _ = guide_sample(guide_sets, target_label) guide_data = guide_data.to(device) # adversarial example output adv_output = model(attack.generate(model, data, target)) guided_loss = F.mse_loss(adv_data, guide_data) + guided_loss adv_loss = attack.lf(adv_output, target) + adv_loss # for i in range(len(target_label)): # while target_label[i] == target[i]: # target_label[i] = random.randint(0, 9) train_loss = F.nll_loss(output, target) loss = (1 - beta - gamma) * train_loss + beta * guided_loss + gamma * adv_loss loss.backward() optimizer.step() loss_sum += loss.item() train_loss_sum += train_loss.item() guided_loss_sum += guided_loss.item() adv_loss_sum += adv_loss.item() loss_sum /= len(train_loader) train_loss_sum /= len(train_loader) guided_loss_sum /= len(train_loader) adv_loss_sum /= len(train_loader) print( 'Train Epoch: {} \tLoss: {:.6f}, Training Loss: {:.6f}, Guided Loss: {:.6f}, Adv Loss: {:.6f}' .format(epoch, loss_sum, train_loss_sum, guided_loss_sum, adv_loss_sum))