def test(opt, net, loader): correct = 0 if opt.adversarial: correct_fgm, correct_pgd = 0, 0 total = 0 net.eval() logger.info('Starting testing...') with torch.set_grad_enabled(opt.adversarial): # adversarial attacks need grad computations n_batches = len(loader) for i, (input, target) in enumerate(loader): logger.info('batch: {}/{}'.format(i + 1, n_batches)) input, target = \ input.to(device, non_blocking=True), target.to( device, non_blocking=True) output = net(input, update_centers=False) _, predicted = torch.max(output.data, 1) total += target.size(0) correct += (predicted == target).sum().item() if opt.adversarial: def net_fn(input): return net(input, update_centers=False) input_fgm = fast_gradient_method(net_fn, input, opt.adversary_eps, opt.adversary_norm) input_pgd = projected_gradient_descent(net_fn, input, opt.adversary_eps, opt.pgd_step_eps, opt.pgd_n_steps, opt.adversary_norm) output_fgm = net(input_fgm, update_centers=False) output_pgd = net(input_pgd, update_centers=False) _, predicted_fgm = torch.max(output_fgm.data, 1) _, predicted_pgd = torch.max(output_pgd.data, 1) correct_fgm += (predicted_fgm == target).sum().item() correct_pgd += (predicted_pgd == target).sum().item() acc = 100 * correct / total results = {'accuracy (%)': acc} logger.info('Accuracy (%): {:.3f}'.format(acc)) if opt.adversarial: acc_fgm = 100 * correct_fgm / total logger.info('Accuracy under FGM (%): {:.3f}'.format(acc_fgm)) acc_pgd = 100 * correct_pgd / total logger.info('Accuracy under PGD (%): {:.3f}'.format(acc_pgd)) results['accuracy under FGM (%)'] = acc_fgm results['accuracy under PGD (%)'] = acc_pgd with open(os.path.join(opt.save_dir, 'test.json'), 'w') as out: json.dump(results, out, indent=2) logger.info('Testing finished!')
def main(_): # Load training and test data data = ld_cifar10() # Instantiate model, loss, and optimizer for training net = CNN(in_channels=3) device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == 'cuda': net = net.cuda() loss_fn = torch.nn.CrossEntropyLoss(reduction='mean') optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) # Train vanilla model net.train() for epoch in range(1, FLAGS.nb_epochs + 1): train_loss = 0. for x, y in data.train: x, y = x.to(device), y.to(device) if FLAGS.adv_train: # Replace clean example with adversarial example for adversarial training x = projected_gradient_descent(net, x, FLAGS.eps, 0.01, 40, np.inf) # Stop backward from entering the graph that created the adv example x = x.clone().detach() optimizer.zero_grad() loss = loss_fn(net(x), y) loss.backward() optimizer.step() train_loss += loss.item() print('epoch: {}/{}, train loss: {:.3f}'.format( epoch, FLAGS.nb_epochs, train_loss)) # Evaluate on clean and adversarial data net.eval() report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0) for x, y in data.test: x, y = x.to(device), y.to(device) x_fgm = fast_gradient_method(net, x, FLAGS.eps, np.inf) x_pgd = projected_gradient_descent(net, x, FLAGS.eps, 0.01, 40, np.inf) _, y_pred = net(x).max(1) # model prediction on clean examples _, y_pred_fgm = net(x_fgm).max( 1) # model prediction on FGM adversarial examples _, y_pred_pgd = net(x_pgd).max( 1) # model prediction on PGD adversarial examples report.nb_test += y.size(0) report.correct += y_pred.eq(y).sum().item() report.correct_fgm += y_pred_fgm.eq(y).sum().item() report.correct_pgd += y_pred_pgd.eq(y).sum().item() print('test acc on clean examples (%): {:.3f}'.format( report.correct / report.nb_test * 100.)) print('test acc on FGM adversarial examples (%): {:.3f}'.format( report.correct_fgm / report.nb_test * 100.)) print('test acc on PGD adversarial examples (%): {:.3f}'.format( report.correct_pgd / report.nb_test * 100.))
def test(opt, net, loader): correct = 0 if opt.adversarial: correct_fgm, correct_pgd = 0, 0 total = 0 net.eval() logger.info('Starting testing...') with torch.set_grad_enabled(opt.adversarial): # adversarial attacks need grad computations for i, (input, target) in enumerate(loader): logger.info('batch: {}/{}'.format(i, len(loader))) input, target = \ input.to(device, non_blocking=True), target.to(device, non_blocking=True) output = net(input, update_centers=False) # save tensors for visualization raw_data = input if i == 0 else torch.cat((raw_data, input)) labels = target if i == 0 else torch.cat((labels, target)) activations = net_head(input) all_activations = activations if i == 0 else torch.cat((all_activations, activations)) _, predicted = torch.max(output.data, 1) total += target.size(0) correct += (predicted == target).sum().item() if opt.adversarial: net_fn = lambda input: net(input, update_centers=False) input_fgm = fast_gradient_method(net_fn, input, opt.adversary_eps, opt.adversary_norm) input_pgd = projected_gradient_descent(net_fn, input, opt.adversary_eps, opt.pgd_step_eps, opt.pgd_n_steps, opt.adversary_norm) output_fgm = net(input_fgm, update_centers=False) output_pgd = net(input_pgd, update_centers=False) _, predicted_fgm = torch.max(output_fgm.data, 1) _, predicted_pgd = torch.max(output_pgd.data, 1) correct_fgm += (predicted_fgm == target).sum().item() correct_pgd += (predicted_pgd == target).sum().item() logger.info('Accuracy (%): {:.3f}'.format(100 * correct / total)) if opt.adversarial: logger.info('Accuracy under FGM (%): {:.3f}'.format(100 * correct_fgm / total)) logger.info('Accuracy under PGD (%): {:.3f}'.format(100 * correct_pgd / total)) logger.info('Testing finished!') return raw_data.cpu().numpy(), labels.cpu().numpy(), all_activations.cpu().numpy()
def run_attacks_cleverhans(res_path, ncl=False): if ncl: MODEL_DIR = '/mnt/md0/orville/Miriam/modular-loss-experiments-morph/results_ncl/CIFAR-10/densenet-82-8-8' rel_dirs = ['alpha_0.0_gamma_0.02_n_models_2_1583114412120', 'alpha_0.0_gamma_0.05_n_models_2_1583114439810'] alpha = ['0.02', '0.05'] res_path = res_path + '_ncl' else: MODEL_DIR = '/mnt/md0/orville/Miriam/modular-loss-experiments-morph/results/CIFAR-10/densenet-82-8-8' # UNCORR_MODEL_DIR = 'alpha_0.0_gamma_0.0_n_models_2_1581641733617' # CORR_MODEL_DIR = 'alpha_0.1_gamma_0.0_n_models_2_1581641746832' # CORR_MODEL_DIR_2 = 'alpha_0.2_gamma_0.0_n_models_2_1581641777871' # UNCORR_MODEL_DIR = 'alpha_0.0_gamma_0.0_n_models_3_1585505819121' # CORR_MODEL_DIR = 'alpha_0.1_gamma_0.0_n_models_3_1585505685528' # CORR_MODEL_DIR_2 = 'alpha_0.2_gamma_0.0_n_models_3_1585505042819' # rel_dirs = [UNCORR_MODEL_DIR, CORR_MODEL_DIR, CORR_MODEL_DIR_2] rel_dirs = ['alpha_0.0_gamma_0.0_n_models_3_1585505819121', 'alpha_0.1_gamma_0.0_n_models_3_1589795142450', 'alpha_0.2_gamma_0.0_n_models_3_1589794987034', 'alpha_0.3_gamma_0.0_n_models_3_1589795486214', 'alpha_0.4_gamma_0.0_n_models_3_1589796192038', 'alpha_0.5_gamma_0.0_n_models_3_1589796200262', 'alpha_0.6_gamma_0.0_n_models_3_1589796218204', 'alpha_0.7_gamma_0.0_n_models_3_1589796234665'] alpha = list(map(lambda x: format(x, '2.1f'), np.arange(0.0, 0.8, 0.1))) batch_size = 256 # 128 # 516 n_workers = 20 dataset = 'CIFAR-10' network = 'densenet-82-8-8' loaders, _ = get_dataloaders_(batch_size, 0, dataset, False, early_stop=False, n_workers=n_workers) n_models = 2 if ncl else 3 params = {} params['densenet-82-8-8'] = {'num_modules': n_models, 'bottleneck': True, 'reduction': 0.5, 'depth': 82, 'growth_rate': 8, 'input_shape': (3, 32, 32), 'output_dim': 10} network = 'densenet-82-8-8' model = DenseNet(input_shape=params[network]['input_shape'], output_dim=params[network]['output_dim'], growth_rate=params[network]['growth_rate'], depth=params[network]['depth'], reduction=params[network]['reduction'], bottleneck=params[network]['bottleneck'], num_modules=n_models) device = torch.device("cuda") reports = dict.fromkeys(alpha) for model_path, curr_alpha in tqdm(zip(rel_dirs, alpha), total=len(alpha)): if ncl: weight_path = path.join(MODEL_DIR, model_path, 'trial_0/' + curr_alpha + '/weights/final_weights.pt') else: weight_path = path.join(MODEL_DIR, model_path, 'trial_0/0.0/weights/final_weights.pt') model.reset_parameters() model.load_state_dict(torch.load(weight_path)) model.eval() # model.train(mode=False) net = ModelMeanEP(model).to(device) report = dict() for x, y in tqdm(loaders['test'], total=len(loaders['test'])): x, y = x.to(device), y.to(device) report['nb_test'] = report.get('nb_test', 0) + y.size(0) _, y_pred = net(x).max(1) # model prediction on clean examples report['acc'] = report.get('acc', 0) + y_pred.eq(y).sum().item() # model prediction on FGM adversarial examples x_adv = fast_gradient_method(net, x, 0.02, np.inf) _, y_pred = net(x_adv).max(1) # model prediction on FGM adversarial examples report['FGM_0.02'] = report.get('FGM_0.02', 0) + y_pred.eq(y).sum().item() x_adv = fast_gradient_method(net, x, 0.04, np.inf) _, y_pred = net(x_adv).max(1) # model prediction on FGM adversarial examples report['FGM_0.04'] = report.get('FGM_0.04', 0) + y_pred.eq(y).sum().item() # model prediction on BIM adversarial examples x_adv = projected_gradient_descent(net, x, eps=0.01, eps_iter=0.01 / 10, nb_iter=10, norm=np.inf, rand_init=0) _, y_pred = net(x_adv).max(1) report['BIM_0.01'] = report.get('BIM_0.01', 0) + y_pred.eq(y).sum().item() x_adv = projected_gradient_descent(net, x, eps=0.02, eps_iter=0.02 / 10, nb_iter=10, norm=np.inf, rand_init=0) _, y_pred = net(x_adv).max(1) report['BIM_0.02'] = report.get('BIM_0.02', 0) + y_pred.eq(y).sum().item() # model prediction on PGD adversarial examples x_adv = projected_gradient_descent(net, x, eps=0.01, eps_iter=0.01 / 10, nb_iter=10, norm=np.inf) _, y_pred = net(x_adv).max(1) report['PGD_0.01'] = report.get('PGD_0.01', 0) + y_pred.eq(y).sum().item() x_adv = projected_gradient_descent(net, x, eps=0.02, eps_iter=0.02 / 10, nb_iter=10, norm=np.inf) _, y_pred = net(x_adv).max(1) report['PGD_0.02'] = report.get('PGD_0.02', 0) + y_pred.eq(y).sum().item() for key in ['acc', 'FGM_0.02', 'FGM_0.04', 'BIM_0.01', 'BIM_0.02', 'PGD_0.01', 'PGD_0.02']: report[key] = (report[key] / report['nb_test']) * 100. reports[curr_alpha] = report pickle.dump(reports, open(res_path, 'wb')) pickle.dump(reports, open(res_path, 'wb'))
def evaluate_attack_mnist(model, device, attack, eps=0, norm=2, num_SD=1.8, num_summed_SD=0.75, num_false=1, num_imgs=10000, print_every=100, stop_idx=[], CheckAll=False, use_printouts=False, get_psi=False): ''' :param model (nn.Module): The model to be used for testing. It is expected to have the following attributes: .name (str): Name of the model .encoder (nn.Module): Neural network with callable, implemented forward function -- must expect inputs of size 784 .classifier (nn.Module): Neural network with callable, implemented forward function -- must expect inputs of size 784 .z_prior (torch.nn.Parameter): length-2 tuple that holds the z_prior mean(s) in [0] and z_prior variances in [1] :param device (str): either 'cuda' or 'cpu' :param attack (str): Choice of 'noise', 'fgm', or 'pixel' :param eps (int or float): Strength of attack. Note that pixel attacks only takes integer values :param norm (int or float): Either 2 or np.inf. Used only with fgm attacks :param num_SD (float): sigma_detect threshold value :param num_summed_SD (float): Sigma_detect threshold value :param num_false (int): Number of delta values for a given image that must exceed sigma_detect to be detected :param num_imgs (int): Number of images to iterate over through the test dataset :param print_every (int): How often to print a progress report :param stop_idx (list of ints): List of specific indexes within the test dataset to pause at with detailed printouts :param CheckAll (bool): If true, will pause at every image within the test dataset :param use_printouts (bool): If true, will pause at every anomalous / successful adversarial image in the dataset :param get_psi (bool): If true (and eps = 0), will evaluate and save psi values across the dataset as 'psis_minst.npy' ''' # Load MNIST test dataset testload = torch.utils.data.DataLoader( datasets.MNIST("data/mnist", train=False, download=True, transform=transforms.Compose([ transforms.Resize(28), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]) ]))) # x_test = testload.dataset.test_data.to(device).reshape(-1, 784).float() / 255 x_test = testload.dataset.test_data.to(device).reshape( -1, 784).float()[:num_imgs] / 255 y_test = testload.dataset.test_labels.to(device)[:num_imgs] print("y_test shape: {}".format(y_test.shape)) total_num = len(x_test) print("Total length of x_test dataset: {}".format(total_num)) # Load model in eval mode model.eval() # Load KL Data KL_Classes_Stats = np.zeros((10, 10, 2)) if os.path.exists('deltas_mnist.npy'): KL_Classes_Stats = np.load('deltas_mnist.npy') else: print( "Warning: No deltas_mnist file to load. Make sure you run determine_deltas_mnist first!" ) KL_Summed_SD_Stats = np.zeros((10, 2)) if os.path.exists('psis_mnist.npy'): KL_Summed_SD_Stats = np.load('psis_mnist.npy') else: print( "Warning: No psis_mnist file to load. Make sure you run with get_psi=True first!" ) # Create vectors to hold values Max_Delta_KL_z_adv = [] Summed_KL_z_adv = [] PredictClean = [] ProbClean = [] ProbAdv = [] PredictAdv = [] IsCorrect = [] AdvImages = [] SuccessfulAdvAtkDetected = [] UnsuccessfulAdvAtkDetected = [] FalsePositive = [] # only used for d = 0 pixels changed AnomalyDetected = [] KL_Summed_SD = [] for i in range(10): KL_Summed_SD.append([]) # If running Single Pixel attack, load class attacker = None if attack == 'pixel': attacker = OnePixelAttack(model, device) for x, y, j in zip(x_test, y_test, range(len(x_test))): # Load single img orig = x.view(28, 28).cpu().numpy() img = orig.copy() shape = img.shape inp = Variable(x.type(Tensor)).to(device) prob_orig = ut.softmax(model.classifier(inp).data.cpu().numpy()[0]) pred_orig = np.argmax(prob_orig) # Append to vectors PredictClean.append(pred_orig) ProbClean.append(prob_orig) # Run specified attack adv_img = None if eps > 0: if attack == 'fgm': adv_img = fast_gradient_method(model.classifier, x, eps=eps, norm=norm, clip_min=0, clip_max=1).view(1, -1) elif attack == 'noise': adv_img = noise(x, eps=eps, clip_min=0, clip_max=1).view(1, -1) elif attack == 'pixel': _, _, _, adv_img = attacker.pixel_attack( eps, shape, pred_orig, img) else: raise AssertionError( "Attack must either be 'fgm', 'pixel', or 'noise'") else: adv_img = x.view(1, -1) adv_out = model.classifier(adv_img) prob = ut.softmax(adv_out.data.cpu().numpy()) adv_y = F.softmax(adv_out, dim=-1).float() pred_adv = torch.topk(adv_y, 1, dim=-1)[1].item() prob_adv = prob[0][pred_adv] # Append to vectors PredictAdv.append(pred_adv) ProbAdv.append(prob_adv) AdvImages.append(adv_img.view(1, 28, 28).data) # Append to accuracy vector IsCorrect.append(int(pred_adv == y)) #### Test KL z div for all images #### # Display adv image only if certain conditions are met if (((pred_orig != pred_adv) or (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx: fig1 = plt.imshow(adv_img.view(28, 28).cpu().data) fig1.axes.get_xaxis().set_visible(False) fig1.axes.get_yaxis().set_visible(False) plt.title('{} Attack, eps = {}, Adv Prediction: {}'.format( attack, eps, pred_adv)) plt.show() fig2 = plt.imshow(x.view(28, 28).cpu().data) fig2.axes.get_xaxis().set_visible(False) fig2.axes.get_yaxis().set_visible(False) plt.title('Clean Image Prediction: {}'.format(pred_orig)) plt.show() if (((pred_orig != pred_adv) or (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx: print( "Test Image i = {}: Original prediction: {}, Adversarially-induced prediction: {}, True Label = {}" .format(j, pred_orig, pred_adv, y)) KL_local = [] # Calculate KL div for "expected" (clean or adversarially-induced) label y_prob = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).float().view(1, -1).to(device) y_prob[0][pred_adv] = 1 qm, qv = model.encoder(adv_img.view(1, -1), y_prob) kl_z_all = y_prob * ut.kl_normal( qm, qv, model.z_prior[0], model.z_prior[1]) # kl_z_all shape = [batch_size * y_dim] expected_kl_z = torch.sum(kl_z_all) TotalFalse = 0 Num_SD_Away_Total = 0 Max_Adv_KL = 0 for i in range(10): y_prob = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).float().view(1, -1).to(device) y_prob[0][i] = 1 #y_logprob = F.log_softmax(out, dim=1).float() qm, qv = model.encoder(adv_img.view(1, -1), y_prob) kl_z_all = y_prob * ut.kl_normal( qm, qv, model.z_prior[0], model.z_prior[1]) # kl_z_all shape = [batch_size * y_dim] kl_z = torch.sum(kl_z_all) KL_local.append(kl_z.item()) if KL_Classes_Stats[pred_adv][i][1] > 0: Num_SD_Away = (abs(kl_z - expected_kl_z - KL_Classes_Stats[pred_adv][i][0]) / KL_Classes_Stats[pred_adv][i][1]).item() else: Num_SD_Away = 0 if Num_SD_Away > Max_Adv_KL: Max_Adv_KL = Num_SD_Away Num_SD_Away_Total = Num_SD_Away_Total + Num_SD_Away reasonable = True if Num_SD_Away <= num_SD else False if not reasonable: TotalFalse = TotalFalse + 1 if (((pred_orig != pred_adv) or (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx: print( "delta KL_div for y = {}: {:.2f}, Expected delta KL_div: {:.2f}, SD: {:.2f}, Num SD Away: {:.2f}, Reasonable (within {} SD): {}" .format(i, kl_z - expected_kl_z, KL_Classes_Stats[pred_adv][i][0], KL_Classes_Stats[pred_adv][i][1], Num_SD_Away, num_SD, reasonable)) PositiveDetected = 1 if ( Num_SD_Away_Total - KL_Summed_SD_Stats[pred_adv][0] ) / KL_Summed_SD_Stats[pred_adv][1] > num_summed_SD else 0 if (pred_orig != pred_adv) or (eps == 0 and pred_orig != y): Max_Delta_KL_z_adv.append(Max_Adv_KL) Summed_KL_z_adv.append(Num_SD_Away_Total) if eps == 0 and get_psi: KL_Summed_SD[y].append(Num_SD_Away_Total.item()) if (((pred_orig != pred_adv) or (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx: print( "Summed SDs across classes: {:.2f}".format(Num_SD_Away_Total)) print("Mean, SD for Summed SDs: {}".format( KL_Summed_SD_Stats[pred_adv])) print( "Detected: {}, PositiveDetected: {}, Detected as anomaly: {}". format(TotalFalse >= num_false, PositiveDetected, bool(TotalFalse >= num_false or PositiveDetected))) # Append the Detected Value to the appropriate vector if eps == 0 and pred_orig == y: # Then this is a false positive FalsePositive.append( int(TotalFalse >= num_false or PositiveDetected)) if pred_orig == pred_adv and TotalFalse >= num_false: # Then this is a detection of an unsuccessful adv atk UnsuccessfulAdvAtkDetected.append(PositiveDetected) if pred_orig != pred_adv and pred_orig == y: # Then this is a detection of a successful adv atk SuccessfulAdvAtkDetected.append( int(TotalFalse >= num_false or PositiveDetected)) if eps == 0 and pred_orig != y: # Then this is a detection of anomaly AnomalyDetected.append( int(TotalFalse >= num_false or PositiveDetected)) # Wait for user to press a keystroke before continuing if (((pred_orig != pred_adv) or (pred_orig != y) or CheckAll) and use_printouts) or j in stop_idx: input("Press Enter to continue...") # progress print if j and j % print_every == 0: # Get ongoing stats printed out Accuracy = statistics.mean(IsCorrect) * 100 Avg_Max_Delta_KL_z_adv = statistics.mean(Max_Delta_KL_z_adv) SD_Max_Delta_KL_z_adv = statistics.stdev(Max_Delta_KL_z_adv) Avg_Summed_KL_z_adv = statistics.mean(Summed_KL_z_adv) SD_Summed_KL_z_adv = statistics.stdev(Summed_KL_z_adv) print("Completed {} of {} Total Examples in MNIST Test Dataset. " "Accuracy = {:.2f}, " "Avg Max Delta Adversarial KL_z = {:.2f}, SD = {:.2f}, " "Avg Summed Delta Adversarial KL_z = {:.2f}, SD = {:.2f}". format(j, total_num, Accuracy, Avg_Max_Delta_KL_z_adv, SD_Max_Delta_KL_z_adv, Avg_Summed_KL_z_adv, SD_Summed_KL_z_adv)) # After, determine stats Accuracy = statistics.mean(IsCorrect) * 100 Avg_Max_Delta_KL_z_adv = statistics.mean(Max_Delta_KL_z_adv) SD_Max_Delta_KL_z_adv = statistics.stdev(Max_Delta_KL_z_adv) Avg_Summed_KL_z_adv = statistics.mean(Summed_KL_z_adv) SD_Summed_KL_z_adv = statistics.stdev(Summed_KL_z_adv) if eps == 0 and get_psi: KL_Summed_SD_Stats = np.zeros([10, 2]) for i in range(10): KL_Summed_SD_Stats[i][0] = statistics.mean(KL_Summed_SD[i]) KL_Summed_SD_Stats[i][1] = statistics.stdev(KL_Summed_SD[i]) # Save file np.save('psis_mnist.npy', KL_Summed_SD_Stats) FalsePositivePercentage = None SuccessfulAdvAtkDetectedPercentage = None AnomalyDetectedPercentage = None if eps == 0 and len(FalsePositive) > 0: FalsePositivePercentage = sum(FalsePositive) / len(x_test) * 100 if len(SuccessfulAdvAtkDetected) > 0: SuccessfulAdvAtkDetectedPercentage = statistics.mean( SuccessfulAdvAtkDetected) * 100 if len(AnomalyDetected) > 0: AnomalyDetectedPercentage = statistics.mean(AnomalyDetected) * 100 # Print out results to user print("Accuracy with eps = {} {} Disturbance: {:.2f}%".format( eps, attack, Accuracy)) print("Percentage of Successful Adversarial Attacks: {:.2f}%".format( 100 * len(SuccessfulAdvAtkDetected) / len(x_test))) print("Average Max Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".format( Avg_Max_Delta_KL_z_adv, SD_Max_Delta_KL_z_adv)) print("Average Summed Delta Adversarial KL_z = {:.2f}, SD = {:.2f}".format( Avg_Summed_KL_z_adv, SD_Summed_KL_z_adv)) if eps == 0: print( "False Positive Percentage for Clean (eps = {}) data with KL threshold of {}: {}%" .format(eps, num_SD, FalsePositivePercentage)) print( "Anomaly (incorrectly classified from clean img) Detected Percentage: {:.2f}%" .format(AnomalyDetectedPercentage)) else: print("Successful Adversarial Attack Detected Percentage: {:.2f}%". format(SuccessfulAdvAtkDetectedPercentage)) # Now, plot the histograms of the KL divergences of both the clean and corrupted images separately plt.figure(0) plt.hist(x=Max_Delta_KL_z_adv, bins='auto', color='#0504aa') plt.grid(axis='y') plt.xlabel('Max KL z Divergence') plt.ylabel('Frequency') plt.xlim(0, 5) if eps == 0: plt.title("Max Clean Delta using {} Model on MNIST".format(model.name)) else: plt.title( 'Max Adv. Delta using {} Model on MNIST, {} Attack, eps = {}'. format(model.name, attack, eps)) plt.show() # Now, plot the histograms of the KL divergences of both the clean and corrupted images separately plt.figure(1) plt.hist(x=Summed_KL_z_adv, bins='auto', color='#607c8e') plt.grid(axis='y') plt.xlabel('Summed KL z Divergence') plt.ylabel('Frequency') plt.xlim(0, 35) if eps == 0: plt.title("Clean Psi using {} on MNIST".format(model.name)) else: plt.title('Adv. Psi using {} on MNIST, {} Attack, eps = {}'.format( model.name, attack, eps)) plt.show() # Save some of the examples of Adv images generated save_image(AdvImages[:25], "images/{}_attack-eps={}.png".format(attack, eps), nrow=5, normalize=True)
def main(_): # Load training and test data data = ld_cifar10() # Instantiate model, loss, and optimizer for training # net = CNN(in_channels=3) net = resnet.ResNet18() device = 'cuda' if torch.cuda.is_available() else 'cpu' logging.info('Using GPU' if device == 'cuda' else 'Using CPU') if device == 'cuda': net = net.cuda() loss_fn = torch.nn.CrossEntropyLoss(reduction='mean') optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) # load checkpoint if exists if os.path.exists(FLAGS.checkpoint): ckpt = torch.load(FLAGS.checkpoint) net.load_state_dict(ckpt['net']) logging.info('Loaded model %s with accuracy %.3f', FLAGS.checkpoint, ckpt['acc']) else: # Train vanilla model if input('No checkpoint found, continue? y/[n]') != 'y': return -1 net.train() with trange(1, FLAGS.nb_epochs + 1, desc='Training', unit='Epoch') as t: for epoch in t: train_loss = 0. for x, y in data.train: x, y = x.to(device), y.to(device) if FLAGS.adv_train: # Replace clean example with adversarial example for adversarial training x = projected_gradient_descent(net, x, FLAGS.eps, 0.01, 40, np.inf) optimizer.zero_grad() loss = loss_fn(net(x), y) loss.backward() optimizer.step() train_loss += loss.item() t.set_description('Train Loss=%.3f' % train_loss) # Evaluate on clean and adversarial data net.eval() report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0) for x, y in tqdm(data.test, unit='Samples', desc='Testing'): x, y = x.to(device), y.to(device) x_fgm = fast_gradient_method(net, x, FLAGS.eps, np.inf) x_pgd = projected_gradient_descent(net, x, FLAGS.eps, 0.01, 40, np.inf) _, y_pred = net(x).max(1) # model prediction on clean examples _, y_pred_fgm = net(x_fgm).max( 1) # model prediction on FGM adversarial examples _, y_pred_pgd = net(x_pgd).max( 1) # model prediction on PGD adversarial examples report.nb_test += y.size(0) report.correct += y_pred.eq(y).sum().item() report.correct_fgm += y_pred_fgm.eq(y).sum().item() report.correct_pgd += y_pred_pgd.eq(y).sum().item() print('test acc on clean examples (%): {:.3f}'.format( report.correct / report.nb_test * 100.)) print('test acc on FGM adversarial examples (%): {:.3f}'.format( report.correct_fgm / report.nb_test * 100.)) print('test acc on PGD adversarial examples (%): {:.3f}'.format( report.correct_pgd / report.nb_test * 100.)) return 0