def get_theoretic_lowerbound(model, eps_range, config, pretrained_config, output_root): n_examples = config['num_examples'] n_batches = int( math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size)) pretrained_config.cuda = pretrained_config.cuda data = load_data(pretrained_config) # Perform the attack. eps_acc = [] for sample in tqdm(islice(data['validation'], n_batches), total=n_batches): x = to_cuda(sample[0], cuda=pretrained_config.cuda) y = to_cuda(sample[1].type(torch.LongTensor), cuda=pretrained_config.cuda) eps_acc.append( theoretical_adversary( model, x, y, pretrained_config.model.l_constant * eps_range)) avg_eps_acc = np.concatenate(eps_acc, 0).mean(0) results = {'eps': eps_range.tolist(), 'acc': avg_eps_acc.tolist()} print(results) with open(os.path.join(output_root, 'results.json'), 'w') as f: json.dump(results, f, sort_keys=True, indent=4)
def check_grad_norm(model, data, cuda, epochs=3): u = to_cuda(torch.randn(10), cuda) for _ in range(epochs): for x, _ in data: model.zero_grad() x = to_cuda(x, cuda) x.requires_grad = True logits = model(x) s, u = jac_spectral_norm(logits, x, u) return s
def eval_on_examples(model, output_root, cuda=True): adv_examples = np.load(os.path.join(output_root, 'examples.npy')) adv_targets = np.load(os.path.join(output_root, 'targets.npy')) print(adv_examples.shape) adv_ex_t = torch.Tensor(adv_examples) save_image(adv_ex_t, 'test.png') adv_examples = to_cuda(torch.Tensor(adv_examples), cuda) adv_targets = to_cuda(torch.LongTensor(adv_targets), cuda) print("Adv Accuracy: {}".format( accuracy(model, adv_examples, adv_targets).item()))
def main(config): # Create the output directory. output_root = config.output_root if not os.path.isdir(output_root): os.makedirs(output_root) # Load pretrained model pretrained_path = config.pretrained_path model, pretrained_config = load_model_from_config(pretrained_path) # Push model to GPU if available. if config.cuda: print('Using cuda: {}'.format("Yes")) to_cuda(model, cuda=config.cuda) model.eval() # model.model.project_network_weights(Munch.fromDict({'type': 'l_inf_projected'})) generate_examples(model, config, pretrained_config, output_root)
def main(config): print(config) # Create the output directory. output_root = config.output_root if not os.path.isdir(output_root): os.makedirs(output_root) # Load pretrained model pretrained_path = config.model.exp_path model, pretrained_config = load_model_from_config(pretrained_path) # model, pretrained_config = load_model_hack(pretrained_path) # Push model to GPU if available. if config.cuda: print('Using cuda: {}'.format("Yes")) to_cuda(model, cuda=config.cuda) model.eval() eps_range = np.linspace(0.01, 0.5, 20) # exp_root = os.path.join(output_root, 'theory') # if not os.path.isdir(exp_root): # os.makedirs(exp_root) # get_theoretic_lowerbound(model, eps_range, config, pretrained_config, exp_root) config.fgs = True for e in eps_range: config.eps = e exp_root = os.path.join(output_root, 'fgs', str(e)) if not os.path.isdir(exp_root): os.makedirs(exp_root) generate_examples(model, config, pretrained_config, exp_root) config.fgs = False config.pgd = True for e in eps_range: config.eps = e exp_root = os.path.join(output_root, 'pgd', str(e)) if not os.path.isdir(exp_root): os.makedirs(exp_root) generate_examples(model, config, pretrained_config, exp_root)
def slow_check_grad_norm(model, data, cuda): spectral_rads = [] for x, _ in data: model.zero_grad() x = to_cuda(x, cuda).view(-1, 784) x.requires_grad = True logits = model(x) jac = compute_jacobian(logits, x) for j in jac: _, S, _ = torch.svd(j) spectral_rads.append(torch.max(S).cpu().detach().item()) return np.mean(spectral_rads), np.max(spectral_rads)
def get_safe_bjorck_scaling(weight, cuda=True): bjorck_scaling = torch.tensor([np.sqrt(weight.shape[0] * weight.shape[1])]).float() bjorck_scaling = to_cuda(bjorck_scaling, cuda=cuda) return bjorck_scaling
def generate_examples(model, config, pretrained_config, output_root): adv_example_filepath = os.path.join(output_root, 'examples') adv_targets_filepath = os.path.join(output_root, 'targets') # Set up adversarial attack. adv_model = PyTorchModel(model, (0, 1), pretrained_config.data.class_count, cuda=config.cuda) criterion = Misclassification() attack = getattr(foolbox.attacks, config.name)(adv_model, criterion) # Get data. pretrained_config.cuda = config.cuda pretrained_config.optim.batch_size = config.data.batch_size data = load_data(pretrained_config) # print('Test Accuracy:{}'.format(loader_accuracy(model, data['test']))) n_examples = config['num_examples'] n_batches = int( math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size)) # Save the results of the computations in the following variable. adv_ex = torch.Tensor() adv_targets = torch.LongTensor() adv_mse = torch.Tensor() adv_inf = torch.Tensor() success = torch.Tensor() # Set up distance for the adversarial attack. distance_name = config.get('distance') distance = getattr(foolbox.distances, distance_name) if distance_name is not None \ else foolbox.distances.MeanSquaredDistance # Perform the attack. for sample in tqdm(islice(data['validation'], n_batches), total=n_batches): x = sample[0] y = sample[1].type(torch.LongTensor) x = to_cuda(x, cuda=config.cuda) adv, adv_t, batch_success, batch_adv_mse, batch_adv_inf = batch_attack( attack, adv_model, criterion, x, y.cpu().numpy(), config['attack_kwargs'], distance) adv_ex = torch.cat([adv_ex, adv], 0) adv_targets = torch.cat([adv_targets, adv_t], 0) success = torch.cat([success, batch_success], 0) adv_mse = torch.cat([adv_mse, batch_adv_mse], 0) adv_inf = torch.cat([adv_inf, batch_adv_inf], 0) # evaluate_adv_grad_norms(model, adv_ex, adv_targets, config.cuda) # Summarize the results. results = { "success_rate": success.mean().item(), "defense_rate": 1 - success.mean().item(), "mean_mse": ((adv_mse * success).sum() / success.sum()).item(), "mean_inf": ((adv_inf * success).sum() / success.sum()).item(), "mse_quartiles": list( np.percentile(adv_mse[success == 1.0].numpy(), [0, 25, 50, 75, 100])) } results["median_mse"] = results["mse_quartiles"][2] print("success rate: {}".format(results["success_rate"])) print("defense rate: {}".format(results["defense_rate"])) print("mean MSE for successful attacks: {}".format(results["mean_mse"])) print("mean L_inf for successful attacks: {}".format(results["mean_inf"])) print("MSE quartiles for successful attacks: {}".format( results["mse_quartiles"])) with open(os.path.join(config['output_root'], 'results.json'), 'w') as f: json.dump(results, f, sort_keys=True, indent=4) np.save(adv_example_filepath, adv_ex) np.save(adv_targets_filepath, adv_targets) print( accuracy(model, to_cuda(adv_ex, cuda=config.cuda), to_cuda(adv_targets, cuda=config.cuda)))
def visualize_saliency(config): # Create the output directory. output_root = config.output_root if not os.path.isdir(output_root): os.makedirs(output_root) # Load a pretrained model. pretrained_path = config.pretrained_path model, pretrained_config = load_model_from_config(pretrained_path) # Push model to GPU if available. if config.cuda: print('Using cuda: Yes') model.cuda() model.eval() # Get data. pretrained_config.data.cuda = config.cuda pretrained_config.data.batch_size = config.data.batch_size data = load_data(pretrained_config) # Compute adversarial gradients and save their visualizations. for i, (x, y) in enumerate(data['test']): x = to_cuda(x, cuda=config.cuda) # Save the input image. save_path = os.path.join(output_root, 'x{}.png'.format(i)) save_image(x, save_path) # Save the adversarial gradients. for j in range(pretrained_config.data.class_count): # Compute and save the adversarial gradients. x_grad = get_saliency_map(model, x, j) save_image(x_grad, os.path.join(output_root, 'x_{}_grad_{}.png'.format(i, j)), normalize=True, scale_each=True) break # Produce joint image. nrow = config.visualization.num_rows x_sub = to_cuda( torch.zeros(nrow, *x.size()[1:]).copy_(x[:nrow]).detach(), config.cuda) print("Size of visualization: ", x_sub.size(), "Maximum pixel value: ", x_sub.max()) tensors = [] c = 0 for i, (x, y) in enumerate(data['test']): for (k, t) in enumerate(y): if t == c: c += 1 tensors.append(x[k]) if len(tensors) == pretrained_config.data.class_count: break if len(tensors) == pretrained_config.data.class_count: break # Collect tensors from each class x_sub = to_cuda(torch.stack(tensors, 0), cuda=config.cuda) tensors = [x_sub] for j in range(pretrained_config.data.class_count): # Compute and visualize the adversarial gradients. model.zero_grad() x_grad = get_saliency_map(model, x_sub, j).clone().detach() tensors.append(x_grad) # Concatenate and visualize. joint_tensor = torch.cat(tensors, dim=0) save_image(joint_tensor, os.path.join(output_root, 'x_joint.png'), nrow=pretrained_config.data.class_count, normalize=True, colormap='seismic')
def generate_examples(model, config, pretrained_config, output_root): adv_example_filepath = os.path.join(output_root, 'examples') adv_targets_filepath = os.path.join(output_root, 'adv_targets') targets_filepath = os.path.join(output_root, 'targets') n_examples = config['num_examples'] n_batches = int( math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size)) pretrained_config.cuda = pretrained_config.cuda data = load_data(pretrained_config) # Save the results of the computations in the following variable. adv_ex = torch.Tensor() adv_targets = torch.LongTensor() true_targets = torch.LongTensor() adv_mse = torch.Tensor() adv_inf = torch.Tensor() success = torch.Tensor() margins = torch.Tensor() # Perform the attack. i = 0 for sample in tqdm(islice(data['validation'], n_batches), total=n_batches): i += 1 model.zero_grad() x = to_cuda(sample[0], cuda=pretrained_config.cuda) y = to_cuda(sample[1].type(torch.LongTensor), cuda=pretrained_config.cuda) true_targets = torch.cat([true_targets, y.detach().cpu()], 0) if config.fgs: adv, adv_t, original_pred = manual_fgs(model, x, y, config.eps, clamp=False) elif config.pgd: adv, adv_t, original_pred = manual_pgd(model, x, y, config.eps, config.eps, rand_start=False, clamp=False) adv_ex = torch.cat([adv_ex, adv.cpu().detach()], 0) adv_targets = torch.cat([adv_targets, adv_t.cpu().detach()], 0) # import pdb; pdb.set_trace() original_top_2 = model(x).topk(2, 1)[0] original_margin = original_top_2[:, 0] - original_top_2[:, 1] margins = torch.cat([margins, original_margin.cpu().detach()], 0) batch_success = ((original_pred == y) & (adv_t != y)).float() success = torch.cat([success, batch_success.cpu().detach()], 0) adv_mse = torch.cat([ adv_mse, ((adv.view(adv.size(0), -1) - x.view(adv.size(0), -1))** 2).mean(-1).cpu().detach() ], 0) adv_inf = torch.cat([ adv_inf, (adv.view(adv.size(0), -1) - x.view(adv.size(0), -1)).abs().max(-1)[0].cpu().detach() ], 0) total_accuracy = accuracy(model, to_cuda(adv_ex, cuda=config.cuda), to_cuda(true_targets, cuda=config.cuda)).item() # Summarize the results. results = { "eps": config.eps, "success_rate": success.mean().item(), "defense_rate": 1 - success.mean().item(), "total_acc": total_accuracy, "all_margins_mean": margins.mean().item(), "successful_margins": ((margins * success).sum() / success.sum()).item(), "mean_mse": ((adv_mse * success).sum() / success.sum()).item(), "mean_inf": ((adv_inf * success).sum() / success.sum()).item(), "mse_quartiles": list(np.percentile(adv_mse[success == 1.0].numpy(), [0, 25, 50, 75, 100])) \ if len(adv_mse[success == 1.0]) > 0 else [0, 0, 0, 0, 0] } results["median_mse"] = results["mse_quartiles"][2] print("success rate: {}".format(results["success_rate"])) print("defense rate: {}".format(results["defense_rate"])) print("total accuracy: {}".format(results["total_acc"])) print("Avg Margin: {}".format(results['all_margins_mean'])) print("Avg Success Margin: {}".format(results['successful_margins'])) print("mean MSE for successful attacks: {}".format(results["mean_mse"])) print("mean L_inf for successful attacks: {}".format(results["mean_inf"])) print("MSE quartiles for successful attacks: {}".format( results["mse_quartiles"])) with open(os.path.join(output_root, 'results.json'), 'w') as f: json.dump(results, f, sort_keys=True, indent=4) np.save(adv_example_filepath, adv_ex) np.save(adv_targets_filepath, adv_targets) np.save(targets_filepath, true_targets) return adv_ex