def craft_one_type(args, model, X, Y, attack, test_loader=None): Y = Y.squeeze() if attack == 'fgsm': # FGSM attack print('Crafting fgsm adversarial samples...') eps = ATTACK_PARAMS[args.dataset]['eps'] #eps = 0.03 X_adv = fast_gradient_sign_method(model, X, Y, eps, test_loader) elif attack in ['bim-a', 'bim-b', 'bim']: # BIM attack print('Crafting %s adversarial samples...' % attack) eps = ATTACK_PARAMS[args.dataset]['eps'] eps_iter = ATTACK_PARAMS[args.dataset]['eps_iter'] X_adv = basic_iterative_method(model, X, Y, eps, eps_iter, test_loader) #elif attack == 'jsma': # # JSMA attack # print('Crafting jsma adversarial samples. This may take a while...') # X_adv = saliency_map_method( # model, X, Y, theta=1, gamma=0.1, clip_min=0., clip_max=1. # ) #else: # #TODO: CW attack # raise NotImplementedError('幹您娘CW attack not yet implemented.') print(X_adv.shape) adv_data = [(x_now, y_now) for x_now, y_now in zip(X_adv, Y)] adv_loader = DataLoader( dataset=adv_data, batch_size=args.batch_size, ) acc = evaluate(model, adv_loader) print("Model accuracy on the adversarial test set: %0.2f%%" % (100 * acc)) np.save('../data/Adv_%s_%s_train.npy' % (args.dataset, args.attack), X_adv)
def train(args, model, device, train_loader, test_loader=None): running_loss = 0 model.train() # optimizer = Adadelta optimizer = optim.Adadelta(model.parameters(), lr=args.lr) loss_criterien = torch.nn.CrossEntropyLoss() for i_epoch in range(args.epochs): model.to(device) print('training %d epochs' % (i_epoch)) pbar = tqdm(train_loader) ## train for batch_data in pbar: optimizer.zero_grad() x = batch_data[0].to(device) y = batch_data[1].to(device) batch_sz = x.shape[0] pred = model(x) target = torch.tensor(y) loss = loss_criterien(pred, target) if args.dataset == "cifar": for m in model.modules(): if m.__class__.__name__.startswith("Linear"): loss += 0.01 * torch.norm(m.weight, p=2) loss.backward() running_loss += loss.item() optimizer.step() pbar.set_description('loss: %f' % (loss)) if test_loader is not None: acc = evaluate(model, test_loader) print('DEV accuracy: ', acc) model_dir = '../model/' path = model_dir + args.dataset + '_' + str(args.epochs) + '.pth' torch.save(model.state_dict(), path)
def main(args): #### assertions assert args.dataset in ['mnist', 'cifar', 'svhn'], \ "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'bim', 'jsma', 'cw', 'all'], \ "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ "'jsma' or 'cw'" assert os.path.isfile(args.model), \ 'model file not found... must first train model using train_model.py.' print('Dataset: %s. Attack: %s' % (args.dataset, args.attack)) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') ## load model model = get_model(args.dataset) model.load_state_dict(torch.load(args.model)) model.to(device) #train_dataset = get_data(args.dataset, train=True) test_dataset = get_data(args.dataset, train=True) ## evaluate on normal testset test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size) acc = evaluate(model, test_loader) print("Accuracy on the test set: %0.2f%%" % (100 * acc)) X_test = [tmp[0] for tmp in test_dataset] X_test = torch.stack(X_test, dim=0) print('X: ', X_test.shape) Y_test = [torch.tensor([tmp[1]], dtype=torch.long) for tmp in test_dataset] Y_test = torch.stack(Y_test, dim=0) print('Y:', Y_test.shape) ## craft craft_one_type(args, model, X_test, Y_test, args.attack, test_loader) print('Adversarial samples crafted and saved to data/ subfolder.')
def main(args): ## assert assert args.dataset in ['mnist', 'cifar', 'svhn'], "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'bim', 'jsma', 'cw', 'all'], \ "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ "'jsma' or 'cw'" assert os.path.isfile('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)), \ 'adversarial sample file not found... must first craft adversarial ' \ 'samples using craft_adv_samples.py' print('Loading the data and model...') # Load the model model = get_model(args.dataset) model.load_state_dict(torch.load(args.model)) model.to('cuda') model.eval() # Load the dataset train_data, test_data = get_data(args.dataset) train_loader = DataLoader( dataset = train_data, batch_size = args.batch_size, ) ##### Load adversarial samples (create by crate_adv_samples.py) print('Loading noisy and adversarial samples...') X_test_adv = np.load('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)) X_test_adv = torch.from_numpy(X_test_adv) #train_adv = [ (x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_train_adv, train_data) ] test_adv = [ (x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_test_adv, test_data) ] ##### create noisy data noise_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean = (0.1307, ), std = (0.3781, )), AddGaussianNoise(0., 0.1) ]) train_noisy, test_noisy = get_data(args.dataset, noise_transform) # Check model accuracies on each sample type for s_type, dataset in zip(['normal', 'noisy', 'adversarial'], [test_data, test_noisy, test_adv]): data_loader = DataLoader( dataset = dataset, batch_size = 1, ) acc = evaluate(model, data_loader) print("Model accuracy on the %s test set: %0.2f%%" % (s_type, 100 * acc) ) # Compute and display average perturbation sizes ### TODO ''' if not s_type == 'normal': l2_diff = np.linalg.norm( dataset.reshape((len(X_test), -1)) - X_test.reshape((len(X_test), -1)), axis=1 ).mean() print("Average L-2 perturbation size of the %s test set: %0.2f" % (s_type, l2_diff)) ''' ### Refine the normal, noisy and adversarial sets to only include samples for which the original version was correctly classified by the model ### run test data and choose the data model can correctly predict test_loader = DataLoader( dataset = test_data, batch_size = args.batch_size ) y_test_list = [] pred_list = [] with torch.no_grad(): for batch in test_loader: x = batch[0].to('cuda') y_test_list.append( batch[1] ) pred_list.append( model(x) ) pred_test = torch.cat(pred_list) Y_test = torch.cat(y_test_list).to('cuda') inds_correct = torch.where(Y_test == pred_test.argmax(axis=1), torch.full_like(Y_test, 1), torch.full_like(Y_test, 0)).to('cuda') picked_test_data = [] picked_test_data_noisy = [] picked_test_data_adv = [] for i, (b, y_tmp) in enumerate(zip(inds_correct, Y_test)): if b == 1: picked_test_data.append( (test_data[i][0], y_tmp) ) picked_test_data_noisy.append( (test_noisy[i][0], y_tmp) ) picked_test_data_adv.append( (X_test_adv[i], y_tmp) ) else: continue picked_test_loader = DataLoader( dataset = picked_test_data, batch_size = args.batch_size ) picked_test_noisy_loader = DataLoader( dataset = picked_test_data_noisy, batch_size = args.batch_size ) picked_test_adv_loader = DataLoader( dataset = picked_test_data_adv, batch_size = args.batch_size ) ####################################### ## Get Bayesian uncertainty scores nb_size = 50 print('Getting Monte Carlo dropout variance predictions...') uncerts_normal = get_mc_predictions(model, picked_test_loader, nb_iter=nb_size)#.unsqueeze(1) uncerts_noisy = get_mc_predictions(model, picked_test_noisy_loader, nb_iter=nb_size)#.unsqueeze(1) uncerts_adv = get_mc_predictions(model, picked_test_adv_loader, nb_iter=nb_size)#.unsqueeze(1) print(uncerts_normal.shape) print(uncerts_noisy.shape) print(uncerts_adv.shape) ## Get KDE scores # Get deep feature representations print('Getting deep feature representations...') x_train_features = get_deep_representations(model, train_loader , args.dataset) x_test_normal_features = get_deep_representations(model, picked_test_loader , args.dataset) x_test_noisy_features = get_deep_representations(model, picked_test_noisy_loader , args.dataset) x_test_adv_features = get_deep_representations(model, picked_test_adv_loader , args.dataset) print(x_train_features.shape) print(x_test_normal_features.shape) print(x_test_noisy_features.shape) print(x_test_adv_features.shape) class_num = 10 Y_train_label = [ tmp[1] for tmp in train_data ] Y_train_label = np.array(Y_train_label) Y_train = np.zeros((len(Y_train_label), class_num)) Y_train[ np.arange(Y_train_label.size), Y_train_label ] = 1 # Train one KDE per class print('Training KDEs...') class_inds = {} for i in range(class_num): #class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0] class_inds[i] = np.where(Y_train_label == i)[0] print('class_inds[', i, ']: ', class_inds[i].size ) kdes = {} warnings.warn("Using pre-set kernel bandwidths that were determined " "optimal for the specific CNN models of the paper. If you've " "changed your model, you'll need to re-optimize the bandwidth.") for i in range(class_num): kdes[i] = KernelDensity(kernel='gaussian', bandwidth=BANDWIDTHS[args.dataset]).fit( x_train_features.cpu().numpy()[class_inds[i]] ) #print(kdes) # Get model predictions print('Computing model predictions...') data_loaders = [ picked_test_loader, picked_test_noisy_loader, picked_test_adv_loader ] preds = [] for now_loader in data_loaders: with torch.no_grad(): tmp_result = [] for batch in now_loader: x = batch[0].to('cuda') pred = model(x) tmp_result.append(pred) preds.append( torch.cat(tmp_result) ) preds_test_normal = torch.argmax(preds[0], dim=1) preds_test_noisy = torch.argmax(preds[1], dim=1) preds_test_adv = torch.argmax(preds[2], dim=1) print(preds_test_normal) # Get density estimates print('computing densities...') densities_normal = score_samples( kdes, x_test_normal_features.cpu(), preds_test_normal.cpu() ) densities_noisy = score_samples( kdes, x_test_noisy_features.cpu(), preds_test_noisy.cpu() ) densities_adv = score_samples( kdes, x_test_adv_features.cpu(), preds_test_adv.cpu() ) ## Z-score the uncertainty and density values uncerts_normal_z, uncerts_adv_z, uncerts_noisy_z = normalize( uncerts_normal.cpu().numpy(), uncerts_adv.cpu().numpy(), uncerts_noisy.cpu().numpy() ) densities_normal_z, densities_adv_z, densities_noisy_z = normalize( densities_normal, densities_adv, densities_noisy ) print('.......mean,,,,,,,,,,') print(densities_normal_z.mean()) print(densities_adv_z.mean()) print(densities_noisy_z.mean()) ## Build detector ### combine values_combine, labels_combine, lr_combine = train_lr( densities_pos = densities_adv_z, densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)), uncerts_pos = uncerts_adv_z, uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)), flag = 'combine' ) ### dense values_dense, labels_dense, lr_dense = train_lr( densities_pos = densities_adv_z, densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)), uncerts_pos = uncerts_adv_z, uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)), flag = 'dense' ) ### uncert values_uncert, labels_uncert, lr_uncert = train_lr( densities_pos = densities_adv_z, densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)), uncerts_pos = uncerts_adv_z, uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)), flag = 'uncert' ) ## Evaluate detector # Compute logistic regression model predictions probs_combine = lr_combine.predict_proba(values_combine)[:, 1] probs_dense = lr_dense.predict_proba(values_dense)[:, 1] probs_uncert = lr_uncert.predict_proba(values_uncert)[:, 1] # Compute AUC n_samples = len(picked_test_data) # The first 2/3 of 'probs' is the negative class (normal and noisy samples), # and the last 1/3 is the positive class (adversarial samples). #probs_neg = probs[:2 * n_samples], #probs_pos = probs[2 * n_samples:], prob_datas = [ (probs_combine[:2 * n_samples], probs_combine[2 * n_samples:], 'combine'), (probs_dense[:2 * n_samples], probs_dense[2 * n_samples:], 'dense'), (probs_uncert[:2 * n_samples], probs_uncert[2 * n_samples:], 'uncert') ] _, _, auc_score = compute_roc( prob_datas, plot=True ) '''
def main(args): print(args) datatypes = ['normal', 'noisy', 'adversarial'] ## assertions assert args.dataset in [ 'mnist', 'cifar', 'svhn' ], "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'bim', 'jsma', 'cw', 'all'], \ "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ "'jsma' or 'cw'" #assert os.path.isfile('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)), \ # 'adversarial sample file not found... must first craft adversarial ' \ # 'samples using craft_adv_samples.py' print('Loading the data and model...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load the model model = get_model(args.dataset) model.load_state_dict(torch.load(args.model)) model.to(device) model.eval() # Load the dataset train_data = get_data(args.dataset, train=True) train_loader = DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=False) ##### Load adversarial samples (create by crate_adv_samples.py) print('Loading noisy and adversarial samples...') ### train_adv X_train_adv = np.load('../data/Adv_%s_%s_train.npy' % (args.dataset, args.attack)) X_train_adv = torch.from_numpy(X_train_adv) train_adv = [(x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_train_adv, train_data)] ##### create noisy data if args.dataset == 'mnist': noise_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.1307), std=(0.3081)), AddGaussianNoise(0., 0.1) ]) elif args.dataset == 'cifar': noise_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261)), AddGaussianNoise(0., 0.1) ]) train_noisy = get_data(args.dataset, train=True, transform=noise_transform) #print('NOISY', train_noisy) #print(train_noisy[0]) X_train, Y_train = getXY(train_data) # Check model accuracies on each sample type for s_type, dataset in zip(['normal', 'noisy', 'adversarial'], [train_data, train_noisy, train_adv]): data_loader = DataLoader( dataset=dataset, batch_size=args.batch_size, ) acc = evaluate(model, data_loader) print("Model accuracy on the %s test set: %0.2f%%" % (s_type, 100 * acc)) # Compute and display average perturbation sizes ### TODO X_now, Y_now = getXY(dataset) if not s_type == 'normal': l2_diff = np.linalg.norm(X_now.reshape( (len(X_train), -1)) - X_train.reshape((len(X_train), -1)), axis=1).mean() print("Average L-2 perturbation size of the %s test set: %0.2f" % (s_type, l2_diff)) ### Refine the normal, noisy and adversarial sets to only include samples for which the original version was correctly classified by the model ### run test data and choose the data model can correctly predict y_train_list = [] pred_train_list = [] with torch.no_grad(): for batch in train_loader: x = batch[0].to(device) y_train_list.append(batch[1]) pred_train_list.append(model(x)) y_train_list = torch.cat(y_train_list) Y_train = torch.tensor(y_train_list).detach().cpu() pred_train = torch.cat(pred_train_list).detach().cpu() inds_correct = torch.where(Y_train == pred_train.argmax(axis=1), torch.full_like(Y_train, 1), torch.full_like(Y_train, 0)).to(device) picked_train_data = {} for datatype in datatypes: picked_train_data[datatype] = [] for i, (b, y_tmp) in enumerate(zip(inds_correct, Y_train)): if b == 1: picked_train_data['normal'].append((train_data[i][0], y_tmp)) picked_train_data['noisy'].append((train_noisy[i][0], y_tmp)) picked_train_data['adversarial'].append((X_train_adv[i], y_tmp)) else: continue picked_train_loader = {} for datatype in datatypes: picked_train_loader[datatype] = DataLoader( dataset=picked_train_data[datatype], batch_size=args.batch_size) ########################################################################################################################################### ################# Get Bayesian uncertainty scores nb_size = 20 #### mc_variance print('Getting Monte Carlo dropout variance...') mc_variance = {} for datatype in datatypes: mc_variance[datatype] = get_mc_predictions( model, picked_train_loader[datatype], nb_iter=nb_size, method='default') #### mc_entropy print('Getting Monte Carlo dropout entropy...') mc_entropy = {} for datatype in datatypes: mc_entropy[datatype] = get_mc_predictions( model, picked_train_loader[datatype], nb_iter=nb_size, method='entropy') where_are_NaNs = isnan(mc_entropy[datatype]) mc_entropy[datatype][where_are_NaNs] = 0 ### entropy print('Getting entropy...') entropy = {} for datatype in datatypes: entropy[datatype] = get_entropy(model, picked_train_loader[datatype]) where_are_NaNs = isnan(entropy[datatype]) entropy[datatype][where_are_NaNs] = 0 #print(entropy['normal']) #print(entropy['noisy']) #print(entropy['adversarial']) ################# Get KDE scores # Get deep feature representations print('Getting deep feature representations...') x_train_features = get_deep_representations(model, train_loader, args.dataset) picked_train_features = {} for datatype in datatypes: picked_train_features[datatype] = get_deep_representations( model, picked_train_loader[datatype], args.dataset) print('Shape') print(x_train_features.shape) for datatype in datatypes: print(picked_train_features[datatype].shape) ####### CLASS NUM ######## class_num = 10 Y_train_label = [tmp[1] for tmp in train_data] Y_train_label = np.array(Y_train_label) Y_train = np.zeros((len(Y_train_label), class_num)) Y_train[np.arange(Y_train_label.size), Y_train_label] = 1 # Train one KDE per class print('Training KDEs...') class_inds = {} for i in range(class_num): class_inds[i] = np.where(Y_train_label == i)[0] print('class_inds[', i, ']: ', class_inds[i].size) kdes = {} warnings.warn( "Using pre-set kernel bandwidths that were determined optimal for the specific CNN models of the paper. If you've " "changed your model, you'll need to re-optimize the bandwidth.") ### Use train features to fit Kernel density for i in range(class_num): kdes[i] = KernelDensity( kernel='gaussian', bandwidth=BANDWIDTHS[args.dataset]).fit( x_train_features.cpu().numpy()[class_inds[i]]) # Get model predictions print('Computing model predictions...') data_loaders = [] for datatype in datatypes: data_loaders.append(picked_train_loader[datatype]) preds = [] preds_train = {} for now_loader in data_loaders: with torch.no_grad(): tmp_result = [] for batch in now_loader: x = batch[0].to(device) pred = model(x) tmp_result.append(pred.detach().cpu()) preds.append(torch.cat(tmp_result)) preds_train['normal'] = torch.argmax(preds[0], dim=1) preds_train['noisy'] = torch.argmax(preds[1], dim=1) preds_train['adversarial'] = torch.argmax(preds[2], dim=1) # Get density estimates ###### get test density print('computing densities...') train_densities = {} for datatype in datatypes: train_densities[datatype] = score_samples( kdes, picked_train_features[datatype].cpu(), preds_train[datatype].cpu()) ###### Z-score the uncertainty and density values ###### normalize mc_entropy_z = {} mc_entropy_z['normal'], mc_entropy_z['noisy'], mc_entropy_z[ 'adversarial'] = normalize( mc_entropy['normal'].cpu().numpy(), mc_entropy['noisy'].cpu().numpy(), mc_entropy['adversarial'].cpu().numpy(), ) mc_variance_z = {} mc_variance_z['normal'], mc_variance_z['noisy'], mc_variance_z[ 'adversarial'] = normalize( mc_variance['normal'].cpu().numpy(), mc_variance['noisy'].cpu().numpy(), mc_variance['adversarial'].cpu().numpy(), ) entropy_z = {} entropy_z['normal'], entropy_z['noisy'], entropy_z[ 'adversarial'] = normalize( entropy['normal'].cpu().numpy(), entropy['noisy'].cpu().numpy(), entropy['adversarial'].cpu().numpy(), ) densities_z = {} densities_z['normal'], densities_z['noisy'], densities_z[ 'adversarial'] = normalize( train_densities['normal'], train_densities['noisy'], train_densities['adversarial'], ) #print(entropy_z['normal']) #print(entropy_z['noisy']) #print(entropy_z['adversarial']) print('.......Densities............') for datatype in datatypes: print(datatype, ' Mean: ', densities_z[datatype].mean()) ## Build detector ### dense, uncert, combine flags = ['dense', 'entropy', 'mc_entropy', 'mc_variance', 'combine'] values = {} labels = {} lrs = {} for now_flag in flags: print('processing %s ...' % now_flag) tmp_values, tmp_labels, tmp_lr = train_lr( densities=(densities_z['adversarial'], np.concatenate( (densities_z['normal'], densities_z['noisy']))), entropy=(entropy_z['adversarial'], np.concatenate( (entropy_z['normal'], entropy_z['noisy']))), mc_entropy=(mc_entropy_z['adversarial'], np.concatenate( (mc_entropy_z['normal'], mc_entropy_z['noisy']))), mc_variance=(mc_variance_z['adversarial'], np.concatenate((mc_variance_z['normal'], mc_variance_z['noisy']))), flag=now_flag) #densities_pos = densities_z['adversarial'], #densities_neg = np.concatenate((densities_z['normal'], densities_z['noisy'])), #uncerts_pos = uncerts_z['adversarial'], #uncerts_neg = np.concatenate((uncerts_z['normal'], uncerts_z['noisy'])), #flag = flag values[now_flag] = tmp_values labels[now_flag] = tmp_labels lrs[now_flag] = tmp_lr if args.do_test: test_values, test_labels, test_num = evaluate_test( args, model, kdes, datatypes, nb_size, flags) ## Evaluate detector ### evaluate on train dataset probs = {} for flag in flags: if args.do_test: probs[flag] = lrs[flag].predict_proba(test_values[flag])[:, 1] else: probs[flag] = lrs[flag].predict_proba(values[flag])[:, 1] # Compute AUC if args.do_test: n_samples = test_num else: n_samples = len(train_data) # The first 2/3 of 'probs' is the negative class (normal and noisy samples) and the last 1/3 is the positive class (adversarial samples). prob_datas = [] for flag in flags: prob_datas.append( (probs[flag][:2 * n_samples], probs[flag][2 * n_samples:], flag)) _, _, auc_score = compute_roc(prob_datas, plot=True, pic_name=args.pic_name)