def eval_kernel_density(model, X, batch_size=256, reset_kde=False, **args): if reset_kde or len(kernel_densities.keys()) == 0: init_kernel_densities(model, batch_size=batch_size, **args) features = get_deep_representations(model, X, batch_size=batch_size) preds_class = np.argmax(model.predict(X, verbose=0, batch_size=batch_size), axis=1) densities = score_samples(kernel_densities, features, preds_class) return densities
def init_kernel_densities(model, X_train, Y_train, batch_size=256, dataset="cifar"): class_inds = {} global kernel_densities kernel_densities = {} for i in range(Y_train.shape[1]): class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0] X_train_features = get_deep_representations(model, X_train, batch_size=batch_size) for i in range(Y_train.shape[1]): kernel_densities[i] = KernelDensity( kernel='gaussian', bandwidth=BANDWIDTHS[dataset]).fit(X_train_features[class_inds[i]])
def main(args): assert args.dataset in ['mnist', 'cifar', 'svhn'], \ "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw', 'all'], \ "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ "'jsma' or 'cw'" assert os.path.isfile('../data/model_%s.h5' % args.dataset), \ 'model file not found... must first train model using train_model.py.' assert os.path.isfile('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)), \ 'adversarial sample file not found... must first craft adversarial ' \ 'samples using craft_adv_samples.py' print('Loading the data and model...') # Load the model model = load_model('../data/model_%s.h5' % args.dataset) # Load the dataset X_train, Y_train, X_test, Y_test = get_data(args.dataset) # Check attack type, select adversarial and noisy samples accordingly print('Loading noisy and adversarial samples...') if args.attack == 'all': # TODO: implement 'all' option #X_test_adv = ... #X_test_noisy = ... raise NotImplementedError("'All' types detector not yet implemented.") else: # Load adversarial samples X_test_adv = np.load('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)) # Craft an equal number of noisy samples X_test_noisy = get_noisy_samples(X_test, X_test_adv, args.dataset, args.attack) # Check model accuracies on each sample type for s_type, dataset in zip(['normal', 'noisy', 'adversarial'], [X_test, X_test_noisy, X_test_adv]): _, acc = model.evaluate(dataset, Y_test, batch_size=args.batch_size, verbose=0) print("Model accuracy on the %s test set: %0.2f%%" % (s_type, 100 * acc)) # Compute and display average perturbation sizes if not s_type == 'normal': l2_diff = np.linalg.norm(dataset.reshape( (len(X_test), -1)) - X_test.reshape((len(X_test), -1)), axis=1).mean() print("Average L-2 perturbation size of the %s test set: %0.2f" % (s_type, l2_diff)) # Refine the normal, noisy and adversarial sets to only include samples for # which the original version was correctly classified by the model preds_test = model.predict_classes(X_test, verbose=0, batch_size=args.batch_size) inds_correct = np.where(preds_test == Y_test.argmax(axis=1))[0] X_test = X_test[inds_correct] X_test_noisy = X_test_noisy[inds_correct] X_test_adv = X_test_adv[inds_correct] ## Get Bayesian uncertainty scores print('Getting Monte Carlo dropout variance predictions...') uncerts_normal = get_mc_predictions(model, X_test, batch_size=args.batch_size) \ .var(axis=0).mean(axis=1) uncerts_noisy = get_mc_predictions(model, X_test_noisy, batch_size=args.batch_size) \ .var(axis=0).mean(axis=1) uncerts_adv = get_mc_predictions(model, X_test_adv, batch_size=args.batch_size) \ .var(axis=0).mean(axis=1) ## Get KDE scores # Get deep feature representations print('Getting deep feature representations...') X_train_features = get_deep_representations(model, X_train, batch_size=args.batch_size) X_test_normal_features = get_deep_representations( model, X_test, batch_size=args.batch_size) X_test_noisy_features = get_deep_representations( model, X_test_noisy, batch_size=args.batch_size) X_test_adv_features = get_deep_representations(model, X_test_adv, batch_size=args.batch_size) # Train one KDE per class print('Training KDEs...') class_inds = {} for i in range(Y_train.shape[1]): class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0] kdes = {} warnings.warn( "Using pre-set kernel bandwidths that were determined " "optimal for the specific CNN models of the paper. If you've " "changed your model, you'll need to re-optimize the " "bandwidth.") for i in range(Y_train.shape[1]): kdes[i] = KernelDensity(kernel='gaussian', bandwidth=BANDWIDTHS[args.dataset]) \ .fit(X_train_features[class_inds[i]]) # Get model predictions print('Computing model predictions...') preds_test_normal = model.predict_classes(X_test, verbose=0, batch_size=args.batch_size) preds_test_noisy = model.predict_classes(X_test_noisy, verbose=0, batch_size=args.batch_size) preds_test_adv = model.predict_classes(X_test_adv, verbose=0, batch_size=args.batch_size) # Get density estimates print('computing densities...') densities_normal = score_samples(kdes, X_test_normal_features, preds_test_normal) densities_noisy = score_samples(kdes, X_test_noisy_features, preds_test_noisy) densities_adv = score_samples(kdes, X_test_adv_features, preds_test_adv) ## Z-score the uncertainty and density values uncerts_normal_z, uncerts_adv_z, uncerts_noisy_z = normalize( uncerts_normal, uncerts_adv, uncerts_noisy) densities_normal_z, densities_adv_z, densities_noisy_z = normalize( densities_normal, densities_adv, densities_noisy) ## Build detector values, labels, lr = train_lr(densities_pos=densities_adv_z, densities_neg=np.concatenate( (densities_normal_z, densities_noisy_z)), uncerts_pos=uncerts_adv_z, uncerts_neg=np.concatenate( (uncerts_normal_z, uncerts_noisy_z))) ## Evaluate detector # Compute logistic regression model predictions probs = lr.predict_proba(values)[:, 1] # Compute AUC n_samples = len(X_test) # The first 2/3 of 'probs' is the negative class (normal and noisy samples), # and the last 1/3 is the positive class (adversarial samples). _, _, auc_score = compute_roc(probs_neg=probs[:2 * n_samples], probs_pos=probs[2 * n_samples:]) print('Detector ROC-AUC score: %0.4f' % auc_score)
def main(args): ## assert assert args.dataset in ['mnist', 'cifar', 'svhn'], "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'bim', 'jsma', 'cw', 'all'], \ "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ "'jsma' or 'cw'" assert os.path.isfile('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)), \ 'adversarial sample file not found... must first craft adversarial ' \ 'samples using craft_adv_samples.py' print('Loading the data and model...') # Load the model model = get_model(args.dataset) model.load_state_dict(torch.load(args.model)) model.to('cuda') model.eval() # Load the dataset train_data, test_data = get_data(args.dataset) train_loader = DataLoader( dataset = train_data, batch_size = args.batch_size, ) ##### Load adversarial samples (create by crate_adv_samples.py) print('Loading noisy and adversarial samples...') X_test_adv = np.load('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)) X_test_adv = torch.from_numpy(X_test_adv) #train_adv = [ (x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_train_adv, train_data) ] test_adv = [ (x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_test_adv, test_data) ] ##### create noisy data noise_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean = (0.1307, ), std = (0.3781, )), AddGaussianNoise(0., 0.1) ]) train_noisy, test_noisy = get_data(args.dataset, noise_transform) # Check model accuracies on each sample type for s_type, dataset in zip(['normal', 'noisy', 'adversarial'], [test_data, test_noisy, test_adv]): data_loader = DataLoader( dataset = dataset, batch_size = 1, ) acc = evaluate(model, data_loader) print("Model accuracy on the %s test set: %0.2f%%" % (s_type, 100 * acc) ) # Compute and display average perturbation sizes ### TODO ''' if not s_type == 'normal': l2_diff = np.linalg.norm( dataset.reshape((len(X_test), -1)) - X_test.reshape((len(X_test), -1)), axis=1 ).mean() print("Average L-2 perturbation size of the %s test set: %0.2f" % (s_type, l2_diff)) ''' ### Refine the normal, noisy and adversarial sets to only include samples for which the original version was correctly classified by the model ### run test data and choose the data model can correctly predict test_loader = DataLoader( dataset = test_data, batch_size = args.batch_size ) y_test_list = [] pred_list = [] with torch.no_grad(): for batch in test_loader: x = batch[0].to('cuda') y_test_list.append( batch[1] ) pred_list.append( model(x) ) pred_test = torch.cat(pred_list) Y_test = torch.cat(y_test_list).to('cuda') inds_correct = torch.where(Y_test == pred_test.argmax(axis=1), torch.full_like(Y_test, 1), torch.full_like(Y_test, 0)).to('cuda') picked_test_data = [] picked_test_data_noisy = [] picked_test_data_adv = [] for i, (b, y_tmp) in enumerate(zip(inds_correct, Y_test)): if b == 1: picked_test_data.append( (test_data[i][0], y_tmp) ) picked_test_data_noisy.append( (test_noisy[i][0], y_tmp) ) picked_test_data_adv.append( (X_test_adv[i], y_tmp) ) else: continue picked_test_loader = DataLoader( dataset = picked_test_data, batch_size = args.batch_size ) picked_test_noisy_loader = DataLoader( dataset = picked_test_data_noisy, batch_size = args.batch_size ) picked_test_adv_loader = DataLoader( dataset = picked_test_data_adv, batch_size = args.batch_size ) ####################################### ## Get Bayesian uncertainty scores nb_size = 50 print('Getting Monte Carlo dropout variance predictions...') uncerts_normal = get_mc_predictions(model, picked_test_loader, nb_iter=nb_size)#.unsqueeze(1) uncerts_noisy = get_mc_predictions(model, picked_test_noisy_loader, nb_iter=nb_size)#.unsqueeze(1) uncerts_adv = get_mc_predictions(model, picked_test_adv_loader, nb_iter=nb_size)#.unsqueeze(1) print(uncerts_normal.shape) print(uncerts_noisy.shape) print(uncerts_adv.shape) ## Get KDE scores # Get deep feature representations print('Getting deep feature representations...') x_train_features = get_deep_representations(model, train_loader , args.dataset) x_test_normal_features = get_deep_representations(model, picked_test_loader , args.dataset) x_test_noisy_features = get_deep_representations(model, picked_test_noisy_loader , args.dataset) x_test_adv_features = get_deep_representations(model, picked_test_adv_loader , args.dataset) print(x_train_features.shape) print(x_test_normal_features.shape) print(x_test_noisy_features.shape) print(x_test_adv_features.shape) class_num = 10 Y_train_label = [ tmp[1] for tmp in train_data ] Y_train_label = np.array(Y_train_label) Y_train = np.zeros((len(Y_train_label), class_num)) Y_train[ np.arange(Y_train_label.size), Y_train_label ] = 1 # Train one KDE per class print('Training KDEs...') class_inds = {} for i in range(class_num): #class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0] class_inds[i] = np.where(Y_train_label == i)[0] print('class_inds[', i, ']: ', class_inds[i].size ) kdes = {} warnings.warn("Using pre-set kernel bandwidths that were determined " "optimal for the specific CNN models of the paper. If you've " "changed your model, you'll need to re-optimize the bandwidth.") for i in range(class_num): kdes[i] = KernelDensity(kernel='gaussian', bandwidth=BANDWIDTHS[args.dataset]).fit( x_train_features.cpu().numpy()[class_inds[i]] ) #print(kdes) # Get model predictions print('Computing model predictions...') data_loaders = [ picked_test_loader, picked_test_noisy_loader, picked_test_adv_loader ] preds = [] for now_loader in data_loaders: with torch.no_grad(): tmp_result = [] for batch in now_loader: x = batch[0].to('cuda') pred = model(x) tmp_result.append(pred) preds.append( torch.cat(tmp_result) ) preds_test_normal = torch.argmax(preds[0], dim=1) preds_test_noisy = torch.argmax(preds[1], dim=1) preds_test_adv = torch.argmax(preds[2], dim=1) print(preds_test_normal) # Get density estimates print('computing densities...') densities_normal = score_samples( kdes, x_test_normal_features.cpu(), preds_test_normal.cpu() ) densities_noisy = score_samples( kdes, x_test_noisy_features.cpu(), preds_test_noisy.cpu() ) densities_adv = score_samples( kdes, x_test_adv_features.cpu(), preds_test_adv.cpu() ) ## Z-score the uncertainty and density values uncerts_normal_z, uncerts_adv_z, uncerts_noisy_z = normalize( uncerts_normal.cpu().numpy(), uncerts_adv.cpu().numpy(), uncerts_noisy.cpu().numpy() ) densities_normal_z, densities_adv_z, densities_noisy_z = normalize( densities_normal, densities_adv, densities_noisy ) print('.......mean,,,,,,,,,,') print(densities_normal_z.mean()) print(densities_adv_z.mean()) print(densities_noisy_z.mean()) ## Build detector ### combine values_combine, labels_combine, lr_combine = train_lr( densities_pos = densities_adv_z, densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)), uncerts_pos = uncerts_adv_z, uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)), flag = 'combine' ) ### dense values_dense, labels_dense, lr_dense = train_lr( densities_pos = densities_adv_z, densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)), uncerts_pos = uncerts_adv_z, uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)), flag = 'dense' ) ### uncert values_uncert, labels_uncert, lr_uncert = train_lr( densities_pos = densities_adv_z, densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)), uncerts_pos = uncerts_adv_z, uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)), flag = 'uncert' ) ## Evaluate detector # Compute logistic regression model predictions probs_combine = lr_combine.predict_proba(values_combine)[:, 1] probs_dense = lr_dense.predict_proba(values_dense)[:, 1] probs_uncert = lr_uncert.predict_proba(values_uncert)[:, 1] # Compute AUC n_samples = len(picked_test_data) # The first 2/3 of 'probs' is the negative class (normal and noisy samples), # and the last 1/3 is the positive class (adversarial samples). #probs_neg = probs[:2 * n_samples], #probs_pos = probs[2 * n_samples:], prob_datas = [ (probs_combine[:2 * n_samples], probs_combine[2 * n_samples:], 'combine'), (probs_dense[:2 * n_samples], probs_dense[2 * n_samples:], 'dense'), (probs_uncert[:2 * n_samples], probs_uncert[2 * n_samples:], 'uncert') ] _, _, auc_score = compute_roc( prob_datas, plot=True ) '''
def evaluate_test(args, model, kdes, datatypes, nb_size, flags): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') test_dataset = {} transform = None test_dataset['normal'] = get_data(args.dataset, train=False, transform=transform) test_dataset['noisy'] = get_data(args.dataset, train=False, transform=transform) ##### Load adversarial samples (create by crate_adv_samples.py) ### craft adversarial examples on test_dataset print('[Test] Loading noisy and adversarial samples...') X_test_adv = np.load('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)) X_test_adv = torch.from_numpy(X_test_adv) test_adv = [(x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_test_adv, test_dataset['normal'])] test_dataset['adversarial'] = test_adv num = len(test_dataset['normal']) test_loader = {} for datatype in datatypes: test_loader[datatype] = DataLoader(dataset=test_dataset[datatype], batch_size=args.batch_size, shuffle=False) ### TODO pick data(model predict correctly on normal) ################# Get Bayesian uncertainty scores #### mc_variance print('[Test] Getting Monte Carlo dropout variance...') mc_variance = {} for datatype in datatypes: mc_variance[datatype] = get_mc_predictions(model, test_loader[datatype], nb_iter=nb_size, method='default') #### mc_entropy print('[Test] Getting Monte Carlo dropout entropy...') mc_entropy = {} for datatype in datatypes: mc_entropy[datatype] = get_mc_predictions(model, test_loader[datatype], nb_iter=nb_size, method='entropy') where_are_NaNs = isnan(mc_entropy[datatype]) mc_entropy[datatype][where_are_NaNs] = 0 ### entropy print('[Test] Getting entropy...') entropy = {} for datatype in datatypes: entropy[datatype] = get_entropy(model, test_loader[datatype]) where_are_NaNs = isnan(entropy[datatype]) entropy[datatype][where_are_NaNs] = 0 ################# Get KDE scores # Get deep feature representations print('[Test] Getting deep feature representations...') features = {} for datatype in datatypes: features[datatype] = get_deep_representations(model, test_loader[datatype], args.dataset) # Get model predictions print('[Test] Computing model predictions...') preds = {} for datatype in datatypes: with torch.no_grad(): tmp_result = [] for batch in test_loader[datatype]: x = batch[0].to(device) pred = model(x) tmp_result.append(pred.detach().cpu()) preds[datatype] = torch.argmax(torch.cat(tmp_result), dim=1) # Get density estimates ###### get test density print('[Test] computing densities...') densities = {} for datatype in datatypes: densities[datatype] = score_samples(kdes, features[datatype].cpu(), preds[datatype].cpu()) ###### Z-score the uncertainty and density values ###### normalize mc_entropy_z = {} mc_entropy_z['normal'], mc_entropy_z['noisy'], mc_entropy_z[ 'adversarial'] = normalize( mc_entropy['normal'].cpu().numpy(), mc_entropy['noisy'].cpu().numpy(), mc_entropy['adversarial'].cpu().numpy(), ) mc_variance_z = {} mc_variance_z['normal'], mc_variance_z['noisy'], mc_variance_z[ 'adversarial'] = normalize( mc_variance['normal'].cpu().numpy(), mc_variance['noisy'].cpu().numpy(), mc_variance['adversarial'].cpu().numpy(), ) entropy_z = {} entropy_z['normal'], entropy_z['noisy'], entropy_z[ 'adversarial'] = normalize( entropy['normal'].cpu().numpy(), entropy['noisy'].cpu().numpy(), entropy['adversarial'].cpu().numpy(), ) densities_z = {} densities_z['normal'], densities_z['noisy'], densities_z[ 'adversarial'] = normalize( densities['normal'], densities['noisy'], densities['adversarial'], ) print('.......Densities............') for datatype in datatypes: print(datatype, ' Mean: ', densities_z[datatype].mean()) ### dense, uncert, combine values = {} labels = {} for now_flag in flags: tmp_values, tmp_labels = get_value( densities=(densities_z['adversarial'], np.concatenate( (densities_z['normal'], densities_z['noisy']))), entropy=(entropy_z['adversarial'], np.concatenate( (entropy_z['normal'], entropy_z['noisy']))), mc_entropy=(mc_entropy_z['adversarial'], np.concatenate( (mc_entropy_z['normal'], mc_entropy_z['noisy']))), mc_variance=(mc_variance_z['adversarial'], np.concatenate((mc_variance_z['normal'], mc_variance_z['noisy']))), flag=now_flag) values[now_flag] = tmp_values labels[now_flag] = tmp_labels return values, labels, num
def main(args): print(args) datatypes = ['normal', 'noisy', 'adversarial'] ## assertions assert args.dataset in [ 'mnist', 'cifar', 'svhn' ], "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'bim', 'jsma', 'cw', 'all'], \ "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ "'jsma' or 'cw'" #assert os.path.isfile('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)), \ # 'adversarial sample file not found... must first craft adversarial ' \ # 'samples using craft_adv_samples.py' print('Loading the data and model...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load the model model = get_model(args.dataset) model.load_state_dict(torch.load(args.model)) model.to(device) model.eval() # Load the dataset train_data = get_data(args.dataset, train=True) train_loader = DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=False) ##### Load adversarial samples (create by crate_adv_samples.py) print('Loading noisy and adversarial samples...') ### train_adv X_train_adv = np.load('../data/Adv_%s_%s_train.npy' % (args.dataset, args.attack)) X_train_adv = torch.from_numpy(X_train_adv) train_adv = [(x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_train_adv, train_data)] ##### create noisy data if args.dataset == 'mnist': noise_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.1307), std=(0.3081)), AddGaussianNoise(0., 0.1) ]) elif args.dataset == 'cifar': noise_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261)), AddGaussianNoise(0., 0.1) ]) train_noisy = get_data(args.dataset, train=True, transform=noise_transform) #print('NOISY', train_noisy) #print(train_noisy[0]) X_train, Y_train = getXY(train_data) # Check model accuracies on each sample type for s_type, dataset in zip(['normal', 'noisy', 'adversarial'], [train_data, train_noisy, train_adv]): data_loader = DataLoader( dataset=dataset, batch_size=args.batch_size, ) acc = evaluate(model, data_loader) print("Model accuracy on the %s test set: %0.2f%%" % (s_type, 100 * acc)) # Compute and display average perturbation sizes ### TODO X_now, Y_now = getXY(dataset) if not s_type == 'normal': l2_diff = np.linalg.norm(X_now.reshape( (len(X_train), -1)) - X_train.reshape((len(X_train), -1)), axis=1).mean() print("Average L-2 perturbation size of the %s test set: %0.2f" % (s_type, l2_diff)) ### Refine the normal, noisy and adversarial sets to only include samples for which the original version was correctly classified by the model ### run test data and choose the data model can correctly predict y_train_list = [] pred_train_list = [] with torch.no_grad(): for batch in train_loader: x = batch[0].to(device) y_train_list.append(batch[1]) pred_train_list.append(model(x)) y_train_list = torch.cat(y_train_list) Y_train = torch.tensor(y_train_list).detach().cpu() pred_train = torch.cat(pred_train_list).detach().cpu() inds_correct = torch.where(Y_train == pred_train.argmax(axis=1), torch.full_like(Y_train, 1), torch.full_like(Y_train, 0)).to(device) picked_train_data = {} for datatype in datatypes: picked_train_data[datatype] = [] for i, (b, y_tmp) in enumerate(zip(inds_correct, Y_train)): if b == 1: picked_train_data['normal'].append((train_data[i][0], y_tmp)) picked_train_data['noisy'].append((train_noisy[i][0], y_tmp)) picked_train_data['adversarial'].append((X_train_adv[i], y_tmp)) else: continue picked_train_loader = {} for datatype in datatypes: picked_train_loader[datatype] = DataLoader( dataset=picked_train_data[datatype], batch_size=args.batch_size) ########################################################################################################################################### ################# Get Bayesian uncertainty scores nb_size = 20 #### mc_variance print('Getting Monte Carlo dropout variance...') mc_variance = {} for datatype in datatypes: mc_variance[datatype] = get_mc_predictions( model, picked_train_loader[datatype], nb_iter=nb_size, method='default') #### mc_entropy print('Getting Monte Carlo dropout entropy...') mc_entropy = {} for datatype in datatypes: mc_entropy[datatype] = get_mc_predictions( model, picked_train_loader[datatype], nb_iter=nb_size, method='entropy') where_are_NaNs = isnan(mc_entropy[datatype]) mc_entropy[datatype][where_are_NaNs] = 0 ### entropy print('Getting entropy...') entropy = {} for datatype in datatypes: entropy[datatype] = get_entropy(model, picked_train_loader[datatype]) where_are_NaNs = isnan(entropy[datatype]) entropy[datatype][where_are_NaNs] = 0 #print(entropy['normal']) #print(entropy['noisy']) #print(entropy['adversarial']) ################# Get KDE scores # Get deep feature representations print('Getting deep feature representations...') x_train_features = get_deep_representations(model, train_loader, args.dataset) picked_train_features = {} for datatype in datatypes: picked_train_features[datatype] = get_deep_representations( model, picked_train_loader[datatype], args.dataset) print('Shape') print(x_train_features.shape) for datatype in datatypes: print(picked_train_features[datatype].shape) ####### CLASS NUM ######## class_num = 10 Y_train_label = [tmp[1] for tmp in train_data] Y_train_label = np.array(Y_train_label) Y_train = np.zeros((len(Y_train_label), class_num)) Y_train[np.arange(Y_train_label.size), Y_train_label] = 1 # Train one KDE per class print('Training KDEs...') class_inds = {} for i in range(class_num): class_inds[i] = np.where(Y_train_label == i)[0] print('class_inds[', i, ']: ', class_inds[i].size) kdes = {} warnings.warn( "Using pre-set kernel bandwidths that were determined optimal for the specific CNN models of the paper. If you've " "changed your model, you'll need to re-optimize the bandwidth.") ### Use train features to fit Kernel density for i in range(class_num): kdes[i] = KernelDensity( kernel='gaussian', bandwidth=BANDWIDTHS[args.dataset]).fit( x_train_features.cpu().numpy()[class_inds[i]]) # Get model predictions print('Computing model predictions...') data_loaders = [] for datatype in datatypes: data_loaders.append(picked_train_loader[datatype]) preds = [] preds_train = {} for now_loader in data_loaders: with torch.no_grad(): tmp_result = [] for batch in now_loader: x = batch[0].to(device) pred = model(x) tmp_result.append(pred.detach().cpu()) preds.append(torch.cat(tmp_result)) preds_train['normal'] = torch.argmax(preds[0], dim=1) preds_train['noisy'] = torch.argmax(preds[1], dim=1) preds_train['adversarial'] = torch.argmax(preds[2], dim=1) # Get density estimates ###### get test density print('computing densities...') train_densities = {} for datatype in datatypes: train_densities[datatype] = score_samples( kdes, picked_train_features[datatype].cpu(), preds_train[datatype].cpu()) ###### Z-score the uncertainty and density values ###### normalize mc_entropy_z = {} mc_entropy_z['normal'], mc_entropy_z['noisy'], mc_entropy_z[ 'adversarial'] = normalize( mc_entropy['normal'].cpu().numpy(), mc_entropy['noisy'].cpu().numpy(), mc_entropy['adversarial'].cpu().numpy(), ) mc_variance_z = {} mc_variance_z['normal'], mc_variance_z['noisy'], mc_variance_z[ 'adversarial'] = normalize( mc_variance['normal'].cpu().numpy(), mc_variance['noisy'].cpu().numpy(), mc_variance['adversarial'].cpu().numpy(), ) entropy_z = {} entropy_z['normal'], entropy_z['noisy'], entropy_z[ 'adversarial'] = normalize( entropy['normal'].cpu().numpy(), entropy['noisy'].cpu().numpy(), entropy['adversarial'].cpu().numpy(), ) densities_z = {} densities_z['normal'], densities_z['noisy'], densities_z[ 'adversarial'] = normalize( train_densities['normal'], train_densities['noisy'], train_densities['adversarial'], ) #print(entropy_z['normal']) #print(entropy_z['noisy']) #print(entropy_z['adversarial']) print('.......Densities............') for datatype in datatypes: print(datatype, ' Mean: ', densities_z[datatype].mean()) ## Build detector ### dense, uncert, combine flags = ['dense', 'entropy', 'mc_entropy', 'mc_variance', 'combine'] values = {} labels = {} lrs = {} for now_flag in flags: print('processing %s ...' % now_flag) tmp_values, tmp_labels, tmp_lr = train_lr( densities=(densities_z['adversarial'], np.concatenate( (densities_z['normal'], densities_z['noisy']))), entropy=(entropy_z['adversarial'], np.concatenate( (entropy_z['normal'], entropy_z['noisy']))), mc_entropy=(mc_entropy_z['adversarial'], np.concatenate( (mc_entropy_z['normal'], mc_entropy_z['noisy']))), mc_variance=(mc_variance_z['adversarial'], np.concatenate((mc_variance_z['normal'], mc_variance_z['noisy']))), flag=now_flag) #densities_pos = densities_z['adversarial'], #densities_neg = np.concatenate((densities_z['normal'], densities_z['noisy'])), #uncerts_pos = uncerts_z['adversarial'], #uncerts_neg = np.concatenate((uncerts_z['normal'], uncerts_z['noisy'])), #flag = flag values[now_flag] = tmp_values labels[now_flag] = tmp_labels lrs[now_flag] = tmp_lr if args.do_test: test_values, test_labels, test_num = evaluate_test( args, model, kdes, datatypes, nb_size, flags) ## Evaluate detector ### evaluate on train dataset probs = {} for flag in flags: if args.do_test: probs[flag] = lrs[flag].predict_proba(test_values[flag])[:, 1] else: probs[flag] = lrs[flag].predict_proba(values[flag])[:, 1] # Compute AUC if args.do_test: n_samples = test_num else: n_samples = len(train_data) # The first 2/3 of 'probs' is the negative class (normal and noisy samples) and the last 1/3 is the positive class (adversarial samples). prob_datas = [] for flag in flags: prob_datas.append( (probs[flag][:2 * n_samples], probs[flag][2 * n_samples:], flag)) _, _, auc_score = compute_roc(prob_datas, plot=True, pic_name=args.pic_name)