def main(): with open('data.json') as data_json: data_params = json.load(data_json) parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, required=True) parser.add_argument('--model', type=str, required=True) parser.add_argument('--pretrained', type=str, required=True) parser.add_argument('--data_path', type=str, default='data') parser.add_argument('--output_path', type=str, default='results') parser.add_argument('--random_state', type=int, default=1234) args = parser.parse_args() print(args) set_seeds(args.random_state) if not os.path.exists(args.output_path): print('Output folder does not exist. Create:', args.output_path) os.mkdir(args.output_path) print('data:', args.data) print('model:', args.model) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if args.data == 'mnist': dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms) elif args.data == 'cifar10': dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms) else: data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name']) print('Read file:', data_path) X, y = load_csv(data_path) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=data_params['data'][args.data]['n_test'], random_state=args.random_state) scaler = MinMaxScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dataset_train = TensorDataset( torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long)) dataset_test = TensorDataset( torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long)) loader_train = DataLoader(dataset_train, batch_size=512, shuffle=False) loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False) shape_train = get_shape(loader_train.dataset) shape_test = get_shape(loader_test.dataset) print('Train set:', shape_train) print('Test set:', shape_test) use_prob = True print('Using softmax layer:', use_prob) # Load model if args.data == 'mnist': model = BaseModel(use_prob=use_prob).to(device) model_name = 'basic' elif args.data == 'cifar10': model_name = args.pretrained.split('_')[1] if model_name == 'resnet': model = Resnet(use_prob=use_prob).to(device) elif model_name == 'vgg': model = Vgg(use_prob=use_prob).to(device) else: raise NotImplementedError else: n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] model = NumericModel(n_features, n_hidden=n_features * 4, n_classes=n_classes, use_prob=use_prob).to(device) model_name = 'basic' + str(n_features * 4) loss = nn.CrossEntropyLoss() pretrained_path = os.path.join(args.output_path, args.pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) _, acc_train = validate(model, loader_train, loss, device) _, acc_test = validate(model, loader_test, loss, device) print('Accuracy on train set: {:.4f}%'.format(acc_train * 100)) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) # Create a subset which only contains recognisable samples. # The original train and test sets are no longer needed. tensor_train_X, tensor_train_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True) dataset_train = TensorDataset(tensor_train_X, tensor_train_y) loader_train = DataLoader(dataset_train, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader_train, loss, device) print('Accuracy on {} filtered train set: {:.4f}%'.format( len(dataset_train), acc_perfect * 100)) tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset_test = TensorDataset(tensor_test_X, tensor_test_y) loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader_test, loss, device) print('Accuracy on {} filtered test set: {:.4f}%'.format( len(dataset_test), acc_perfect * 100)) X_train = tensor_train_X.cpu().detach().numpy() y_train = tensor_train_y.cpu().detach().numpy() X_baard = baard_preprocess(args.data, tensor_train_X).cpu().detach().numpy() obj = {'X_train': X_baard, 'y_train': y_train} path_ouput = os.path.join( args.output_path, '{}_{}_baard_train.pt'.format(args.data, args.model, args.model)) torch.save(obj, path_ouput) print('Save to:', path_ouput) print()
def main(): with open('data.json') as data_json: data_params = json.load(data_json) parser = argparse.ArgumentParser() parser.add_argument('--data', type=str) parser.add_argument('--data_path', type=str, default='data') parser.add_argument('--output_path', type=str, default='results') parser.add_argument('--pretrained', type=str, required=True) parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--attack', type=str, required=True, choices=data_params['attacks']) parser.add_argument('--eps', type=float, default=0.3) # NOTE: In CW_L2 attack, eps is the upper bound of c. parser.add_argument('--n_samples', type=int, default=2000) parser.add_argument('--random_state', type=int, default=1234) args = parser.parse_args() print(args) set_seeds(args.random_state) if not os.path.exists(args.output_path): print('Output folder does not exist. Create:', args.output_path) os.mkdir(args.output_path) print('Dataset:', args.data) print('Pretrained model:', args.pretrained) print('Running attack: {}'.format(args.attack)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if args.data == 'mnist': dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms) elif args.data == 'cifar10': dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms) else: data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name']) print('Read file:', data_path) X, y = load_csv(data_path) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=data_params['data'][args.data]['n_test'], random_state=args.random_state) scaler = MinMaxScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long)) dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long)) dataloader_train = DataLoader(dataset_train, 256, shuffle=False) dataloader_test = DataLoader(dataset_test, 256, shuffle=False) shape_train = get_shape(dataloader_train.dataset) shape_test = get_shape(dataloader_test.dataset) print('Train set:', shape_train) print('Test set:', shape_test) # Load model use_prob = args.attack not in ['apgd', 'apgd1', 'apgd2', 'cw2', 'cwinf'] print('Attack:', args.attack) print('Using softmax layer:', use_prob) if args.data == 'mnist': model = BaseModel(use_prob=use_prob).to(device) model_name = 'basic' elif args.data == 'cifar10': model_name = args.pretrained.split('_')[1] if model_name == 'resnet': model = Resnet(use_prob=use_prob).to(device) elif model_name == 'vgg': model = Vgg(use_prob=use_prob).to(device) else: raise ValueError('Unknown model: {}'.format(model_name)) else: n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] model = NumericModel( n_features, n_hidden=n_features * 4, n_classes=n_classes, use_prob=use_prob).to(device) model_name = 'basic' + str(n_features * 4) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) loss = nn.CrossEntropyLoss() pretrained_path = os.path.join(args.output_path, args.pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) _, acc_train = validate(model, dataloader_train, loss, device) _, acc_test = validate(model, dataloader_test, loss, device) print('Accuracy on train set: {:.4f}%'.format(acc_train * 100)) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) # Create a subset which only contains recognisable samples. tensor_test_X, tensor_test_y = get_correct_examples( model, dataset_test, device=device, return_tensor=True) dataset_perfect = TensorDataset(tensor_test_X, tensor_test_y) loader_perfect = DataLoader(dataset_perfect, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader_perfect, loss, device) print('Accuracy on {} filtered test examples: {:.4f}%'.format( len(dataset_perfect), acc_perfect * 100)) # Generate adversarial examples n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] if isinstance(n_features, int): n_features = (n_features,) classifier = PyTorchClassifier( model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=n_classes, clip_values=(0.0, 1.0), device_type='gpu') if args.attack == 'apgd': eps_step = args.eps / 10.0 if args.eps <= 0.1 else 0.1 attack = AutoProjectedGradientDescent( estimator=classifier, eps=args.eps, eps_step=eps_step, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'apgd1': attack = AutoProjectedGradientDescent( estimator=classifier, norm=1, eps=args.eps, eps_step=0.1, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'apgd2': attack = AutoProjectedGradientDescent( estimator=classifier, norm=2, eps=args.eps, eps_step=0.1, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'bim': eps_step = args.eps / 10.0 attack = BasicIterativeMethod( estimator=classifier, eps=args.eps, eps_step=eps_step, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'boundary': attack = BoundaryAttack( estimator=classifier, max_iter=1000, sample_size=args.batch_size, targeted=False) elif args.attack == 'cw2': # NOTE: Do NOT increase the batch size! attack = CarliniWagnerAttackL2( model=model, n_classes=n_classes, confidence=args.eps, verbose=True, check_prob=False, batch_size=args.batch_size, targeted=False) elif args.attack == 'cwinf': attack = CarliniLInfMethod( classifier=classifier, confidence=args.eps, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'deepfool': attack = DeepFool( classifier=classifier, epsilon=args.eps, batch_size=args.batch_size) elif args.attack == 'fgsm': attack = FastGradientMethod( estimator=classifier, eps=args.eps, batch_size=args.batch_size) elif args.attack == 'jsma': attack = SaliencyMapMethod( classifier=classifier, gamma=args.eps, batch_size=args.batch_size) elif args.attack == 'line': if args.data == 'mnist': color = args.eps elif args.data == 'cifar10': color = (args.eps, args.eps, args.eps) else: raise NotImplementedError attack = LineAttack(color=color, thickness=1) elif args.attack == 'shadow': attack = ShadowAttack( estimator=classifier, batch_size=args.batch_size, targeted=False, verbose=False) elif args.attack == 'watermark': attack = WaterMarkAttack( eps=args.eps, n_classes=data_params['data'][args.data]['n_classes'], x_min=0.0, x_max=1.0, targeted=False) X_train, y_train = get_correct_examples(model, dataset_train, device=device, return_tensor=True) X_train = X_train.cpu().detach().numpy() y_train = y_train.cpu().detach().numpy() attack.fit(X_train, y_train) else: raise NotImplementedError if len(dataset_perfect) > args.n_samples: n = args.n_samples else: n = len(dataset_perfect) X_benign = tensor_test_X[:n].cpu().detach().numpy() y = tensor_test_y[:n].cpu().detach().numpy() print('Creating {} adversarial examples with eps={} (Not all attacks use eps)'.format(n, args.eps)) time_start = time.time() # Shadow attack only takes single sample! if args.attack == 'shadow': adv = np.zeros_like(X_benign) for i in trange(len(X_benign)): adv[i] = attack.generate(x=np.expand_dims(X_benign[i], axis=0)) elif args.attack == 'watermark': # This is untargeted. adv = attack.generate(X_benign, y) else: adv = attack.generate(x=X_benign) time_elapsed = time.time() - time_start print('Total time spend: {}'.format(str(datetime.timedelta(seconds=time_elapsed)))) pred_benign = np.argmax(classifier.predict(X_benign), axis=1) acc_benign = np.sum(pred_benign == y) / n pred_adv = np.argmax(classifier.predict(adv), axis=1) acc_adv = np.sum(pred_adv == y) / n print("Accuracy on benign samples: {:.4f}%".format(acc_benign * 100)) print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100)) # Save results if args.n_samples < 2000: output_file = '{}_{}_{}_{}_size{}'.format(args.data, model_name, args.attack, str(args.eps), args.n_samples) else: output_file = '{}_{}_{}_{}'.format(args.data, model_name, args.attack, str(args.eps)) path_x = os.path.join(args.output_path, '{}_x.npy'.format(output_file)) path_y = os.path.join(args.output_path, '{}_y.npy'.format(output_file)) path_adv = os.path.join(args.output_path, '{}_adv.npy'.format(output_file)) np.save(path_x, X_benign) np.save(path_y, y) np.save(path_adv, adv) print('Saved to:', '{}_adv.npy'.format(output_file)) print()
def train_adv(data='mnist', model_name='basic', n_samples=2000, eps=2., path_output='results', path_data='data', is_test=False, batch_size=128, device='cpu'): # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_test = datasets.MNIST(path_data, train=False, download=True, transform=transforms) elif data == 'cifar10': dataset_test = datasets.CIFAR10(path_data, train=False, download=True, transform=transforms) else: raise NotImplementedError loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False) # Load model if data == 'mnist': model = BaseModel(use_prob=False).to(device) n_features = (1, 28, 28) pretrained = 'mnist_200.pt' elif data == 'cifar10': n_features = (3, 32, 32) if model_name == 'resnet': model = Resnet(use_prob=False).to(device) pretrained = 'cifar10_resnet_200.pt' elif model_name == 'vgg': model = Vgg(use_prob=False).to(device) pretrained = 'cifar10_vgg_200.pt' else: raise NotImplementedError else: raise NotImplementedError pretrained_path = os.path.join(path_output, pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) loss = nn.CrossEntropyLoss() _, acc_test = validate(model, loader_test, loss, device) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) # Get samples from the tail if not is_test: # This is for training the surrogate model tensor_test_X = tensor_test_X[-n_samples:] tensor_test_y = tensor_test_y[-n_samples:] else: # This is for testing the surrogate model tensor_test_X = tensor_test_X[-n_samples - 2000:-2000] tensor_test_y = tensor_test_y[-n_samples - 2000:-2000] dataset_test = TensorDataset(tensor_test_X, tensor_test_y) loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False) _, acc_perfect = validate(model, loader_test, loss, device) print('Accuracy on {} filtered test set: {:.4f}%'.format( len(dataset_test), acc_perfect * 100)) classifier = PyTorchClassifier(model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=10, clip_values=(0.0, 1.0), device_type='gpu') attack = AutoProjectedGradientDescent(estimator=classifier, eps=eps, eps_step=0.1, max_iter=1000, batch_size=batch_size, targeted=False) X_benign = tensor_test_X.cpu().detach().numpy() y_true = tensor_test_y.cpu().detach().numpy() adv = attack.generate(x=X_benign) pred_adv = np.argmax(classifier.predict(adv), axis=1) acc_adv = np.mean(pred_adv == y_true) print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100)) if not is_test: output_file = '{}_{}_baard_surro_train_eps{}_size{}.pt'.format( data, model_name, eps, n_samples) else: output_file = '{}_{}_baard_surro_test_eps{}_size{}.pt'.format( data, model_name, eps, n_samples) file_path = os.path.join(path_output, output_file) output = {'X': X_benign, 'adv': adv, 'y': y_true} torch.save(output, file_path) print('Save to:', file_path)
def get_baard_output(data, model_name, data_path, output_path, file_name, param, batch_size, device): """This function reads a dataset object. It runs BAARD, applies clipping and adds label_as_adv to the object. """ file_path = os.path.join(output_path, file_name) print('file_path:', file_path) obj = torch.load(file_path) X = obj['X'] adv = obj['adv'] y = obj['y'] # Load model transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_train = datasets.MNIST(data_path, train=True, download=True, transform=transforms) model = BaseModel(use_prob=False).to(device) pretrained = 'mnist_200.pt' elif data == 'cifar10': dataset_train = datasets.CIFAR10(data_path, train=True, download=True, transform=transforms) if model_name == 'resnet': model = Resnet(use_prob=False).to(device) pretrained = 'cifar10_resnet_200.pt' elif model_name == 'vgg': model = Vgg(use_prob=False).to(device) pretrained = 'cifar10_vgg_200.pt' else: raise NotImplementedError else: raise NotImplementedError pretrained_path = os.path.join(output_path, pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) pred = predict_numpy(model, X, device) acc = np.mean(pred == y) print('Accuracy on {} clean samples: {}'.format(X.shape[0], acc)) tensor_train_X, tensor_train_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True) X_train = tensor_train_X.cpu().detach().numpy() y_train = tensor_train_y.cpu().detach().numpy() # Load the preprocessed training set baard_train_path = os.path.join( output_path, '{}_{}_baard_train.pt'.format(data, model_name)) obj = torch.load(baard_train_path) X_baard = obj['X_train'] # Load the original validation set for BAARD # eg: ./results/mnist_basic_apgd2_2.0_adv.npy file_root = '{}_{}_apgd2_2.0'.format(data, model_name) path_benign = os.path.join(output_path, file_root + '_x.npy') path_y = os.path.join(output_path, file_root + '_y.npy') X_val = np.load(path_benign) y_val = np.load(path_y) n = X_val.shape[0] // 2 X_val = X_val[n:] y_val = y_val[n:] stages = [] stages.append(ApplicabilityStage(n_classes=N_CLASSES, quantile=param['q1'])) stages.append( ReliabilityStage(n_classes=N_CLASSES, k=param['k_re'], quantile=param['q2'])) stages.append( DecidabilityStage(n_classes=N_CLASSES, k=param['k_de'], quantile=param['q3'])) print('BAARD: # of stages:', len(stages)) detector = BAARDOperator(stages=stages) detector.stages[0].fit(X_baard, y_train) detector.stages[1].fit(X_train, y_train) detector.stages[2].fit(X_train, y_train) detector.search_thresholds(X_val, y_val, np.zeros_like(y_val)) pred_adv = predict_numpy(model, adv, device) print('Acc on adv without clip:', np.mean(pred_adv == y)) # count_class(pred_adv) # TODO: After clipping, the 1st stage still blocks samples. I don't know why?! # To bypass the 1st stage, we want to clip all adversarial examples with the bounding boxes applicability = detector.stages[0] thresholds = applicability.thresholds_ adv_clipped = adv.copy() for c in range(N_CLASSES): idx = np.where(pred_adv == c)[0] # Adversarial examples do NOT have the same distribution as the true classes if len(idx) == 0: continue bounding_boxes = thresholds[c] low = bounding_boxes[0] high = bounding_boxes[1] shape = adv_clipped[idx].shape subset = flatten(adv[idx]) # clipped_subset = np.clip(subset, low, high) subset = np.minimum(subset, high) subset = np.maximum(subset, low) adv_clipped[idx] = subset.reshape(shape) pred_adv_clip = predict_numpy(model, adv_clipped, device) print('Acc on adv with clip:', np.mean(pred_adv_clip == y)) print('Class changed after clipping:', np.sum(pred_adv != pred_adv_clip)) pred_X = predict_numpy(model, X, device) assert not np.all([pred_X, y]) baard_label_adv = detector.detect(adv_clipped, pred_adv_clip) s1_blocked = detector.stages[0].predict(adv_clipped, pred_adv_clip) print('Blocked by Stage1:', np.sum(s1_blocked)) acc = acc_on_adv(pred_adv_clip, y, baard_label_adv) print('Acc_on_adv:', acc) baard_label_x = detector.detect(X, y) print('FPR:', np.mean(baard_label_x)) output = { 'X': X, 'adv': adv_clipped, 'y': y, 'baard_label_x': baard_label_x, 'baard_label_adv': baard_label_adv } torch.save(output, file_path) print('Save to:', file_path) print()
def main(): with open('data.json') as data_json: data_params = json.load(data_json) parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, required=True) parser.add_argument('--data_path', type=str, default='data') parser.add_argument('--output_path', type=str, default='results') parser.add_argument('--pretrained', type=str, required=True) parser.add_argument('--adv', type=str, required=True, help="Example: 'mnist_basic_apgd_0.3'") parser.add_argument('--defence', type=str, required=True, choices=data_params['defences']) parser.add_argument('--param', type=str, required=True) parser.add_argument('--suffix', type=str) parser.add_argument('--random_state', type=int, default=1234) parser.add_argument('--save', type=int, default=1, choices=[0, 1]) args = parser.parse_args() print(args) set_seeds(args.random_state) if not os.path.exists(args.output_path): print('Output folder does not exist. Create:', args.output_path) os.mkdir(args.output_path) print('Dataset:', args.data) print('Pretrained model:', args.pretrained) print('Pretrained samples:', args.adv + '_adv.npy') print('Defence:', args.defence) with open(args.param) as param_json: param = json.load(param_json) param['n_classes'] = data_params['data'][args.data]['n_classes'] print('Param:', param) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if args.data == 'mnist': dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms) elif args.data == 'cifar10': dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms) else: data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name']) print('Read file:', data_path) X, y = load_csv(data_path) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=data_params['data'][args.data]['n_test'], random_state=args.random_state) scaler = MinMaxScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long)) dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long)) loader_train = DataLoader(dataset_train, batch_size=512, shuffle=False) loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False) shape_train = get_shape(loader_train.dataset) shape_test = get_shape(loader_test.dataset) print('Train set:', shape_train) print('Test set:', shape_test) use_prob = True print('Using softmax layer:', use_prob) # Load model if args.data == 'mnist': model = BaseModel(use_prob=use_prob).to(device) model_name = 'basic' elif args.data == 'cifar10': model_name = args.pretrained.split('_')[1] if model_name == 'resnet': model = Resnet(use_prob=use_prob).to(device) elif model_name == 'vgg': model = Vgg(use_prob=use_prob).to(device) else: raise ValueError('Unknown model: {}'.format(model_name)) else: n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] model = NumericModel(n_features, n_hidden=n_features * 4, n_classes=n_classes, use_prob=use_prob).to(device) model_name = 'basic' + str(n_features * 4) loss = nn.CrossEntropyLoss() pretrained_path = os.path.join(args.output_path, args.pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) _, acc_train = validate(model, loader_train, loss, device) _, acc_test = validate(model, loader_test, loss, device) print('Accuracy on train set: {:.4f}%'.format(acc_train * 100)) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) # Create a subset which only contains recognisable samples. # The original train and test sets are no longer needed. tensor_train_X, tensor_train_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True) dataset_train = TensorDataset(tensor_train_X, tensor_train_y) loader_train = DataLoader(dataset_train, batch_size=512, shuffle=True) _, acc_perfect = validate(model, loader_train, loss, device) print('Accuracy on {} filtered train set: {:.4f}%'.format(len(dataset_train), acc_perfect * 100)) tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset_test = TensorDataset(tensor_test_X, tensor_test_y) loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader_test, loss, device) print('Accuracy on {} filtered test set: {:.4f}%'.format(len(dataset_test), acc_perfect * 100)) # Load pre-trained adversarial examples path_benign = os.path.join(args.output_path, args.adv + '_x.npy') path_adv = os.path.join(args.output_path, args.adv + '_adv.npy') path_y = os.path.join(args.output_path, args.adv + '_y.npy') X_benign = np.load(path_benign) adv = np.load(path_adv) y_true = np.load(path_y) dataset = TensorDataset(torch.from_numpy(X_benign), torch.from_numpy(y_true)) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc = validate(model, loader, loss, device) print('Accuracy on {} benign samples: {:.4f}%'.format(len(dataset), acc * 100)) dataset = TensorDataset(torch.from_numpy(adv), torch.from_numpy(y_true)) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc = validate(model, loader, loss, device) print('Accuracy on {} adversarial examples: {:.4f}%'.format(len(dataset), acc * 100)) # Do NOT shuffle the indices, so different defences can use the same test set. dataset = TensorDataset(torch.from_numpy(adv)) loader = DataLoader(dataset, batch_size=512, shuffle=False) pred_adv = predict(model, loader, device).cpu().detach().numpy() # Find the thresholds using the 2nd half n = len(X_benign) // 2 # Merge benign samples and adversarial examples into one set. # This labels indicate a sample is an adversarial example or not. X_val, labels_val = merge_and_generate_labels(adv[n:], X_benign[n:], flatten=False) # The predictions for benign samples are exactly same as the true labels. pred_val = np.concatenate((pred_adv[n:], y_true[n:])) X_train = tensor_train_X.cpu().detach().numpy() y_train = tensor_train_y.cpu().detach().numpy() # Train defence time_start = time.time() if args.defence == 'baard': sequence = param['sequence'] stages = [] if sequence[0]: stages.append(ApplicabilityStage(n_classes=param['n_classes'], quantile=param['q1'])) if sequence[1]: stages.append(ReliabilityStage(n_classes=param['n_classes'], k=param['k_re'], quantile=param['q2'])) if sequence[2]: stages.append(DecidabilityStage(n_classes=param['n_classes'], k=param['k_de'], quantile=param['q3'])) print('BAARD: # of stages:', len(stages)) detector = BAARDOperator(stages=stages) # Run preprocessing baard_train_path = os.path.join(args.output_path, '{}_{}_baard_train.pt'.format(args.data, model_name)) obj = torch.load(baard_train_path) X_baard = obj['X_train'] y_train = obj['y_train'] # Fit the model with the filtered the train set. detector.stages[0].fit(X_baard, y_train) detector.stages[1].fit(X_train, y_train) if len(detector.stages) == 3: detector.stages[2].fit(X_train, y_train) detector.search_thresholds(X_val, pred_val, labels_val) path_baard = os.path.join(args.output_path, 'baard_{}_{}_param.pt'.format(args.data, model_name)) detector.save(path_baard) elif args.defence == 'fs': squeezers = [] if args.data == 'mnist': squeezers.append(DepthSqueezer(x_min=0.0, x_max=1.0, bit_depth=1)) squeezers.append(MedianSqueezer(x_min=0.0, x_max=1.0, kernel_size=2)) elif args.data == 'cifar10': squeezers.append(DepthSqueezer(x_min=0.0, x_max=1.0, bit_depth=4)) squeezers.append(MedianSqueezer(x_min=0.0, x_max=1.0, kernel_size=2)) squeezers.append(NLMeansColourSqueezer(x_min=0.0, x_max=1.0, h=2, templateWindowsSize=3, searchWindowSize=13)) else: raise NotImplementedError print('FS: # of squeezers:', len(squeezers)) detector = FeatureSqueezingTorch( classifier=model, lr=0.001, momentum=0.9, weight_decay=5e-4, loss=loss, batch_size=128, x_min=0.0, x_max=1.0, squeezers=squeezers, n_classes=param['n_classes'], device=device) path_fs = os.path.join(args.output_path, '{}_fs.pt'.format(args.pretrained.split('.')[0])) detector.load(path_fs) detector.search_thresholds(X_val, pred_val, labels_val) elif args.defence == 'lid': # This batch_size is not same as the mini batch size for the neural network. before_softmax = args.data == 'cifar10' detector = LidDetector( model, k=param['k'], batch_size=param['batch_size'], x_min=0.0, x_max=1.0, device=device, before_softmax=before_softmax) # LID uses different training set X_train, y_train = detector.get_train_set(X_benign[n:], adv[n:], std_dominator=param['std_dominator']) detector.fit(X_train, y_train, verbose=1) elif args.defence == 'magnet': magnet_detectors = [] # Different datasets require different autoencoders. if args.data == 'mnist': # autoencoder1 and autoencoder2 magnet_detectors.append(MagNetDetector( encoder=Autoencoder1(n_channel=1), classifier=model, lr=param['lr'], batch_size=param['batch_size'], weight_decay=param['weight_decay'], x_min=0.0, x_max=1.0, noise_strength=param['noise_strength'], algorithm='error', p=1, device=device)) magnet_detectors.append(MagNetDetector( encoder=Autoencoder2(n_channel=1), classifier=model, lr=param['lr'], batch_size=param['batch_size'], weight_decay=param['weight_decay'], x_min=0.0, x_max=1.0, noise_strength=param['noise_strength'], algorithm='error', p=2, device=device)) elif args.data == 'cifar10': autoencoder = Autoencoder2( n_channel=data_params['data'][args.data]['n_features'][0]) # There are 3 autoencoder based detectors, but they use the same architecture. magnet_detectors.append(MagNetDetector( encoder=autoencoder, classifier=model, lr=param['lr'], batch_size=param['batch_size'], weight_decay=param['weight_decay'], x_min=0.0, x_max=1.0, noise_strength=param['noise_strength'], algorithm='error', p=2, device=device)) magnet_detectors.append(MagNetDetector( encoder=autoencoder, classifier=model, lr=param['lr'], batch_size=param['batch_size'], weight_decay=param['weight_decay'], x_min=0.0, x_max=1.0, noise_strength=param['noise_strength'], algorithm='prob', temperature=10, device=device)) magnet_detectors.append(MagNetDetector( encoder=autoencoder, classifier=model, lr=param['lr'], batch_size=param['batch_size'], weight_decay=param['weight_decay'], x_min=0.0, x_max=1.0, noise_strength=param['noise_strength'], algorithm='prob', temperature=40, device=device)) else: raise ValueError('Magnet requires autoencoder.') for i, ae in enumerate(magnet_detectors, start=1): ae_path = os.path.join(args.output_path, 'autoencoder_{}_{}_{}.pt'.format(args.data, model_name, i)) ae.load(ae_path) tensor_X_test, _ = dataset2tensor(dataset_test) X_test = tensor_X_test.cpu().detach().numpy() print('Autoencoder {} MSE training set: {:.6f}, test set: {:.6f}'.format(i, ae.score(X_train), ae.score(X_test))) print('Autoencoder {} threshold: {}'.format(i, ae.threshold)) reformer = MagNetAutoencoderReformer( encoder=magnet_detectors[0].encoder, batch_size=param['batch_size'], device=device) detector = MagNetOperator( classifier=model, detectors=magnet_detectors, reformer=reformer, batch_size=param['batch_size'], device=device) elif args.defence == 'rc': detector = RegionBasedClassifier( model=model, r=param['r'], sample_size=param['sample_size'], n_classes=param['n_classes'], x_min=0.0, x_max=1.0, batch_size=param['batch_size'], r0=param['r0'], step_size=param['step_size'], stop_value=param['stop_value'], device=device) # Region-based classifier only uses benign samples to search threshold. # The r value is already set to the optimal. We don't need to search it. # detector.search_thresholds(X_val, pred_val, labels_val, verbose=0) else: raise ValueError('{} is not supported!'.format(args.defence)) time_elapsed = time.time() - time_start print('Total training time:', str(datetime.timedelta(seconds=time_elapsed))) # Test defence time_start = time.time() X_test, labels_test = merge_and_generate_labels(adv[:n], X_benign[:n], flatten=False) pred_test = np.concatenate((pred_adv[:n], y_true[:n])) y_test = np.concatenate((y_true[:n], y_true[:n])) # Only MegNet uses reformer. X_reformed = None if args.defence == 'magnet': X_reformed, res_test = detector.detect(X_test, pred_test) y_pred = predict_numpy(model, X_reformed, device) elif args.defence == 'rc': y_pred = detector.detect(X_test, pred_test) res_test = np.zeros_like(y_pred) else: res_test = detector.detect(X_test, pred_test) y_pred = pred_test acc = acc_on_adv(y_pred[:n], y_test[:n], res_test[:n]) if args.defence == 'rc': fpr = np.mean(y_pred[n:] != y_test[n:]) else: fpr = np.mean(res_test[n:]) print('Acc_on_adv:', acc) print('FPR:', fpr) time_elapsed = time.time() - time_start print('Total test time:', str(datetime.timedelta(seconds=time_elapsed))) # Save results suffix = '_' + args.suffix if args.suffix is not None else '' if args.save: path_result = os.path.join(args.output_path, '{}_{}{}.pt'.format(args.adv, args.defence, suffix)) torch.save({ 'X_val': X_val, 'y_val': np.concatenate((y_true[n:], y_true[n:])), 'labels_val': labels_val, 'X_test': X_test, 'y_test': y_test, 'labels_test': labels_test, 'res_test': y_pred if args.defence == 'rc' else res_test, 'X_reformed': X_reformed, 'param': param}, path_result) print('Saved to:', path_result) else: print('No file is save!') print()
def run_full_pipeline_baard(data, model_name, path, seed, json_param, att_name, eps): set_seeds(seed) # Line attack takes no hyperparameter if att_name == 'line': eps = 1 print('args:', data, model_name, path, seed, json_param, att_name, eps) if not os.path.exists(path): print('Output folder does not exist. Create:', path) os.mkdir(path) # Get data n_classes = 10 transform = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform) dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform) elif data == 'cifar10': transform_train = tv.transforms.Compose([ tv.transforms.RandomHorizontalFlip(), tv.transforms.RandomCrop(32, padding=4), tv.transforms.ToTensor()]) dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train) dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform) else: raise ValueError('Unknown dataset: {}'.format(data)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name)) print('Start training {} model on {}...'.format(model_name, data)) model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model) # Split data tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset = TensorDataset(tensor_X, tensor_y) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device) print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100)) # Split rules: # 1. Benchmark_defence_test: 1000 (def_test) # 2. Benchmark_defence_val: 1000 (def_val) # 3. Test white-box attack: 2000 (att_test) # 5. Train surrogate model: 2000 (sur_train) # -----------------Total: 6000 idx_shuffle = np.random.permutation(tensor_X.size(0))[:6000] X = tensor_X[idx_shuffle].cpu().detach().numpy() y = tensor_y[idx_shuffle].cpu().detach().numpy() print('-------------------------------------------------------------------') print('Start generating {} adversarial examples...'.format(len(idx_shuffle))) adv, X, y = run_attack_untargeted(file_model, X, y, att_name=att_name, eps=eps, device=device) print('-------------------------------------------------------------------') print('Start testing adversarial examples...') pred = predict_numpy(model, adv, device) print('Acc on adv:', np.mean(pred == y)) X_def_test = X[:1000] y_def_test = y[:1000] adv_def_test = adv[:1000] pred_adv_def_test = pred[:1000] X_def_val = X[1000:2000] y_def_val = y[1000:2000] adv_def_val = adv[1000:2000] # Unused by BAARD pred_adv_def_val = pred[1000:2000] # Unused by BAARD X_att_test = X[2000:4000] y_att_test = y[2000:4000] adv_att_test = adv[2000:4000] pred_adv_att_test = pred[2000:4000] X_surro_train = X[4000:] y_surro_train = y[4000:] adv_surro_train = adv[4000:] # concatenate the adversarial examples computed for different epsilon if data == 'mnist': eps_1 = 1 eps_2 = 5 eps_3 = 8 eps_4 = 3 elif data == "cifar10": eps_1 = 0.05 eps_2 = 0.1 eps_3 = 0.5 eps_4 = 1 else: raise ValueError("dataset idx unknown") print('-------------------------------------------------------------------') print('Start training BAARD...') # Run preprocessing file_baard_train = os.path.join(path, '{}_{}_baard_s1_train_data.pt'.format(data, model_name)) if os.path.exists(file_baard_train): print('Found existing BAARD preprocess data:', file_baard_train) obj = torch.load(file_baard_train) X_baard_train_s1 = obj['X_s1'] X_baard_train= obj['X'] y_baard_train = obj['y'] else: tensor_X, tensor_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True) X_baard_train = tensor_X.cpu().detach().numpy() y_baard_train = tensor_y.cpu().detach().numpy() # fixme: this gives an error as it expect a PIL image X_baard_train_s1 = preprocess_baard(data, X_baard_train ).cpu().detach().numpy() obj = { 'X_s1': X_baard_train_s1, 'X': X_baard_train, 'y': y_baard_train } torch.save(obj, file_baard_train) print('Save BAARD training data to:', file_baard_train) print('X_baard_train_s1', X_baard_train_s1.shape) with open(json_param) as j: baard_param = json.load(j) print('Param:', baard_param) sequence = baard_param['sequence'] stages = [] if sequence[0]: stages.append(ApplicabilityStage(n_classes=n_classes, quantile=baard_param['q1'])) if sequence[1]: stages.append(ReliabilityStage(n_classes=n_classes, k=baard_param['k_re'], quantile=baard_param['q2'])) if sequence[2]: stages.append(DecidabilityStage(n_classes=n_classes, k=baard_param['k_de'], quantile=baard_param['q3'])) print('BAARD stages:', len(stages)) detector = BAARDOperator(stages=stages) assert X_baard_train.shape == X_baard_train_s1.shape, 'Unmatched size: {}, {}'.format(X_baard_train.shape, X_baard_train_s1.shape) assert X_baard_train_s1.shape[0] == y_baard_train.shape[0] detector.stages[0].fit(X_baard_train_s1, y_baard_train) for stage in detector.stages[1:]: stage.fit(X_baard_train, y_baard_train) file_baard_threshold = os.path.join(path, '{}_{}_baard_threshold.pt'.format(data, model_name)) if os.path.exists(file_baard_threshold): print('Found existing BAARD thresholds:', file_baard_threshold) detector.load(file_baard_threshold) else: # Search thresholds detector.search_thresholds(X_def_val, y_def_val, np.zeros_like(y_def_val)) detector.save(file_baard_threshold) print('-------------------------------------------------------------------') print('Start testing BAARD...') time_start = time.time() label_adv = detector.detect(adv_def_test, pred_adv_def_test) label_clean = detector.detect(X_def_test, y_def_test) time_elapsed = time.time() - time_start print('Total run time:', str(datetime.timedelta(seconds=time_elapsed))) acc = acc_on_adv(pred_adv_def_test, y_def_test, label_adv) fpr = np.mean(label_clean) print('Acc_on_adv:', acc) print('FPR:', fpr) obj = { 'X': X_def_test, 'y': y_def_test, 'adv': adv_def_test, 'label_adv': label_adv, 'label_clean': label_clean, 'pred_adv': pred_adv_def_test } file_baard_output = os.path.join(path, '{}_{}_{}_{}_baard_output.pt'.format(data, model_name, att_name, round(eps * 1000))) torch.save(obj, file_baard_output) print('Save to:', file_baard_output) print('-------------------------------------------------------------------') print('Start training surrogate model...') file_surro = os.path.join(path, '{}_{}_baard_surrogate.pt'.format(data, model_name)) # if os.path.exists(file_surro): # print('Found existing surrogate model:', file_surro) # surrogate = get_pretrained_surrogate(file_surro, device) # else: # Prepare data for surrogate model # file_surro_data = os.path.join(path, '{}_{}_surrogate_data.pt'.format(data, model_name)) # if os.path.exists(file_surro_data): # print('Found existing surrogate dataset:', file_surro_data) # obj = torch.load(file_surro_data) # X_train = obj['X_train'] # label_train = obj['label_train'] # X_test = obj['X_test'] # label_test = obj['label_test'] # print(X_train.shape, label_train.shape, X_test.shape, label_test.shape) # print('Labelled as adv:', np.mean(label_train == 1), np.mean(label_test == 1)) # else: file_surro_data = os.path.join(path, '{}_{}_surrogate_data.pt'.format(data, model_name)) adv_surro_train_2 = \ run_attack_untargeted(file_model, X_surro_train, y_surro_train, att_name=att_name, eps=eps_1, device=device)[0] adv_surro_train_3 = \ run_attack_untargeted(file_model, X_surro_train, y_surro_train, att_name=att_name, eps=eps_2, device=device)[0] adv_surro_train_4 = \ run_attack_untargeted(file_model, X_surro_train, y_surro_train, att_name=att_name, eps=eps_3, device=device)[0] adv_surro_train_5 = \ run_attack_untargeted(file_model, X_surro_train, y_surro_train, att_name=att_name, eps=eps_4, device=device)[0] adv_surro_train = np.append(adv_surro_train,adv_surro_train_2,axis = 0) adv_surro_train = np.append(adv_surro_train,adv_surro_train_3,axis=0) adv_surro_train = np.append(adv_surro_train,adv_surro_train_4, axis=0) adv_surro_train = np.append(adv_surro_train,adv_surro_train_5, axis=0) # augment also the number of benign dataset to avoid having an # unbalanced data X_surro_train_replicated = np.append(X_surro_train,X_surro_train,axis = 0) X_surro_train_replicated = np.append(X_surro_train_replicated, X_surro_train, axis=0) X_surro_train_replicated = np.append(X_surro_train_replicated, X_surro_train, axis=0) X_surro_train_replicated = np.append(X_surro_train_replicated, X_surro_train, axis=0) y_surro_train_replicated = np.append(y_surro_train,y_surro_train) y_surro_train_replicated = np.append(y_surro_train_replicated, y_surro_train) y_surro_train_replicated = np.append(y_surro_train_replicated, y_surro_train) y_surro_train_replicated = np.append(y_surro_train_replicated, y_surro_train) # classify the surrogate set pred_adv_surro_train = predict_numpy(model, adv_surro_train, device) label_adv_train = detector.detect(adv_surro_train, pred_adv_surro_train) label_X_train = detector.detect(X_surro_train_replicated, y_surro_train_replicated) # concatenate the clean and the adversarial samples X_train = np.concatenate((X_surro_train_replicated, adv_surro_train)) label_train = np.concatenate((label_X_train, label_adv_train)) label_adv_test = detector.detect(adv_att_test[:1000], pred_adv_att_test[:1000]) label_X_test = detector.detect(X_att_test[:1000], y_att_test[:1000]) X_test = np.concatenate((X_att_test[:1000], adv_att_test[:1000])) label_test = np.concatenate((label_X_test, label_adv_test)) print(X_train.shape, label_train.shape, X_test.shape, label_test.shape) print('Labelled as adv:', np.mean(label_train == 1), np.mean(label_test == 1)) obj = { 'X_train': X_train, 'y_train': np.concatenate((y_surro_train, y_surro_train)), 'pred_train': np.concatenate((y_surro_train, pred_adv_surro_train)), 'label_train': label_train, 'X_test': X_test, 'y_test': np.concatenate((y_att_test[:1000], y_att_test[:1000])), 'pred_test': np.concatenate((y_att_test[:1000], pred_adv_att_test[:1000])), 'label_test': label_test } torch.save(obj, file_surro_data) print('Save surrogate training data to:', file_surro_data) surrogate = train_surrogate(X_train, X_test, label_train, label_test, epochs=EPOCHS, device=device) torch.save(surrogate.state_dict(), file_surro) print('Save surrogate model to:', file_surro) print('-------------------------------------------------------------------') print('Start testing surrogate model...') X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:])) pred_test = predict_numpy(model, X_test, device) label_test = detector.detect(X_test, pred_test) acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:]) fpr = np.mean(label_test[:1000]) print('BAARD Acc_on_adv:', acc) print('BAARD FPR:', fpr) label_surro = predict_numpy(surrogate, X_test, device) acc = np.mean(label_surro == label_test) print('Acc on surrogate:', acc) print('DONE!') print('-------------------------------------------------------------------\n')
def run_full_pipeline_magnet(data, model_name, path, seed, json_param, att_name, eps): set_seeds(seed) print('args:', data, model_name, path, seed, json_param, att_name, eps) if not os.path.exists(path): print('Output folder does not exist. Create:', path) os.mkdir(path) # Get data transform = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform) dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform) elif data == 'cifar10': transform_train = tv.transforms.Compose([ tv.transforms.RandomHorizontalFlip(), tv.transforms.RandomCrop(32, padding=4), tv.transforms.ToTensor() ]) dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train) dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform) else: raise ValueError('Unknown dataset: {}'.format(data)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name)) print('Start training {} model on {}...'.format(model_name, data)) model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model) # Split data tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset = TensorDataset(tensor_X, tensor_y) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device) print('Accuracy on {} filtered test set: {:.2f}%'.format( tensor_y.size(0), acc_perfect * 100)) # Split rules: # 1. Benchmark_defence_test: 1000 (def_test) # 2. Benchmark_defence_val: 1000 (def_val) # 3. Test white-box attack: 2000 (att_test) # 5. Train surrogate model: 2000 (sur_train) # -----------------Total: 6000 idx_shuffle = np.random.permutation(tensor_X.size(0))[:6000] X = tensor_X[idx_shuffle].cpu().detach().numpy() y = tensor_y[idx_shuffle].cpu().detach().numpy() print( '-------------------------------------------------------------------') print('Start generating {} adversarial examples...'.format( len(idx_shuffle))) adv, X, y = run_attack_untargeted(file_model, X, y, att_name=att_name, eps=eps, device=device) print( '-------------------------------------------------------------------') print('Start testing adversarial examples...') pred = predict_numpy(model, adv, device) print('Acc on adv:', np.mean(pred == y)) X_def_test = X[:1000] y_def_test = y[:1000] adv_def_test = adv[:1000] pred_adv_def_test = pred[:1000] X_def_val = X[1000:2000] # y_def_val = y[1000:2000] # adv_def_val = adv[1000:2000] # pred_adv_def_val = pred[1000:2000] # X_att_test = X[2000:4000] # y_att_test = y[2000:4000] # adv_att_test = adv[2000:4000] # pred_adv_att_test = pred[2000:4000] # X_surro_train = X[4000:] # y_surro_train = y[4000:] # adv_surro_train = adv[4000:] # pred_adv_surro_train = pred[4000:] print( '-------------------------------------------------------------------') print('Start training MagNet...') # Run preprocessing tensor_X, tensor_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True) X_train = tensor_X.cpu().detach().numpy() y_train = tensor_y.cpu().detach().numpy() with open(json_param) as j: param = json.load(j) time_start = time.time() detector = train_magnet(data, model_name, X_train, y_train, X_def_val, param, device, path, EPOCHS, model=model) time_elapsed = time.time() - time_start print('Total run time:', str(datetime.timedelta(seconds=time_elapsed))) print( '-------------------------------------------------------------------') print('Start testing MagNet...') time_start = time.time() adv_reformed_test, label_adv = detector.detect(adv_def_test, pred_adv_def_test) X_reformed_test, label_clean = detector.detect(X_def_test, y_def_test) time_elapsed = time.time() - time_start print('Total run time:', str(datetime.timedelta(seconds=time_elapsed))) pred_adv_reformed = predict_numpy(model, adv_reformed_test, device) acc = acc_on_adv(pred_adv_reformed, y_def_test, label_adv) fpr = np.mean(label_clean) print('Acc_on_adv:', acc) print('FPR:', fpr) obj = { 'X': X_def_test, 'y': y_def_test, 'adv': adv_def_test, 'label_adv': label_adv, 'label_clean': label_clean, 'pred_adv': pred_adv_def_test, 'X_reformed': X_reformed_test, 'adv_reformed': adv_reformed_test, 'pred_adv_reformed': pred_adv_reformed } file_detector_output = os.path.join( path, '{}_{}_{}_{}_magnet_output.pt'.format(data, model_name, att_name, round(eps * 1000))) torch.save(obj, file_detector_output) print('Save to:', file_detector_output) print('DONE!') print( '-------------------------------------------------------------------\n' )
def main(): with open('data.json') as data_json: data_params = json.load(data_json) parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, required=True) parser.add_argument('--data_path', type=str, default='data') parser.add_argument('--output_path', type=str, default='results') parser.add_argument('--pretrained', type=str, required=True) parser.add_argument('--adv', type=str, required=True, help="Example: 'mnist_basic_apgd_0.3'") parser.add_argument('--random_state', type=int, default=1234) args = parser.parse_args() print(args) set_seeds(args.random_state) print('Dataset:', args.data) print('Pretrained model:', args.pretrained) print('Pretrained samples:', args.adv + '_adv.npy') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if args.data == 'mnist': dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms) elif args.data == 'cifar10': dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms) else: data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name']) print('Read file:', data_path) X, y = load_csv(data_path) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=data_params['data'][args.data]['n_test'], random_state=args.random_state) scaler = MinMaxScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dataset_train = TensorDataset( torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long)) dataset_test = TensorDataset( torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long)) # Note: Train set alway shuffle! loader_train = DataLoader(dataset_train, batch_size=512, shuffle=True) loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False) shape_train = get_shape(loader_train.dataset) shape_test = get_shape(loader_test.dataset) print('Train set:', shape_train) print('Test set:', shape_test) use_prob = True print('Using softmax layer:', use_prob) n_classes = data_params['data'][args.data]['n_classes'] # Load model if args.data == 'mnist': model = BaseModel(use_prob=use_prob).to(device) model_name = 'basic' elif args.data == 'cifar10': model_name = args.pretrained.split('_')[1] if model_name == 'resnet': model = Resnet(use_prob=use_prob).to(device) elif model_name == 'vgg': model = Vgg(use_prob=use_prob).to(device) else: raise ValueError('Unknown model: {}'.format(model_name)) else: n_features = data_params['data'][args.data]['n_features'] model = NumericModel(n_features, n_hidden=n_features * 4, n_classes=n_classes, use_prob=use_prob).to(device) model_name = 'basic' + str(n_features * 4) loss = nn.CrossEntropyLoss() pretrained_path = os.path.join(args.output_path, args.pretrained) model.load_state_dict(torch.load(pretrained_path)) _, acc_train = validate(model, loader_train, loss, device) _, acc_test = validate(model, loader_test, loss, device) print('Accuracy on train set: {:.4f}%'.format(acc_train * 100)) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) # Create a subset which only contains recognisable samples. # The original train and test sets are no longer needed. tensor_train_X, tensor_train_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True) dataset_train = TensorDataset(tensor_train_X, tensor_train_y) loader_train = DataLoader(dataset_train, batch_size=512, shuffle=True) _, acc_perfect = validate(model, loader_train, loss, device) print('Accuracy on {} filtered train set: {:.4f}%'.format( len(dataset_train), acc_perfect * 100)) tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset_test = TensorDataset(tensor_test_X, tensor_test_y) loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader_test, loss, device) print('Accuracy on {} filtered test set: {:.4f}%'.format( len(dataset_test), acc_perfect * 100)) # Load pre-trained adversarial examples path_benign = os.path.join(args.output_path, args.adv + '_x.npy') path_adv = os.path.join(args.output_path, args.adv + '_adv.npy') path_y = os.path.join(args.output_path, args.adv + '_y.npy') X_benign = np.load(path_benign) adv = np.load(path_adv) y_true = np.load(path_y) dataset = TensorDataset(torch.from_numpy(X_benign), torch.from_numpy(y_true)) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc = validate(model, loader, loss, device) print('Accuracy on {} benign samples: {:.4f}%'.format( len(dataset), acc * 100)) dataset = TensorDataset(torch.from_numpy(adv), torch.from_numpy(y_true)) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc = validate(model, loader, loss, device) print('Accuracy on {} adversarial examples: {:.4f}%'.format( len(dataset), acc * 100)) # Do NOT shuffle the indices, so different defences can use the same test set. dataset = TensorDataset(torch.from_numpy(adv)) loader = DataLoader(dataset, batch_size=512, shuffle=False) pred_adv = predict(model, loader, device).cpu().detach().numpy() # Find the thresholds using the 2nd half n = len(X_benign) // 2 # Merge benign samples and adversarial examples into one set. # This labels indicate a sample is an adversarial example or not. X_val, labels_val = merge_and_generate_labels(adv[n:], X_benign[n:], flatten=False) # The predictions for benign samples are exactly same as the true labels. pred_val = np.concatenate((pred_adv[n:], y_true[n:])) X_train = tensor_train_X.cpu().detach().numpy() y_train = tensor_train_y.cpu().detach().numpy() # Train defence time_start = time.time() detector = RegionBasedClassifier(model=model, r=0.2, sample_size=1000, n_classes=n_classes, x_min=0.0, x_max=1.0, batch_size=512, r0=0.0, step_size=0.02, stop_value=0.4, device=device) r_best = detector.search_thresholds(X_val, pred_val, labels_val, verbose=0) time_elapsed = time.time() - time_start print('Total training time:', str(datetime.timedelta(seconds=time_elapsed))) param = { "r": r_best, "sample_size": 1000, "batch_size": 512, "r0": 0, "step_size": 0.02, "stop_value": 0.40 } path_json = os.path.join( 'params', 'rc_param_{}_{}.json'.format(args.data, args.model)) with open(path_json, 'w') as f: json.dump(param, f) print('Save to:', path_json) print()
def run_evaluate_baard(data, model_name, path, seed, json_param, att_name, eps): set_seeds(seed) # Line attack takes no hyperparameter if att_name == 'line': eps = [1] print('args:', data, model_name, path, seed, json_param, att_name, eps) if not os.path.exists(path): print('Output folder does not exist. Create:', path) os.mkdir(path) # Get data n_classes = 10 transform = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform) dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform) elif data == 'cifar10': transform_train = tv.transforms.Compose([ tv.transforms.RandomHorizontalFlip(), tv.transforms.RandomCrop(32, padding=4), tv.transforms.ToTensor()]) dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train) dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform) else: raise ValueError('Unknown dataset: {}'.format(data)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name)) print('Start training {} model on {}...'.format(model_name, data)) model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model) # Split data tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset = TensorDataset(tensor_X, tensor_y) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device) print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100)) print('-------------------------------------------------------------------') print('Start training BAARD...') file_baard_train = os.path.join(path, '{}_{}_baard_s1_train_data.pt'.format(data, model_name)) if os.path.exists(file_baard_train): print('Found existing BAARD preprocess data:', file_baard_train) obj = torch.load(file_baard_train) X_baard_train_s1 = obj['X_s1'] X_baard_train= obj['X'] y_baard_train = obj['y'] else: raise FileNotFoundError('Cannot find BAARD preprocess data:', file_baard_train) print('BAARD train set:', X_baard_train_s1.shape) with open(json_param) as j: baard_param = json.load(j) print('Param:', baard_param) sequence = baard_param['sequence'] stages = [] if sequence[0]: stages.append(ApplicabilityStage(n_classes=n_classes, quantile=baard_param['q1'])) if sequence[1]: stages.append(ReliabilityStage(n_classes=n_classes, k=baard_param['k_re'], quantile=baard_param['q2'])) if sequence[2]: stages.append(DecidabilityStage(n_classes=n_classes, k=baard_param['k_de'], quantile=baard_param['q3'])) print('BAARD stages:', len(stages)) detector = BAARDOperator(stages=stages) assert X_baard_train.shape == X_baard_train_s1.shape, 'Unmatched size: {}, {}'.format(X_baard_train.shape, X_baard_train_s1.shape) assert X_baard_train_s1.shape[0] == y_baard_train.shape[0] detector.stages[0].fit(X_baard_train_s1, y_baard_train) for stage in detector.stages[1:]: stage.fit(X_baard_train, y_baard_train) file_baard_threshold = os.path.join(path, '{}_{}_baard_threshold.pt'.format(data, model_name)) if os.path.exists(file_baard_threshold): print('Found existing BAARD thresholds:', file_baard_threshold) detector.load(file_baard_threshold) else: raise FileNotFoundError('Cannot find pre-trained BAARD:', file_baard_threshold) # print('-------------------------------------------------------------------') # print('Load surrogate model...') # file_surro = os.path.join(path, '{}_{}_baard_surrogate.pt'.format(data, model_name)) # if os.path.exists(file_surro): # print('Found existing surrogate model:', file_surro) # surrogate = get_pretrained_surrogate(file_surro, device) # else: # raise FileNotFoundError('Cannot find pre-trained surrogate model:', file_surro) print('-------------------------------------------------------------------') print('Start evaluating the robustness of the classifier...') eps = np.array(eps, dtype=np.float) n_att = eps.shape[0] accs_classifier = np.zeros(n_att, dtype=np.float) accs_on_adv = np.zeros_like(accs_classifier) fprs = np.zeros_like(accs_on_adv) file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, int(eps[0] * 1000))) obj = torch.load(file_data) X = obj['X'] y = obj['y'] pred = predict_numpy(model, X, device) print('Acc on clean samples:', np.mean(pred == y)) for i in range(n_att): print('Evaluating {} eps={}'.format(att_name, eps[i])) file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, round(eps[i] * 1000))) obj = torch.load(file_data) adv = obj['adv'] X_def_test = X[:1000] y_def_test = y[:1000] adv_def_test = adv[:1000] pred_adv_def_test = pred[:1000] pred = predict_numpy(model, adv_def_test, device) acc_base = np.mean(pred == y_def_test) labelled_as_adv = detector.detect(adv_def_test, pred_adv_def_test) acc_def = acc_on_adv(pred_adv_def_test, y_def_test, labelled_as_adv) labelled_false = detector.detect(X_def_test, y_def_test) fpr = np.mean(labelled_false) print('acc_model: {:.4f}, acc_on_adv: {:.4f}, fpr: {:.4f}'.format(acc_base, acc_def, fpr)) accs_classifier[i] = acc_base accs_on_adv[i] = acc_def fprs[i] = fpr results = np.array([eps, accs_classifier, accs_on_adv, fprs]).transpose() df = pd.DataFrame(data=results, columns=['eps', 'acc_base', 'acc_on_adv', 'fpr']) file_output = os.path.join(path, '{}_{}_{}_{}.csv'.format(data, model_name, DEFENCE, att_name)) df.to_csv(file_output, index=False) print('Saved results to:', file_output) print('DONE!') print('-------------------------------------------------------------------\n')
def run_evaluate_magnet(data, model_name, path, seed, json_param, att_name, eps): set_seeds(seed) # Line attack takes no hyperparameter if att_name == 'line': eps = [1] print('args:', data, model_name, path, seed, json_param, att_name, eps) if not os.path.exists(path): print('Output folder does not exist. Create:', path) os.mkdir(path) # Get data n_classes = 10 transform = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform) dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform) elif data == 'cifar10': transform_train = tv.transforms.Compose([ tv.transforms.RandomHorizontalFlip(), tv.transforms.RandomCrop(32, padding=4), tv.transforms.ToTensor()]) dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train) dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform) else: raise ValueError('Unknown dataset: {}'.format(data)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name)) print('Start training {} model on {}...'.format(model_name, data)) model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model) # Split data tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset = TensorDataset(tensor_X, tensor_y) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device) print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100)) print('-------------------------------------------------------------------') print('Start training MagNet...') # Run preprocessing tensor_X, tensor_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True) X_train = tensor_X.cpu().detach().numpy() y_train = tensor_y.cpu().detach().numpy() # We need load the evaluation set first. The clean samples are all the same, # which attack is selected does not matter. file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, int(eps[0] * 1000))) obj = torch.load(file_data) X = obj['X'] y = obj['y'] X_def_val = X[1000:2000] with open(json_param) as j: param = json.load(j) time_start = time.time() detector = train_magnet(data, model_name, X_train, y_train, X_def_val, param, device, path, EPOCHS, model=model) time_elapsed = time.time() - time_start print('Total run time:', str(datetime.timedelta(seconds=time_elapsed))) print('-------------------------------------------------------------------') print('Start evaluating the robustness of the classifier...') eps = np.array(eps, dtype=np.float) n_att = eps.shape[0] accs_classifier = np.zeros(n_att, dtype=np.float) accs_on_adv = np.zeros_like(accs_classifier) fprs = np.zeros_like(accs_on_adv) pred = predict_numpy(model, X, device) print('Acc on clean samples:', np.mean(pred == y)) for i in range(n_att): print('Evaluating {} eps={}'.format(att_name, eps[i])) file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, round(eps[i] * 1000))) obj = torch.load(file_data) adv = obj['adv'] X_def_test = X[:1000] y_def_test = y[:1000] adv_def_test = adv[:1000] pred_adv_def_test = pred[:1000] pred = predict_numpy(model, adv_def_test, device) acc_base = np.mean(pred == y_def_test) X_reformed, labelled_as_adv = detector.detect(adv_def_test, pred_adv_def_test) pred_reformed = predict_numpy(model, X_reformed, device) acc_def = acc_on_adv(pred_reformed, y_def_test, labelled_as_adv) _, labelled_false = detector.detect(X_def_test, y_def_test) fpr = np.mean(labelled_false) print('acc_model: {:.4f}, acc_on_adv: {:.4f}, fpr: {:.4f}'.format(acc_base, acc_def, fpr)) accs_classifier[i] = acc_base accs_on_adv[i] = acc_def fprs[i] = fpr results = np.array([eps, accs_classifier, accs_on_adv, fprs]).transpose() df = pd.DataFrame(data=results, columns=['eps', 'acc_base', 'acc_on_adv', 'fpr']) file_output = os.path.join(path, '{}_{}_{}_{}.csv'.format(data, model_name, DEFENCE, att_name)) df.to_csv(file_output, index=False) print('Saved results to:', file_output) print('DONE!') print('-------------------------------------------------------------------\n')
def run_generate_adv(data, model_name, path, seed, att_name, eps): set_seeds(seed) # Line attack takes no hyperparameter if att_name == 'line': eps = [1] print('args:', data, model_name, path, seed, att_name, eps) if not os.path.exists(path): print('Output folder does not exist. Create:', path) os.mkdir(path) # Get data n_classes = 10 transform = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform) dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform) elif data == 'cifar10': transform_train = tv.transforms.Compose([ tv.transforms.RandomHorizontalFlip(), tv.transforms.RandomCrop(32, padding=4), tv.transforms.ToTensor()]) dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train) dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform) else: raise ValueError('Unknown dataset: {}'.format(data)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name)) print('Start training {} model on {}...'.format(model_name, data)) model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model) # Split data tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) dataset = TensorDataset(tensor_X, tensor_y) loader = DataLoader(dataset, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device) print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100)) # Split rules: # 1. Benchmark_defence_test: 1000 (def_test) # 2. Benchmark_defence_val: 1000 (def_val) # 3. Test white-box attack: 2000 (att_test) # 5. Train surrogate model: 2000 (sur_train) # -----------------Total: 6000 idx_shuffle = np.random.permutation(tensor_X.size(0))[:6000] X = tensor_X[idx_shuffle].cpu().detach().numpy() y = tensor_y[idx_shuffle].cpu().detach().numpy() print('-------------------------------------------------------------------') print('Start generating {} adversarial examples...'.format(len(idx_shuffle))) advs = [] for e in eps: adv, X, y = run_attack_untargeted(file_model, X, y, att_name=att_name, eps=e, device=device) advs.append(adv) advs = np.array(advs, dtype=np.float) print('-------------------------------------------------------------------') print('Start testing adversarial examples...') for i, e in enumerate(eps): adv = advs[i] pred = predict_numpy(model, adv, device) print('Attack: {} Eps={} Acc on adv: {:.4f}'.format(att_name, e, np.mean(pred == y)))