def test_generate(art_warning, fix_get_mnist_subset, image_dl_estimator_for_attack): try: classifier = image_dl_estimator_for_attack( AutoProjectedGradientDescent) attack = AutoProjectedGradientDescent( estimator=classifier, norm=np.inf, eps=0.3, eps_step=0.1, max_iter=5, targeted=False, nb_random_init=1, batch_size=32, loss_type="cross_entropy", verbose=False, ) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) assert np.mean(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(0.0329, abs=0.005) assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(0.3, abs=0.01) except ARTTestException as e: art_warning(e)
def test_generate(is_tf_version_2, fix_get_mnist_subset, get_image_classifier_list_for_attack): if is_tf_version_2: classifier_list = get_image_classifier_list_for_attack(AutoProjectedGradientDescent) if classifier_list is None: logging.warning("Couldn't perform this test because no classifier is defined") return for classifier in classifier_list: attack = AutoProjectedGradientDescent( estimator=classifier, norm=np.inf, eps=0.3, eps_step=0.1, max_iter=5, targeted=False, nb_random_init=1, batch_size=32, loss_type="cross_entropy", ) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) assert np.mean(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(0.0329, abs=0.005) assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(0.3, abs=0.01)
def test_generate(art_warning, fix_get_mnist_subset, image_dl_estimator_for_attack, framework, loss_type, norm): print("test_generate") try: classifier = image_dl_estimator_for_attack( AutoProjectedGradientDescent, from_logits=True) print("framework", framework) if framework in ["tensorflow1", "tensorflow2v1" ] and loss_type == "difference_logits_ratio": with pytest.raises(ValueError): _ = AutoProjectedGradientDescent( estimator=classifier, norm=norm, eps=0.3, eps_step=0.1, max_iter=5, targeted=False, nb_random_init=1, batch_size=32, loss_type=loss_type, verbose=False, ) else: attack = AutoProjectedGradientDescent( estimator=classifier, norm=norm, eps=0.3, eps_step=0.1, max_iter=5, targeted=False, nb_random_init=1, batch_size=32, loss_type=loss_type, verbose=False, ) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) > 0.0 except ARTTestException as e: art_warning(e)
def main(): with open('data.json') as data_json: data_params = json.load(data_json) parser = argparse.ArgumentParser() parser.add_argument('--data', type=str) parser.add_argument('--data_path', type=str, default='data') parser.add_argument('--output_path', type=str, default='results') parser.add_argument('--pretrained', type=str, required=True) parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--attack', type=str, required=True, choices=data_params['attacks']) parser.add_argument('--eps', type=float, default=0.3) # NOTE: In CW_L2 attack, eps is the upper bound of c. parser.add_argument('--n_samples', type=int, default=2000) parser.add_argument('--random_state', type=int, default=1234) args = parser.parse_args() print(args) set_seeds(args.random_state) if not os.path.exists(args.output_path): print('Output folder does not exist. Create:', args.output_path) os.mkdir(args.output_path) print('Dataset:', args.data) print('Pretrained model:', args.pretrained) print('Running attack: {}'.format(args.attack)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Device: {}'.format(device)) # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if args.data == 'mnist': dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms) elif args.data == 'cifar10': dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms) dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms) else: data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name']) print('Read file:', data_path) X, y = load_csv(data_path) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=data_params['data'][args.data]['n_test'], random_state=args.random_state) scaler = MinMaxScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long)) dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long)) dataloader_train = DataLoader(dataset_train, 256, shuffle=False) dataloader_test = DataLoader(dataset_test, 256, shuffle=False) shape_train = get_shape(dataloader_train.dataset) shape_test = get_shape(dataloader_test.dataset) print('Train set:', shape_train) print('Test set:', shape_test) # Load model use_prob = args.attack not in ['apgd', 'apgd1', 'apgd2', 'cw2', 'cwinf'] print('Attack:', args.attack) print('Using softmax layer:', use_prob) if args.data == 'mnist': model = BaseModel(use_prob=use_prob).to(device) model_name = 'basic' elif args.data == 'cifar10': model_name = args.pretrained.split('_')[1] if model_name == 'resnet': model = Resnet(use_prob=use_prob).to(device) elif model_name == 'vgg': model = Vgg(use_prob=use_prob).to(device) else: raise ValueError('Unknown model: {}'.format(model_name)) else: n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] model = NumericModel( n_features, n_hidden=n_features * 4, n_classes=n_classes, use_prob=use_prob).to(device) model_name = 'basic' + str(n_features * 4) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) loss = nn.CrossEntropyLoss() pretrained_path = os.path.join(args.output_path, args.pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) _, acc_train = validate(model, dataloader_train, loss, device) _, acc_test = validate(model, dataloader_test, loss, device) print('Accuracy on train set: {:.4f}%'.format(acc_train * 100)) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) # Create a subset which only contains recognisable samples. tensor_test_X, tensor_test_y = get_correct_examples( model, dataset_test, device=device, return_tensor=True) dataset_perfect = TensorDataset(tensor_test_X, tensor_test_y) loader_perfect = DataLoader(dataset_perfect, batch_size=512, shuffle=False) _, acc_perfect = validate(model, loader_perfect, loss, device) print('Accuracy on {} filtered test examples: {:.4f}%'.format( len(dataset_perfect), acc_perfect * 100)) # Generate adversarial examples n_features = data_params['data'][args.data]['n_features'] n_classes = data_params['data'][args.data]['n_classes'] if isinstance(n_features, int): n_features = (n_features,) classifier = PyTorchClassifier( model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=n_classes, clip_values=(0.0, 1.0), device_type='gpu') if args.attack == 'apgd': eps_step = args.eps / 10.0 if args.eps <= 0.1 else 0.1 attack = AutoProjectedGradientDescent( estimator=classifier, eps=args.eps, eps_step=eps_step, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'apgd1': attack = AutoProjectedGradientDescent( estimator=classifier, norm=1, eps=args.eps, eps_step=0.1, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'apgd2': attack = AutoProjectedGradientDescent( estimator=classifier, norm=2, eps=args.eps, eps_step=0.1, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'bim': eps_step = args.eps / 10.0 attack = BasicIterativeMethod( estimator=classifier, eps=args.eps, eps_step=eps_step, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'boundary': attack = BoundaryAttack( estimator=classifier, max_iter=1000, sample_size=args.batch_size, targeted=False) elif args.attack == 'cw2': # NOTE: Do NOT increase the batch size! attack = CarliniWagnerAttackL2( model=model, n_classes=n_classes, confidence=args.eps, verbose=True, check_prob=False, batch_size=args.batch_size, targeted=False) elif args.attack == 'cwinf': attack = CarliniLInfMethod( classifier=classifier, confidence=args.eps, max_iter=1000, batch_size=args.batch_size, targeted=False) elif args.attack == 'deepfool': attack = DeepFool( classifier=classifier, epsilon=args.eps, batch_size=args.batch_size) elif args.attack == 'fgsm': attack = FastGradientMethod( estimator=classifier, eps=args.eps, batch_size=args.batch_size) elif args.attack == 'jsma': attack = SaliencyMapMethod( classifier=classifier, gamma=args.eps, batch_size=args.batch_size) elif args.attack == 'line': if args.data == 'mnist': color = args.eps elif args.data == 'cifar10': color = (args.eps, args.eps, args.eps) else: raise NotImplementedError attack = LineAttack(color=color, thickness=1) elif args.attack == 'shadow': attack = ShadowAttack( estimator=classifier, batch_size=args.batch_size, targeted=False, verbose=False) elif args.attack == 'watermark': attack = WaterMarkAttack( eps=args.eps, n_classes=data_params['data'][args.data]['n_classes'], x_min=0.0, x_max=1.0, targeted=False) X_train, y_train = get_correct_examples(model, dataset_train, device=device, return_tensor=True) X_train = X_train.cpu().detach().numpy() y_train = y_train.cpu().detach().numpy() attack.fit(X_train, y_train) else: raise NotImplementedError if len(dataset_perfect) > args.n_samples: n = args.n_samples else: n = len(dataset_perfect) X_benign = tensor_test_X[:n].cpu().detach().numpy() y = tensor_test_y[:n].cpu().detach().numpy() print('Creating {} adversarial examples with eps={} (Not all attacks use eps)'.format(n, args.eps)) time_start = time.time() # Shadow attack only takes single sample! if args.attack == 'shadow': adv = np.zeros_like(X_benign) for i in trange(len(X_benign)): adv[i] = attack.generate(x=np.expand_dims(X_benign[i], axis=0)) elif args.attack == 'watermark': # This is untargeted. adv = attack.generate(X_benign, y) else: adv = attack.generate(x=X_benign) time_elapsed = time.time() - time_start print('Total time spend: {}'.format(str(datetime.timedelta(seconds=time_elapsed)))) pred_benign = np.argmax(classifier.predict(X_benign), axis=1) acc_benign = np.sum(pred_benign == y) / n pred_adv = np.argmax(classifier.predict(adv), axis=1) acc_adv = np.sum(pred_adv == y) / n print("Accuracy on benign samples: {:.4f}%".format(acc_benign * 100)) print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100)) # Save results if args.n_samples < 2000: output_file = '{}_{}_{}_{}_size{}'.format(args.data, model_name, args.attack, str(args.eps), args.n_samples) else: output_file = '{}_{}_{}_{}'.format(args.data, model_name, args.attack, str(args.eps)) path_x = os.path.join(args.output_path, '{}_x.npy'.format(output_file)) path_y = os.path.join(args.output_path, '{}_y.npy'.format(output_file)) path_adv = os.path.join(args.output_path, '{}_adv.npy'.format(output_file)) np.save(path_x, X_benign) np.save(path_y, y) np.save(path_adv, adv) print('Saved to:', '{}_adv.npy'.format(output_file)) print()
def train_adv(data='mnist', model_name='basic', n_samples=2000, eps=2., path_output='results', path_data='data', is_test=False, batch_size=128, device='cpu'): # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_test = datasets.MNIST(path_data, train=False, download=True, transform=transforms) elif data == 'cifar10': dataset_test = datasets.CIFAR10(path_data, train=False, download=True, transform=transforms) else: raise NotImplementedError loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False) # Load model if data == 'mnist': model = BaseModel(use_prob=False).to(device) n_features = (1, 28, 28) pretrained = 'mnist_200.pt' elif data == 'cifar10': n_features = (3, 32, 32) if model_name == 'resnet': model = Resnet(use_prob=False).to(device) pretrained = 'cifar10_resnet_200.pt' elif model_name == 'vgg': model = Vgg(use_prob=False).to(device) pretrained = 'cifar10_vgg_200.pt' else: raise NotImplementedError else: raise NotImplementedError pretrained_path = os.path.join(path_output, pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) loss = nn.CrossEntropyLoss() _, acc_test = validate(model, loader_test, loss, device) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) # Get samples from the tail if not is_test: # This is for training the surrogate model tensor_test_X = tensor_test_X[-n_samples:] tensor_test_y = tensor_test_y[-n_samples:] else: # This is for testing the surrogate model tensor_test_X = tensor_test_X[-n_samples - 2000:-2000] tensor_test_y = tensor_test_y[-n_samples - 2000:-2000] dataset_test = TensorDataset(tensor_test_X, tensor_test_y) loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False) _, acc_perfect = validate(model, loader_test, loss, device) print('Accuracy on {} filtered test set: {:.4f}%'.format( len(dataset_test), acc_perfect * 100)) classifier = PyTorchClassifier(model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=10, clip_values=(0.0, 1.0), device_type='gpu') attack = AutoProjectedGradientDescent(estimator=classifier, eps=eps, eps_step=0.1, max_iter=1000, batch_size=batch_size, targeted=False) X_benign = tensor_test_X.cpu().detach().numpy() y_true = tensor_test_y.cpu().detach().numpy() adv = attack.generate(x=X_benign) pred_adv = np.argmax(classifier.predict(adv), axis=1) acc_adv = np.mean(pred_adv == y_true) print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100)) if not is_test: output_file = '{}_{}_baard_surro_train_eps{}_size{}.pt'.format( data, model_name, eps, n_samples) else: output_file = '{}_{}_baard_surro_test_eps{}_size{}.pt'.format( data, model_name, eps, n_samples) file_path = os.path.join(path_output, output_file) output = {'X': X_benign, 'adv': adv, 'y': y_true} torch.save(output, file_path) print('Save to:', file_path)
def run_attack_untargeted(file_model, X, y, att_name, eps, device): path = file_model.split('/')[0] file_str = file_model.split('/')[-1] name_arr = file_str.split('_') data = name_arr[0] model_name = name_arr[1] file_data = os.path.join( path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, round(eps * 1000))) if os.path.exists(file_data): print('Found existing file:', file_data) obj = torch.load(file_data) return obj['adv'], obj['X'], obj['y'] if data == 'mnist': n_features = (1, 28, 28) n_classes = 10 model = BaseModel(use_prob=False).to(device) elif data == 'cifar10': n_features = (3, 32, 32) n_classes = 10 if model_name == 'resnet': model = Resnet(use_prob=False).to(device) elif model_name == 'vgg': model = Vgg(use_prob=False).to(device) else: raise NotImplementedError else: raise NotImplementedError model.load_state_dict(torch.load(file_model, map_location=device)) loss = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) classifier = PyTorchClassifier(model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=n_classes, clip_values=(0.0, 1.0), device_type='gpu') if att_name == 'apgd': eps_step = eps / 4. if eps <= 0.2 else 0.1 attack = AutoProjectedGradientDescent(estimator=classifier, eps=eps, eps_step=eps_step, max_iter=1000, batch_size=BATCH_SIZE, targeted=False) elif att_name == 'apgd2': attack = AutoProjectedGradientDescent(estimator=classifier, norm=2, eps=eps, eps_step=0.1, max_iter=1000, batch_size=BATCH_SIZE, targeted=False) elif att_name == 'cw2': # Do not increase the batch_size attack = CarliniWagnerAttackL2(model=model, n_classes=n_classes, confidence=eps, verbose=True, check_prob=False, batch_size=32, targeted=False) elif att_name == 'deepfool': # Do not adjust Epsilon attack = DeepFool(classifier=classifier, batch_size=BATCH_SIZE) elif att_name == 'fgsm': attack = FastGradientMethod(estimator=classifier, eps=eps, batch_size=BATCH_SIZE) elif att_name == 'line': attack = LineAttack(color=1, thickness=2) else: raise NotImplementedError time_start = time.time() adv = attack.generate(x=X) time_elapsed = time.time() - time_start print('Total run time:', str(datetime.timedelta(seconds=time_elapsed))) obj = {'X': X, 'y': y, 'adv': adv} torch.save(obj, file_data) print('Save data to:', file_data) return adv, X, y
def main(): Ad = np.load(AD_MAT_FILE) # Load adjacency matrix NUM_TEST = 50 NUM_GRAPH = 200 array_std, array_mean_values, array_overlap_ratio = load_raw_result_csv( RAW_RESULT_FILE) NUM_CLASS = array_mean_values.shape[1] print(array_mean_values.shape) with open("result_PGD.csv", "w", newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([ "overlap ratio", "acc_test_L", "acc_test_WL", "acc_adv_with_Lip", "acc_adv_without_Lip" ]) for i in range(NUM_TEST): x_test, y_test = reconstruct_test_data(array_std[i], array_mean_values[i], Ad, NUM_GRAPH) model_with_Lip_constr = tf.keras.models.load_model( "saved_model/fit{}_model_with_Lip_constr.h5".format(i)) print(model_with_Lip_constr.summary()) model_without_Lip_constr = tf.keras.models.load_model( "saved_model/fit{}_model_without_Lip_constr.h5".format(i)) print( "Evaluation of model WITH Lipschitz constant constraint on TEST data" ) loss_test_L, acc_test_L = model_with_Lip_constr.evaluate( x_test, y_test, batch_size=x_test.shape[0], verbose=0) print("Loss: {:.4f}, accuracy: {:.4f}".format(loss_test_L, acc_test_L)) print( "Evaluation of model WITHOUT Lipschitz constant constraint on TEST data" ) loss_test_WL, acc_test_WL = model_without_Lip_constr.evaluate( x_test, y_test, batch_size=x_test.shape[0], verbose=0) print("Loss: {:.4f}, accuracy: {:.4f}".format(loss_test_WL, acc_test_WL)) # Reshape model output reshape_with_Lip = Reshape( (x_test.shape[1] * NUM_CLASS, ), name="added_reshape_layer_L")(model_with_Lip_constr.output) new_model_with_Lip = Model(inputs=model_with_Lip_constr.input, outputs=reshape_with_Lip) reshape_without_Lip = Reshape( (x_test.shape[1] * NUM_CLASS, ), name="added_reshape_layer_WL")(model_without_Lip_constr.output) new_model_without_Lip = Model(inputs=model_without_Lip_constr.input, outputs=reshape_without_Lip) new_model_with_Lip.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) new_model_without_Lip.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) min_value = np.min(array_mean_values[i]) - 100 * array_std[i] max_value = np.max(array_mean_values[i]) + 100 * array_std[i] classifier_with_Lip = KerasClassifier(model=new_model_with_Lip, clip_values=(min_value, max_value), use_logits=False) classifier_without_Lip = KerasClassifier(model=new_model_without_Lip, clip_values=(min_value, max_value), use_logits=False) attack1 = AutoProjectedGradientDescent(estimator=classifier_with_Lip, norm="inf", eps=0.6, eps_step=1, batch_size=200, nb_random_init=5, verbose=True, targeted=False) attack2 = AutoProjectedGradientDescent( estimator=classifier_without_Lip, norm="inf", eps=0.6, eps_step=1, batch_size=200, nb_random_init=5, verbose=True, targeted=False) x_test_adv1 = attack1.generate(x=x_test, mask=np.ones((1, x_test.shape[1], x_test.shape[2]))) x_test_adv2 = attack2.generate(x=x_test, mask=np.ones((1, x_test.shape[1], x_test.shape[2]))) y_predict_adv_with_Lip = classifier_with_Lip.predict(x_test_adv1) y_predict_adv_without_Lip = classifier_without_Lip.predict(x_test_adv2) y_predict_adv_with_Lip = y_predict_adv_with_Lip.reshape((y_test.shape)) y_predict_adv_without_Lip = y_predict_adv_without_Lip.reshape( (y_test.shape)) acc_adv_with_Lip = np.sum( np.argmax(y_predict_adv_with_Lip, axis=2) == np.argmax( y_test, axis=2)) / (y_test.shape[0] * y_test.shape[1]) print( "Accuracy on adversarial test examples with Lipschitz constraint: {:.2f}%" .format(acc_adv_with_Lip * 100)) acc_adv_without_Lip = np.sum( np.argmax(y_predict_adv_without_Lip, axis=2) == np.argmax( y_test, axis=2)) / (y_test.shape[0] * y_test.shape[1]) print( "Accuracy on adversarial test examples without Lipschitz constraint: {:.2f}%" .format(acc_adv_without_Lip * 100)) with open("result_PGD.csv", "a", newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([ array_overlap_ratio[i], acc_test_L, acc_test_WL, acc_adv_with_Lip, acc_adv_without_Lip ])
def test_check_params(art_warning, image_dl_estimator_for_attack): try: classifier = image_dl_estimator_for_attack( AutoProjectedGradientDescent, from_logits=True) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, norm=0) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, eps="1") with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, eps=-1.0) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, eps_step="1") with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, eps_step=-1.0) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, max_iter=1.0) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, max_iter=-1) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, targeted="true") with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, nb_random_init=1.0) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, nb_random_init=-1) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, batch_size=1.0) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, batch_size=-1) with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, loss_type="test") with pytest.raises(ValueError): _ = AutoProjectedGradientDescent(classifier, verbose="true") except ARTTestException as e: art_warning(e)