def test_binary_keras_instantiation_and_attack_pgd(art_warning): tf.compat.v1.disable_eager_execution() try: x, y = sklearn.datasets.make_classification(n_samples=10000, n_features=20, n_informative=5, n_redundant=2, n_repeated=0, n_classes=2) train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split( x, y, test_size=0.2) train_x = train_x.astype(np.float32) test_x = test_x.astype(np.float32) model = tf.keras.models.Sequential([ tf.keras.layers.Dense(128, activation=tf.nn.relu, input_shape=(20, )), tf.keras.layers.Dense(1, activation=tf.nn.sigmoid), ]) model.summary() model.compile(optimizer=tf.optimizers.Adam(), loss="binary_crossentropy", metrics=["accuracy"]) classifier = KerasClassifier(model=model) classifier.fit(train_x, train_y, nb_epochs=5) pred = classifier.predict(test_x) attack = ProjectedGradientDescent(estimator=classifier, eps=0.5) x_test_adv = attack.generate(x=test_x) adv_predictions = classifier.predict(x_test_adv) assert (adv_predictions != pred).any() except ARTTestException as e: art_warning(e)
def test_update_image_classification_sw(art_warning, fix_get_mnist_subset, image_dl_estimator): try: from art.attacks.evasion import ProjectedGradientDescent classifier, _ = image_dl_estimator(from_logits=False) swd = SummaryWriterDefault(summary_writer=True, ind_1=True, ind_2=True, ind_3=True, ind_4=True) attack = ProjectedGradientDescent( estimator=classifier, max_iter=10, eps=0.3, eps_step=0.03, batch_size=5, verbose=False, summary_writer=swd ) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset attack.generate(x=x_train_mnist, y=y_train_mnist) assert all(attack.summary_writer.i_1 == [False, False, False, False, False]) if np.ndim(attack.summary_writer.i_2) != 0: assert len(attack.summary_writer.i_2) == 5 np.testing.assert_almost_equal(attack.summary_writer.i_3["0"], np.array([0.0, 0.0, 0.0, 0.0, 0.0])) np.testing.assert_almost_equal(attack.summary_writer.i_4["0"], np.array([0.0, 0.0, 0.0, 0.0, 0.0])) except ARTTestException as e: art_warning(e)
def test_pgd(self): from art.attacks.evasion import ProjectedGradientDescent attack = ProjectedGradientDescent(estimator=self.obj_detect, max_iter=2) x_test_adv = attack.generate(x=self.x_test, y=self.y_test) np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, x_test_adv, self.x_test)
def gen_adv_attack(model): attack = ProjectedGradientDescent(model, eps=0.01, eps_step=0.01, max_iter=2, verbose=True) return attack
def get_attack(attack, target): if attack == Attacks.elasticnet: # lr=pert attack = ElasticNet( classifier, targeted=False, decision_rule=target, batch_size=1, learning_rate=lr, max_iter=100, # 1000 recomendado por Iveta y Stefan binary_search_steps=25, # 50 recomendado por Iveta y Stefan # layer=7, # delta=35/255, # optimizer=None, # step_size=1/255, # max_iter=100, ) elif attack == Attacks.projected_gradient_descent: if target == TargetAttack.notarget.value: attack = ProjectedGradientDescent(classifier, eps=pert, eps_step=0.05) target = "no_target" else: raise Exception("set no target if you use Projected Attack") return attack
def test_update_image_classification_bool_str(art_warning, fix_get_mnist_subset, image_dl_estimator, summary_writer): try: from art.attacks.evasion import ProjectedGradientDescent classifier, _ = image_dl_estimator(from_logits=False) attack = ProjectedGradientDescent( estimator=classifier, max_iter=10, eps=0.3, eps_step=0.03, batch_size=5, verbose=False, summary_writer=summary_writer, ) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset attack.generate(x=x_train_mnist, y=y_train_mnist) except ARTTestException as e: art_warning(e)
def test_update_image_object_detection_sw(art_warning, fix_get_mnist_subset, fix_get_rcnn): try: from art.attacks.evasion import ProjectedGradientDescent frcnn = fix_get_rcnn swd = SummaryWriterDefault(summary_writer=True, ind_1=False, ind_2=True, ind_3=True, ind_4=True) attack = ProjectedGradientDescent( estimator=frcnn, max_iter=10, eps=0.3, eps_step=0.03, batch_size=5, verbose=False, summary_writer=swd ) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset attack.generate(x=x_train_mnist, y=y_train_mnist) if np.ndim(attack.summary_writer.i_2) != 0: assert len(attack.summary_writer.i_2) == 5 np.testing.assert_almost_equal(attack.summary_writer.i_3["0"], np.array([0.2265982])) np.testing.assert_almost_equal(attack.summary_writer.i_4["0"], np.array([0.0, 0.0, 0.0, 0.0, 0.0])) except ARTTestException as e: art_warning(e)
def create_attack(attack_type, classifier): if attack_type == 'fgsm': # Create a Fast Gradient Sign Method instance, specifying the classifier model, eps : attack step size attacker = FastGradientMethod(classifier, eps=epsilon) elif attack_type == 'pgd': # Create a Projected Gradient Descent instance, specifying the classifier model, eps : Maximum perturbation that # attacker can introduce, eps_step : Attack step size/input variation at each iteration, # max_iter : maximum number of iterations, num_random_init : number of random initializations attacker = ProjectedGradientDescent(classifier, eps=epsilon, eps_step=eps_step, max_iter=max_iter, num_random_init=num_random_init) elif attack_type == 'bim': # Create a Basic Iterative Method instance, specifying the classifier model, eps : Maximum perturbation, # eps_step : attack step size, max_iter : maximum number of iterations attacker = BasicIterativeMethod(classifier, eps=epsilon, eps_step=epsilon / max_iter, max_iter=max_iter) else: print('No supported attack specified') exit(0) return attacker
# Load the sbest saved model: tiny_vgg = tf.keras.models.load_model(SAVED_MODEL_LOCATION, compile=False) # Then compile with an optimizer loss_object = tf.keras.losses.CategoricalCrossentropy() optimizer = tf.keras.optimizers.SGD(learning_rate=LR) tiny_vgg.compile(optimizer=optimizer, loss=loss_object) classifier = KerasClassifier(model=tiny_vgg, clip_values=(0, 1), use_logits=False) attack = ProjectedGradientDescent(estimator=classifier, eps=16 / 255, eps_step=1 / 255, norm="inf", max_iter=200) #attack = CarliniLInfMethod(classifier, # confidence=0.8, targeted=False, learning_rate=0.001) x_test_adv = attack.generate(x=x_test) outputs = classifier.predict(x_test_adv) preds = np.argmax(outputs, axis=1) trues = np.argmax(y_test, axis=1) accuracy = np.sum(preds == trues) / len(y_test) print("Accuracy on adversarial test examples: {}%".format(accuracy * 100)) print("Ixs that worked: ")
def main(args): assert args.dataset in ['mnist', 'cifar', 'svhn', 'tiny', 'tiny_gray'], \ "dataset parameter must be either 'mnist', 'cifar', 'svhn', or 'tiny'" print('Dataset: %s' % args.dataset) adv_path = '/home/aaldahdo/detectors/adv_data/' if args.dataset == 'mnist': from baselineCNN.cnn.cnn_mnist import MNISTCNN as model model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_mnist.model sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.3 pa_th=78 # random_restart = 20 # x_train = model_mnist.x_train x_test = model_mnist.x_test # y_train = model_mnist.y_train y_test = model_mnist.y_test y_test_labels = model_mnist.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'mnist_gray': from baselineCNN.cnn.cnn_mnist_gray import MNISTCNN as model model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_mnist.model sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.3 pa_th=78 # random_restart = 20 # x_train = model_mnist.x_train x_test = model_mnist.x_test # y_train = model_mnist.y_train y_test = model_mnist.y_test y_test_labels = model_mnist.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'cifar': from baselineCNN.cnn.cnn_cifar10 import CIFAR10CNN as model model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_cifar.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_cifar.x_train x_test = model_cifar.x_test # y_train = model_cifar.y_train y_test = model_cifar.y_test y_test_labels = model_cifar.y_test_labels translation = 8 rotation = 30 elif args.dataset == 'cifar_gray': from baselineCNN.cnn.cnn_cifar10_gray import CIFAR10CNN as model model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_cifar.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_cifar.x_train x_test = model_cifar.x_test # y_train = model_cifar.y_train y_test = model_cifar.y_test y_test_labels = model_cifar.y_test_labels translation = 8 rotation = 30 elif args.dataset == 'svhn': from baselineCNN.cnn.cnn_svhn import SVHNCNN as model model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_svhn.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_svhn.x_train x_test = model_svhn.x_test # y_train = model_svhn.y_train y_test = model_svhn.y_test y_test_labels = model_svhn.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'svhn_gray': from baselineCNN.cnn.cnn_svhn_gray import SVHNCNN as model model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_svhn.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_svhn.x_train x_test = model_svhn.x_test # y_train = model_svhn.y_train y_test = model_svhn.y_test y_test_labels = model_svhn.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'tiny': from baselineCNN.cnn.cnn_tiny import TINYCNN as model model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_tiny.model sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_tiny.x_train x_test = model_tiny.x_test # y_train = model_tiny.y_train y_test = model_tiny.y_test y_test_labels = model_tiny.y_test_labels translation = 8 rotation = 30 del model_tiny elif args.dataset == 'tiny_gray': from baselineCNN.cnn.cnn_tiny_gray import TINYCNN as model model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_tiny.model sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 # x_train = model_tiny.x_train x_test = model_tiny.x_test # y_train = model_tiny.y_train y_test = model_tiny.y_test y_test_labels = model_tiny.y_test_labels translation = 8 rotation = 30 del model_tiny # batch_count_start = args.batch_indx # bsize = args.batch_size # batch_count_end = batch_count_start + 1 #FGSM for e in epsilons: attack = FastGradientMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=256) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_fgsm_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #BIM for e in epsilons: attack = BasicIterativeMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=32, max_iter=int(e*256*1.25)) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_bim_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #PGD1 for e in epsilons1: attack = ProjectedGradientDescent(estimator=kclassifier, norm=1, eps=e, eps_step=4, batch_size=32) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_pgd1_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #PGD2 for e in epsilons2: attack = ProjectedGradientDescent(estimator=kclassifier, norm=2, eps=e, eps_step=0.1, batch_size=32) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_pgd2_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #PGDInf for e in epsilons: attack = ProjectedGradientDescent(estimator=kclassifier, norm=np.inf, eps=e, eps_step=0.01, batch_size=32) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_pgdi_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #CWi attack = CarliniLInfMethod(classifier=kclassifier, max_iter=200) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_cwi.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) # #CWi # if args.dataset=='tiny': # for n, x, y in batch(x_test, y_test, batch_size=bsize): # if n>=batch_count_start*bsize and n<batch_count_end*bsize: # adv_file_path = adv_path + args.dataset + '_cwi_' + str(batch_count_start) + '.npy' # if not os.path.isfile(adv_file_path): # attack = CarliniLInfMethod(classifier=kclassifier, max_iter=100, batch_size=bsize) # adv_data = attack.generate(x=x) # np.save(adv_file_path, adv_data) # print('Done - {}'.format(adv_file_path)) #CW2 - SLOW attack = CarliniL2Method(classifier=kclassifier, max_iter=100, batch_size=1, confidence=10) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_cw2.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #DF attack = DeepFool(classifier=kclassifier) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_df.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) # #DF # if args.dataset=='tiny': # for n, x, y in batch(x_test, y_test, batch_size=bsize): # if n>=batch_count_start*bsize and n<batch_count_end*bsize: # attack = DeepFool(classifier=kclassifier, epsilon=9, max_iter=100) # adv_data = attack.generate(x=x) # adv_file_path = adv_path + args.dataset + '_df_'+ str(batch_count_start) + '.npy' # np.save(adv_file_path, adv_data) # print('Done - {}'.format(adv_file_path)) #Spatial transofrmation attack attack = SpatialTransformation(classifier=kclassifier, max_translation=translation, max_rotation=rotation) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_sta.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #Square Attack attack = SquareAttack(estimator=kclassifier, max_iter=200, eps=eps_sa) adv_data = attack.generate(x=x_test, y=y_test) adv_file_path = adv_path + args.dataset + '_sa.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #HopSkipJump Attack y_test_next= get_next_class(y_test) attack = HopSkipJump(classifier=kclassifier, targeted=False, max_iter=0, max_eval=100, init_eval=10) iter_step = 10 adv_data = np.zeros(x_test.shape) # adv_data = adv_data[0:25] for i in range(4): adv_data = attack.generate(x=x_test, x_adv_init=adv_data, resume=True) attack.max_iter = iter_step # _, acc_normal = classifier.evaluate(x_test[0:25], y_test[0:25]) # _, acc_adv = classifier.evaluate(adv_data, y_test[0:25]) # print('Normal accuracy - {}\nAttack accuracy - {}'.format(acc_normal, acc_adv)) # subcount=1 # for i in range(0, 25): # plt.subplot(5,5,subcount) # if args.dataset=='mnist': # plt.imshow(adv_data[i][:,:,0]) # else: # plt.imshow(adv_data[i][:,:,:]) # plt.suptitle(args.dataset+ " sb") # subcount = subcount + 1 # plt.show() adv_file_path = adv_path + args.dataset + '_hop.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #ZOO attack attack = ZooAttack(classifier=kclassifier, batch_size=32) adv_data = attack.generate(x=x_test, y=y_test) adv_file_path = adv_path + args.dataset + '_zoo.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path))
# torch.save(classifier.model.state_dict(), 'pth/{}.pth.tar'.format(exp_time)) # Step 5: Evaluate the ART classifier on benign test examples predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) # Step 6: Generate adversarial test examples attack = FastGradientMethod(estimator=classifier, eps=0.2) attack_pgd = ProjectedGradientDescent( classifier, norm=np.inf, eps=8.0 / 255.0, eps_step=2.0 / 255.0, max_iter=40, targeted=False, num_random_init=5, batch_size=32, ) x_test_adv = attack.generate(x=x_test) # x_test_adv = attack_pgd.generate(x_test) # np.save('./adv.npy', x_test_adv) # x_test_adv = np.load('./adv.npy') # Step 7: Evaluate the ART classifier on adversarial test examples # x_save = x_test[0:100] # x_adv_save = x_test_adv[0:100] # x_sprite = create_sprite(x_save) # x_adv_sprite = create_sprite(x_adv_save)
datagen.fit(x_train) art_datagen = KerasDataGenerator( datagen.flow(x=x_train, y=y_train, batch_size=batch_size, shuffle=True), size=x_train.shape[0], batch_size=batch_size, ) # Create a toy Keras CNN architecture & wrap it under ART interface classifier = KerasClassifier(build_model(), clip_values=(0, 1), use_logits=False) # Create attack for adversarial trainer; here, we use 2 attacks, both crafting adv examples on the target model pgd = ProjectedGradientDescent(classifier, eps=8, eps_step=2, max_iter=10, num_random_init=20) # Create some adversarial samples for evaluation x_test_pgd = pgd.generate(x_test) # Create adversarial trainer and perform adversarial training adv_trainer = AdversarialTrainer(classifier, attacks=pgd, ratio=1.0) adv_trainer.fit_generator(art_datagen, nb_epochs=83) # Evaluate the adversarially trained model on clean test set labels_true = np.argmax(y_test, axis=1) labels_test = np.argmax(classifier.predict(x_test), axis=1) print("Accuracy test set: %.2f%%" % (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))
adv_classifier = TensorFlowV2Classifier( model=new_model, loss_object=loss_object, train_step=train_step, nb_classes=5, input_shape=(1, 25), clip_values=(0, 1), ) print("Creating adversarial attack object...\n") pgd = ProjectedGradientDescent(adv_classifier, norm=np.inf, eps=eps, eps_step=0.001, targeted=False, batch_size=2048, num_random_init=27) print("Generating adversarial samples...\n") logger.info("Craft attack on training examples") x_train_adv = pgd.generate(train_data) save_samples(x_train_adv, 'pgd_train', exp) logger.info("=" * 50) logger.info("Craft attack test examples") x_test_adv = pgd.generate(test_data) save_samples(x_test_adv, 'pgd_test', exp) logger.info("=" * 50)
def natual(eps): # Step 1: Load the MNIST dataset (x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist() # Step 2: Create the model import tensorflow as tf from tensorflow.keras import Model from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D class TensorFlowModel(Model): """ Standard TensorFlow model for unit testing. """ def __init__(self): super(TensorFlowModel, self).__init__() self.conv1 = Conv2D(filters=4, kernel_size=5, activation="relu") self.conv2 = Conv2D(filters=10, kernel_size=5, activation="relu") self.maxpool = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid", data_format=None) self.flatten = Flatten() self.dense1 = Dense(100, activation="relu") self.logits = Dense(10, activation="linear") def call(self, x): """ Call function to evaluate the model. :param x: Input to the model :return: Prediction of the model """ x = self.conv1(x) x = self.maxpool(x) x = self.conv2(x) x = self.maxpool(x) x = self.flatten(x) x = self.dense1(x) x = self.logits(x) return x optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) def train_step(model, images, labels): with tf.GradientTape() as tape: predictions = model(images, training=True) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) model = TensorFlowModel() loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True) # Step 3: Create the ART classifier classifier = TensorFlowV2Classifier( model=model, loss_object=loss_object, train_step=train_step, nb_classes=10, input_shape=(28, 28, 1), clip_values=(0, 1), ) # Step 4: Train the ART classifier classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10) # Step 5: Evaluate the ART classifier on benign test examples predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len( y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) # Step 6: Generate adversarial test examples attack = ProjectedGradientDescent(estimator=classifier, eps=eps, eps_step=eps / 3, max_iter=20) x_test_adv = attack.generate(x=x_test) # Step 7: Evaluate the ART classifier on adversarial test examples predictions = classifier.predict(x_test_adv) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len( y_test) print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
def main(): # Create ART object detector frcnn = PyTorchFasterRCNN(clip_values=(0, 255), attack_losses=[ "loss_classifier", "loss_box_reg", "loss_objectness", "loss_rpn_box_reg" ]) # Load image 1 image_0 = cv2.imread("./10best-cars-group-cropped-1542126037.jpg") image_0 = cv2.cvtColor(image_0, cv2.COLOR_BGR2RGB) # Convert to RGB print("image_0.shape:", image_0.shape) # Load image 2 image_1 = cv2.imread("./banner-diverse-group-of-people-2.jpg") image_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2RGB) # Convert to RGB image_1 = cv2.resize(image_1, dsize=(image_0.shape[1], image_0.shape[0]), interpolation=cv2.INTER_CUBIC) print("image_1.shape:", image_1.shape) # Stack images image = np.stack([image_0, image_1], axis=0).astype(np.float32) print("image.shape:", image.shape) for i in range(image.shape[0]): plt.axis("off") plt.title("image {}".format(i)) plt.imshow(image[i].astype(np.uint8), interpolation="nearest") plt.show() # Make prediction on benign samples predictions = frcnn.predict(x=image) for i in range(image.shape[0]): print("\nPredictions image {}:".format(i)) # Process predictions predictions_class, predictions_boxes, predictions_class = extract_predictions( predictions[i]) # Plot predictions plot_image_with_boxes(img=image[i].copy(), boxes=predictions_boxes, pred_cls=predictions_class) # Create and run attack eps = 32 attack = ProjectedGradientDescent(estimator=frcnn, eps=eps, eps_step=2, max_iter=10) image_adv = attack.generate(x=image, y=None) print("\nThe attack budget eps is {}".format(eps)) print("The resulting maximal difference in pixel values is {}.".format( np.amax(np.abs(image - image_adv)))) for i in range(image_adv.shape[0]): plt.axis("off") plt.title("image_adv {}".format(i)) plt.imshow(image_adv[i].astype(np.uint8), interpolation="nearest") plt.show() predictions_adv = frcnn.predict(x=image_adv) for i in range(image.shape[0]): print("\nPredictions adversarial image {}:".format(i)) # Process predictions predictions_adv_class, predictions_adv_boxes, predictions_adv_class = extract_predictions( predictions_adv[i]) # Plot predictions plot_image_with_boxes(img=image_adv[i].copy(), boxes=predictions_adv_boxes, pred_cls=predictions_adv_class)
def main(args): batch_status_message = {'status':'Ready','modelurl':args.model} batch_count = 0 model_filename = 'base_model.h5' logging.info('model={}'.format(args.model)) location = os.path.join(ART_DATA_PATH, model_filename) try: os.remove(location) except OSError as error: pass path = get_file(model_filename, extract=False, path=ART_DATA_PATH, url=args.model) kmodel = load_model(path) model = KerasClassifier(kmodel, use_logits=False, clip_values=[float(args.min),float(args.max)]) logging.info('finished acquiring model') logging.info('creating attack {}'.format(args.attack)) if args.attack == 'FGM': attack = FastGradientMethod(model, eps=0.3, eps_step=0.01, targeted=False) logging.info('created FGM attack') elif args.attack == 'PGD': attack = ProjectedGradientDescent(model, eps=8, eps_step=2, max_iter=13, targeted=False, num_random_init=True) logging.info('created PGD attack') else: logging.error('Invalid attack provided {} must be one of {FGM, PGD}'.format(args.attack)) exit(0) logging.info('finished creating attack') logging.info('brokers={}'.format(args.brokers)) logging.info('readtopic={}'.format(args.readtopic)) logging.info('creating kafka consumer') consumer = KafkaConsumer( args.readtopic, bootstrap_servers=args.brokers, value_deserializer=lambda val: loads(val.decode('utf-8'))) logging.info("finished creating kafka consumer") if args.dbxtoken != '': dbx = dropbox.Dropbox(args.dbxtoken) logging.info('creating kafka producer') producer = KafkaProducer(bootstrap_servers=args.brokers, value_serializer=lambda x: dumps(x).encode('utf-8')) logging.info('finished creating kafka producer') else: dbx = None while True: for message in consumer: if message.value['url']: conn = psycopg2.connect( host = args.dbhost, port = 5432, dbname = args.dbname, user = args.dbusername, password = args.dbpassword) cur = conn.cursor() image_url = message.value['url'] query = 'UPDATE images SET STATUS=%s where URL=%s' cur.execute(query, ('Processed', image_url)) logging.info('updated database for {}'.format(image_url)) cur.close() conn.close() batch_count = batch_count+1 response = requests.get(image_url) img = Image.open(BytesIO(response.content)) label = message.value['label'] infilename = message.value['filename'].rpartition('.')[0] logging.info('received URL {}'.format(image_url)) logging.info('received label {}'.format(label)) logging.info('received filename {}'.format(infilename)) logging.info('downloading image') image = np.array(img.getdata()).reshape(1,img.size[0], img.size[1], 3).astype('float32') logging.info('downloaded image {} and {}'.format(image.shape,image.dtype)) images = np.ndarray(shape=(2,32,32,3)) logging.info('created images storage') images[0] = image logging.info('assigned image to images') adversarial = attack.generate(image) logging.info('adversarial image generated') images[1] = adversarial logging.info('adversarial image assigned') preds = model.predict(images) orig_inf = np.argmax(preds[0]) adv_inf = np.argmax(preds[1]) logging.info('original inference: {} adversarial inference: {}'.format(orig_inf, adv_inf)) if (orig_inf != adv_inf) and (dbx != None): fs=BytesIO() imout=Image.fromarray(np.uint8(adversarial[0])) imout.save(fs, format='jpeg') outfilename = '/images/{}_{}_adv.jpg'.format(infilename,adv_inf) logging.info('Uploading file') dbx.files_upload(f=fs.getvalue(), path=outfilename,mode=dropbox.files.WriteMode('overwrite', None)) if (batch_count == int(args.batchsize)) and (dbx != None): logging.info('Sending message {} to topic {}'.format(batch_status_message,args.writetopic)) producer.send(args.writetopic,batch_status_message) batch_count=0
def test_fgsm(adv_model, dataset, loss_fn, optimizer, batch_size=32, num_workers=20, device='cuda:0', attack='fgsm', **kwargs): """ Train the model with the given training data :param x: :param y: :param epochs: """ epsilons =[0.00001, 0.0001, 0.004, 0.01, 0.1, 1, 10, 100] label_dict = pkl.load(open('external/speaker2int_7323.pkl','rb')) extractor = mfcc_extractor(collate=False) adv_classifier = PyTorchClassifier(model=AdvModel(adv_model.cpu(), extractor.cpu()), loss=loss_fn, optimizer=optimizer, input_shape=[1, 32000], nb_classes=250) # Create Dataloader dataloader = DataLoader(dataset=dataset['eval'], batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=PadBatch()) n_iterations = len(dataloader) f_log_all, f_name_all = createLogFiles('all') with open(f_name_all, 'a+') as f_log_all: f_log_all.write("\n\n #################################### Begin #####################################") f_log_all.write("\n New Log: {}".format(datetime.now())) # Loop over all the training data for generator n_files = 0 accuracy = 0 adv_acc_eps = {e: 0.0 for e in epsilons} success_eps = {e: 0.0 for e in epsilons} for i, (X, y, f) in enumerate(dataloader): if label_dict: y = torch.LongTensor([label_dict[y_] for y_ in y]) # send data to the GPU y = y.to(device) x_mfccs, labels = extractor((X.to(device).transpose(1,2))), y clean_logits = adv_model.forward(x_mfccs) clean_class = clean_logits.argmax(dim=-1) n_files += len(X) tmp_accuracy = torch.sum(clean_class == y).detach().cpu() accuracy += tmp_accuracy # Epsilon loop for e in epsilons: # FGSM if attack == 'fgsm': attack = FastGradientMethod(estimator=adv_classifier, eps=e) elif attack == 'bim': attack = ProjectedGradientDescent(estimator=adv_classifier, eps=e, eps_step=e/5, max_iter=100) X_fgsm = torch.Tensor(attack.generate(x=X)).to(device) assert(len(X_fgsm) == len(X)) pred_mfccs, labels_preds = extractor(X_fgsm.transpose(1,2)), y adv_logits = adv_model.forward(pred_mfccs) adv_class = adv_logits.argmax(dim=-1) tmp_success = torch.sum(clean_class != adv_class).detach().cpu() tmp_adv_acc = torch.sum(y == adv_class).detach().cpu() success_eps[e] += tmp_success adv_acc_eps[e] += tmp_adv_acc # Update total loss and acc with open(f_name_all, 'a+') as f_log_all: f_log_all.write('File {}\tBatch {}\tEps {}\tTarg {}\tClean {}\tAdv {}\n'.format( f[0][-1], i+1, e, y.cpu().detach().numpy(), clean_class.cpu().detach().numpy(), adv_class.cpu().detach().numpy())) for wav, fi in zip(X_fgsm, f): adv_path="samples/fgsm/{}".format(fi[-2]) if not os.path.exists(adv_path): os.makedirs(adv_path) torchaudio.save("{}/{}_{}.wav".format(adv_path,fi[-1], e), wav.squeeze().detach().cpu(), 8000) print("Epsilon: {}".format(e), "Tmp Acc: {:.3f}".format((tmp_accuracy + 0.0) / len(X)), "Tmp Adv: {:.3f}".format((tmp_adv_acc + 0.0) / len(X)), "Tmp Suc: {:.3f}".format((tmp_success + 0.0) / len(X))) accuracy = (accuracy + 0.0) / n_files adv_acc_eps = {k : v / n_files for k, v in adv_acc_eps.items()} success_eps = {k : v / n_files for k, v in success_eps.items()} with open(f_name_all, 'a+') as f_log_all: f_log_all.write('Epsilons: {} - Accuracy: {}%\tAdv Accuracy: {}%\tSuccess rate: {}%\n'.format(e, accuracy, adv_acc_eps, success_eps)) return
cuda.empty_cache() models_to_ensemble = [] # load next model and continue only if ensemble is done models_to_ensemble.append(guess_and_load_model(path_model, data=data, force_cpu=False)) if len(models_to_ensemble) < args.ensemble_inner: continue classifier = load_classifier_ensemble(models_to_ensemble, data=data) else: raise ValueError('incorrect ensemble_inner arg') # create attack if args.attack_name == 'FGM': attack = FastGradientMethod(estimator=classifier, targeted=False, norm=args.norm, eps=args.norm_inner, num_random_init=args.n_random_init_inner, batch_size=args.batch_size) elif args.attack_name == 'PGD': attack = ProjectedGradientDescent(estimator=classifier, targeted=False, max_iter=args.n_iter_attack, norm=args.norm, eps=args.norm_inner, eps_step=args.norm_inner / 4, # TODO: tune? num_random_init=args.n_random_init_inner, batch_size=args.batch_size) else: raise NotImplementedError('attack-name not supported') X_adv_tmp = attack.generate(x=X_adv_tmp, y=y) # project on ball of max_norm size, and clip X_adv_tmp = X + projection(X_adv_tmp - X, eps=args.max_norm, norm_p=args.norm) # project on the ball X_adv_tmp = np.clip(X_adv_tmp, data.min_pixel_value, data.max_pixel_value) # print and save stats acc_ens_prob, acc_ens_logit = compute_accuracy_ensemble(models_dir=args.dir_models, X=X_adv_tmp, y=y, data=data) lpnorm = compute_norm(X_adv=X_adv_tmp, X=X, norm=args.norm) if USE_CUDA: torch.cuda.synchronize() end_time = time.perf_counter() print(
x_test = torch.cat(x_data).numpy() # pp(predictions.shape) # test accuracy on benign examples accuracy = np.sum(predictions == y_test) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) # Step 5: Generate adversarial test examples # attack = FastGradientMethod(estimator=classifier, eps=0.1) # x_test_adv = attack.generate(x=x_test) # adv_crafter = DeepFool(classifier, nb_grads=args.nb_grads) # pgd adv_crafter_untargeted = ProjectedGradientDescent(classifier, eps=args.eps, eps_step=args.eps_step, max_iter=args.max_iter) print("PGD:Craft attack on untargeted training examples") x_test_adv = adv_crafter_untargeted.generate(x_test) adv_crafter_targeted = ProjectedGradientDescent(classifier, targeted=True, eps=args.eps_step, eps_step=args.eps_step, max_iter=args.max_iter) print("PGD:Craft attack on targeted training examples") targets = random_targets(y_test, nb_classes=10) x_test_adv_targeted = adv_crafter_targeted.generate(x_test, **{"y": targets}) #auto pgd auto_adv_crafter_untargeted = AutoProjectedGradientDescent(
def plot_attacks_acc(classifier, x, y, path_fig, dataset, title): ''' Description: This function takes in a classifier model and a list of images with labels and creates a plot showing how the accuracy of model on the dataset decreases as attack strength (perturbation size) increases for 3 different attacks (FGSM, PGD, BIM). :param classifier: model to be evaluated :param x: list of images to be predicted on :param y: labels of images :param path_fig: path to save the plot figure :param dataset: name of dataset (e.g. mnist, cifar, ddsm, brain_mri, lidc) :param title: title to define plot figure :return: Figure will be saved with title ''' if dataset == 'ddsm': eps_range = [0.00001, 0.00005, 0.0001, 0.00025, 0.0005, 0.00075, 0.001, 0.00125, 0.0015, 0.00175, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005, 0.0055, 0.006, 0.007, 0.008] step_size = 0.001 elif dataset == 'brain_mri': eps_range = [0.0001, 0.0005, 0.001, 0.0013, 0.0016, 0.002, 0.00225, 0.0025, 0.00275, 0.003, 0.00325, 0.0035, 0.00375, 0.004, 0.0045, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.012] step_size = 0.001 elif dataset == 'mnist': eps_range = [0.0001, 0.01, 0.02, 0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5] step_size = 0.1 elif dataset == 'cifar': eps_range = [0.0001, 0.001, 0.002, 0.003, 0.004, 0.005, 0.007, 0.009, 0.01, 0.015, 0.02, 0.03, 0.04, 0.05] step_size = 0.01 elif dataset == 'lidc': eps_range = [0.0001, 0.0003, 0.0006, 0.0008, 0.001, 0.00125, 0.0015, 0.00175, 0.002, 0.0023, 0.0026, 0.0028, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02] step_size = 0.001 nb_correct_fgsm = [] nb_correct_pgd = [] nb_correct_bim = [] for eps in eps_range: attacker_fgsm = FastGradientMethod(classifier, eps=eps) attacker_pgd = ProjectedGradientDescent(classifier, eps=eps, eps_step=eps/4, max_iter=10, num_random_init=5) attacker_bim = BasicIterativeMethod(classifier, eps=eps, eps_step=eps/10, max_iter=10) x_fgsm = attacker_fgsm.generate(x) x_pgd = attacker_pgd.generate(x) x_bim = attacker_bim.generate(x) x_pred_fgsm = np.argmax(classifier.predict(x_fgsm), axis=1) nb_correct_fgsm += [np.sum(x_pred_fgsm == np.argmax(y, axis=1))] x_pred_pgd = np.argmax(classifier.predict(x_pgd), axis=1) nb_correct_pgd += [np.sum(x_pred_pgd == np.argmax(y, axis=1))] x_pred_bim = np.argmax(classifier.predict(x_bim), axis=1) nb_correct_bim += [np.sum(x_pred_bim == np.argmax(y, axis=1))] fig, ax = plt.subplots() ax.plot(np.array(eps_range) / step_size, 100 * np.array(nb_correct_fgsm) / y.shape[0], 'b--', label='FGSM') ax.plot(np.array(eps_range) / step_size, 100 * np.array(nb_correct_pgd) / y.shape[0], 'r--', label='PGD') ax.plot(np.array(eps_range) / step_size, 100 * np.array(nb_correct_bim) / y.shape[0], 'g--', label='BIM') legend = ax.legend(loc='upper right', shadow=True, fontsize='large') legend.get_frame().set_facecolor('#FFFFFF') if dataset == 'mnist': plt.xlabel('Perturbation (x ' + '$10^{-1}$' + ')') elif dataset == 'cifar': plt.xlabel('Perturbation (x ' + '$10^{-2}$' + ')') else: plt.xlabel('Perturbation (x ' + '$10^{-3}$' + ')') plt.ylabel('Accuracy (%)') plt.savefig(path_fig + dataset + '/' + title + '.png') plt.clf() data = [np.array(eps_range), np.array(nb_correct_fgsm) / y.shape[0], np.array(nb_correct_pgd) / y.shape[0], np.array(nb_correct_bim) / y.shape[0]] out = csv.writer(open(path_csv + dataset + '/' + title + '.csv', "w"), delimiter=',', quoting=csv.QUOTE_ALL) out.writerows(zip(*data)) return 0
def main(): args = parse_option() print(args) # check args if args.loss not in LOSS_NAMES: raise ValueError('Unsupported loss function type {}'.format(args.loss)) if args.optimizer == 'adam': optimizer1 = tf.keras.optimizers.Adam(lr=args.lr_1) elif args.optimizer == 'lars': from lars_optimizer import LARSOptimizer # not compatible with tf2 optimizer1 = LARSOptimizer( args.lr_1, exclude_from_weight_decay=['batch_normalization', 'bias']) elif args.optimizer == 'sgd': optimizer1 = tfa.optimizers.SGDW(learning_rate=args.lr_1, momentum=0.9, weight_decay=1e-4) optimizer2 = tf.keras.optimizers.Adam(lr=args.lr_2) model_name = '{}_model-bs_{}-lr_{}'.format(args.loss, args.batch_size_1, args.lr_1) # 0. Load data if args.data == 'mnist': mnist = tf.keras.datasets.mnist elif args.data == 'fashion_mnist': mnist = tf.keras.datasets.fashion_mnist print('Loading {} data...'.format(args.data)) (_, y_train), (_, y_test) = mnist.load_data() # x_train, x_test = x_train / 255.0, x_test / 255.0 # x_train = x_train.reshape(-1, 28*28).astype(np.float32) # x_test = x_test.reshape(-1, 28*28).astype(np.float32) (x_train, _), (x_test, _), _, _ = load_mnist() # print(x_train[0][0]) print(x_train.shape, x_test.shape) # simulate low data regime for training # n_train = x_train.shape[0] # shuffle_idx = np.arange(n_train) # np.random.shuffle(shuffle_idx) # x_train = x_train[shuffle_idx][:args.n_data_train] # y_train = y_train[shuffle_idx][:args.n_data_train] # print('Training dataset shapes after slicing:') print(x_train.shape, y_train.shape) train_ds = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).shuffle(5000).batch(args.batch_size_1) train_ds2 = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).shuffle(5000).batch(args.batch_size_2) test_ds = tf.data.Dataset.from_tensor_slices( (x_test, y_test)).batch(args.batch_size_1) # 1. Stage 1: train encoder with multiclass N-pair loss encoder = Encoder(normalize=True, activation=args.activation) projector = Projector(args.projection_dim, normalize=True, activation=args.activation) if args.loss == 'max_margin': def loss_func(z, y): return losses.max_margin_contrastive_loss(z, y, margin=args.margin, metric=args.metric) elif args.loss == 'npairs': loss_func = losses.multiclass_npairs_loss elif args.loss == 'sup_nt_xent': def loss_func(z, y): return losses.supervised_nt_xent_loss( z, y, temperature=args.temperature, base_temperature=args.base_temperature) elif args.loss.startswith('triplet'): triplet_kind = args.loss.split('-')[1] def loss_func(z, y): return losses.triplet_loss(z, y, kind=triplet_kind, margin=args.margin) train_loss = tf.keras.metrics.Mean(name='train_loss') test_loss = tf.keras.metrics.Mean(name='test_loss') # tf.config.experimental_run_functions_eagerly(True) @tf.function # train step for the contrastive loss def train_step_stage1(x, y): ''' x: data tensor, shape: (batch_size, data_dim) y: data labels, shape: (batch_size, ) ''' with tf.GradientTape() as tape: r = encoder(x, training=True) z = projector(r, training=True) # print("z", z, "y", y) loss = loss_func(z, y) gradients = tape.gradient( loss, encoder.trainable_variables + projector.trainable_variables) optimizer1.apply_gradients( zip(gradients, encoder.trainable_variables + projector.trainable_variables)) train_loss(loss) @tf.function def test_step_stage1(x, y): r = encoder(x, training=False) z = projector(r, training=False) t_loss = loss_func(z, y) test_loss(t_loss) print('Stage 1 training ...') for epoch in range(args.epoch): # Reset the metrics at the start of the next epoch train_loss.reset_states() test_loss.reset_states() for x, y in train_ds: train_step_stage1(x, y) for x_te, y_te in test_ds: test_step_stage1(x_te, y_te) template = 'Epoch {}, Loss: {}, Test Loss: {}' # print(template.format(epoch + 1, # train_loss.result(), # test_loss.result())) if args.draw_figures: # projecting data with the trained encoder, projector x_tr_proj = projector(encoder(x_train)) x_te_proj = projector(encoder(x_test)) # convert tensor to np.array x_tr_proj = x_tr_proj.numpy() x_te_proj = x_te_proj.numpy() print(x_tr_proj.shape, x_te_proj.shape) # check learned embedding using PCA pca = PCA(n_components=2) pca.fit(x_tr_proj) x_te_proj_pca = pca.transform(x_te_proj) x_te_proj_pca_df = pd.DataFrame(x_te_proj_pca, columns=['PC1', 'PC2']) x_te_proj_pca_df['label'] = y_test # PCA scatter plot fig, ax = plt.subplots() ax = sns.scatterplot('PC1', 'PC2', data=x_te_proj_pca_df, palette='tab10', hue='label', linewidth=0, alpha=0.6, ax=ax) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) title = 'Data: {}\nEmbedding: {}\nbatch size: {}; LR: {}'.format( args.data, LOSS_NAMES[args.loss], args.batch_size_1, args.lr_1) ax.set_title(title) fig.savefig('figs/PCA_plot_{}_{}_embed.png'.format( args.data, model_name)) # density plot for PCA g = sns.jointplot('PC1', 'PC2', data=x_te_proj_pca_df, kind="hex") plt.subplots_adjust(top=0.95) g.fig.suptitle(title) g.savefig('figs/Joint_PCA_plot_{}_{}_embed.png'.format( args.data, model_name)) # Stage 2: freeze the learned representations and then learn a classifier # on a linear layer using a softmax loss softmax = SoftmaxPred() train_loss = tf.keras.metrics.Mean(name='train_loss') train_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='train_ACC') test_loss = tf.keras.metrics.Mean(name='test_loss') test_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='test_ACC') cce_loss_obj = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) @tf.function # train step for the 2nd stage def train_step(model, x, y): ''' x: data tensor, shape: (batch_size, data_dim) y: data labels, shape: (batch_size, ) ''' with tf.GradientTape() as tape: r = model.layers[0](x, training=False) y_preds = model.layers[1](r, training=True) loss = cce_loss_obj(y, y_preds) # freeze the encoder, only train the softmax layer gradients = tape.gradient(loss, model.layers[1].trainable_variables) optimizer2.apply_gradients( zip(gradients, model.layers[1].trainable_variables)) train_loss(loss) train_acc(y, y_preds) @tf.function def test_step(x, y): r = encoder(x, training=False) y_preds = softmax(r, training=False) t_loss = cce_loss_obj(y, y_preds) test_loss(t_loss) test_acc(y, y_preds) if args.write_summary: current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = 'logs/{}/{}/{}/train'.format(model_name, args.data, current_time) test_log_dir = 'logs/{}/{}/{}/test'.format(model_name, args.data, current_time) train_summary_writer = tf.summary.create_file_writer(train_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) print('Stage 2 training ...') model = tf.keras.Sequential([encoder, softmax]) loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) classifier = TensorFlowV2Classifier( model=model, loss_object=loss_object, train_step=train_step, nb_classes=10, input_shape=(28, 28, 1), clip_values=(0, 1), ) # classifier.fit(x_train, y_train, batch_size=256, nb_epochs=20) for epoch in range(args.epoch): # Reset the metrics at the start of the next epoch train_loss.reset_states() train_acc.reset_states() test_loss.reset_states() test_acc.reset_states() for x, y in train_ds2: train_step(model, x, y) if args.write_summary: with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss.result(), step=epoch) tf.summary.scalar('accuracy', train_acc.result(), step=epoch) for x_te, y_te in test_ds: test_step(x_te, y_te) if args.write_summary: with test_summary_writer.as_default(): tf.summary.scalar('loss', test_loss.result(), step=epoch) tf.summary.scalar('accuracy', test_acc.result(), step=epoch) template = 'Epoch {}, Loss: {}, Acc: {}, Test Loss: {}, Test Acc: {}' print( template.format(epoch + 1, train_loss.result(), train_acc.result() * 100, test_loss.result(), test_acc.result() * 100)) predictions = classifier.predict(x_test) print(predictions.shape, y_test.shape) accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test) print("Accuracy on benign test examples: {}%".format(accuracy * 100)) print('Stage 3 attacking ...') attack = ProjectedGradientDescent(estimator=classifier, eps=args.eps, eps_step=args.eps / 3, max_iter=20) x_test_adv = attack.generate(x=x_test) print('Stage 4 attacking ...') predictions = classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test) print("Accuracy on adversarial test examples: {}%".format(accuracy * 100)) natual(args.eps)
def adversarial_generation( model: Architecture, x, y, epsilon=0.25, attack_type=AttackType.FGSM, num_iter=10, attack_backend: str = AttackBackend.FOOLBOX, ): """ Create an adversarial example (FGMS only for now) """ x.requires_grad = True logger.info(f"Generating for x (shape={x.shape}) and y (shape={y.shape})") if attack_backend == AttackBackend.ART: from art.attacks.evasion import ( FastGradientMethod, ProjectedGradientDescent, DeepFool as DeepFoolArt, CarliniL2Method, HopSkipJump, ) if attack_type == AttackType.FGSM: attacker = FastGradientMethod(estimator=model.art_classifier, eps=epsilon) elif attack_type == AttackType.PGD: attacker = ProjectedGradientDescent( estimator=model.art_classifier, max_iter=num_iter, eps=epsilon, eps_step=2 * epsilon / num_iter, ) elif attack_type == AttackType.DeepFool: attacker = DeepFoolArt(classifier=model.art_classifier, max_iter=num_iter) elif attack_type == "CW": attacker = CarliniL2Method( classifier=model.art_classifier, max_iter=num_iter, binary_search_steps=15, ) elif attack_type == AttackType.SQUARE: # attacker = SquareAttack(estimator=model.get_art_classifier()) raise NotImplementedError("Work in progress") elif attack_type == AttackType.HOPSKIPJUMP: attacker = HopSkipJump( classifier=model.art_classifier, targeted=False, max_eval=100, max_iter=10, init_eval=10, ) else: raise NotImplementedError(f"{attack_type} is not available in ART") attacked = attacker.generate(x=x.detach().cpu()) attacked = torch.from_numpy(attacked).to(device) elif attack_backend == AttackBackend.FOOLBOX: import foolbox as fb if model.name in ["efficientnet", "resnet32", "resnet44", "resnet56"]: model.set_default_forward_mode(None) else: model.set_default_forward_mode("presoft") if attack_type == AttackType.FGSM: attacker = fb.attacks.LinfFastGradientAttack() elif attack_type == AttackType.PGD: attacker = fb.attacks.LinfProjectedGradientDescentAttack( steps=num_iter, random_start=False, rel_stepsize=2 / num_iter) elif attack_type == AttackType.DeepFool: attacker = fb.attacks.LinfDeepFoolAttack(loss="crossentropy") elif attack_type == AttackType.CW: attacker = fb.attacks.L2CarliniWagnerAttack(steps=num_iter) elif attack_type == AttackType.BOUNDARY: attacker = fb.attacks.BoundaryAttack(steps=7000, spherical_step=0.01, source_step=0.01) x = x.float() else: raise NotImplementedError( f"{attack_type} is not available in Foolbox") attacked, _, _ = attacker( model.foolbox_classifier, x.detach(), torch.from_numpy(y).to(device), epsilons=epsilon, ) model.set_default_forward_mode(None) elif attack_backend == AttackBackend.CUSTOM: from tda.dataset.custom_attacks import FGSM, BIM, DeepFool, CW if attack_type == AttackType.FGSM: attacker = FGSM(model, ce_loss) attacked = attacker.run(data=x.detach(), target=torch.from_numpy(y).to(device), epsilon=epsilon) elif attack_type == AttackType.PGD: attacker = BIM(model, ce_loss, lims=(0, 1), num_iter=num_iter) attacked = attacker.run(data=x.detach(), target=torch.from_numpy(y).to(device), epsilon=epsilon) elif attack_type == AttackType.DeepFool: attacker = DeepFool(model, num_classes=10, num_iter=num_iter) attacked = [ attacker(x[i].detach(), torch.tensor(y[i]).to(device)) for i in range(len(x)) ] attacked = torch.cat([torch.unsqueeze(a, 0) for a in attacked], 0) elif attack_type == AttackType.CW: attacker = CW(model, lims=(0, 1), num_iter=num_iter) attacked = attacker.run(data=x.detach(), target=torch.from_numpy(y).to(device)) attacked = torch.cat([torch.unsqueeze(a, 0) for a in attacked], 0) else: raise NotImplementedError( f"{attack_type} is not available as custom implementation") else: raise NotImplementedError(f"Unknown backend {attack_backend}") return attacked.detach().double()
x_list_new = list() for x_i in x_list: x_i_new = x_i[0:num_frames_min, :, :, :] x_list_new.append(x_i_new) x = np.asarray(x_list_new, dtype=float) y_pred = pgt.predict(x=x, y_init=y_init) ################## # evasion attack # ################## from art.attacks.evasion import ProjectedGradientDescent attack = ProjectedGradientDescent(estimator=pgt, eps=eps, eps_step=eps_step, batch_size=1, max_iter=20) x_adv = attack.generate(x=x, y=y_pred) y_pred_adv = pgt.predict(x=x_adv, y_init=y_init) if x.dtype == object: for i in range(x.shape[0]): print("L_inf:", np.max(np.abs(x_adv[i] - x[i]))) else: print("L_inf:", np.max(np.abs(x_adv - x))) ################################ # visualise adversarial images # ################################
] elif dataset == 'lidc': eps_range = [ 0.0001, 0.0003, 0.0006, 0.0007, 0.0008, 0.0009, 0.001, 0.00125, 0.0015, 0.00175, 0.002, 0.0023, 0.0026, 0.0028, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02 ] # evaluate sensitivity scores of each image test_eps_scores = [1] * x_test.shape[0] for eps in eps_range: attacker = ProjectedGradientDescent(classifier, eps=eps, eps_step=eps / 4, max_iter=max_iter, num_random_init=num_random_init) x_test_adv = attacker.generate(x_test) for i in range(x_test.shape[0]): img = np.expand_dims(x_test[i], axis=0) adv_img = np.expand_dims(x_test_adv[i], axis=0) pred = np.argmax(classifier.predict(img)) pred_adv = np.argmax(classifier.predict(adv_img)) if test_eps_scores[i] == 1: if pred != pred_adv: test_eps_scores[i] = eps np.save(path + dataset + '/test_eps_scores.npy', test_eps_scores) test_eps_scores = np.load(path + dataset + '/test_eps_scores.npy')