def main(_): # Load training and test data data = ld_cifar10() model = CNN() loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.optimizers.Adam(learning_rate=0.001) # Metrics to track the different accuracies. train_loss = tf.metrics.Mean(name='train_loss') test_acc_clean = tf.metrics.SparseCategoricalAccuracy() test_acc_fgsm = tf.metrics.SparseCategoricalAccuracy() test_acc_pgd = tf.metrics.SparseCategoricalAccuracy() @tf.function def train_step(x, y): with tf.GradientTape() as tape: predictions = model(x) loss = loss_object(y, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) # Train model with adversarial training for epoch in range(FLAGS.nb_epochs): # keras like display of progress progress_bar_train = tf.keras.utils.Progbar(50000) for (x, y) in data.train: if FLAGS.adv_train: # Replace clean example with adversarial example for adversarial training x = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40, np.inf) train_step(x, y) progress_bar_train.add(x.shape[0], values=[('loss', train_loss.result())]) # Evaluate on clean and adversarial data progress_bar_test = tf.keras.utils.Progbar(10000) for x, y in data.test: y_pred = model(x) test_acc_clean(y, y_pred) x_fgm = fast_gradient_method(model, x, FLAGS.eps, np.inf) y_pred_fgm = model(x_fgm) test_acc_fgsm(y, y_pred_fgm) x_pgd = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40, np.inf) y_pred_pgd = model(x_pgd) test_acc_pgd(y, y_pred_pgd) progress_bar_test.add(x.shape[0]) print('test acc on clean examples (%): {:.3f}'.format( test_acc_clean.result() * 100)) print('test acc on FGM adversarial examples (%): {:.3f}'.format( test_acc_fgsm.result() * 100)) print('test acc on PGD adversarial examples (%): {:.3f}'.format( test_acc_pgd.result() * 100))
def advStep(self, model_fn, image, epsilon, clip_min=-1, clip_max=1, norm=2): return fast_gradient_method(model_fn, image, epsilon, norm, clip_min, clip_max)
def generate_adversarial_examples(model, data, data_labels, non_encoded_data_labels, eps_range): losses = [] accs = [] epsilons = [] for epsilon in eps_range: epsilons.append(epsilon) adv_samples = fast_gradient_method(model_fn=model, x=data, eps=epsilon, norm=np.inf) loss, acc = model.evaluate(verbose=1, x=adv_samples, y=data_labels, batch_size=10) losses.append(loss) accs.append(acc) return epsilons, losses, accs
y_test = np.array(y_test) X_test = np.array(X_test) X_test = X_test.astype('float32')/255 #Pontossag a tesztkeszleten pred = model.predict_classes(X_test) acc = accuracy_score(y_test, pred) print("Pontosság a tesztkészleten: {:.4f} %".format(acc*100)) #Ellenseges peldakon pontossag tesztelese #Forras:https://github.com/tensorflow/cleverhans/blob/master/tutorials/future/tf2/mnist_tutorial.py (2020.11.26) #ellenseges peldak generalasa az FGSM modszerrel start = time.time() x_fgm = fast_gradient_method(model, X_test, 0.2, np.inf) end = time.time() print("Ellenséges példák generálási ideje az FGSM módszerrel: {:.4f} s".format(end-start)) test_acc_fgsm = tf.metrics.SparseCategoricalAccuracy() y_pred_fgm = model(x_fgm) test_acc_fgsm(y_test, y_pred_fgm) print('Pontosság az FGSM által generált képeken: {:.4f} %'.format(test_acc_fgsm.result() * 100)) #ellenseges peldak generalasa a PGD modszerrel start = time.time() x_pgd = projected_gradient_descent(model, X_test, 0.2, 0.01, 40, np.inf)
def test_robustness(model, model_name, checkpoint_dict, dataset_path, eps=0.3): '''Evaluate the performance of a model on clean data (from the test set of the dataset found at dataset_path) and different adversarial attacks. model: tf.kers.Model the model to test model_name: str name of the model checkpoint_dict: dictionnary containing {'checkpoint_path': str, 'checkpoint': model checkpoint, 'saving_manager': manager} (returned by the make_base_model() and make_finetuning_model() functions, see above) dataset_path: str path to the dataset (e.g. RFD) eps: float Total epsilon for FGM and PGD attacks. ''' from cleverhans.future.tf2.attacks import projected_gradient_descent, fast_gradient_method print('\n\nComputing performance on adversarial examples for model {}'. format(model_name)) checkpoint_path = checkpoint_dict['checkpoint_path'] checkpoint = checkpoint_dict['checkpoint'] saving_manager = checkpoint_dict['saving_manager'] checkpoint.restore(saving_manager.latest_checkpoint) print('Checkpoint found in {}'.format(checkpoint_path)) with h5py.File(dataset_path, 'r') as f: n_imgs = f['test']['n_imgs'][0] test_acc_clean = tf.metrics.CategoricalAccuracy() test_acc_fgsm = tf.metrics.CategoricalAccuracy() test_acc_pgd = tf.metrics.CategoricalAccuracy() progress_bar_test = tf.keras.utils.Progbar(n_imgs) counter = 0 fig, ax = plt.subplots(5, 3) for x, y in zip(f['test']['data'], f['test']['ID_labels']): x = tf.expand_dims(x, 0) # need to add batch dimension y_pred = model(x) test_acc_clean(y, y_pred) x_fgm = fast_gradient_method(model, x, eps, np.inf) y_pred_fgm = model(x_fgm) test_acc_fgsm(y, y_pred_fgm) x_pgd = projected_gradient_descent(model, x, eps, 0.01, 40, np.inf) y_pred_pgd = model(x_pgd) test_acc_pgd(y, y_pred_pgd) if counter < 5: ax[counter, 0].imshow(np.squeeze(x), cmap='gray') ax[counter, 1].imshow(np.squeeze(x_fgm), cmap='gray') ax[counter, 2].imshow(np.squeeze(x_pgd), cmap='gray') if counter == 0: ax[counter, 0].title.set_text('Orginials') ax[counter, 1].title.set_text('FGM adversaries') ax[counter, 2].title.set_text('PGD adversaries') counter += 1 sys.stdout.write('\r') progress_bar_test.add(x.shape[0]) clean_acc = test_acc_clean.result() * 100 fgm_acc = test_acc_fgsm.result() * 100 pgd_acc = test_acc_pgd.result() * 100 print('Model {}: test acc on clean examples (%): {:.3f}'.format( model_name, clean_acc)) print('Model {}: test acc on FGM adversarial examples (%): {:.3f}'.format( model_name, fgm_acc)) print('Model {}: test acc on PGD adversarial examples (%): {:.3f}'.format( model_name, pgd_acc)) plt.suptitle( 'test acc: originals: {:.3f}%, FGM advs: {:.3f}%, , PGD advs: {:.3f}%'. format(clean_acc, fgm_acc, pgd_acc)) plt.savefig(checkpoint_path[:-4] + 'adversarial_images.png') plt.close() return clean_acc, fgm_acc, pgd_acc
def generate_attack(model, data, eps, norm): return fast_gradient_method(model_fn=model, x=data, eps=eps, norm=norm)
def main(_): # Load training and test data # data = ld_mnist() X, Y = make_classification(n_samples=10000, n_features=2, n_redundant=0, n_informative=2, n_classes=2, n_clusters_per_class=1, random_state=1, class_sep=2) plt.figure() plt.scatter(X[:, 0], X[:, 1], marker='o', c=Y, s=25, edgecolor='k') plt.show() scalar = MinMaxScaler() scalar.fit(X) X = scalar.transform(X).reshape(len(X), 2, 1) Y = np_utils.to_categorical(Y, 2) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify=Y, test_size=0.25) train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train)) test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test)) model = Net() loss_object = tf.losses.BinaryCrossentropy(from_logits=True) loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.optimizers.Adam(learning_rate=0.03) # Metrics to track the different accuracies. train_loss = tf.metrics.Mean(name='train_loss') test_acc_clean = tf.metrics.SparseCategoricalAccuracy() test_acc_fgsm = tf.metrics.SparseCategoricalAccuracy() test_acc_pgd = tf.metrics.SparseCategoricalAccuracy() # test_acc_clean = tf.metrics.BinaryAccuracy() # test_acc_fgsm = tf.metrics.BinaryAccuracy() # test_acc_pgd = tf.metrics.BinaryAccuracy() @tf.function def train_step(x, y): with tf.GradientTape() as tape: predictions = model(x) loss = loss_object(y, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) # Train model with adversarial training for epoch in range(FLAGS.nb_epochs): # keras like display of progress progress_bar_train = tf.keras.utils.Progbar(10000) for (x, y) in train_dataset: if FLAGS.adv_train: # Replace clean example with adversarial example for adversarial training x = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40, np.inf) train_step(x, y) progress_bar_train.add(x.shape[0], values=[('loss', train_loss.result())]) # Evaluate on clean and adversarial data progress_bar_test = tf.keras.utils.Progbar(10000) for x, y in test_dataset: y_pred = model(x) test_acc_clean(y, y_pred) x_fgm = fast_gradient_method(model, x, FLAGS.eps, np.inf) y_pred_fgm = model(x_fgm) test_acc_fgsm(y, y_pred_fgm) x_pgd = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40, np.inf) y_pred_pgd = model(x_pgd) test_acc_pgd(y, y_pred_pgd) progress_bar_test.add(x.shape[0]) print('test acc on clean examples (%): {:.3f}'.format( test_acc_clean.result() * 100)) print('test acc on FGM adversarial examples (%): {:.3f}'.format( test_acc_fgsm.result() * 100)) print('test acc on PGD adversarial examples (%): {:.3f}'.format( test_acc_pgd.result() * 100))
def test_for_sub_error(name, datatype, feats=10, n_train=10**4, n_val=10**4, epochs=10, epsilon=0.3, verbose=0, save_model=True): #prepare the training data num_hidden = 200 datatype_dict = { "XOR": 2, "orange_skin": 4, "nonlinear_additive": 4, "switch": 5 } x_train, y_train, x_val, y_val, _ = create_data(datatype, n=n_train, nval=n_val, feats=feats) #initialize and train the various models soft_mod, soft_logits_mod, soft_path, soft_dir, soft_cp = build_model( feats, num_hidden, name, "soft") train_model(soft_mod, x_train, y_train, x_val, y_val, save_model, soft_dir, [soft_cp], SOFT_MESSAGE, epochs=epochs, verbose=verbose) sig_mod, sig_logits_mod, sig_path, sig_dir, sig_cp = build_model( feats, num_hidden, name, "sig") train_model(sig_mod, x_train, y_train, x_val, y_val, save_model, sig_dir, [sig_cp], SIG_MESSAGE, epochs=epochs, verbose=verbose) l2x_mod, l2x_logit_mod, l2x_pred_mod, _, _, _ = L2X_flex( x_train, y_train, x_val, y_val, activation='relu', filedir=str(date.today()) + "l2x" + name, num_selected_features=datatype_dict[datatype], out_activation='sigmoid', loss='binary_crossentropy', optimizer='adam', num_hidden=num_hidden, num_layers=2, train=True, epochs=epochs, verbose=verbose) #create the adversarial examples epsilon = epsilon x_adv = fast_gradient_method(soft_logits_mod, x_val, epsilon, np.inf, targeted=False) x_adv = x_adv.numpy() #turn to np.array from tf object #create correct labels for y_adv if datatype == "XOR": y_adv = generate_XOR_labels(x_adv) elif datatype == "orange_skin": y_adv = generate_orange_labels(x_adv) elif datatype == "nonlinear_additive": y_adv = generate_additive_labels(x_adv) if datatype != "switch": y_adv = (y_adv[:, 0] > 0.5) * 1 else: y_adv = generate_switch_labels(x_adv) print_error_breakdown(datatype, x_val, y_val, soft_mod, "soft", "soft val " + name + datatype) print_error_breakdown(datatype, x_adv, y_adv, soft_mod, "soft", "soft adv " + name + datatype) print_error_breakdown(datatype, x_val, y_val, sig_mod, "sig", "sig val " + name + datatype) print_error_breakdown(datatype, x_adv, y_adv, sig_mod, "sig", "sig adv " + name + datatype) print_error_breakdown(datatype, x_val, y_val, l2x_mod, "sig", "l2x val " + name + datatype) print_error_breakdown(datatype, x_adv, y_adv, l2x_mod, "sig", "l2x adv " + name + datatype) #return the information needed to see the effect of the adversarial examples on the predictions return x_val, y_val, x_adv, y_adv, l2x_mod, l2x_logit_mod, l2x_pred_mod
# :param y: (optional) Tensor with true labels. If targeted is true, then provide the # target label. Otherwise, only provide this parameter if you'd like to use true # labels when crafting adversarial samples. Otherwise, model predictions are used # as labels to avoid the "label leaking" effect (explained in this paper: # https://arxiv.org/abs/1611.01236). Default is None. # :param targeted: (optional) bool. Is the attack targeted or untargeted? # Untargeted, the default, will try to make the label incorrect. # Targeted will instead try to move in the direction of being more like y. # :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime / # memory or for unit tests that intentionally pass strange input) limit = 20000 adv_input = x_train[:limit] eps = 0.1 Z = fast_gradient_method(obj.network, adv_input, eps, np.inf, clip_min=0., clip_max=1.) print("####################################\nAdversarial example : \n", Z) # Saving Adversarial Examples file = open("adv_data.pkl", 'wb') pickle.dump(Z, file) file.close() # Printing Accuracy pred = obj.network(Z) accu = obj.accuracy(pred, y_train[:limit]) pred2 = obj.network(x_test) accu2 = obj.accuracy(pred2, y_test) print(f"Original data accuracy : {accu2:<.3f}")