def _test_mnist_targeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Generate random target classes import numpy as np nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=NB_TEST) while (targets == np.argmax(y_test, axis=1)).any(): targets = np.random.randint(nb_classes, size=NB_TEST) # Perform attack df = SaliencyMapMethod(classifier, theta=1) x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes)) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] print('\nAccuracy on adversarial examples: %.2f%%' % (acc * 100))
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() attack = SaliencyMapMethod(classifier, theta=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = SaliencyMapMethod(classifier, theta=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%', (acc * 100))
def test_mnist_targeted(self): session = tf.Session() k.set_session(session) comp_params = { "loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy'] } # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] im_shape = x_train[0].shape # Get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(x_train, y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(x_test, y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) # Generate random target classes import numpy as np nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=nb_test) while (targets == np.argmax(y_test, axis=1)).any(): targets = np.random.randint(nb_classes, size=nb_test) # Perform attack df = SaliencyMapMethod(classifier, sess=session, clip_min=0, clip_max=1, theta=1) x_test_adv = df.generate(x_test, y_val=targets) self.assertFalse((x_test == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def _test_mnist_untargeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] df = SaliencyMapMethod(classifier, theta=1) x_test_adv = df.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] print('\nAccuracy on adversarial examples: %.2f%%' % (acc * 100))
def _test_mnist_targeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Generate random target classes nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=NB_TEST) while (targets == np.argmax(y_test, axis=1)).any(): targets = np.random.randint(nb_classes, size=NB_TEST) # Perform attack # import time df = SaliencyMapMethod(classifier, theta=1) # starttime = time.clock() # x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes), batch_size=1) # endtime = time.clock() # print(1, endtime - starttime) # # starttime = time.clock() # x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes), batch_size=10) # endtime = time.clock() # print(10, endtime - starttime) # # starttime = time.clock() x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes), batch_size=100) # endtime = time.clock() # print(100, endtime - starttime) # starttime = time.clock() # x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes), batch_size=1000) # endtime = time.clock() # print(1000, endtime - starttime) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (acc * 100))
def test_mnist_untargeted(self): session = tf.Session() k.set_session(session) comp_params = { "loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy'] } # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # Get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) # Perform attack df = SaliencyMapMethod(classifier, sess=session) df.set_params(clip_min=0, clip_max=1, theta=1) x_test_adv = df.generate(X_test) self.assertFalse((X_test == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((Y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def _test_mnist_untargeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # import time df = SaliencyMapMethod(classifier, theta=1) # starttime = time.clock() # x_test_adv = df.generate(x_test, batch_size=1) # endtime = time.clock() # print(1, endtime - starttime) # # starttime = time.clock() # x_test_adv = df.generate(x_test, batch_size=10) # endtime = time.clock() # print(10, endtime - starttime) # # starttime = time.clock() x_test_adv = df.generate(x_test, batch_size=100) # endtime = time.clock() # print(100, endtime - starttime) # starttime = time.clock() # x_test_adv = df.generate(x_test, batch_size=1000) # endtime = time.clock() # print(1000, endtime - starttime) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (acc * 100))
if args.save: np.save(os.path.join(SAVE_ADV, "eps%.2f_train.npy" % eps), X_train_adv) np.save(os.path.join(SAVE_ADV, "eps%.2f_test.npy" % eps), X_test_adv) else: if args.adv_method == 'deepfool': adv_crafter = DeepFool(classifier, session, clip_min=min_, clip_max=max_) elif args.adv_method == 'jsma': adv_crafter = SaliencyMapMethod(classifier, sess=session, clip_min=min_, clip_max=max_, gamma=1, theta=max_) elif args.adv_method == 'carlini': adv_crafter = CarliniL2Method(classifier, sess=session, targeted=False, confidence=10) else: adv_crafter = UniversalPerturbation(classifier, session, p=np.inf, attacker_params={ 'clip_min': min_, 'clip_max': max_ })
def test_robust(opt, model, classifier, attack_method, c, norm=None): if opt.attack == 'FGSM': adv_crafter = FastGradientMethod(classifier, norm=norm, eps=c, targeted=False, num_random_init=0, batch_size=opt.bs) if opt.attack == 'PGD': adv_crafter = ProjectedGradientDescent(classifier, norm=norm, eps=c, eps_step=c / 10., max_iter=10, targeted=False, num_random_init=1, batch_size=opt.bs) if opt.attack == 'BIM': adv_crafter = ProjectedGradientDescent(classifier, norm=norm, eps=c, eps_step=c / 10., max_iter=10, targeted=False, num_random_init=0, batch_size=bs) if opt.attack == 'JSMA': adv_crafter = SaliencyMapMethod(classifier, theta=0.1, gamma=c, batch_size=opt.bs) if opt.attack == 'CW': adv_crafter = cw.L2Adversary(targeted=False, confidence=0.01, c_range=(c, 1e10), max_steps=1000, abort_early=False, search_steps=5, box=(0., 1.0), optimizer_lr=0.01) correct = 0 total = 0 total_sum = 0 common_id = [] for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.cuda(), targets.cuda() output = classifier.predict(inputs.cpu().numpy(), batch_size=opt.bs) output = torch.tensor(output) output = output.cuda() init_pred = output.max(1, keepdim=False)[1] common_id = np.where( init_pred.cpu().numpy() == targets.cpu().numpy())[0] if opt.attack == 'CW': x_test_adv = adv_crafter(model, inputs, targets, to_numpy=True) else: x_test_adv = adv_crafter.generate(x=inputs.cpu().numpy()) perturbed_output = classifier.predict(x_test_adv) perturbed_output = torch.tensor(perturbed_output) perturbed_output = perturbed_output.cuda() final_pred = perturbed_output.max(1, keepdim=False)[1] total_sum += targets.size(0) total += len(common_id) correct += final_pred[common_id].eq( targets[common_id].data).cpu().sum() attack_acc = 100. * float(correct) / total progress.progress_bar( batch_idx, len(testloader), 'Attack Strength:%.3f, robust accuracy: %.3f%% (%d/%d)' '' % (c, attack_acc, correct, total))
def evaluation(x_test, y_test, classify_idx_lst, model, test_acc, ws, current_line, attack_name, flag, column_i): classifier = KerasClassifier((0., 1.), model=model) if attack_name == "FGM": # ===========================参数设置========================= # # Maximum perturbation # Order of the norm parameter_lst = [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1], [60, 1], [70, 1], [80, 1], [90, 1], [100, 1], [1, 2], [2, 2], [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2], [9, 2], [10, 2], [0.05, np.inf], [0.10, np.inf], [0.15, np.inf], [0.20, np.inf], [0.25, np.inf], [0.30, np.inf], [0.35, np.inf], [0.40, np.inf], [0.45, np.inf], [0.50, np.inf]] # ===========================进行攻击========================= # for [epsilon, norm_type] in parameter_lst: # print("current parameter: " + str(epsilon) + ", " + str(norm_type)) adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], eps=epsilon, norm=norm_type) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(epsilon, 4)) + ", " + str(norm_type) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "BIM": # ===========================参数设置========================= # # Order of the norm # Maximum perturbation that the attacker can introduce # Attack step size (input variation) at each iteration # The maximum number of iterations. parameter_lst = [[1, 20.0, 2.0, 10], [1, 20.0, 4.0, 10], [1, 20.0, 6.0, 10], [1, 20.0, 8.0, 10], [1, 20.0, 10.0, 10], [1, 20.0, 2.0, 50], [1, 20.0, 4.0, 50], [1, 20.0, 6.0, 50], [1, 20.0, 8.0, 50], [1, 20.0, 10.0, 50], [2, 2.0, 0.2, 10], [2, 2.0, 0.4, 10], [2, 2.0, 0.6, 10], [2, 2.0, 0.8, 10], [2, 2.0, 1.0, 10], [2, 2.0, 0.2, 50], [2, 2.0, 0.4, 50], [2, 2.0, 0.6, 50], [2, 2.0, 0.8, 50], [2, 2.0, 1.0, 50], [np.inf, 0.1, 0.002, 10], [np.inf, 0.1, 0.004, 10], [np.inf, 0.1, 0.006, 10], [np.inf, 0.1, 0.008, 10], [np.inf, 0.1, 0.010, 10], [np.inf, 0.1, 0.002, 50], [np.inf, 0.1, 0.004, 50], [np.inf, 0.1, 0.006, 50], [np.inf, 0.1, 0.008, 50], [np.inf, 0.1, 0.010, 50]] # ===========================进行攻击========================= # for [norm_type, epsilon, epsilon_step, max_iteration] in parameter_lst: # print("current parameter: " + str(norm_type) + ", " + str(epsilon) + ", " + str(epsilon_step) + ", " + str( # max_iteration)) adv_crafter = BasicIterativeMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], norm=norm_type, eps=epsilon, eps_step=epsilon_step, max_iter=max_iteration) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(norm_type) + ", " + str(round(epsilon, 4)) + ", " + str(round(epsilon_step, 4)) + ", " + str(max_iteration) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "JSMA": # ===========================参数设置========================= # # Perturbation introduced to each modified feature per step (can be positive or negative). # Maximum percentage of perturbed features (between 0 and 1). parameter_lst = [[0.5, 0.5], [0.4, 0.5], [0.3, 0.5], [0.2, 0.5], [0.1, 0.5], [-0.1, 0.5], [-0.2, 0.5], [-0.3, 0.5], [-0.4, 0.5], [-0.5, 0.5]] # ===========================进行攻击========================= # for [theta, gamma] in parameter_lst: # print("current parameter: " + str(theta) + ", " + str(gamma)) adv_crafter = SaliencyMapMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], theta=theta, gamma=gamma) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(theta, 4)) + ", " + str(round(gamma, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "DeepFool": # ===========================参数设置========================= # # The maximum number of iterations. # Overshoot parameter. parameter_lst = [[2, 0.10], [4, 0.10], [6, 0.10], [8, 0.10], [10, 0.10], [12, 0.10], [14, 0.10], [16, 0.10], [18, 0.10], [20, 0.10]] # ===========================进行攻击========================= # for [max_iteration, epsilon] in parameter_lst: # print("current parameter: " + str(max_iteration) + ", " + str(epsilon)) adv_crafter = DeepFool(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], max_iter=max_iteration, epsilon=epsilon) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(max_iteration) + ", " + str(round(epsilon, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "CW-L2": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniL2Method(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 elif attack_name == "CW-Linf": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniLInfMethod(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 current_line += 1 # print("\n------------------------------------------------") return ws, current_line
# ==================================2-1.准备对抗训练的攻击方法 =============================== # """ The `ratio` determines how many of the clean samples in each batch are replaced with their adversarial counterpart. warning: Both successful and unsuccessful adversarial samples are used for training. In the case of unbounded attacks (e.g., DeepFool), this can result in invalid (very noisy) samples being included. """ if adv_train_attack == "FGM": attacks = FastGradientMethod(robust_classifier, eps=attack_par["epsilon"], norm=attack_par["norm_type"]) elif adv_train_attack == "BIM": attacks = BasicIterativeMethod(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"], eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"]) elif adv_train_attack == "PGD": attacks = ProjectedGradientDescent(robust_classifier, norm=attack_par["norm_type"], eps=attack_par["epsilon"], eps_step=attack_par["epsilon_step"], max_iter=attack_par["max_iteration"]) elif adv_train_attack == "JSMA": attacks = SaliencyMapMethod(robust_classifier, theta=attack_par["theta"], gamma=attack_par["gamma"]) elif adv_train_attack == "DeepFool": attacks = DeepFool(robust_classifier, max_iter=attack_par["max_iteration"], epsilon=attack_par["epsilon"]) # ==================================2-2.开始对抗训练 =============================== # trainer = AdversarialTrainer(robust_classifier, attacks, ratio=ratio_value) trainer.fit(x_train, y_train, nb_epochs=adv_train_num, batch_size=128, verbose=2) robust_classifier_model.save("./model/adv_model") end_time = time.time() model = load_model("./model/adv_model") scores = model.evaluate(x_test, y_test, verbose=0) # print('Test loss:', scores[0]) # print('Test accuracy:', scores[1]) # print("adv_model generation's timecost: " + str(end_time - begin_time)) print("adversarial_training completed!")