def adversarialAccuracy(model): keras_model = KerasModel(model, bounds=(0, 1), channel_axis=channel_axis) criterion = Misclassification() length = x_test.shape[0] wrong = 0 period = 50 for i in range(length): image, label = x_test[i], y_test_original[i] #attack = foolbox.attacks.FGSM(keras_model, criterion) #image_adv = attack(image, label, epsilons=5, max_epsilon=1.0) pgd2 = foolbox.attacks.L2BasicIterativeAttack(keras_model, criterion) image_adv = pgd2(image, label, epsilon=1.0, stepsize=1.0, iterations=1, binary_search=False) if image_adv is not None: prediction = np.argmax( keras_model.predictions_and_gradient(image_adv, label)[0]) assert prediction != label wrong += 1 if i % period == period - 1: print("Adversarial attack success rate: {} / {} = {}".format( wrong, i + 1, wrong / (i + 1))) if image_adv is not None: displayImage(image_adv, label) print("Size of perturbation: {}".format( LA.norm(image_adv - image, None))) print("Adversarial error rate: {} / {} = {}".format( wrong, length, wrong / length))
def test_keras_model_preprocess(): num_classes = 1000 bounds = (0, 255) channels = num_classes with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) inputs = Input(shape=(5, 5, channels)) logits = GlobalAveragePooling2D( data_format='channels_last')(inputs) preprocessing = (np.arange(num_classes)[None, None], np.random.uniform(size=(5, 5, channels)) + 1) model1 = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits') model2 = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits', preprocessing=preprocessing) model3 = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits') preprocessing = (0, np.random.uniform(size=(5, 5, channels)) + 1) model4 = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits', preprocessing=preprocessing) np.random.seed(22) test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) test_images_copy = test_images.copy() p1 = model1.batch_predictions(test_images) p2 = model2.batch_predictions(test_images) # make sure the images have not been changed by # the in-place preprocessing assert np.all(test_images == test_images_copy) p3 = model3.batch_predictions(test_images) assert p1.shape == p2.shape == p3.shape == (2, num_classes) np.testing.assert_array_almost_equal( p1 - p1.max(), p3 - p3.max(), decimal=5) model4.batch_predictions(test_images)
def sample_attack(keras_model, image, attack_method, input_name, labeled, target=0): """ 对于给定的模型和输入,本方法将使用指定的攻击方法生成攻击图片 :return: """ input_shape = nm.extract_input_shape(keras_model)[1:] image = nm.prepare_image(image, input_shape) layer_input = [image] label = keras_model.predict(np.asarray(layer_input)) label = np.argmax(label) if labeled is not None and not label == labeled: return None, "This image cannot be correctly classified, no adversarial sample will be generated. expected: " + str( labeled) + " actual: " + str(label) network_model = KerasModel(keras_model, bounds=(0, 1)) # run the attack if str(attack_method).lower() == 'lbfgs': attack = LBFGSAttack(model=network_model, criterion=TargetClassProbability(target, p=.5)) elif str(attack_method).lower() == 'singlepixelattack': attack = SinglePixelAttack(model=network_model, criterion=TargetClassProbability(target, p=.5)) else: return "Attack method not supported at the moment" print(label) if label == target: target = (target + 1) % 10 adversarial = attack(image[:, :, ::-1], label) output = network_model.predictions(adversarial) print(np.argmax(output)) adversarial = adversarial.reshape(input_shape) adversarial = adversarial * 255 adv_image_name = 'adv_{}_origin_{}_{}_{}'.format(target, label, attack_method, input_name) print(adversarial.shape) im = None if len(adversarial.shape) == 2: im = Image.fromarray(np.uint8(adversarial), mode="1") if len(adversarial.shape) == 3 and adversarial.shape[2] == 1: im = Image.fromarray(np.uint8( adversarial.reshape(adversarial.shape[0], adversarial.shape[1])), mode="L") if len(adversarial.shape) == 3 and adversarial.shape[2] == 3: im = Image.fromarray(np.uint8(adversarial), mode="RGB") im.save(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name)) # cv2.imwrite(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name), adversarial) print('adv', adv_image_name) return adversarial, adv_image_name
def test_keras_backward(num_classes): bounds = (0, 255) channels = num_classes model = Sequential() with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) model.add(GlobalAveragePooling2D( data_format='channels_last', input_shape=(5, 5, channels))) model = KerasModel( model, bounds=bounds, predicts='logits') test_image = np.random.rand(5, 5, channels).astype(np.float32) test_grad_pre = np.random.rand(num_classes).astype(np.float32) test_grad = model.backward(test_grad_pre, test_image) assert test_grad.shape == test_image.shape manual_grad = np.repeat(np.repeat( (test_grad_pre / 25.).reshape((1, 1, -1)), 5, axis=0), 5, axis=1) np.testing.assert_almost_equal( test_grad, manual_grad)
def test_keras_model(num_classes): bounds = (0, 255) channels = num_classes model = Sequential() with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) model.add( GlobalAveragePooling2D(data_format="channels_last", input_shape=(5, 5, channels))) model = KerasModel(model, bounds=bounds, predicts="logits") test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) test_label = 7 assert model.forward(test_images).shape == (2, num_classes) test_logits = model.forward_one(test_images[0]) assert test_logits.shape == (num_classes, ) test_gradient = model.gradient_one(test_images[0], test_label) assert test_gradient.shape == test_images[0].shape np.testing.assert_almost_equal( model.forward_and_gradient_one(test_images[0], test_label)[0], test_logits) np.testing.assert_almost_equal( model.forward_and_gradient_one(test_images[0], test_label)[1], test_gradient) assert model.num_classes() == num_classes
def test_keras_model(num_classes): bounds = (0, 255) channels = num_classes model = Sequential() model.add( GlobalAveragePooling2D(data_format='channels_last', input_shape=(5, 5, channels))) model = KerasModel(model, bounds=bounds, predicts='logits') test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) test_label = 7 assert model.batch_predictions(test_images).shape \ == (2, num_classes) test_logits = model.predictions(test_images[0]) assert test_logits.shape == (num_classes, ) test_gradient = model.gradient(test_images[0], test_label) assert test_gradient.shape == test_images[0].shape np.testing.assert_almost_equal( model.predictions_and_gradient(test_images[0], test_label)[0], test_logits) np.testing.assert_almost_equal( model.predictions_and_gradient(test_images[0], test_label)[1], test_gradient) assert model.num_classes() == num_classes
def __init__(self, model, bounds, channel_axis=3, preprocessing=(0, 1), predicts='probabilities', entropy_mask=True, cache_grad_mask=False): super(FoolboxKerasModelEntropy, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocessing=preprocessing) self.entropy_mask = entropy_mask self.grad_mask = None self.cache_grad_mask = cache_grad_mask self.keras_model = KerasModel(model, bounds, channel_axis, preprocessing, predicts)
def setup_local_model(): #sets up local ResNet50 model, to use for local testing keras.backend.set_learning_phase(0) kmodel = keras.applications.resnet50.ResNet50(weights='imagenet') preprocessing = (np.array([104, 116, 123]), 1) model = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing, predicts='logits') return model
def test_keras_model_forward_gradients(): num_classes = 1000 bounds = (0, 255) channels = num_classes with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) inputs = Input(shape=(5, 5, channels)) logits = GlobalAveragePooling2D(data_format="channels_last")(inputs) preprocessing = ( np.arange(num_classes)[None, None], np.random.uniform(size=(5, 5, channels)) + 1, ) model = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts="logits", preprocessing=preprocessing, ) eps = 1e-3 np.random.seed(22) test_images = np.random.rand(5, 5, 5, channels).astype(np.float32) test_labels = [7] * 5 _, g1 = model.forward_and_gradient(test_images, test_labels) test_label_array = np.array([test_labels]) l1 = model._loss_fn([test_images - eps / 2 * g1, test_label_array])[0] l2 = model._loss_fn([test_images + eps / 2 * g1, test_label_array])[0] assert 1e5 * (l2 - l1) > 1 # make sure that gradient is numerically correct np.testing.assert_array_almost_equal(1e5 * (l2 - l1), 1e5 * eps * np.linalg.norm(g1)**2, decimal=1)
def test_keras_model_gradients(): num_classes = 1000 bounds = (0, 255) channels = num_classes with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) inputs = Input(shape=(5, 5, channels)) logits = GlobalAveragePooling2D( data_format='channels_last')(inputs) preprocessing = (np.arange(num_classes)[None, None], np.random.uniform(size=(5, 5, channels)) + 1) model = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits', preprocessing=preprocessing) eps = 1e-3 np.random.seed(22) test_image = np.random.rand(5, 5, channels).astype(np.float32) test_label = 7 _, g1 = model.predictions_and_gradient(test_image, test_label) test_label_array = np.array([test_label]) l1 = model._loss_fn([test_image[None] - eps / 2 * g1, test_label_array])[0] l2 = model._loss_fn([test_image[None] + eps / 2 * g1, test_label_array])[0] assert 1e5 * (l2 - l1) > 1 # make sure that gradient is numerically correct np.testing.assert_array_almost_equal( 1e5 * (l2 - l1), 1e5 * eps * np.linalg.norm(g1)**2, decimal=1)
def test_keras_model_probs(num_classes): bounds = (0, 255) channels = num_classes with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) inputs = Input(shape=(5, 5, channels)) logits = GlobalAveragePooling2D( data_format='channels_last')(inputs) probs = Activation(softmax)(logits) model1 = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits') model2 = KerasModel( Model(inputs=inputs, outputs=probs), bounds=bounds, predicts='probabilities') model3 = KerasModel( Model(inputs=inputs, outputs=probs), bounds=bounds, predicts='probs') np.random.seed(22) test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) p1 = model1.batch_predictions(test_images) p2 = model2.batch_predictions(test_images) p3 = model3.batch_predictions(test_images) assert p1.shape == p2.shape == p3.shape == (2, num_classes) np.testing.assert_array_almost_equal( p1 - p1.max(), p2 - p2.max(), decimal=1) np.testing.assert_array_almost_equal( p2 - p2.max(), p3 - p3.max(), decimal=5)
def test_keras_model_preprocess(): num_classes = 1000 bounds = (0, 255) channels = num_classes with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) inputs = Input(shape=(5, 5, channels)) logits = GlobalAveragePooling2D(data_format='channels_last')(inputs) preprocessing = (np.arange(num_classes)[None, None], np.random.uniform(size=(5, 5, channels)) + 1) model1 = KerasModel(Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits') model2 = KerasModel(Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits', preprocessing=preprocessing) model3 = KerasModel(Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits') np.random.seed(22) test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) test_images_copy = test_images.copy() p1 = model1.batch_predictions(test_images) p2 = model2.batch_predictions(test_images) # make sure the images have not been changed by # the in-place preprocessing assert np.all(test_images == test_images_copy) p3 = model3.batch_predictions(test_images) assert p1.shape == p2.shape == p3.shape == (2, num_classes) np.testing.assert_array_almost_equal(p1 - p1.max(), p3 - p3.max(), decimal=5)
def test_keras_model_preprocess(): num_classes = 1000 bounds = (0, 255) channels = num_classes inputs = Input(shape=(5, 5, channels)) logits = GlobalAveragePooling2D(data_format='channels_last')(inputs) def preprocess_fn(x): # modify x in-place x /= 2 return x model1 = KerasModel(Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits') model2 = KerasModel(Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits', preprocess_fn=preprocess_fn) model3 = KerasModel(Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits') np.random.seed(22) test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) test_images_copy = test_images.copy() p1 = model1.batch_predictions(test_images) p2 = model2.batch_predictions(test_images) # make sure the images have not been changed by # the in-place preprocessing assert np.all(test_images == test_images_copy) p3 = model3.batch_predictions(test_images) assert p1.shape == p2.shape == p3.shape == (2, num_classes) np.testing.assert_array_almost_equal(p1 - p1.max(), p3 - p3.max(), decimal=5)
def main(): # load keras model kmodel = load_model('./models/cnn_model_10bar_ohlc.h5') # load data data = pro.load_pkl('./data/label8_eurusd_10bar_1500_500_val200_gaf.pkl') train_x = data['train_ohlc_gaf'] train_label = data['train_label_onehot'] # create foolbox model fmodel = KerasModel(kmodel, bounds=(-1, 1)) # create our modified attack model MODIFIED_LocalSearchAttack = foolbox.attacks.LocalSearchAttack(model=fmodel) # generate fake data generate_adversarial_examples(kmodel = kmodel, fmodel = fmodel, attacker = MODIFIED_LocalSearchAttack, x_data = train_x, y_label = train_label)
def main(params): # load data data = pro.load_pkl(params['pkl_name']) # load our model kmodel = load_model(params['model_name']) # create foolbox model fmodel = KerasModel(kmodel, bounds=(-1, 1)) # customized LocalSearchAttack attack = foolbox.attacks.LocalSearchAttack(model=fmodel) # attack all samples results = attack_all_samples(data, kmodel, fmodel, attack)
def main(): # model wont work with eager execution enable tf.compat.v1.disable_eager_execution() batch_size = 128 num_classes = 10 epochs = 12 model = mnistmodel.MnistModel(batch_size, num_classes, epochs) white_box = model.build_model() x_sample, y_sample = model.get_samples() #image_plot = plt.imshow(np.reshape(x_sample*255, (28, 28))) #plt.show() # no attack label = np.argmax(white_box.predict(np.reshape(x_sample, (1, 28, 28, 1))), axis=-1)[0] label = np.asarray(label) print(label) # attack preprocessing = dict(flip_axis=-1) fmodel = KerasModel(white_box, bounds=(0, 255), preprocessing=preprocessing) attack = attacks.FGSM(fmodel) adversarial = attack(x_sample, y_sample) image_plot = plt.imshow(np.reshape(adversarial, (28, 28))) plt.show() label = np.argmax(white_box.predict(np.reshape(adversarial, (1, 28, 28, 1))), axis=-1)[0] print(label)
def perturb_lbfgs(sample, model, data): # Perturb images using LBFGS attack by Szegedy et al. using the foolbox library # Based on the tutorial: https://foolbox.readthedocs.io/en/latest/user/tutorial.html # create model for foolbox foolbox_model = KerasModel(model, (0.0, 1.0), channel_axis=1) #foolbox_model = TheanoModel(model.input, model.layers[-2].output, (0.0, 1.0), 10, channel_axis=1) # get correct class correct_class = model.predict_classes(sample) # set target to be next higher class (and 0 for 9) target_class = (correct_class+1)%10 # set attack criterion to be 90% target class probability criterion = TargetClassProbability(target_class, p=0.90) # create attack on model with given criterion attack = LBFGSAttack() #print(sample[0,:,:,:].shape) # generate adversarial example # sample needs to be transformed from (batchsize, channels, rows, cols) format to (height, width, channels) for # foolbox, but that leads to problems with the model transformed_sample = sample.reshape(28,28,1) ad_ins = Adversarial(foolbox_model, criterion, transformed_sample, correct_class) adversarial = attack(ad_ins) # get class of adversarial example pred_class = model.predict_classes(adversarial) if pred_class != correct_class: return (1, adversarial) return (0, sample)
from foolbox.criteria import OriginalClassProbability, Misclassification, ConfidentMisclassification from skimage.measure import compare_ssim as ssim fashion_mnist = keras.datasets.fashion_mnist (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() train_images = np.expand_dims(train_images, axis=3) test_images = np.expand_dims(test_images, axis=3) train_images = train_images / 255.0 test_images = test_images / 255.0 trained_model = keras.models.load_model('fashionMNIST.h5') cnn5 = keras.models.load_model('cnn5.h5') sub_model = KerasModel(trained_model, bounds=(0, 1), channel_axis=1) testNum = 100 randArray = np.random.choice(10000, testNum, replace=False) def diff_ssim(img1, img2): img1 = (img1 * 255).reshape([28, 28]) img2 = (img2 * 255).reshape([28, 28]) return ssim(img1, img2) attackRate_u2 = 0.0 attackSSIM_u2 = 0.0 count = 0
class FoolboxKerasModelEntropy(DifferentiableModel): def __init__(self, model, bounds, channel_axis=3, preprocessing=(0, 1), predicts='probabilities', entropy_mask=True, cache_grad_mask=False): super(FoolboxKerasModelEntropy, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocessing=preprocessing) self.entropy_mask = entropy_mask self.grad_mask = None self.cache_grad_mask = cache_grad_mask self.keras_model = KerasModel(model, bounds, channel_axis, preprocessing, predicts) def compute_gradient_mask(self, image): gray = skimage.color.rgb2gray(image) mask = skimage.filters.rank.entropy(gray, skimage.morphology.disk(3)) low = mask < 4.2 high = mask >= 4.2 mask[low] = 0.0 mask[high] = 1.0 self.grad_mask = np.broadcast_to( mask.reshape(mask.shape[0] * mask.shape[1], 1), (mask.shape[0] * mask.shape[1], image.shape[2])).reshape( image.shape) def __mask_gradient(self, grad, image): if self.entropy_mask is True: if self.cache_grad_mask is True: return grad * self.grad_mask else: mask = utils.image2mask(image) mask = np.broadcast_to( mask.reshape(mask.shape[0] * mask.shape[1], 1), (mask.shape[0] * mask.shape[1], image.shape[2])).reshape( image.shape) return grad * mask else: return grad def predictions_and_gradient(self, image, label): """Calculates predictions for an image and the gradient of the cross-entropy loss w.r.t. the image. Parameters ---------- image : `numpy.ndarray` Single input with shape as expected by the model (without the batch dimension). label : int Reference label used to calculate the gradient. Returns ------- predictions : `numpy.ndarray` Vector of predictions (logits, i.e. before the softmax) with shape (number of classes,). gradient : `numpy.ndarray` The gradient of the cross-entropy loss w.r.t. the image. Will have the same shape as the image. See Also -------- :meth:`gradient` """ pred, grad = self.keras_model.predictions_and_gradient(image, label) return pred, self.__mask_gradient(grad, image) def num_classes(self): return self.keras_model.num_classes() def batch_predictions(self, images): return self.keras_model.batch_predictions(images) def backward(self, gradient, image): """Backpropagates the gradient of some loss w.r.t. the logits through the network and returns the gradient of that loss w.r.t to the input image. Parameters ---------- gradient : `numpy.ndarray` Gradient of some loss w.r.t. the logits. image : `numpy.ndarray` Single input with shape as expected by the model (without the batch dimension). Returns ------- gradient : `numpy.ndarray` The gradient w.r.t the image. See Also -------- :meth:`gradient` """ grad = self.keras_model.backward(gradient, image) return self.__mask_gradient(grad, image)
model = deep.five_layer_dnn_model_wide(input_shape, output_shape, 0, l1, 0) elif args.experiment_type == "six_layer": model = deep.six_layer_dnn_model_wide(input_shape, output_shape, 0, l1, 0) elif args.experiment_type == "VGG": model = convolutional.vgg_model_wide(args.dataset, 0, l1, 0) elif args.experiment_type == "leNet": model = convolutional.leNet_model_wide(0, l1, 0) else: raise Exception("Invalid model!") model.fit(x_train, y_train, epochs=50, batch_size=128) preds = np.argmax(model.predict(x_test), axis=1) kmodel = KerasModel(model=model, bounds=(min_, max_)) attack = None if args.attack_type == 'l2': attack = CarliniWagnerL2Attack(kmodel, TargetClass(7)) elif args.attack_type == 'linf': attack = RandomPGD(kmodel, TargetClass(7)) x_sample = np.take(x_test, ones, axis=0) # We exclude by default those examples which are not predicted by the classifier as 1s. true_ones = np.where(preds == 1)[0] x_sample = np.take(x_sample, true_ones, axis=0) y_sample = np.array([to_one_hot(1) for _ in x_sample])
# Load two images. The cat image is original image # and the dog image is used to initialize a targeted # attack. dog_img = image.load_img('dog.jpg', target_size=(224, 224)) cat_img = image.load_img('cat.jpg', target_size=(224, 224)) dog_img = image.img_to_array(dog_img) cat_img = image.img_to_array(cat_img) cat_img = 2.0 * cat_img / 255.0 - 1 dog_img = 2.0 * dog_img / 255.0 - 1 dog_x = np.expand_dims(dog_img, axis=0) cat_x = np.expand_dims(cat_img, axis=0) # Build a foolbox model fmodel = KerasModel(kmodel, bounds=(-1, 1)) # label of the target class preds = kmodel.predict(dog_x) dog_label = np.argmax(preds) # label of the original class preds = kmodel.predict(cat_x) cat_label = np.argmax(preds) criterion_1 = TopKMisclassification(k=5) criterion_2 = TargetClass(dog_label) criterion_3 = TargetClassProbability(dog_label, p=0.5) criterion = criterion_1 & criterion_2 & criterion_3 attack = BoundaryAttack(model=fmodel, criterion=criterion)
print("adversarial not found") return adverse = np.round(adversarial_image) original_category = categories[original_class] target_category = categories[target_class] original_image_name = file_name[:-4] save_path = "adversarial_examples/" + original_category + "/" saved_image_name = original_image_name + "_" + target_category + ".png" matplotlib.image.imsave(save_path+saved_image_name, adverse/255, format = 'png') #----------------creating the adversarial Model------------------------# keras.backend.set_learning_phase(0) network = load_model("classifier(final).h5") adversarial_model = KerasModel(network, bounds = (0, 255), preprocessing = (0, 1)) attack = ProjectedGradientDescentAttack() #----------------------------Creating the directories------------------# base_directory = "all_years_140x140" categories = os.listdir(base_directory) while len(categories) != 13: del categories[0] for category in categories: adversary_path = "adversarial_examples/" + category if not os.path.exists(adversary_path): os.makedirs(adversary_path) if os.path.isdir(base_directory + "/" + category): images = os.listdir(base_directory + "/" + category)
import foolbox from foolbox.models import KerasModel from foolbox.attacks import LBFGSAttack from foolbox.criteria import TargetClassProbability, Misclassification import numpy as np import keras from keras.models import load_model import matplotlib.pyplot as plt kmodel = load_model('./LeNet.h5') preprocessing = (np.array([104, 116, 123]), 1) fmodel = KerasModel(kmodel, bounds=(0, 255)) attack = LBFGSAttack(model=fmodel, criterion=Misclassification()) adversarial_imgs = [] adversarial_labels =[] # adversarial_imgs = np.asarray(adversarial_imgs) # adversarial_labels = np.asarray(adversarial_labels) # print(type(adversarial_imgs)) img_temp = np.load('./mnist_pure/x_train.npy') # print(img_temp.shape) img_temp = np.asarray(img_temp, dtype=np.float32) # print(img_temp[0].shape) label_temp = np.load('./mnist_pure/y_train.npy') label_temp= np.asarray(label_temp, dtype=np.float32) for i in range(0,60000): adversarial = attack(img_temp[i], label_temp[i]) adversarial_imgs.append(adversarial)
def main(): # Load Keras model model = load_model( r'.................................................h5') Ptype = 'probabilities' # (default) with the softmax # # Switch softmax with linear activations -- to avoid the softmax #model = force_linear_activation(model=model, savemodel=None) #Ptype = 'logits' compressJPEG = 0 #'true' jpeg = 0 jpeg_quality = 85 # size (no color images) img_rows, img_cols, img_chans = 64, 64, 1 num_classes = 2 #--------------------------------------------------------- # Load test data, define labels, test the model #----------------------------------------------------------- images = glob(r'F:..................................\*.png') label = 1 # label = 0 for Manipulated, 1 for Original ------ for StammNets, it is the reverse ! (0 for Original) #number of imagess for testing the model #numImg = len(images) # <= len(images) numImg = 100 #np.random.seed(1234) #index = np.random.randint(len(images), size=numImg) index = np.arange(numImg) x_test = np.zeros((numImg, img_rows, img_cols)) for i in np.arange(numImg): img = imread(images[ index[i]]) # Flatten=True means convert to gray on the fly if compressJPEG: img1 = Image.fromarray(img) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img = Image.open('temp.jpeg') x_test[i] = img # Labels y_test_c = np.tile(label, numImg) # Convert labels to one-hot with Keras y_test = keras.utils.to_categorical(y_test_c, num_classes) # Reshape test data, divide by 255 because net was trained this way x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, img_chans) x_test = x_test.astype('float32') x_test /= 255 # Test legitimate examples score = model.evaluate(x_test, y_test, verbose=0) predicted_legitimate_labels = np.argmax(model.predict(x_test), axis=1) print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1])) # ---------------------------------------------------------------------------------------------------------------------- # Attack the [correctly classified] images in the test set # ---------------------------------------------------------------------------------------------------------------------- # Wrap model fmodel = KerasModel(model, bounds=(0, 1), predicts=Ptype) #KK: KerasModel Creates a Model instance from a Keras model. # Prepare attack #attack = foolbox.attacks.IterativeGradientSignAttack(fmodel) #######attack = foolbox.attacks.DeepFoolAttack(fmodel) attack = foolbox.attacks.SaliencyMapAttack(fmodel) #attack = foolbox.attacks.BIM(fmodel) #attack = foolbox.attacks.LBFGSAttack(fmodel) # ------Get data, labels and categorical labels ***only for correctly classified examples*** l = np.argwhere(predicted_legitimate_labels == y_test_c).shape[0] #this is the number of legitimate images correctly classified x_test_ok = np.reshape( x_test[ np.array(np.argwhere( predicted_legitimate_labels == y_test_c)), :, :, :], (l, img_rows, img_cols, img_chans)) #put the correctly classified images in a Numpy array x_test_ok y_test_ok = np.reshape( y_test[np.argwhere(predicted_legitimate_labels == y_test_c), :], (l, num_classes)) y_test_c_ok = np.argmax(y_test_ok, axis=1) # ------------------ # Elaborate n_test adversarial examples ***only for correctly classified examples*** (at most l) n_test = l #150 # it must be lower than l #how many many images out of the correctly classified you want to try to attack! S = 0 S_jpg = 0 avg_Max_dist = 0 avg_L1_dist = 0 avg_No_Mod_Pixels = 0 t = 0 avg_psnr = 0 PSNR = 0 psnr_org = 0 adv_images = np.zeros((n_test, img_rows, img_cols, img_chans)) true_labels_cat = [] for idx in np.arange(n_test): image = x_test_ok[idx] true_labels_cat.append(y_test_ok[idx, :]) image = image.astype('float32') if compressJPEG: img1 = Image.fromarray(np.uint8(255 * image[:, :, 0])) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img_reread = Image.open('temp.jpeg') image = np.array(img_reread) image = np.reshape(image, (img_rows, img_cols, img_chans)) # Generate adversarial images adv_images[idx] = attack(image, y_test_c_ok[idx]) adversarial_image = 255 * adv_images[idx].reshape( (img_rows, img_cols)) ####################################### #np.save('.................................' % idx,adversarial_image) #path_adv_Image = '..................................' #adversarial = adversarial_image #cv2.imwrite(path_adv_Image + 'adv_%d.png' % idx, adversarial) # Scores of legitimate and adversarial images for each idx scoreTemp = fmodel.predictions(image) true_score = foolbox.utils.softmax(scoreTemp) true_class = np.argmax(true_score) adv_score = foolbox.utils.softmax( fmodel.predictions(adv_images[idx])) adv_class = np.argmax(adv_score) print( 'Image {}. Class changed from {} to {}. The score passes from {} to {}' .format(idx, true_class, adv_class, true_score, adv_score)) '''print('After rounding. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class, Z_class, true_score, Z_score)) ''' # the if below is to solve the strange problem with the prediction of a matrix of nan values... if np.any(np.isnan(adv_images[idx])): adv_class = true_class #attack not successful t = t + 1 print('An adversarial image cannot be found!!') if true_class == adv_class: S = S + 1 # plot image, adv_image and difference #Measure the distortion between the original image and attacked image image_before = 255 * image.reshape((img_rows, img_cols)) diff = np.double(image_before) - np.double(adversarial_image) #diff = np.double(image_before) - np.double(Z) print( 'Max distortion adversarial [After Rounding] = {:3.4f}; L1 distortion = {:3.4f}' .format( abs(diff).max(), abs(diff).sum() / (img_rows * img_cols))) print('Percentage of modified pixels [After Rounding] = {:3.4f}'. format(np.count_nonzero(diff) / (img_rows * img_cols))) psnr_org = psnr(image_before, adversarial_image) print('PSNR = {:3.4f}'.format(abs(psnr_org))) X = np.uint8(image_before) #Z = np.uint8(np.round(adversarial_image)) # Omit This Line Code #show_figures(X,Z,true_score,Z_score) # to save the result of the attack, save the Z matrix....... #Z.save(...) # update average distortion if true_class != adv_class: avg_Max_dist = avg_Max_dist + abs(diff).max() avg_L1_dist = avg_L1_dist + abs(diff).sum() / (img_rows * img_cols) avg_No_Mod_Pixels = avg_No_Mod_Pixels + np.count_nonzero( diff) / (img_rows * img_cols) avg_psnr = avg_psnr + psnr(image_before, adversarial_image) # ------------------------------- # #Compress JPEG the image and test again # ------------------------------- '''if jpeg: #cv2.imwrite('tmp.jpg', Z[::-1], [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality]) #adv_reread = imread('tmp.jpg') img1 = Image.fromarray(Z) img1.save('temp.jpeg', "JPEG", quality= jpeg_quality) adv_reread = Image.open('temp.jpeg') x_test_comp = np.array(adv_reread) x_test_comp = x_test_comp.reshape(img_rows, img_cols, img_chans) x_test_comp = x_test_comp.astype('float32') x_test_comp /= 255 adv_reread_score = foolbox.utils.softmax(fmodel.predictions(x_test_comp)) adv_reread_class = np.argmax(adv_reread_score) if true_class == adv_reread_class: S_jpg = S_jpg + 1 print('Class after JPEG compression {}, with score {}.'.format(adv_reread_class,adv_reread_score)) x_test_comp = 255* x_test_comp.reshape((img_rows, img_cols)) ''' n = n_test - S print('Adversarial failures: {} over {}'.format(S, n_test)) print('Average distortion: max dist {}, L1 dist {}'.format( avg_Max_dist / n, avg_L1_dist / n)) print('Average no of modified pixels: {}'.format(avg_No_Mod_Pixels / n)) print('The adversarial image cannot be found {} times over {}'.format( t, n_test)) if jpeg: print( 'Percentage of adversarial JPEG unchanged with QF {} (the attack is not successful): {}' .format(jpeg_quality, S_jpg / n_test)) # Evaluate accuracy true_labels_cat = np.array(true_labels_cat) adv_score = model.evaluate(adv_images, true_labels_cat, verbose=0) #Z_score = model.evaluate(Z, true_labels_cat, verbose=0) score_perfect = model.evaluate(x_test_ok, y_test_ok, verbose=0) print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1])) print( 'Accuracy on legitimate images (only correctly classified, obviously 1): {:3.4f}' .format(score_perfect[1])) print('Accuracy on adversarial images: {:3.4f}'.format(adv_score[1])) print('Attack success rate on adversarial images N1: {:3.4f}'.format( 1 - adv_score[1])) print('Average PSNR =: {:3.4f}'.format(avg_psnr / n)) #print('Accuracy on legitimate images (all) by mismatched model: {:3.4f}'.format(score2[1])) # SECOND PART # Load the second model and test the adversarial images # Label label3 = 1 # it may be different from label because of the differences in the model. # Labels y_test_c = np.tile(label3, n_test) # Convert labels to one-hot with Keras y_test2 = keras.utils.to_categorical(y_test_c, num_classes)
#!/usr/bin/env python3 import keras from keras.applications.resnet50 import ResNet50 from foolbox.models import KerasModel from robust_vision_benchmark import imagenet_model_server keras.backend.set_learning_phase(0) kmodel = ResNet50(weights='imagenet') preprocessing = ([104, 116, 123], 1) fmodel = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing) imagenet_model_server(fmodel, channel_order='BGR', image_size=224)
from keras.applications.resnet50 import preprocess_input from keras.applications.resnet50 import decode_predictions from scipy.misc import imsave, imshow, imread import matplotlib.pyplot as plt import glob <<<<<<< HEAD:NIPS_attack/test.py image_dir = '/data0/images/imagenet12/imagenet224' ======= image_dir = './imgs' >>>>>>> e604fd2040b9fa9642b4f51892ca69633d9ac984:test.py keras.backend.set_learning_phase(0) kmodel = ResNet50(weights='imagenet') preprocessing = (np.array([104, 116, 123]), 1) fmodel = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing) success = 0. paths = glob.glob(image_dir+'/*.png') <<<<<<< HEAD:NIPS_attack/test.py print "{} images found".format(len(paths)) for path in paths[:100]: ======= for path in paths: >>>>>>> e604fd2040b9fa9642b4f51892ca69633d9ac984:test.py image = imread(path).astype(np.float32) test = image.copy() preds = kmodel.predict(preprocess_input(np.expand_dims(test, 0))) label = np.argmax(preds) #print("Top 3 predictions (regular: ", decode_predictions(preds, top=3))
def main(): # Load Keras model model = load_model(r'...........................h5') #First model # Switch softmax with linear activations -- per evitare il softmax Ptype = 'probabilities' #'logits' # 'probabilities' # 64x64, 2 digits img_rows, img_cols, img_chans = 128,128, 1 input_shape = (img_rows, img_cols, img_chans) num_classes = 2 jpeg_quality = 85 jpeg = 0 # 'true' compressJPEG = 0 #'true' #--------------------------------------------------------- # Load test data and define labels (numImg, 64,64) #----------------------------------------------------------- images = glob(r'...................\*.png') #images from first model (Manipulated class) label = 0 # label = 1 for Original and Label = 0 for Manipulated class # mismatch model: Load Keras model model2 = load_model(r'...................h5') #load second model label2 = 1 # We compute accuracy based on numebr of images ( 5 ) #Ehsan numImg = len(images) np.random.seed(1234) index = np.random.randint(len(images), size=numImg) x_test = np.zeros((numImg, img_rows, img_cols)) for i in np.arange(numImg): img = imread(images[index[i]], flatten=False) # Flatten=True means convert to gray on the fly if compressJPEG: img1 = Image.fromarray(img) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img = Image.open('temp.jpeg') x_test[i] = img # Labels of authentic images = 1 (non-authentic = 0). y_test_c = np.tile(label, numImg) # Convert labels to one-hot with Keras y_test = keras.utils.to_categorical(y_test_c, num_classes) # Reshape test data, divide by 255 because net was trained this way x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, img_chans) x_test = x_test.astype('float32') x_test /= 255 # Test legitimate examples score = model.evaluate(x_test, y_test, verbose=0) #Returns the loss value (of the loss function) & metrics (accuracy ...) values for the model in test mode predicted_legitimate_labels = np.argmax(model.predict(x_test), axis=1) print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1])) y_test_c2 = np.tile(label2, numImg) y_test2 = keras.utils.to_categorical(y_test_c2, num_classes) #one-hot representation score2 = model2.evaluate(x_test, y_test2, verbose=0) # Returns the loss value (of the loss function) & metrics (accuracy ...) values for the model in test mode #predicted_legitimate_labels2 = np.argmax(model2.predict(x_test), axis=1) print('Accuracy on legitimate images (all) by mismatched model: {:3.4f}'.format(score2[1])) # ---------------------------------------------------------------------------------------------------------------------- # Attack the first image of the test set # ---------------------------------------------------------------------------------------------------------------------- # Wrap model fmodel = KerasModel(model, bounds=(0, 1), predicts=Ptype) # Prepare attack #attack = foolbox.attacks.FGSM(fmodel) #attack = foolbox.attacks.DeepFoolAttack(fmodel) #attack = foolbox.attacks.DeepFoolAttack(fmodel) #attack = foolbox.attacks.SaliencyMapAttack(fmodel,threshold=PSNR2MSE(55)) #attack = foolbox.attacks.LBFGSAttack(fmodel) attack = foolbox.attacks.LBFGSAttack(fmodel, threshold=PSNR2MSE(55)) #LBFGS adversarial attack with limit PSNR # ------Get data, labels and categorical labels ***only for correctly classified examples*** l = np.argwhere(predicted_legitimate_labels == y_test_c).shape[0] x_test_ok = np.reshape(x_test[np.array(np.argwhere(predicted_legitimate_labels == y_test_c)), :, :, :], (l, img_rows, img_cols, img_chans)) test_ok_index = index[np.array(np.argwhere(predicted_legitimate_labels == y_test_c))] # x_test_ok are the images that are correctly classified by the first model since we do not want to attack misclassified images y_test_ok = np.reshape(y_test[np.argwhere(predicted_legitimate_labels == y_test_c), :], (l, num_classes)) y_test_c_ok = np.argmax(y_test_ok, axis=1) y_test_c_ok_2 = np.tile(label2, l) y_test_ok_2 = keras.utils.to_categorical(y_test_c_ok_2, num_classes) score3 = model2.evaluate(x_test_ok, y_test_ok_2, verbose=0) predicted_legitimate_labels2 = np.argmax(model2.predict(x_test_ok), axis=1) l = np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2).shape[0] x_test_ok = np.reshape(x_test_ok[np.array(np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2)), :, :, :], (l, img_rows, img_cols, img_chans)) y_test_ok = np.reshape(y_test_ok[np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2), :], (l, num_classes)) y_test_c_ok = np.argmax(y_test_ok, axis=1) test_ok_index = np.squeeze(test_ok_index[np.array(np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2))]) # ------------------ # Elaborate n_test adversarial examples ***only for correctly classified examples*** n_test = l #Benedetta #n_test = l #Ehsan : You're the man Ehsan S = 0 S_int = 0 S_jpg = 0 avg_Max_dist = 0 avg_L1_dist = 0 avg_Max_dist_made_integer = 0 avg_L1_dist_made_integer = 0 avg_No_Mod_Pixels = 0 avg_No_Mod_Pixels_integer_rounding_adv_img = 0 avg_No_Mod_Pixels_integer_NO_rounding = 0 PSNR = 0 t = 0 avg_psnr = 0 avg_psnr_int = 0 psnr_org=0 #for each image psnr_Int=0 #for each image max_diff_integer=0 max_diff=0 adv_images = np.zeros((n_test, img_rows, img_cols, img_chans)) adv_images_integer = np.zeros((n_test, img_rows, img_cols, img_chans)) true_labels_cat = [] for idx in np.arange(n_test): #n_test should be less than to the length of x_test_ok image = x_test_ok[idx] true_labels_cat.append(y_test_ok[idx, :]) image = image.astype('float32') image_original = 255 * image.reshape((img_rows, img_cols)) if compressJPEG: img1 = Image.fromarray(np.uint8(255*image[:,:,0])) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img_reread = Image.open('temp.jpeg') image = np.array(img_reread) image = np.reshape(image, (img_rows, img_cols, img_chans)) # Generate adversarial images adv_images[idx] = attack(image, y_test_c_ok[idx]) adversarial_image = 255 * adv_images[idx].reshape((img_rows, img_cols)) Z = np.uint8(np.round(adversarial_image)) # Store adversarial integer images ############################################################################## path1='E:/......................./' #output folder cv2.imwrite(os.path.join(path1, os.path.basename(images[test_ok_index[idx]])), Z) ################################################################################## path2 = '''E:\..................\\''' diff_noise=adversarial_image - image_original Noise = np.uint8((diff_noise - np.min(diff_noise)) / (np.max(diff_noise) - np.min(diff_noise))) cv2.imwrite(path2 + 'adv_Nosie_%d.png' % idx, 255*Noise) adv_images_integer[idx] = np.reshape(Z / 255., (img_rows, img_cols, 1)) # Scores of legitimate and adversarial images for each idx scoreTemp = fmodel.predictions(image) true_score = foolbox.utils.softmax(scoreTemp) true_class = np.argmax(true_score) #it is the ground truth true_class according to network 1 adv_score = foolbox.utils.softmax(fmodel.predictions(adv_images[idx])) adv_class = np.argmax(adv_score) adv_integer_score = foolbox.utils.softmax(fmodel.predictions(adv_images_integer[idx])) adv_integer_class = np.argmax(adv_integer_score) print('Image {}. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class, adv_class, true_score, adv_score)) print('Image Made Integer {}. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class, adv_integer_class, true_score, adv_integer_score)) # the if below is to solve the strange problem with the prediction of a matrix of nan values... if np.any(np.isnan(adv_images[idx])): adv_class = true_class adv_integer_class = true_class t = t + 1 print('An adversarial image cannot be found!!') if true_class == adv_class: S = S+1 if true_class == adv_integer_class: S_int = S_int + 1 # plot image, adv_image and difference image_before = 255 * image.reshape((img_rows, img_cols)) X = np.uint8(image_before) # uint8 non ha effetto di troncamento diff = np.double(image_before) - np.double(adversarial_image) print('Max distortion adversarial = {:3.4f}; L1 distortion = {:3.4f}'.format(abs(diff).max(), abs(diff).sum() / ( img_rows * img_cols))) print('Percentage of modified pixels on integers = {:3.4f}. Percentage of negative modifications = {:3.4f}'.format(np.count_nonzero(diff)/(img_rows * img_cols), np.count_nonzero(np.double(abs(diff)) - np.double(diff))/(img_rows * img_cols))) diff_integer = np.double(X) - np.double(Z) max_diff_integer = diff_integer.max() max_diff = diff.max() path3 = '''E:\Benedetta_for_ICASSP\IMAGE_Diff_Int\\''' Noise2 = np.uint8((diff_integer - np.min(diff_integer)) / (np.max(diff_integer) - np.min(diff_integer))) cv2.imwrite(path3 + 'adv_Nosie_%d.png' % idx, 255 * Noise2) print('Max distortion adversarial integer = {:3.4f}; L1 distortion = {:3.4f}'.format(abs(diff_integer).max(), abs(diff_integer).sum()/(img_rows * img_cols))) #show_figures(X,Z,true_score,adv_score) #Ehsan: Compute PSNR for each Images org and Adversarial integer psnr_org=psnr(image_before, adversarial_image) print('PSNR = {:3.4f}'.format(abs(psnr_org))) psnr_Int = psnr(X, Z) print('PSNR (Integer) = {:3.4f}'.format(abs(psnr_Int))) # update average distortion if true_class != adv_class: avg_Max_dist = avg_Max_dist + abs(diff).max() avg_L1_dist = avg_L1_dist + abs(diff).sum()/(img_rows * img_cols) avg_No_Mod_Pixels = avg_No_Mod_Pixels + np.count_nonzero(diff) / (img_rows * img_cols) avg_psnr = avg_psnr + psnr(image_before, adversarial_image) if true_class != adv_integer_class: avg_Max_dist_made_integer = avg_Max_dist_made_integer + abs(diff_integer).max() avg_L1_dist_made_integer = avg_L1_dist_made_integer + abs(diff_integer).sum()/(img_rows * img_cols) avg_No_Mod_Pixels_integer_rounding_adv_img = avg_No_Mod_Pixels_integer_rounding_adv_img + np.count_nonzero(diff_integer) / (img_rows * img_cols) # ????????? why diff ???? #this after rounding to integer the adversarial image avg_No_Mod_Pixels_integer_NO_rounding = avg_No_Mod_Pixels_integer_NO_rounding + np.count_nonzero(diff) / (img_rows * img_cols) #this is just without rounding but counting the difference when the true class and the modified class are different avg_psnr_int = avg_psnr_int + psnr(X, Z) # ------------------------------- # #Compress JPEG the image and test again # ------------------------------- if jpeg: img1 = Image.fromarray(Z) img1.save('temp.jpeg', "JPEG", quality= jpeg_quality) adv_reread = Image.open('temp.jpeg') x_test_comp = np.array(adv_reread) x_test_comp = x_test_comp.reshape(img_rows, img_cols, img_chans) x_test_comp = x_test_comp.astype('float32') x_test_comp /= 255 adv_reread_score = foolbox.utils.softmax(fmodel.predictions(x_test_comp)) adv_reread_class = np.argmax(adv_reread_score) if true_class == adv_reread_class: S_jpg = S_jpg + 1 print('Class after JPEG compression {}, with score {}.'.format(adv_reread_class,adv_reread_score)) x_test_comp = 255* x_test_comp.reshape((img_rows, img_cols)) print('PSNR = {}'.format(psnr(image_before, x_test_comp))) PSNR = psnr(image_before, x_test_comp) + PSNR n=n_test-S n_int=n_test-S_int print('Class for the adversarial unchanged: {} over {}'.format(S,n_test)) # on how many test images (advesarial) the attack did not work print('Class for the adversarial integer unchanged: {} over {}'.format(S_int,n_test)) # on how many test images (advesarial) integer the attack did not work print('Average distortion: max dist {}, L1 dist {}'.format(avg_Max_dist/n,avg_L1_dist/n)) print('Average distortion (made integer): max dist {}, L1 dist {}'.format(avg_Max_dist_made_integer/n_int,avg_L1_dist_made_integer/n_int)) print('Average no of modified pixels: {}'.format(avg_No_Mod_Pixels/n)) print('Average no of modified pixels on integers NO ROUNDING: {}'.format(avg_No_Mod_Pixels_integer_NO_rounding /n_int)) print('Average no of modified pixels on integers rounding adv_img to int: {}'.format(avg_No_Mod_Pixels_integer_rounding_adv_img / n_int)) print('The adversarial image cannot be found {} times over {}'.format(t,n_test)) if jpeg: print('Percentage of adversarial JPEG unchanged with QF {} (the attack is not successful): {}'.format(jpeg_quality, S_jpg/n_test)) print('Average PSNR distortion for JPEG adversarial images : {}'.format(PSNR/n_test)) # Evaluate accuracy true_labels_cat = np.array(true_labels_cat) adv_score = model.evaluate(adv_images, true_labels_cat, verbose=0) adv_score_integer= model.evaluate(adv_images_integer, true_labels_cat, verbose=0) score_perfect = model.evaluate(x_test_ok, y_test_ok, verbose=0) print('Accuracy on legitimate images (all) by N1: {:3.4f}'.format(score[1])) print('Accuracy on legitimate images (all) by mismatched model N2: {:3.4f}'.format(score2[1])) # ????? Score2 print('Accuracy on legitimate images (only correctly classified, obviously 1) N1: {:3.4f}'.format(score_perfect[1])) print('Accuracy on adversarial images N1: {:3.4f}'.format(adv_score[1])) print('Attack success rate on adversarial images N1: {:3.4f}'.format(1-adv_score[1])) print('Accuracy on adversarial images (made integer) N1: {:3.4f}'.format(adv_score_integer[1])) print('Attack success on adversarial images (made integer) N1: {:3.4f}'.format(1-adv_score_integer[1])) print('Average PSNR =: {:3.4f}'.format(avg_psnr / n)) print('Average PSNR (Integer) =: {:3.4f}'.format(avg_psnr_int / n_int)) # SECOND PART # Load the second model and test the adversarial images # Label #label3 = np.abs(1 - label2) # it may be different from label because of the differences in the model. # Labels y_test_c = np.tile(label2, n_test) # Convert labels to one-hot with Keras y_test2 = keras.utils.to_categorical(y_test_c, num_classes) # Test adv_score_mismatch = model2.evaluate(adv_images, y_test2, verbose=0) # here Ehsan we need to evaluate model 2 in the same way but not on adv_images ... on adv_images_integer adv_score_mismatch_on_integer = model2.evaluate(adv_images_integer, y_test2, verbose=0) print('Accuracy on adversarial images with the mismatched model N2: {:3.4f}'.format(adv_score_mismatch[1])) print('Attack success rate on adversarial images with the mismatched model N2: {:3.4f}'.format(1-adv_score_mismatch[1])) print('Accuracy on adversarial images with the mismatched model (Integer) N2: {:3.4f}'.format(adv_score_mismatch_on_integer[1])) print('Attack success rate on adversarial images with the mismatched model (Integer) N2: {:3.4f}'.format(1-adv_score_mismatch_on_integer[1]))
plot_sample = True plot_samples = 100 # Define plot path. if plot_sample: path = './adv_samples/' path += dataset if not os.path.exists(path): os.makedirs(path) # ------------------------------------------------- # ADVERSARIAL SAMPLE GENERATION # ------------------------------------------------- # Create Foolbox model from Keras ResNet classifier and FGSM attack type. foolbox_model = KerasModel(model, (0, 1)) attack = FGSM(foolbox_model) # Turn all test set samples into adversarial samples. for i in tqdm(range(len(X_te))): # Try to create an adversarial sample. adv_sample = attack(np.reshape(X_te[i], orig_dims), label=y_te[i], max_epsilon=max_epsilon) # In rare cases, sample generation might fail, which leaves adv_sample empty. if adv_sample is not None: # Successful adversarial samples are written back into the original matrix. X_te[i] = np.reshape(adv_sample, np.prod(orig_dims))
from foolbox.criteria import Misclassification import numpy as np import keras from keras.datasets import mnist from keras.applications.resnet50 import ResNet50 from keras.applications.resnet50 import preprocess_input from keras.applications.resnet50 import decode_predictions from scipy.misc import imsave, imshow, imread import matplotlib.pyplot as plt import glob from train_mnist import madry_mnist_model keras.backend.set_learning_phase(0) model = madry_mnist_model() model.load_weights('mnist_madry.h5') fmodel = KerasModel(model, bounds=(0, 1)) success = 0. (images, labels), _ = mnist.load_data() for image in images[:100]: image = image.astype(np.float32) #[:, :, np.newaxis]/255. image = image[np.newaxis, :, :, np.newaxis] image /= 255. test = image.copy() preds = model.predict(test) label = np.argmax(preds) print("Label: ", label) #imshow(image[0, :, :, 0])
from foolbox.criteria import ConfidentMisclassification import numpy as np import keras from keras.applications.resnet50 import ResNet50 from keras.applications.resnet50 import preprocess_input from keras.applications.resnet50 import decode_predictions import cv2 import scipy.io as sio import matplotlib.pyplot as plt # instantiate model keras.backend.set_learning_phase(0) kmodel = ResNet50(weights='imagenet') preprocessing = (np.array([103.0626,115.9029,123.1516]), 1) fmodel = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing) attacks=['GradientAttack', 'GradientSignAttack', 'IterativeGradientAttack', 'IterativeGradientSignAttack', 'LBFGSAttack', 'ApproximateLBFGSAttack', 'DeepFoolAttack', 'DeepFoolL2Attack', 'DeepFoolLinfinityAttack', 'SaliencyMapAttack', 'GaussianBlurAttack', 'ContrastReductionAttack', 'SinglePixelAttack',