def sample_attack(keras_model, image, attack_method, input_name, labeled, target=0): """ 对于给定的模型和输入,本方法将使用指定的攻击方法生成攻击图片 :return: """ input_shape = nm.extract_input_shape(keras_model)[1:] image = nm.prepare_image(image, input_shape) layer_input = [image] label = keras_model.predict(np.asarray(layer_input)) label = np.argmax(label) if labeled is not None and not label == labeled: return None, "This image cannot be correctly classified, no adversarial sample will be generated. expected: " + str( labeled) + " actual: " + str(label) network_model = KerasModel(keras_model, bounds=(0, 1)) # run the attack if str(attack_method).lower() == 'lbfgs': attack = LBFGSAttack(model=network_model, criterion=TargetClassProbability(target, p=.5)) elif str(attack_method).lower() == 'singlepixelattack': attack = SinglePixelAttack(model=network_model, criterion=TargetClassProbability(target, p=.5)) else: return "Attack method not supported at the moment" print(label) if label == target: target = (target + 1) % 10 adversarial = attack(image[:, :, ::-1], label) output = network_model.predictions(adversarial) print(np.argmax(output)) adversarial = adversarial.reshape(input_shape) adversarial = adversarial * 255 adv_image_name = 'adv_{}_origin_{}_{}_{}'.format(target, label, attack_method, input_name) print(adversarial.shape) im = None if len(adversarial.shape) == 2: im = Image.fromarray(np.uint8(adversarial), mode="1") if len(adversarial.shape) == 3 and adversarial.shape[2] == 1: im = Image.fromarray(np.uint8( adversarial.reshape(adversarial.shape[0], adversarial.shape[1])), mode="L") if len(adversarial.shape) == 3 and adversarial.shape[2] == 3: im = Image.fromarray(np.uint8(adversarial), mode="RGB") im.save(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name)) # cv2.imwrite(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name), adversarial) print('adv', adv_image_name) return adversarial, adv_image_name
preprocessing = (np.array([104, 116, 123]), 1) fmodel = KerasModel(kmodel, bounds=(0, 255)) attack = LBFGSAttack(model=fmodel, criterion=Misclassification()) adversarial_imgs = [] adversarial_labels =[] # adversarial_imgs = np.asarray(adversarial_imgs) # adversarial_labels = np.asarray(adversarial_labels) # print(type(adversarial_imgs)) img_temp = np.load('./mnist_pure/x_train.npy') # print(img_temp.shape) img_temp = np.asarray(img_temp, dtype=np.float32) # print(img_temp[0].shape) label_temp = np.load('./mnist_pure/y_train.npy') label_temp= np.asarray(label_temp, dtype=np.float32) for i in range(0,60000): adversarial = attack(img_temp[i], label_temp[i]) adversarial_imgs.append(adversarial) adv_labels = np.argmax(fmodel.predictions(adversarial)) adversarial_labels.append(adv_labels) print(np.array(adversarial_imgs).shape, np.array(adversarial_labels).shape, 'Actual Label: {}, Adversarial Label: {}'.format(label_temp[i], adv_labels)) adversarial_imgs = np.asarray(adversarial_imgs) adversarial_labels = np.asarray(adversarial_labels) np.save('./adv_imgs_train.npy', adversarial_imgs) np.save('./adv_labels.npy',adversarial_labels)
iteration_size = 1000 global_iterations = 0 # Run boundary attack to generate an adversarial example adversarial = attack(cat_img, label=cat_label, unpack=False, iterations=iteration_size, starting_point=dog_img, log_every_n_steps=10, verbose=True) global_iterations += iteration_size np.save('adversarial_image_{0}'.format(global_iterations), adversarial.image) for i in range(10): adversarial = attack(adversarial, unpack=False, iterations=iteration_size, verbose=True) global_iterations += iteration_size np.save('adversarial_image_{0}'.format(global_iterations), adversarial.image) # show results print(np.argmax(fmodel.predictions(adversarial.image))) print( fmodel.predictions(foolbox.utils.softmax( adversarial.image))[dog_label]) preds = kmodel.predict(adversarial.image.copy()) print("Top 5 predictions (adversarial: ", decode_predictions(preds, top=5))
test = image.copy() preds = kmodel.predict(preprocess_input(np.expand_dims(test, 0))) label = np.argmax(preds) #print("Top 3 predictions (regular: ", decode_predictions(preds, top=3)) # run the attack print "running the attack" attack = MIM(model=fmodel, criterion=Misclassification()) adversarial = attack(image[:, :, ::-1], label) if adversarial is None: print "Did not find an adversarial" continue # show results print(foolbox.utils.softmax(fmodel.predictions(adversarial))[781]) adversarial_rgb = adversarial[np.newaxis, :, :, ::-1] preds = kmodel.predict(preprocess_input(adversarial_rgb.copy())) adv_label = np.argmax(preds) if adv_label != label: success += 1 #print("Top 5 predictions (adversarial: ", decode_predictions(preds, top=5)) diff = (adversarial_rgb[0] - image) # normalize to 0-1 for viewing with matplotlib a = adversarial_rgb[0].copy() nx = (a-np.min(a))/(np.max(a)-np.min(a)) nd = (diff-np.min(diff))/(np.max(diff)-np.min(diff)) max_norm = np.max(np.abs(diff))
def main(): # Load Keras model model = load_model(r'...........................h5') #First model # Switch softmax with linear activations -- per evitare il softmax Ptype = 'probabilities' #'logits' # 'probabilities' # 64x64, 2 digits img_rows, img_cols, img_chans = 128,128, 1 input_shape = (img_rows, img_cols, img_chans) num_classes = 2 jpeg_quality = 85 jpeg = 0 # 'true' compressJPEG = 0 #'true' #--------------------------------------------------------- # Load test data and define labels (numImg, 64,64) #----------------------------------------------------------- images = glob(r'...................\*.png') #images from first model (Manipulated class) label = 0 # label = 1 for Original and Label = 0 for Manipulated class # mismatch model: Load Keras model model2 = load_model(r'...................h5') #load second model label2 = 1 # We compute accuracy based on numebr of images ( 5 ) #Ehsan numImg = len(images) np.random.seed(1234) index = np.random.randint(len(images), size=numImg) x_test = np.zeros((numImg, img_rows, img_cols)) for i in np.arange(numImg): img = imread(images[index[i]], flatten=False) # Flatten=True means convert to gray on the fly if compressJPEG: img1 = Image.fromarray(img) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img = Image.open('temp.jpeg') x_test[i] = img # Labels of authentic images = 1 (non-authentic = 0). y_test_c = np.tile(label, numImg) # Convert labels to one-hot with Keras y_test = keras.utils.to_categorical(y_test_c, num_classes) # Reshape test data, divide by 255 because net was trained this way x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, img_chans) x_test = x_test.astype('float32') x_test /= 255 # Test legitimate examples score = model.evaluate(x_test, y_test, verbose=0) #Returns the loss value (of the loss function) & metrics (accuracy ...) values for the model in test mode predicted_legitimate_labels = np.argmax(model.predict(x_test), axis=1) print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1])) y_test_c2 = np.tile(label2, numImg) y_test2 = keras.utils.to_categorical(y_test_c2, num_classes) #one-hot representation score2 = model2.evaluate(x_test, y_test2, verbose=0) # Returns the loss value (of the loss function) & metrics (accuracy ...) values for the model in test mode #predicted_legitimate_labels2 = np.argmax(model2.predict(x_test), axis=1) print('Accuracy on legitimate images (all) by mismatched model: {:3.4f}'.format(score2[1])) # ---------------------------------------------------------------------------------------------------------------------- # Attack the first image of the test set # ---------------------------------------------------------------------------------------------------------------------- # Wrap model fmodel = KerasModel(model, bounds=(0, 1), predicts=Ptype) # Prepare attack #attack = foolbox.attacks.FGSM(fmodel) #attack = foolbox.attacks.DeepFoolAttack(fmodel) #attack = foolbox.attacks.DeepFoolAttack(fmodel) #attack = foolbox.attacks.SaliencyMapAttack(fmodel,threshold=PSNR2MSE(55)) #attack = foolbox.attacks.LBFGSAttack(fmodel) attack = foolbox.attacks.LBFGSAttack(fmodel, threshold=PSNR2MSE(55)) #LBFGS adversarial attack with limit PSNR # ------Get data, labels and categorical labels ***only for correctly classified examples*** l = np.argwhere(predicted_legitimate_labels == y_test_c).shape[0] x_test_ok = np.reshape(x_test[np.array(np.argwhere(predicted_legitimate_labels == y_test_c)), :, :, :], (l, img_rows, img_cols, img_chans)) test_ok_index = index[np.array(np.argwhere(predicted_legitimate_labels == y_test_c))] # x_test_ok are the images that are correctly classified by the first model since we do not want to attack misclassified images y_test_ok = np.reshape(y_test[np.argwhere(predicted_legitimate_labels == y_test_c), :], (l, num_classes)) y_test_c_ok = np.argmax(y_test_ok, axis=1) y_test_c_ok_2 = np.tile(label2, l) y_test_ok_2 = keras.utils.to_categorical(y_test_c_ok_2, num_classes) score3 = model2.evaluate(x_test_ok, y_test_ok_2, verbose=0) predicted_legitimate_labels2 = np.argmax(model2.predict(x_test_ok), axis=1) l = np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2).shape[0] x_test_ok = np.reshape(x_test_ok[np.array(np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2)), :, :, :], (l, img_rows, img_cols, img_chans)) y_test_ok = np.reshape(y_test_ok[np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2), :], (l, num_classes)) y_test_c_ok = np.argmax(y_test_ok, axis=1) test_ok_index = np.squeeze(test_ok_index[np.array(np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2))]) # ------------------ # Elaborate n_test adversarial examples ***only for correctly classified examples*** n_test = l #Benedetta #n_test = l #Ehsan : You're the man Ehsan S = 0 S_int = 0 S_jpg = 0 avg_Max_dist = 0 avg_L1_dist = 0 avg_Max_dist_made_integer = 0 avg_L1_dist_made_integer = 0 avg_No_Mod_Pixels = 0 avg_No_Mod_Pixels_integer_rounding_adv_img = 0 avg_No_Mod_Pixels_integer_NO_rounding = 0 PSNR = 0 t = 0 avg_psnr = 0 avg_psnr_int = 0 psnr_org=0 #for each image psnr_Int=0 #for each image max_diff_integer=0 max_diff=0 adv_images = np.zeros((n_test, img_rows, img_cols, img_chans)) adv_images_integer = np.zeros((n_test, img_rows, img_cols, img_chans)) true_labels_cat = [] for idx in np.arange(n_test): #n_test should be less than to the length of x_test_ok image = x_test_ok[idx] true_labels_cat.append(y_test_ok[idx, :]) image = image.astype('float32') image_original = 255 * image.reshape((img_rows, img_cols)) if compressJPEG: img1 = Image.fromarray(np.uint8(255*image[:,:,0])) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img_reread = Image.open('temp.jpeg') image = np.array(img_reread) image = np.reshape(image, (img_rows, img_cols, img_chans)) # Generate adversarial images adv_images[idx] = attack(image, y_test_c_ok[idx]) adversarial_image = 255 * adv_images[idx].reshape((img_rows, img_cols)) Z = np.uint8(np.round(adversarial_image)) # Store adversarial integer images ############################################################################## path1='E:/......................./' #output folder cv2.imwrite(os.path.join(path1, os.path.basename(images[test_ok_index[idx]])), Z) ################################################################################## path2 = '''E:\..................\\''' diff_noise=adversarial_image - image_original Noise = np.uint8((diff_noise - np.min(diff_noise)) / (np.max(diff_noise) - np.min(diff_noise))) cv2.imwrite(path2 + 'adv_Nosie_%d.png' % idx, 255*Noise) adv_images_integer[idx] = np.reshape(Z / 255., (img_rows, img_cols, 1)) # Scores of legitimate and adversarial images for each idx scoreTemp = fmodel.predictions(image) true_score = foolbox.utils.softmax(scoreTemp) true_class = np.argmax(true_score) #it is the ground truth true_class according to network 1 adv_score = foolbox.utils.softmax(fmodel.predictions(adv_images[idx])) adv_class = np.argmax(adv_score) adv_integer_score = foolbox.utils.softmax(fmodel.predictions(adv_images_integer[idx])) adv_integer_class = np.argmax(adv_integer_score) print('Image {}. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class, adv_class, true_score, adv_score)) print('Image Made Integer {}. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class, adv_integer_class, true_score, adv_integer_score)) # the if below is to solve the strange problem with the prediction of a matrix of nan values... if np.any(np.isnan(adv_images[idx])): adv_class = true_class adv_integer_class = true_class t = t + 1 print('An adversarial image cannot be found!!') if true_class == adv_class: S = S+1 if true_class == adv_integer_class: S_int = S_int + 1 # plot image, adv_image and difference image_before = 255 * image.reshape((img_rows, img_cols)) X = np.uint8(image_before) # uint8 non ha effetto di troncamento diff = np.double(image_before) - np.double(adversarial_image) print('Max distortion adversarial = {:3.4f}; L1 distortion = {:3.4f}'.format(abs(diff).max(), abs(diff).sum() / ( img_rows * img_cols))) print('Percentage of modified pixels on integers = {:3.4f}. Percentage of negative modifications = {:3.4f}'.format(np.count_nonzero(diff)/(img_rows * img_cols), np.count_nonzero(np.double(abs(diff)) - np.double(diff))/(img_rows * img_cols))) diff_integer = np.double(X) - np.double(Z) max_diff_integer = diff_integer.max() max_diff = diff.max() path3 = '''E:\Benedetta_for_ICASSP\IMAGE_Diff_Int\\''' Noise2 = np.uint8((diff_integer - np.min(diff_integer)) / (np.max(diff_integer) - np.min(diff_integer))) cv2.imwrite(path3 + 'adv_Nosie_%d.png' % idx, 255 * Noise2) print('Max distortion adversarial integer = {:3.4f}; L1 distortion = {:3.4f}'.format(abs(diff_integer).max(), abs(diff_integer).sum()/(img_rows * img_cols))) #show_figures(X,Z,true_score,adv_score) #Ehsan: Compute PSNR for each Images org and Adversarial integer psnr_org=psnr(image_before, adversarial_image) print('PSNR = {:3.4f}'.format(abs(psnr_org))) psnr_Int = psnr(X, Z) print('PSNR (Integer) = {:3.4f}'.format(abs(psnr_Int))) # update average distortion if true_class != adv_class: avg_Max_dist = avg_Max_dist + abs(diff).max() avg_L1_dist = avg_L1_dist + abs(diff).sum()/(img_rows * img_cols) avg_No_Mod_Pixels = avg_No_Mod_Pixels + np.count_nonzero(diff) / (img_rows * img_cols) avg_psnr = avg_psnr + psnr(image_before, adversarial_image) if true_class != adv_integer_class: avg_Max_dist_made_integer = avg_Max_dist_made_integer + abs(diff_integer).max() avg_L1_dist_made_integer = avg_L1_dist_made_integer + abs(diff_integer).sum()/(img_rows * img_cols) avg_No_Mod_Pixels_integer_rounding_adv_img = avg_No_Mod_Pixels_integer_rounding_adv_img + np.count_nonzero(diff_integer) / (img_rows * img_cols) # ????????? why diff ???? #this after rounding to integer the adversarial image avg_No_Mod_Pixels_integer_NO_rounding = avg_No_Mod_Pixels_integer_NO_rounding + np.count_nonzero(diff) / (img_rows * img_cols) #this is just without rounding but counting the difference when the true class and the modified class are different avg_psnr_int = avg_psnr_int + psnr(X, Z) # ------------------------------- # #Compress JPEG the image and test again # ------------------------------- if jpeg: img1 = Image.fromarray(Z) img1.save('temp.jpeg', "JPEG", quality= jpeg_quality) adv_reread = Image.open('temp.jpeg') x_test_comp = np.array(adv_reread) x_test_comp = x_test_comp.reshape(img_rows, img_cols, img_chans) x_test_comp = x_test_comp.astype('float32') x_test_comp /= 255 adv_reread_score = foolbox.utils.softmax(fmodel.predictions(x_test_comp)) adv_reread_class = np.argmax(adv_reread_score) if true_class == adv_reread_class: S_jpg = S_jpg + 1 print('Class after JPEG compression {}, with score {}.'.format(adv_reread_class,adv_reread_score)) x_test_comp = 255* x_test_comp.reshape((img_rows, img_cols)) print('PSNR = {}'.format(psnr(image_before, x_test_comp))) PSNR = psnr(image_before, x_test_comp) + PSNR n=n_test-S n_int=n_test-S_int print('Class for the adversarial unchanged: {} over {}'.format(S,n_test)) # on how many test images (advesarial) the attack did not work print('Class for the adversarial integer unchanged: {} over {}'.format(S_int,n_test)) # on how many test images (advesarial) integer the attack did not work print('Average distortion: max dist {}, L1 dist {}'.format(avg_Max_dist/n,avg_L1_dist/n)) print('Average distortion (made integer): max dist {}, L1 dist {}'.format(avg_Max_dist_made_integer/n_int,avg_L1_dist_made_integer/n_int)) print('Average no of modified pixels: {}'.format(avg_No_Mod_Pixels/n)) print('Average no of modified pixels on integers NO ROUNDING: {}'.format(avg_No_Mod_Pixels_integer_NO_rounding /n_int)) print('Average no of modified pixels on integers rounding adv_img to int: {}'.format(avg_No_Mod_Pixels_integer_rounding_adv_img / n_int)) print('The adversarial image cannot be found {} times over {}'.format(t,n_test)) if jpeg: print('Percentage of adversarial JPEG unchanged with QF {} (the attack is not successful): {}'.format(jpeg_quality, S_jpg/n_test)) print('Average PSNR distortion for JPEG adversarial images : {}'.format(PSNR/n_test)) # Evaluate accuracy true_labels_cat = np.array(true_labels_cat) adv_score = model.evaluate(adv_images, true_labels_cat, verbose=0) adv_score_integer= model.evaluate(adv_images_integer, true_labels_cat, verbose=0) score_perfect = model.evaluate(x_test_ok, y_test_ok, verbose=0) print('Accuracy on legitimate images (all) by N1: {:3.4f}'.format(score[1])) print('Accuracy on legitimate images (all) by mismatched model N2: {:3.4f}'.format(score2[1])) # ????? Score2 print('Accuracy on legitimate images (only correctly classified, obviously 1) N1: {:3.4f}'.format(score_perfect[1])) print('Accuracy on adversarial images N1: {:3.4f}'.format(adv_score[1])) print('Attack success rate on adversarial images N1: {:3.4f}'.format(1-adv_score[1])) print('Accuracy on adversarial images (made integer) N1: {:3.4f}'.format(adv_score_integer[1])) print('Attack success on adversarial images (made integer) N1: {:3.4f}'.format(1-adv_score_integer[1])) print('Average PSNR =: {:3.4f}'.format(avg_psnr / n)) print('Average PSNR (Integer) =: {:3.4f}'.format(avg_psnr_int / n_int)) # SECOND PART # Load the second model and test the adversarial images # Label #label3 = np.abs(1 - label2) # it may be different from label because of the differences in the model. # Labels y_test_c = np.tile(label2, n_test) # Convert labels to one-hot with Keras y_test2 = keras.utils.to_categorical(y_test_c, num_classes) # Test adv_score_mismatch = model2.evaluate(adv_images, y_test2, verbose=0) # here Ehsan we need to evaluate model 2 in the same way but not on adv_images ... on adv_images_integer adv_score_mismatch_on_integer = model2.evaluate(adv_images_integer, y_test2, verbose=0) print('Accuracy on adversarial images with the mismatched model N2: {:3.4f}'.format(adv_score_mismatch[1])) print('Attack success rate on adversarial images with the mismatched model N2: {:3.4f}'.format(1-adv_score_mismatch[1])) print('Accuracy on adversarial images with the mismatched model (Integer) N2: {:3.4f}'.format(adv_score_mismatch_on_integer[1])) print('Attack success rate on adversarial images with the mismatched model (Integer) N2: {:3.4f}'.format(1-adv_score_mismatch_on_integer[1]))
def save_adv_image(adversarial, global_iterations): save_image_name = 'adversarial_image_{0}_steps_{1}_calls.jpg'.format( global_iterations, adversarial._total_prediction_calls) adv_image = (adversarial.image + 1) * 255 / 2 adv_image = adv_image.astype(np.uint8) adv_image_pil = Image.fromarray(adv_image) adv_image_pil.save(save_image_name) return save_adv_image(adversarial, global_iterations) # np.save('adversarial_image_{0}'.format(global_iterations), adversarial.image) for i in range(5): adversarial = attack(adversarial, unpack=False, iterations=iteration_size, log_every_n_steps=10, verbose=True) global_iterations += iteration_size save_adv_image(adversarial, global_iterations) # np.save('adversarial_image_{0}'.format(global_iterations), adversarial.image) # show results print(np.argmax(fmodel.predictions(adversarial.image))) preds = kmodel.predict(np.expand_dims(adversarial.image.copy(), 0)) print("Top 5 predictions (adversarial: ", decode_predictions(preds, top=5)) print("make {0} model calls so far".format( adversarial._total_prediction_calls))
def main(): # Load Keras model model = load_model( r'.................................................h5') Ptype = 'probabilities' # (default) with the softmax # # Switch softmax with linear activations -- to avoid the softmax #model = force_linear_activation(model=model, savemodel=None) #Ptype = 'logits' compressJPEG = 0 #'true' jpeg = 0 jpeg_quality = 85 # size (no color images) img_rows, img_cols, img_chans = 64, 64, 1 num_classes = 2 #--------------------------------------------------------- # Load test data, define labels, test the model #----------------------------------------------------------- images = glob(r'F:..................................\*.png') label = 1 # label = 0 for Manipulated, 1 for Original ------ for StammNets, it is the reverse ! (0 for Original) #number of imagess for testing the model #numImg = len(images) # <= len(images) numImg = 100 #np.random.seed(1234) #index = np.random.randint(len(images), size=numImg) index = np.arange(numImg) x_test = np.zeros((numImg, img_rows, img_cols)) for i in np.arange(numImg): img = imread(images[ index[i]]) # Flatten=True means convert to gray on the fly if compressJPEG: img1 = Image.fromarray(img) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img = Image.open('temp.jpeg') x_test[i] = img # Labels y_test_c = np.tile(label, numImg) # Convert labels to one-hot with Keras y_test = keras.utils.to_categorical(y_test_c, num_classes) # Reshape test data, divide by 255 because net was trained this way x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, img_chans) x_test = x_test.astype('float32') x_test /= 255 # Test legitimate examples score = model.evaluate(x_test, y_test, verbose=0) predicted_legitimate_labels = np.argmax(model.predict(x_test), axis=1) print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1])) # ---------------------------------------------------------------------------------------------------------------------- # Attack the [correctly classified] images in the test set # ---------------------------------------------------------------------------------------------------------------------- # Wrap model fmodel = KerasModel(model, bounds=(0, 1), predicts=Ptype) #KK: KerasModel Creates a Model instance from a Keras model. # Prepare attack #attack = foolbox.attacks.IterativeGradientSignAttack(fmodel) #######attack = foolbox.attacks.DeepFoolAttack(fmodel) attack = foolbox.attacks.SaliencyMapAttack(fmodel) #attack = foolbox.attacks.BIM(fmodel) #attack = foolbox.attacks.LBFGSAttack(fmodel) # ------Get data, labels and categorical labels ***only for correctly classified examples*** l = np.argwhere(predicted_legitimate_labels == y_test_c).shape[0] #this is the number of legitimate images correctly classified x_test_ok = np.reshape( x_test[ np.array(np.argwhere( predicted_legitimate_labels == y_test_c)), :, :, :], (l, img_rows, img_cols, img_chans)) #put the correctly classified images in a Numpy array x_test_ok y_test_ok = np.reshape( y_test[np.argwhere(predicted_legitimate_labels == y_test_c), :], (l, num_classes)) y_test_c_ok = np.argmax(y_test_ok, axis=1) # ------------------ # Elaborate n_test adversarial examples ***only for correctly classified examples*** (at most l) n_test = l #150 # it must be lower than l #how many many images out of the correctly classified you want to try to attack! S = 0 S_jpg = 0 avg_Max_dist = 0 avg_L1_dist = 0 avg_No_Mod_Pixels = 0 t = 0 avg_psnr = 0 PSNR = 0 psnr_org = 0 adv_images = np.zeros((n_test, img_rows, img_cols, img_chans)) true_labels_cat = [] for idx in np.arange(n_test): image = x_test_ok[idx] true_labels_cat.append(y_test_ok[idx, :]) image = image.astype('float32') if compressJPEG: img1 = Image.fromarray(np.uint8(255 * image[:, :, 0])) img1.save('temp.jpeg', "JPEG", quality=jpeg_quality) img_reread = Image.open('temp.jpeg') image = np.array(img_reread) image = np.reshape(image, (img_rows, img_cols, img_chans)) # Generate adversarial images adv_images[idx] = attack(image, y_test_c_ok[idx]) adversarial_image = 255 * adv_images[idx].reshape( (img_rows, img_cols)) ####################################### #np.save('.................................' % idx,adversarial_image) #path_adv_Image = '..................................' #adversarial = adversarial_image #cv2.imwrite(path_adv_Image + 'adv_%d.png' % idx, adversarial) # Scores of legitimate and adversarial images for each idx scoreTemp = fmodel.predictions(image) true_score = foolbox.utils.softmax(scoreTemp) true_class = np.argmax(true_score) adv_score = foolbox.utils.softmax( fmodel.predictions(adv_images[idx])) adv_class = np.argmax(adv_score) print( 'Image {}. Class changed from {} to {}. The score passes from {} to {}' .format(idx, true_class, adv_class, true_score, adv_score)) '''print('After rounding. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class, Z_class, true_score, Z_score)) ''' # the if below is to solve the strange problem with the prediction of a matrix of nan values... if np.any(np.isnan(adv_images[idx])): adv_class = true_class #attack not successful t = t + 1 print('An adversarial image cannot be found!!') if true_class == adv_class: S = S + 1 # plot image, adv_image and difference #Measure the distortion between the original image and attacked image image_before = 255 * image.reshape((img_rows, img_cols)) diff = np.double(image_before) - np.double(adversarial_image) #diff = np.double(image_before) - np.double(Z) print( 'Max distortion adversarial [After Rounding] = {:3.4f}; L1 distortion = {:3.4f}' .format( abs(diff).max(), abs(diff).sum() / (img_rows * img_cols))) print('Percentage of modified pixels [After Rounding] = {:3.4f}'. format(np.count_nonzero(diff) / (img_rows * img_cols))) psnr_org = psnr(image_before, adversarial_image) print('PSNR = {:3.4f}'.format(abs(psnr_org))) X = np.uint8(image_before) #Z = np.uint8(np.round(adversarial_image)) # Omit This Line Code #show_figures(X,Z,true_score,Z_score) # to save the result of the attack, save the Z matrix....... #Z.save(...) # update average distortion if true_class != adv_class: avg_Max_dist = avg_Max_dist + abs(diff).max() avg_L1_dist = avg_L1_dist + abs(diff).sum() / (img_rows * img_cols) avg_No_Mod_Pixels = avg_No_Mod_Pixels + np.count_nonzero( diff) / (img_rows * img_cols) avg_psnr = avg_psnr + psnr(image_before, adversarial_image) # ------------------------------- # #Compress JPEG the image and test again # ------------------------------- '''if jpeg: #cv2.imwrite('tmp.jpg', Z[::-1], [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality]) #adv_reread = imread('tmp.jpg') img1 = Image.fromarray(Z) img1.save('temp.jpeg', "JPEG", quality= jpeg_quality) adv_reread = Image.open('temp.jpeg') x_test_comp = np.array(adv_reread) x_test_comp = x_test_comp.reshape(img_rows, img_cols, img_chans) x_test_comp = x_test_comp.astype('float32') x_test_comp /= 255 adv_reread_score = foolbox.utils.softmax(fmodel.predictions(x_test_comp)) adv_reread_class = np.argmax(adv_reread_score) if true_class == adv_reread_class: S_jpg = S_jpg + 1 print('Class after JPEG compression {}, with score {}.'.format(adv_reread_class,adv_reread_score)) x_test_comp = 255* x_test_comp.reshape((img_rows, img_cols)) ''' n = n_test - S print('Adversarial failures: {} over {}'.format(S, n_test)) print('Average distortion: max dist {}, L1 dist {}'.format( avg_Max_dist / n, avg_L1_dist / n)) print('Average no of modified pixels: {}'.format(avg_No_Mod_Pixels / n)) print('The adversarial image cannot be found {} times over {}'.format( t, n_test)) if jpeg: print( 'Percentage of adversarial JPEG unchanged with QF {} (the attack is not successful): {}' .format(jpeg_quality, S_jpg / n_test)) # Evaluate accuracy true_labels_cat = np.array(true_labels_cat) adv_score = model.evaluate(adv_images, true_labels_cat, verbose=0) #Z_score = model.evaluate(Z, true_labels_cat, verbose=0) score_perfect = model.evaluate(x_test_ok, y_test_ok, verbose=0) print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1])) print( 'Accuracy on legitimate images (only correctly classified, obviously 1): {:3.4f}' .format(score_perfect[1])) print('Accuracy on adversarial images: {:3.4f}'.format(adv_score[1])) print('Attack success rate on adversarial images N1: {:3.4f}'.format( 1 - adv_score[1])) print('Average PSNR =: {:3.4f}'.format(avg_psnr / n)) #print('Accuracy on legitimate images (all) by mismatched model: {:3.4f}'.format(score2[1])) # SECOND PART # Load the second model and test the adversarial images # Label label3 = 1 # it may be different from label because of the differences in the model. # Labels y_test_c = np.tile(label3, n_test) # Convert labels to one-hot with Keras y_test2 = keras.utils.to_categorical(y_test_c, num_classes)
'SpatialAttack', 'CarliniWagnerL2Attack', 'LinfinityBasicIterativeAttack', 'BasicIterativeMethod', 'L1BasicIterativeAttack', 'L2BasicIterativeAttack', 'ProjectedGradientDescentAttack', 'ProjectedGradientDescent', 'RandomStartProjectedGradientDescentAttack', 'RandomProjectedGradientDescent', 'MomentumIterativeAttack', 'MomentumIterativeMethod'] image=cv2.imread('myTest7.JPEG') imageS=cv2.resize(image,dsize=(224,224)).astype(np.float) print('predicted class', np.argmax(fmodel.predictions(imageS[:,:,:]))) label=np.argmax(fmodel.predictions(imageS[:,:,:])) for atk in attacks: attack=eval('foolbox.attacks.'+atk+'(fmodel, criterion=TargetClassProbability(388, p=.8))') adversarial = attack(imageS[:,:,:], label) if type(adversarial)!=np.ndarray: print(atk+": Targeted Class not Supported") attack=eval('foolbox.attacks.'+atk+'(fmodel, criterion=TopKMisclassification(2))') adversarial = attack(imageS[:,:,:], label) if type(adversarial)!=np.ndarray: print(atk+": Targeted Class not Supported") attack=eval('foolbox.attacks.'+atk+'(fmodel, criterion=ConfidentMisclassification(0.5))') adversarial = attack(imageS[:,:,:], label) if type(adversarial)==np.ndarray: sum(sum(adversarial[:,:,:]-imageS))