def adversarialAccuracy(model): keras_model = KerasModel(model, bounds=(0, 1), channel_axis=channel_axis) criterion = Misclassification() length = x_test.shape[0] wrong = 0 period = 50 for i in range(length): image, label = x_test[i], y_test_original[i] #attack = foolbox.attacks.FGSM(keras_model, criterion) #image_adv = attack(image, label, epsilons=5, max_epsilon=1.0) pgd2 = foolbox.attacks.L2BasicIterativeAttack(keras_model, criterion) image_adv = pgd2(image, label, epsilon=1.0, stepsize=1.0, iterations=1, binary_search=False) if image_adv is not None: prediction = np.argmax( keras_model.predictions_and_gradient(image_adv, label)[0]) assert prediction != label wrong += 1 if i % period == period - 1: print("Adversarial attack success rate: {} / {} = {}".format( wrong, i + 1, wrong / (i + 1))) if image_adv is not None: displayImage(image_adv, label) print("Size of perturbation: {}".format( LA.norm(image_adv - image, None))) print("Adversarial error rate: {} / {} = {}".format( wrong, length, wrong / length))
def test_keras_model_gradients(): num_classes = 1000 bounds = (0, 255) channels = num_classes with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) inputs = Input(shape=(5, 5, channels)) logits = GlobalAveragePooling2D( data_format='channels_last')(inputs) preprocessing = (np.arange(num_classes)[None, None], np.random.uniform(size=(5, 5, channels)) + 1) model = KerasModel( Model(inputs=inputs, outputs=logits), bounds=bounds, predicts='logits', preprocessing=preprocessing) eps = 1e-3 np.random.seed(22) test_image = np.random.rand(5, 5, channels).astype(np.float32) test_label = 7 _, g1 = model.predictions_and_gradient(test_image, test_label) test_label_array = np.array([test_label]) l1 = model._loss_fn([test_image[None] - eps / 2 * g1, test_label_array])[0] l2 = model._loss_fn([test_image[None] + eps / 2 * g1, test_label_array])[0] assert 1e5 * (l2 - l1) > 1 # make sure that gradient is numerically correct np.testing.assert_array_almost_equal( 1e5 * (l2 - l1), 1e5 * eps * np.linalg.norm(g1)**2, decimal=1)
class FoolboxKerasModelEntropy(DifferentiableModel): def __init__(self, model, bounds, channel_axis=3, preprocessing=(0, 1), predicts='probabilities', entropy_mask=True, cache_grad_mask=False): super(FoolboxKerasModelEntropy, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocessing=preprocessing) self.entropy_mask = entropy_mask self.grad_mask = None self.cache_grad_mask = cache_grad_mask self.keras_model = KerasModel(model, bounds, channel_axis, preprocessing, predicts) def compute_gradient_mask(self, image): gray = skimage.color.rgb2gray(image) mask = skimage.filters.rank.entropy(gray, skimage.morphology.disk(3)) low = mask < 4.2 high = mask >= 4.2 mask[low] = 0.0 mask[high] = 1.0 self.grad_mask = np.broadcast_to( mask.reshape(mask.shape[0] * mask.shape[1], 1), (mask.shape[0] * mask.shape[1], image.shape[2])).reshape( image.shape) def __mask_gradient(self, grad, image): if self.entropy_mask is True: if self.cache_grad_mask is True: return grad * self.grad_mask else: mask = utils.image2mask(image) mask = np.broadcast_to( mask.reshape(mask.shape[0] * mask.shape[1], 1), (mask.shape[0] * mask.shape[1], image.shape[2])).reshape( image.shape) return grad * mask else: return grad def predictions_and_gradient(self, image, label): """Calculates predictions for an image and the gradient of the cross-entropy loss w.r.t. the image. Parameters ---------- image : `numpy.ndarray` Single input with shape as expected by the model (without the batch dimension). label : int Reference label used to calculate the gradient. Returns ------- predictions : `numpy.ndarray` Vector of predictions (logits, i.e. before the softmax) with shape (number of classes,). gradient : `numpy.ndarray` The gradient of the cross-entropy loss w.r.t. the image. Will have the same shape as the image. See Also -------- :meth:`gradient` """ pred, grad = self.keras_model.predictions_and_gradient(image, label) return pred, self.__mask_gradient(grad, image) def num_classes(self): return self.keras_model.num_classes() def batch_predictions(self, images): return self.keras_model.batch_predictions(images) def backward(self, gradient, image): """Backpropagates the gradient of some loss w.r.t. the logits through the network and returns the gradient of that loss w.r.t to the input image. Parameters ---------- gradient : `numpy.ndarray` Gradient of some loss w.r.t. the logits. image : `numpy.ndarray` Single input with shape as expected by the model (without the batch dimension). Returns ------- gradient : `numpy.ndarray` The gradient w.r.t the image. See Also -------- :meth:`gradient` """ grad = self.keras_model.backward(gradient, image) return self.__mask_gradient(grad, image)