def test_fgsm_defences(fix_get_mnist_subset, image_dl_estimator, is_tf_version_2): if is_tf_version_2: clip_values = (0, 1) smooth_3x3 = SpatialSmoothingTensorFlowV2(window_size=3, channels_first=False) smooth_5x5 = SpatialSmoothingTensorFlowV2(window_size=5, channels_first=False) smooth_7x7 = SpatialSmoothingTensorFlowV2(window_size=7, channels_first=False) classifier_, _ = image_dl_estimator(one_classifier=True) loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True) classifier = TensorFlowV2Classifier( clip_values=clip_values, model=classifier_.model, preprocessing_defences=[smooth_3x3, smooth_5x5, smooth_7x7], loss_object=loss_object, input_shape=(28, 28, 1), nb_classes=10, ) assert len(classifier.preprocessing_defences) == 3 attack = FastGradientMethod(classifier, eps=1, batch_size=128) backend_test_defended_images(attack, fix_get_mnist_subset)
def _test_preprocessing_defences_forward(get_default_mnist_subset, image_dl_estimator, device_type, preprocessing_defences): (_, _), (x_test_mnist, y_test_mnist) = get_default_mnist_subset classifier_, _ = image_dl_estimator(one_classifier=True) clip_values = (0, 1) loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True) classifier = TensorFlowV2Classifier( clip_values=clip_values, model=classifier_.model, preprocessing_defences=preprocessing_defences, loss_object=loss_object, input_shape=(28, 28, 1), nb_classes=10, ) predictions_classifier = classifier.predict(x_test_mnist) # Apply the same defences by hand x_test_defense = x_test_mnist for defence in preprocessing_defences: x_test_defense, _ = defence(x_test_defense, y_test_mnist) x_test_defense = tf.convert_to_tensor(x_test_defense) predictions_check = classifier_.model(x_test_defense) predictions_check = predictions_check.cpu().numpy() # Check that the prediction results match np.testing.assert_array_almost_equal(predictions_classifier, predictions_check, decimal=4)
def _predict_classifier(self, x: np.ndarray, batch_size: int, training_mode: bool, **kwargs) -> np.ndarray: return TensorFlowV2Classifier.predict(self, x=x, batch_size=batch_size, training_mode=training_mode, **kwargs)
def _test_preprocessing_defences_backward( get_default_mnist_subset, image_dl_estimator, device_type, preprocessing_defences ): (_, _), (x_test_mnist, y_test_mnist) = get_default_mnist_subset classifier_, _ = image_dl_estimator() clip_values = (0, 1) loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True) classifier = TensorFlowV2Classifier( clip_values=clip_values, model=classifier_.model, preprocessing_defences=preprocessing_defences, loss_object=loss_object, input_shape=(28, 28, 1), nb_classes=10, ) # The efficient defence-chaining. pseudo_gradients = np.random.randn(*x_test_mnist.shape) gradients_in_chain = classifier._apply_preprocessing_gradient(x_test_mnist, pseudo_gradients) # Apply the same backward pass one by one. x = x_test_mnist x_intermediates = [x] for preprocess in classifier.preprocessing_operations[:-1]: x = preprocess(x)[0] x_intermediates.append(x) gradients = pseudo_gradients for preprocess, x in zip(classifier.preprocessing_operations[::-1], x_intermediates[::-1]): gradients = preprocess.estimate_gradient(x, gradients) np.testing.assert_array_almost_equal(gradients_in_chain, gradients, decimal=4)
def _fit_classifier(self, x: np.ndarray, y: np.ndarray, batch_size: int, nb_epochs: int, **kwargs) -> None: return TensorFlowV2Classifier.fit(self, x, y, batch_size=batch_size, nb_epochs=nb_epochs, **kwargs)
def _weight_grad(classifier: TensorFlowV2Classifier, x: tf.Tensor, target: tf.Tensor) -> tf.Tensor: # Get the target gradient vector. import tensorflow as tf with tf.GradientTape() as t: # pylint: disable=C0103 t.watch(classifier.model.weights) output = classifier.model(x, training=False) loss = classifier.model.compiled_loss(target, output) d_w = t.gradient(loss, classifier.model.weights) d_w = [w for w in d_w if w is not None] d_w = tf.concat([tf.reshape(d, [-1]) for d in d_w], 0) d_w_norm = d_w / tf.sqrt(tf.reduce_sum(tf.square(d_w))) return d_w_norm
def loss_gradient(self, x: np.ndarray, y: np.ndarray, training_mode: bool = False, **kwargs) -> np.ndarray: """ Compute the gradient of the loss function w.r.t. `x`. :param x: Sample input with shape as expected by the model. :param y: Correct labels, one-vs-rest encoding. :param training_mode: `True` for model set to training mode and `'False` for model set to evaluation mode. :param sampling: True if loss gradients should be determined with Monte Carlo sampling. :type sampling: `bool` :return: Array of gradients of the same shape as `x`. """ import tensorflow as tf # lgtm [py/repeated-import] sampling = kwargs.get("sampling") if sampling: # Apply preprocessing x_preprocessed, _ = self._apply_preprocessing(x, y, fit=False) if tf.executing_eagerly(): with tf.GradientTape() as tape: inputs_t = tf.convert_to_tensor(x_preprocessed) tape.watch(inputs_t) inputs_repeat_t = tf.repeat(inputs_t, repeats=self.sample_size, axis=0) noise = tf.random.normal( shape=inputs_repeat_t.shape, mean=0.0, stddev=self.scale, dtype=inputs_repeat_t.dtype, seed=None, name=None, ) inputs_noise_t = inputs_repeat_t + noise if self.clip_values is not None: inputs_noise_t = tf.clip_by_value( inputs_noise_t, clip_value_min=self.clip_values[0], clip_value_max=self.clip_values[1], name=None, ) model_outputs = self._model(inputs_noise_t, training=training_mode) softmax = tf.nn.softmax(model_outputs, axis=1, name=None) average_softmax = tf.reduce_mean(tf.reshape( softmax, shape=(-1, self.sample_size, model_outputs.shape[-1])), axis=1) loss = tf.reduce_mean( tf.keras.losses.categorical_crossentropy( y_true=y, y_pred=average_softmax, from_logits=False, label_smoothing=0)) gradients = tape.gradient(loss, inputs_t).numpy() else: raise ValueError("Expecting eager execution.") # Apply preprocessing gradients gradients = self._apply_preprocessing_gradient(x, gradients) else: gradients = TensorFlowV2Classifier.loss_gradient( self, x=x, y=y, training_mode=training_mode, **kwargs) return gradients
return tot_loss # Define Generator loss def generator_loss(fake_output): return cross_entropy(tf.ones_like(fake_output), fake_output) noise_dim = 100 capacity = 64 generator = TensorFlow2Generator(encoding_length=noise_dim, model=make_generator_model( capacity, noise_dim)) discriminator_classifier = TensorFlowV2Classifier( model=make_discriminator_model(capacity), nb_classes=2, input_shape=(28, 28, 1)) # Build GAN gan = TensorFlow2GAN( generator=generator, discriminator=discriminator_classifier, generator_loss=generator_loss, generator_optimizer_fct=tf.keras.optimizers.Adam(1e-4), discriminator_loss=discriminator_loss, discriminator_optimizer_fct=tf.keras.optimizers.Adam(1e-4), ) # Create BackDoorAttacks Class gan_attack = BackdoorAttackDGMTrail(gan=gan)
def _predict_classifier(self, x: np.ndarray, batch_size: int) -> np.ndarray: return TensorFlowV2Classifier.predict(self, x=x, batch_size=batch_size)