def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier krc, sess = get_classifier_kr() # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Attack up = UniversalPerturbation(krc, max_iter=1, attacker="ead", attacker_params={ "max_iter": 5, "targeted": False }) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build a TFClassifier # Define input and output placeholders self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) self._output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer self._logits = tf.layers.dense(fc, 10) # Train operator self._loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) self._train = optimizer.minimize(self._loss) # Tensorflow session and initialization self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) # Get MNIST batch_size, nb_train, nb_test = 10, 10, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Train the classifier tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph, self._train, self._loss, None, self._sess) tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2) # Attack # TODO Launch with all possible attacks attack_params = { "attacker": "newtonfool", "attacker_params": { "max_iter": 20 } } up = UniversalPerturbation(tfc) x_train_adv = up.generate(x_train, **attack_params) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.v self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build TFClassifier tfc, sess = get_classifier_tf() # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Attack up = UniversalPerturbation(tfc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() attack_params = { "max_iter": 1, "attacker": "ead", "attacker_params": { "max_iter": 5, "targeted": False } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5 } } attack = UniversalPerturbation(classifier) attack.set_params(**attack_params) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with universal adversarial examples: %.2f%%', (acc * 100))
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Attack up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def atk_UniPerturb(x_train, x_test, y_train, y_test, classifier): attack_params = {"attacker": "newtonfool", "attacker_params": {"max_iter": 20}} up = UniversalPerturbation(classifier) x_train_adv = up.generate(x_train, **attack_params) x_test_adv = up.generate(x_test, **attack_params) print("After Universal Perturbing NeutonFool Attack \n") evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier) return x_test_adv, x_train_adv
def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST batch_size, nb_train, nb_test = 10, 10, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Get classifier krc = KerasClassifier((0, 1), model, use_logits=False) krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2) # Attack # TODO Launch with all possible attacks attack_params = { "attacker": "newtonfool", "attacker_params": { "max_iter": 20 } } up = UniversalPerturbation(krc) x_train_adv = up.generate(x_train, **attack_params) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.v self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], np.argmax(y_train[:nb_train], axis=1) x_test, y_test = x_test[:nb_test], np.argmax(y_test[:nb_test], axis=1) x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Create simple CNN # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), (10, )) ptc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=1) # Attack # TODO Launch with all possible attacks attack_params = { "attacker": "newtonfool", "attacker_params": { "max_iter": 20 } } up = UniversalPerturbation(ptc) x_train_adv = up.generate(x_train, **attack_params) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.v self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertFalse((y_test == test_y_pred).all()) self.assertFalse((y_train == train_y_pred).all())
session, clip_min=min_, clip_max=max_) elif args.adv_method == 'jsma': adv_crafter = SaliencyMapMethod(classifier, sess=session, clip_min=min_, clip_max=max_, gamma=1, theta=max_) elif args.adv_method == 'carlini': adv_crafter = CarliniL2Method(classifier, sess=session, targeted=False, confidence=10) else: adv_crafter = UniversalPerturbation(classifier, session, p=np.inf, attacker_params={ 'clip_min': min_, 'clip_max': max_ }) X_train_adv = adv_crafter.generate(x_val=X_train) X_test_adv = adv_crafter.generate(x_val=X_test) if args.save: np.save(os.path.join(SAVE_ADV, "train.npy"), X_train_adv) np.save(os.path.join(SAVE_ADV, "test.npy"), X_test_adv)