def _test_mnist_targeted(self, classifier, x_test): x_test_original = x_test.copy() # Test FGSM with np.inf norm attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.01, targeted=True, batch_size=128) # y_test_adv = to_categorical((np.argmax(y_test, axis=1) + 1) % 10, 10) pred_sort = classifier.predict(x_test).argsort(axis=1) y_test_adv = np.zeros((x_test.shape[0], 10)) for i in range(x_test.shape[0]): y_test_adv[i, pred_sort[i, -2]] = 1.0 x_test_adv = attack.generate(x_test, y=y_test_adv) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertEqual(y_test_adv.shape, test_y_pred.shape) # This doesn't work all the time, especially with small networks self.assertGreaterEqual((y_test_adv == test_y_pred).sum(), x_test.shape[0] // 2) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def _test_mnist_targeted(self, classifier): # Get MNIST (_, _), (x_test, _) = self.mnist # Test FGSM with np.inf norm attack = BasicIterativeMethod(classifier, eps=1.0, eps_step=0.01, targeted=True, batch_size=128) # y_test_adv = to_categorical((np.argmax(y_test, axis=1) + 1) % 10, 10) pred_sort = classifier.predict(x_test).argsort(axis=1) y_test_adv = np.zeros((x_test.shape[0], 10)) for i in range(x_test.shape[0]): y_test_adv[i, pred_sort[i, -2]] = 1.0 x_test_adv = attack.generate(x_test, y=y_test_adv) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertEqual(y_test_adv.shape, test_y_pred.shape) # This doesn't work all the time, especially with small networks self.assertGreaterEqual((y_test_adv == test_y_pred).sum(), x_test.shape[0] // 2)
def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Test BIM with np.inf norm attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, batch_size=128) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
def _test_backend_mnist(self, classifier, x_train, y_train, x_test, y_test): x_test_original = x_test.copy() # Test BIM with np.inf norm attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, batch_size=128) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (acc * 100)) acc = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (acc * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def atk_BasicIterative(x_train, x_test, y_train, y_test, classifier): #print('Create BasicIterativeMethod attack \n') adv_crafter = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_train_adv = adv_crafter.generate(x_train) x_test_adv = adv_crafter.generate(x_test) print("After BasicIterative Attack \n") evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier) return x_test_adv, x_train_adv
def basic_iter(x_test, model, eps, eps_step, max_iter, targeted, batch_size): classifier = KerasClassifier(model=model, clip_values=(0, 1)) attack_cw = BasicIterativeMethod(classifier=classifier, eps=eps, eps_step=eps_step, max_iter=max_iter, targeted=targeted, batch_size=batch_size) x_test_adv = attack_cw.generate(x_test) return np.reshape(x_test_adv, (32, 32, 3))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.2, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))
class BIMAttack(AdversarialAttack): def __init__(self, model, step_size_iter=0.1, max_perturbation=0.3, max_iterations=100, targeted=False, batch_size=16): super().__init__(model=model) self._targeted = targeted self._step_size_iter = step_size_iter self._max_perturbation = max_perturbation self._max_iterations = max_iterations self._method = BasicIterativeMethod(classifier=self.model, targeted=self._targeted, eps=self._max_perturbation, eps_step=self._step_size_iter, max_iter=self._max_iterations, batch_size=batch_size) def attack_method(self, x, y=None): params = {} if y is not None: params['y'] = y return self._method.generate(x=x, **params)
def general_test(model, optimizer, input_shape, nb_classes, test_loader, method, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: model.load_state_dict(torch.load(model_file)) model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=20) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=20) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=20) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=20) correct, total = 0, 0 class_correct = list(0. for _ in range(10)) class_total = list(0. for _ in range(10)) for images, labels in test_loader: images = adv_crafter.generate(images.numpy()) images = Variable(torch.from_numpy(images).cuda()) labels = Variable(labels.cuda()) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() c = (predicted == labels.data).squeeze() for i in range(20): label = labels.data[i] class_correct[label] += c[i] class_total[label] += 1 print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) for i in range(10): print('Accuracy of %5s : %2d %%' % (_classes[i], 100 * class_correct[i] / class_total[i])) return correct / total
def general_test_v2(model, optimizer, input_shape, nb_classes, test_loader, method, conf, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: checked_state = torch.load(model_file)['state_dict'] model.load_state_dict(checked_state) model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=32) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=32) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=32) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=32) elif method == 'FGSM': adv_crafter = FastGradientMethod(warped_model, batch_size=32) correct, total = 0, 0 adv_dataset = adv_generalization(test_loader, adv_crafter, conf) temp_loader = DataLoader(dataset=adv_dataset, batch_size=32, shuffle=False, drop_last=True) # temp_loader = test_loader for images, labels in temp_loader: images = Variable(images.cuda()) labels = Variable(labels.cuda()) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) return correct / total
def __init__(self, model, step_size_iter=0.1, max_perturbation=0.3, max_iterations=100, targeted=False, batch_size=16): super().__init__(model=model) self._targeted = targeted self._step_size_iter = step_size_iter self._max_perturbation = max_perturbation self._max_iterations = max_iterations self._method = BasicIterativeMethod(classifier=self.model, targeted=self._targeted, eps=self._max_perturbation, eps_step=self._step_size_iter, max_iter=self._max_iterations, batch_size=batch_size)
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "FGSM": attacker = FastGradientMethod(classifier=classifier, eps=0.3) elif attacker_name == "Elastic": attacker = ElasticNet(classifier=classifier, confidence=0.5) elif attacker_name == "BasicIterativeMethod": attacker = BasicIterativeMethod(classifier=classifier, eps=0.3) elif attacker_name == "NewtonFool": attacker = NewtonFool(classifier=classifier, max_iter=20) elif attacker_name == "HopSkipJump": attacker = HopSkipJump(classifier=classifier, max_iter=20) elif attacker_name == "ZooAttack": attacker = ZooAttack(classifier=classifier, max_iter=20) elif attacker_name == "VirtualAdversarialMethod": attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20) elif attacker_name == "UniversalPerturbation": attacker = UniversalPerturbation(classifier=classifier, max_iter=20) elif attacker_name == "AdversarialPatch": attacker = AdversarialPatch(classifier=classifier, max_iter=20) elif attacker_name == "Attack": attacker = Attack(classifier=classifier) elif attacker_name == "BoundaryAttack": attacker = BoundaryAttack(classifier=classifier, targeted=False, epsilon=0.05, max_iter=20) #, max_iter=20 elif attacker_name == "CarliniL2": attacker = CarliniL2Method(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "CarliniLinf": attacker = CarliniLInfMethod(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "DeepFool": attacker = DeepFool(classifier) elif attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=2) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=2, eps=1, eps_step=0.5) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def test_tensorflow_iris(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with BIM adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted BIM on Iris: %.2f%%', (acc * 100))
def test_pytorch_iris(self): classifier = get_tabular_classifier_pt() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
def test_classifier_type_check_fail_classifier(self): # Use a useless test classifier to test basic classifier properties class ClassifierNoAPI: pass classifier = ClassifierNoAPI with self.assertRaises(TypeError) as context: _ = BasicIterativeMethod(classifier=classifier) self.assertIn( 'For `BasicIterativeMethod` classifier must be an instance of ' '`art.classifiers.classifier.Classifier`, the provided classifier is instance of ' '(<class \'object\'>,).', str(context.exception))
def test_classifier_type_check_fail_gradients(self): # Use a test classifier not providing gradients required by white-box attack from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier classifier = ScikitlearnDecisionTreeClassifier( model=DecisionTreeClassifier()) with self.assertRaises(TypeError) as context: _ = BasicIterativeMethod(classifier=classifier) self.assertIn( 'For `BasicIterativeMethod` classifier must be an instance of ' '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of ' '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).', str(context.exception))
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC scikitlearn_test_cases = { LogisticRegression: ScikitlearnLogisticRegression, SVC: ScikitlearnSVC, LinearSVC: ScikitlearnSVC } (_, _), (x_test, y_test) = self.iris for (model_class, classifier_class) in scikitlearn_test_cases.items(): model = model_class() classifier = classifier_class(model=model, clip_values=(0, 1)) classifier.fit(x=x_test, y=y_test) # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy of ' + classifier.__class__.__name__ + ' on Iris with BIM adversarial examples: ' '%.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum( preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info( 'Success rate of ' + classifier.__class__.__name__ + ' on targeted BIM on Iris: %.2f%%', (acc * 100))
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with BIM adversarial examples: " "%.2f%%", (acc * 100), ) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128, max_iter=5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info( "Success rate of " + classifier.__class__.__name__ + " on targeted BIM on Iris: %.2f%%", (acc * 100) ) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3): model.eval() wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class) if method == 'deepfool': adv_crafter = DeepFool(wmodel) elif method == 'bim': adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size) elif method == 'jsma': adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size) elif method == 'cw2': adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size) elif method == 'cwi': adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size) elif method == 'fgsm': adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size) elif method == 'pgd': adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps) else: raise NotImplementedError('Unsupported Attack Method: {}'.format(method)) return adv_crafter
clip_values=(-0.5, 0.5), use_logits=False) elif args.model == 'densenet201': model = DenseNet201(weights='imagenet') classifier = KerasClassifier(model=model, use_logits=False) if args.attack == 'fgsm': attack = FastGradientMethod(classifier=classifier, eps=0.6, eps_step=0.6, batch_size=64) if args.attack == 'bim': if args.d == 'imagenet': attack = BasicIterativeMethod(classifier=classifier, eps=0.6, batch_size=64, max_iter=25) else: attack = BasicIterativeMethod(classifier=classifier, eps=0.6, batch_size=64) if args.attack == 'jsma': attack = SaliencyMapMethod(classifier=classifier, batch_size=64) if args.attack == 'c+w': attack = CarliniL2Method(classifier=classifier, batch_size=64) # generating adversarial of the testing dataset and save it to the folder './adv' if args.d == 'mnist' or args.d == 'cifar': x_adv = attack.generate(x=x_test)
) # only use without modifying batch size (default: 1) classifier = KerasClassifier(model=model, use_logits=False) for i in range(args.val_start, args.val_end): x_test, y_test = pickle.load( open( './dataset_imagenet/%s_%s_val_%i.p' % (args.d, args.model, int(i)), 'rb')) if args.attack == 'fgsm': attack = FastGradientMethod(classifier=classifier, eps=0.6, eps_step=0.6) if args.attack == 'bim': attack = BasicIterativeMethod(classifier=classifier, eps=0.6, max_iter=5) if args.attack == 'jsma': # attack = ProjectedGradientDescent(classifier=classifier, eps=0.6, max_iter=5) attack = SaliencyMapMethod(classifier=classifier) if args.attack == 'c+w': attack = CarliniL2Method(classifier=classifier) # attack = CarliniLInfMethod(classifier=classifier, batch_size=1, max_iter=2) # attack = FastGradientMethod(classifier=classifier) from datetime import datetime now = datetime.now() current_time_after = now.strftime("%H:%M:%S") print("Current Time After=", current_time_after) print('Generating adversarial examples----------------') print(i, x_test.shape, y_test.shape)
def run(): log_dir = cfg.log_dir now = datetime.now() now_datetime = now.strftime("%d-%m-%y_%H:%M:%S") log_dir = os.path.join( log_dir, f"ESKD_baseline_noise_and_adv_evaluation_{cfg.dataset}_{cfg.student_model_size}_{now_datetime}" ) os.mkdir(log_dir) RESULTS_FILE = os.path.join(log_dir, "results.csv") DIR_QUERY = os.path.join(cfg.MODEL_DIR4, "*.h5") STUDENT_MODEL_WEIGHT_PATHS = glob.glob(DIR_QUERY) # generate a list of parsed student model information rm_path = cfg.MODEL_DIR4 + "/" STUDENT_MODEL_NAMES = [ x[len(rm_path):] for x in STUDENT_MODEL_WEIGHT_PATHS ] # parse values out of model names sizes = [] intervals = [] test_accs = [] train_accs = [] for name in STUDENT_MODEL_NAMES: size, interval, test_acc, train_acc = re.findall( rf"model_(\d+)_(\d+)\|\d+_(\d+.\d+)_(\d+.\d+).", name)[0] sizes.append(int(size)) intervals.append(int(interval)) test_accs.append(float(test_acc)) train_accs.append(float(train_acc)) # create dataframe with the parsed data df = pd.DataFrame(list(zip(sizes, intervals, test_accs, train_accs)), columns=['size', 'interval', 'test_acc', 'train_acc']) # loading dataset and "centering" data samples X_train, Y_train, X_test, Y_test = load_dataset.load_cifar_100(None) X_train, X_test = load_dataset.z_standardization(X_train, X_test) # creating gaussian noised set of images for evaluation X_test_gauss_noised_sets = [] X_train_gauss_noised_sets = [] for i in range(len(cfg.SIGMA_VALS4)): X_test_gauss_noised = np.zeros_like(X_test) X_train_gauss_noised = np.zeros_like(X_train) for j in range(len(X_test)): X_test_gauss_noised[j] = X_test[j] + np.random.normal( cfg.MEAN4, cfg.SIGMA_VALS4[i], (X_test[j].shape[0], X_test[j].shape[1], X_test[j].shape[2])) for j in range(len(X_train)): X_train_gauss_noised[j] = X_train[j] + np.random.normal( cfg.MEAN4, cfg.SIGMA_VALS4[i], (X_train[j].shape[0], X_train[j].shape[1], X_train[j].shape[2])) X_test_gauss_noised_sets.append(X_test_gauss_noised) X_train_gauss_noised_sets.append(X_train_gauss_noised) # min and max values of the test set for adversarial example generation dataset_min = np.min(X_test) dataset_max = np.max(X_test) # create column in dataframe for each adversarial accuracy value zeros = [0 for name in STUDENT_MODEL_NAMES] for eps in cfg.EPS_VALS4: df[("eps_" + str(format(eps, '.3f')))] = zeros for sig in cfg.SIGMA_VALS4: df[("sig_" + str(format(sig, '.3f')))] = zeros print("[INFO] Loading student model...") curr_student_model = knowledge_distillation_models.get_model( cfg.dataset, 100, X_train, int(size), cfg.model_type) optimizer = SGD(lr=0.01, momentum=0.9, nesterov=True) curr_student_model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]) # curr_student_model.summary() for j in range(len(STUDENT_MODEL_WEIGHT_PATHS)): print( "\n--------------------------Starting new AR step--------------------------" ) # load weights for the student model print("[INFO] Loading student model weights...") curr_student_model.load_weights(STUDENT_MODEL_WEIGHT_PATHS[j]) for i in range(max(len(cfg.EPS_VALS4), len(cfg.SIGMA_VALS4))): if cfg.USE_ADV_ATTACK: if i < len(cfg.EPS_VALS4): # evaluating adversarial attack robustness curr_eps = cfg.EPS_VALS4[i] print( f"[INFO] Evaluating {STUDENT_MODEL_NAMES[j]} with attack at epsilon {format(curr_eps, '.3f')}..." ) student_art_model = KerasClassifier( model=curr_student_model, clip_values=(dataset_min, dataset_max), use_logits=False) print( "[INFO] Generating adversarial examples for the current model..." ) if cfg.attack_type4 is "fgm": attack_student_model = FastGradientMethod( classifier=student_art_model, eps=curr_eps) elif cfg.attack_type4 is "bim": attack_student_model = BasicIterativeMethod( classifier=student_art_model, eps_step=0.025, eps=curr_eps, max_iter=4, targeted=False, batch_size=1) else: print("[WARNING] attack type not supported!") break X_test_adv = attack_student_model.generate(x=X_test) print( "[INFO] Evaluating student model's adversarial accuracy..." ) predictions = student_art_model.predict(X_test_adv) adv_acc = np.sum( np.argmax(predictions, axis=1) == np.argmax( Y_test, axis=1)) / len(Y_test) df.iloc[j, df.columns. get_loc("eps_" + str(format(curr_eps, '.3f')))] = adv_acc print(f"Adversarial accuracy: {adv_acc}") if cfg.USE_GAUSS_NOISE: if i < len(cfg.SIGMA_VALS4): # evaluating gaussian noise robustness curr_sig = cfg.SIGMA_VALS4[i] print( f"[INFO] Evaluating {STUDENT_MODEL_NAMES[j]} with Gaussian Noise at sigma {format(curr_sig, '.3f')}..." ) predictions2 = curr_student_model.predict( X_test_gauss_noised_sets[i]) gauss_acc = np.sum( np.argmax(predictions2, axis=1) == np.argmax( Y_test, axis=1)) / len(Y_test) df.iloc[j, df.columns. get_loc("sig_" + str(format(curr_sig, '.3f')))] = gauss_acc print("[INFO] Completed adversarial evaluation...") print(f"Gaussian noise accuracy: {gauss_acc}") print( f"[INFO] Recording adversarial robustness results to {RESULTS_FILE}..." ) df.to_csv(RESULTS_FILE, sep=',')