def attack(predictWrapper, x_train, x_test, y_train, y_test, input_shape, datapoint): min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) print('xtrain shape: ', x_train.shape) print('xtest shape: ', x_test.shape) print('y_train shape: ', y_train.shape) print('ytest shape: ', y_test.shape) # Create classifier classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(input_shape, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data by HopSkipJump attack -----') # Generate adversarial test examples s = time.time() attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=100, max_eval=10000, init_eval=100, init_size=100) # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100) # Input data shape should be 2D datapoint = datapoint.reshape((-1, input_shape)) adv_data = attacker.generate(x=datapoint) distortion(datapoint, adv_data) print('Generate test adv cost time: ', time.time() - s) return adv_data
def init_hopskipjump(config, data, limit=50): """Runs the HopSkipJump evasion attack Arxiv Paper: https://arxiv.org/abs/1904.02144""" attack = HopSkipJump(config, False, max_iter=limit, max_eval=100, init_eval=10) return attack.generate(data)
def hopskipjump( data, query, query_limit, art_model, victim_input_shape, substitute_input_shape, victim_output_targets, ): """Runs the HopSkipJump evasion attack Arxiv Paper: https://arxiv.org/abs/1904.02144""" internal_limit = int(query_limit * 0.5) X, y = copycat( data, query, internal_limit, art_model, victim_input_shape, substitute_input_shape, victim_output_targets, reshape=False, ) # import pdb; pdb.set_trace() X_np = X.detach().clone().numpy() # config = set_evasion_model(query, victim_input_shape, victim_input_targets) evasion_limit = int(query_limit * 0.5) # The initial evaluation number must be lower than the maximum lower_bound = 0.01 * evasion_limit init_eval = int(lower_bound if lower_bound > 1 else 1) # Run attack and process results attack = HopSkipJump( art_model, False, norm="inf", max_iter=evasion_limit, max_eval=evasion_limit, init_eval=init_eval, ) result = attack.generate(X_np) result = (torch.from_numpy(attack.generate(X_np)).clone().detach().float() ) # .detach().clone().float() y = query(result) result = reshape_input(result, substitute_input_shape) return result, y
def get(self): data = self.parser.parse_args() img = data.get('img') img_data = img.split(',') img = np.array(img_data, np.float32).reshape(28, 28) img = img * 255.0 img_new = np.zeros((1, 32, 32, 1)) img_new[0] = np.pad(img.reshape(28, 28), [(2, ), (2, )], mode='constant').reshape(32, 32, 1) global sess global graph with graph.as_default(): set_session(sess) attack = HopSkipJump(classifier=classifier, targeted=False, max_iter=0, max_eval=1000, init_eval=10) iter_step = 3 x_adv = None for i in range(iter_step): x_adv = attack.generate(x=img_new, x_adv_init=x_adv, resume=True) #clear_output() # print("Adversarial image at step %d." % (i * iter_step), # "and class label %d." % np.argmax(classifier.predict(x_adv)[0])) attack.max_iter = iter_step sav_img = Image.fromarray(x_adv.reshape(32, 32)) sav_img = sav_img.convert("L") sav_img.save("test.jpg") buffer = BytesIO() sav_img.save(buffer, format="JPEG") myimage = buffer.getvalue() res = str(predict(x_adv)) print("After Attack: ", res) return jsonify({ 'res': res, 'dat': bytes.decode(base64.b64encode(myimage)) })
def robust_score(y_true, y_pred, eps=0.1, X=None, y=None, model=None, feature_selector=None, scorer=None): all_ids = range(X.shape[0]) test_ids = y_true.index.values train_ids = list(set(all_ids) - set(test_ids)) y_train = y[train_ids] y_test = y[test_ids] X_train = X[train_ids, :] X_test = X[test_ids, :] if type(feature_selector) != type(None): X_train = feature_selector.fit_transform(X_train) X_test = feature_selector.transform(X_test) #tuned_parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]} #cv = GridSearchCV(model, tuned_parameters) #cv.fit(X_train, y_train) #best_model = cv.best_estimator_ best_model = copy.deepcopy(model) best_model.fit(X_train, y_train) classifier = SklearnClassifier(model=best_model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=10, init_eval=5, init_size=1) X_test_adv = attack.generate(X_test) diff = scorer(best_model, X_test, y_test) - scorer(best_model, X_test_adv, y_test) return diff
def robust_score_test(eps=0.1, X_test=None, y_test=None, model=None, feature_selector=None, scorer=None): X_test_filtered = feature_selector.transform(X_test) best_model = copy.deepcopy(model) classifier = SklearnClassifier(model=best_model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=10, init_eval=5, init_size=1) X_test_adv = attack.generate(X_test_filtered) score_original_test = scorer(best_model, X_test_filtered, y_test) score_corrupted_test = scorer(best_model, X_test_adv, y_test) diff = score_original_test - score_corrupted_test return diff
# Create a query function for a PyTorch Lightning model model = train_mnist_victim() def query_mnist(input_data): input_data = torch.from_numpy(input_data) return get_target(model, input_data) emnist_train, emnist_test = get_emnist_data() test = BlackBoxClassifier( predict=query_mnist, input_shape=(1, 28, 28, 1), nb_classes=10, clip_values=(0, 255), preprocessing_defences=None, postprocessing_defences=None, preprocessing=None, ) attack = HopSkipJump(test, False, max_iter=50, max_eval=100, init_eval=10) X, y = emnist_train.data, emnist_train.targets X = X.to(torch.float32) X = X.unsqueeze(3) attack.generate(X)
x_train = target.astype(float) classifier = BlackBoxClassifier(predict, x_train[0].shape, 3, clip_values=(0, 255)) res = predict(x_train[:1]) print(res) assert (res[0, 0] == 1) # Select target image and show prediction target_image = x_train[0] # Generate HopSkipJump attack against black box classifier attack = HopSkipJump(classifier=classifier, targeted=True, max_iter=0, max_eval=1000, init_eval=10) iter_step = 10 stop = Image.open(curr_path + "../danny-machine/machine.jpg") stop = np.array([np.array(stop)]).astype(float) x_adv = stop errors = [] for i in range(100): x_adv = attack.generate(x=np.array([target_image]), y=[1], x_adv_init=x_adv) l2_err = np.linalg.norm(np.reshape(x_adv[0] - target_image, [-1])) print("Adversarial image at step %d." % (i * iter_step), "L2 error", np.linalg.norm(np.reshape(x_adv[0] - target_image, [-1])),
print('max_pixel_value ', max_pixel_value) # Create classifier classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=input_shape, nb_classes=args.n_classes, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data by HopSkipJump attack -----') # Generate adversarial test examples attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=40, max_eval=10000, init_eval=100, init_size=100) # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100) # Input data shape should be 2D datapoint = test[correct_index[:1]] s = time.time() adv_data = attacker.generate(x=datapoint) # distortion(datapoint, adv_data) print('Generate test adv cost time: ', time.time() - s) # return adv_data
def main(args): assert args.dataset in ['mnist', 'cifar', 'svhn', 'tiny', 'tiny_gray'], \ "dataset parameter must be either 'mnist', 'cifar', 'svhn', or 'tiny'" print('Dataset: %s' % args.dataset) adv_path = '/home/aaldahdo/detectors/adv_data/' if args.dataset == 'mnist': from baselineCNN.cnn.cnn_mnist import MNISTCNN as model model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_mnist.model sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.3 pa_th=78 # random_restart = 20 # x_train = model_mnist.x_train x_test = model_mnist.x_test # y_train = model_mnist.y_train y_test = model_mnist.y_test y_test_labels = model_mnist.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'mnist_gray': from baselineCNN.cnn.cnn_mnist_gray import MNISTCNN as model model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_mnist.model sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.3 pa_th=78 # random_restart = 20 # x_train = model_mnist.x_train x_test = model_mnist.x_test # y_train = model_mnist.y_train y_test = model_mnist.y_test y_test_labels = model_mnist.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'cifar': from baselineCNN.cnn.cnn_cifar10 import CIFAR10CNN as model model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_cifar.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_cifar.x_train x_test = model_cifar.x_test # y_train = model_cifar.y_train y_test = model_cifar.y_test y_test_labels = model_cifar.y_test_labels translation = 8 rotation = 30 elif args.dataset == 'cifar_gray': from baselineCNN.cnn.cnn_cifar10_gray import CIFAR10CNN as model model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_cifar.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_cifar.x_train x_test = model_cifar.x_test # y_train = model_cifar.y_train y_test = model_cifar.y_test y_test_labels = model_cifar.y_test_labels translation = 8 rotation = 30 elif args.dataset == 'svhn': from baselineCNN.cnn.cnn_svhn import SVHNCNN as model model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_svhn.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_svhn.x_train x_test = model_svhn.x_test # y_train = model_svhn.y_train y_test = model_svhn.y_test y_test_labels = model_svhn.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'svhn_gray': from baselineCNN.cnn.cnn_svhn_gray import SVHNCNN as model model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_svhn.model sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_svhn.x_train x_test = model_svhn.x_test # y_train = model_svhn.y_train y_test = model_svhn.y_test y_test_labels = model_svhn.y_test_labels translation = 10 rotation = 60 elif args.dataset == 'tiny': from baselineCNN.cnn.cnn_tiny import TINYCNN as model model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_tiny.model sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 pa_th=100 # x_train = model_tiny.x_train x_test = model_tiny.x_test # y_train = model_tiny.y_train y_test = model_tiny.y_test y_test_labels = model_tiny.y_test_labels translation = 8 rotation = 30 del model_tiny elif args.dataset == 'tiny_gray': from baselineCNN.cnn.cnn_tiny_gray import TINYCNN as model model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset)) classifier=model_tiny.model sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy']) kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1)) epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256] epsilons1=[5, 10, 15, 20, 25, 30, 40] epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2] eps_sa=0.125 # x_train = model_tiny.x_train x_test = model_tiny.x_test # y_train = model_tiny.y_train y_test = model_tiny.y_test y_test_labels = model_tiny.y_test_labels translation = 8 rotation = 30 del model_tiny # batch_count_start = args.batch_indx # bsize = args.batch_size # batch_count_end = batch_count_start + 1 #FGSM for e in epsilons: attack = FastGradientMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=256) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_fgsm_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #BIM for e in epsilons: attack = BasicIterativeMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=32, max_iter=int(e*256*1.25)) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_bim_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #PGD1 for e in epsilons1: attack = ProjectedGradientDescent(estimator=kclassifier, norm=1, eps=e, eps_step=4, batch_size=32) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_pgd1_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #PGD2 for e in epsilons2: attack = ProjectedGradientDescent(estimator=kclassifier, norm=2, eps=e, eps_step=0.1, batch_size=32) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_pgd2_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #PGDInf for e in epsilons: attack = ProjectedGradientDescent(estimator=kclassifier, norm=np.inf, eps=e, eps_step=0.01, batch_size=32) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_pgdi_' + str(e) + '.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #CWi attack = CarliniLInfMethod(classifier=kclassifier, max_iter=200) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_cwi.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) # #CWi # if args.dataset=='tiny': # for n, x, y in batch(x_test, y_test, batch_size=bsize): # if n>=batch_count_start*bsize and n<batch_count_end*bsize: # adv_file_path = adv_path + args.dataset + '_cwi_' + str(batch_count_start) + '.npy' # if not os.path.isfile(adv_file_path): # attack = CarliniLInfMethod(classifier=kclassifier, max_iter=100, batch_size=bsize) # adv_data = attack.generate(x=x) # np.save(adv_file_path, adv_data) # print('Done - {}'.format(adv_file_path)) #CW2 - SLOW attack = CarliniL2Method(classifier=kclassifier, max_iter=100, batch_size=1, confidence=10) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_cw2.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #DF attack = DeepFool(classifier=kclassifier) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_df.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) # #DF # if args.dataset=='tiny': # for n, x, y in batch(x_test, y_test, batch_size=bsize): # if n>=batch_count_start*bsize and n<batch_count_end*bsize: # attack = DeepFool(classifier=kclassifier, epsilon=9, max_iter=100) # adv_data = attack.generate(x=x) # adv_file_path = adv_path + args.dataset + '_df_'+ str(batch_count_start) + '.npy' # np.save(adv_file_path, adv_data) # print('Done - {}'.format(adv_file_path)) #Spatial transofrmation attack attack = SpatialTransformation(classifier=kclassifier, max_translation=translation, max_rotation=rotation) adv_data = attack.generate(x=x_test) adv_file_path = adv_path + args.dataset + '_sta.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #Square Attack attack = SquareAttack(estimator=kclassifier, max_iter=200, eps=eps_sa) adv_data = attack.generate(x=x_test, y=y_test) adv_file_path = adv_path + args.dataset + '_sa.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #HopSkipJump Attack y_test_next= get_next_class(y_test) attack = HopSkipJump(classifier=kclassifier, targeted=False, max_iter=0, max_eval=100, init_eval=10) iter_step = 10 adv_data = np.zeros(x_test.shape) # adv_data = adv_data[0:25] for i in range(4): adv_data = attack.generate(x=x_test, x_adv_init=adv_data, resume=True) attack.max_iter = iter_step # _, acc_normal = classifier.evaluate(x_test[0:25], y_test[0:25]) # _, acc_adv = classifier.evaluate(adv_data, y_test[0:25]) # print('Normal accuracy - {}\nAttack accuracy - {}'.format(acc_normal, acc_adv)) # subcount=1 # for i in range(0, 25): # plt.subplot(5,5,subcount) # if args.dataset=='mnist': # plt.imshow(adv_data[i][:,:,0]) # else: # plt.imshow(adv_data[i][:,:,:]) # plt.suptitle(args.dataset+ " sb") # subcount = subcount + 1 # plt.show() adv_file_path = adv_path + args.dataset + '_hop.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path)) #ZOO attack attack = ZooAttack(classifier=kclassifier, batch_size=32) adv_data = attack.generate(x=x_test, y=y_test) adv_file_path = adv_path + args.dataset + '_zoo.npy' np.save(adv_file_path, adv_data) print('Done - {}'.format(adv_file_path))
def load_hopskip(classifier): return HopSkipJump(classifier=classifier, targeted=False, max_iter=10, max_eval=1000, init_eval=10)
def adversarial_generation( model: Architecture, x, y, epsilon=0.25, attack_type=AttackType.FGSM, num_iter=10, attack_backend: str = AttackBackend.FOOLBOX, ): """ Create an adversarial example (FGMS only for now) """ x.requires_grad = True logger.info(f"Generating for x (shape={x.shape}) and y (shape={y.shape})") if attack_backend == AttackBackend.ART: from art.attacks.evasion import ( FastGradientMethod, ProjectedGradientDescent, DeepFool as DeepFoolArt, CarliniL2Method, HopSkipJump, ) if attack_type == AttackType.FGSM: attacker = FastGradientMethod(estimator=model.art_classifier, eps=epsilon) elif attack_type == AttackType.PGD: attacker = ProjectedGradientDescent( estimator=model.art_classifier, max_iter=num_iter, eps=epsilon, eps_step=2 * epsilon / num_iter, ) elif attack_type == AttackType.DeepFool: attacker = DeepFoolArt(classifier=model.art_classifier, max_iter=num_iter) elif attack_type == "CW": attacker = CarliniL2Method( classifier=model.art_classifier, max_iter=num_iter, binary_search_steps=15, ) elif attack_type == AttackType.SQUARE: # attacker = SquareAttack(estimator=model.get_art_classifier()) raise NotImplementedError("Work in progress") elif attack_type == AttackType.HOPSKIPJUMP: attacker = HopSkipJump( classifier=model.art_classifier, targeted=False, max_eval=100, max_iter=10, init_eval=10, ) else: raise NotImplementedError(f"{attack_type} is not available in ART") attacked = attacker.generate(x=x.detach().cpu()) attacked = torch.from_numpy(attacked).to(device) elif attack_backend == AttackBackend.FOOLBOX: import foolbox as fb if model.name in ["efficientnet", "resnet32", "resnet44", "resnet56"]: model.set_default_forward_mode(None) else: model.set_default_forward_mode("presoft") if attack_type == AttackType.FGSM: attacker = fb.attacks.LinfFastGradientAttack() elif attack_type == AttackType.PGD: attacker = fb.attacks.LinfProjectedGradientDescentAttack( steps=num_iter, random_start=False, rel_stepsize=2 / num_iter) elif attack_type == AttackType.DeepFool: attacker = fb.attacks.LinfDeepFoolAttack(loss="crossentropy") elif attack_type == AttackType.CW: attacker = fb.attacks.L2CarliniWagnerAttack(steps=num_iter) elif attack_type == AttackType.BOUNDARY: attacker = fb.attacks.BoundaryAttack(steps=7000, spherical_step=0.01, source_step=0.01) x = x.float() else: raise NotImplementedError( f"{attack_type} is not available in Foolbox") attacked, _, _ = attacker( model.foolbox_classifier, x.detach(), torch.from_numpy(y).to(device), epsilons=epsilon, ) model.set_default_forward_mode(None) elif attack_backend == AttackBackend.CUSTOM: from tda.dataset.custom_attacks import FGSM, BIM, DeepFool, CW if attack_type == AttackType.FGSM: attacker = FGSM(model, ce_loss) attacked = attacker.run(data=x.detach(), target=torch.from_numpy(y).to(device), epsilon=epsilon) elif attack_type == AttackType.PGD: attacker = BIM(model, ce_loss, lims=(0, 1), num_iter=num_iter) attacked = attacker.run(data=x.detach(), target=torch.from_numpy(y).to(device), epsilon=epsilon) elif attack_type == AttackType.DeepFool: attacker = DeepFool(model, num_classes=10, num_iter=num_iter) attacked = [ attacker(x[i].detach(), torch.tensor(y[i]).to(device)) for i in range(len(x)) ] attacked = torch.cat([torch.unsqueeze(a, 0) for a in attacked], 0) elif attack_type == AttackType.CW: attacker = CW(model, lims=(0, 1), num_iter=num_iter) attacked = attacker.run(data=x.detach(), target=torch.from_numpy(y).to(device)) attacked = torch.cat([torch.unsqueeze(a, 0) for a in attacked], 0) else: raise NotImplementedError( f"{attack_type} is not available as custom implementation") else: raise NotImplementedError(f"Unknown backend {attack_backend}") return attacked.detach().double()
# A toy example of how to call the class if __name__ == '__main__': from sklearn.datasets import load_breast_cancer from sklearn.metrics import f1_score diabetes = load_breast_cancer() X = diabetes.data y = diabetes.target model = PrivateRandomForest(n_estimators=100, epsilon=0.1) model.fit(X, y) print(f1_score(y, model.predict(X))) #print(model.predict(X)) import numpy as np from art.classifiers import SklearnClassifier import copy from art.attacks.evasion import HopSkipJump classifier = SklearnClassifier(model=model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=100) X_test_adv = attack.generate(X) print(model.predict(X_test_adv))
def experiment(dataset_id, folder, n_estimators=500, reps=5, n_attack=50): dataset = openml.datasets.get_dataset(dataset_id) X, y, is_categorical, _ = dataset.get_data( dataset_format="array", target=dataset.default_target_attribute) if np.mean(is_categorical) > 0: return if np.isnan(np.sum(y)): return if np.isnan(np.sum(X)): return total_sample = X.shape[0] unique_classes, counts = np.unique(y, return_counts=True) test_sample = min(counts) // 3 indx = [] for label in unique_classes: indx.append(np.where(y == label)[0]) max_sample = min(counts) - test_sample train_samples = np.logspace(np.log10(2), np.log10(max_sample), num=10, endpoint=True, dtype=int) train_samples = [train_samples[-1]] # Only use small data for now if train_samples[-1] > 1000: return l2_kdf_list = [] l2_rf_list = [] linf_kdf_list = [] linf_rf_list = [] err_adv_rf_list = [] err_adv_kdf_list = [] err_rf = [] err_kdf = [] mc_rep = [] samples_attack = [] samples = [] for train_sample in train_samples: for rep in range(reps): indx_to_take_train = [] indx_to_take_test = [] for ii, _ in enumerate(unique_classes): np.random.shuffle(indx[ii]) indx_to_take_train.extend(list(indx[ii][:train_sample])) indx_to_take_test.extend( list(indx[ii][-test_sample:counts[ii]])) # Fit the estimators model_kdf = kdf( kwargs={ "n_estimators": n_estimators, "min_samples_leaf": int(np.ceil(X.shape[1] * 10 / np.log(train_sample))), }) model_kdf.fit(X[indx_to_take_train], y[indx_to_take_train]) proba_kdf = model_kdf.predict_proba(X[indx_to_take_test]) proba_rf = model_kdf.rf_model.predict_proba(X[indx_to_take_test]) predicted_label_kdf = np.argmax(proba_kdf, axis=1) predicted_label_rf = np.argmax(proba_rf, axis=1) # Initial classification error err_rf.append(1 - np.mean(predicted_label_rf == y[indx_to_take_test])) err_kdf.append(1 - np.mean( predicted_label_kdf == y[indx_to_take_test])) ## Adversarial attack ### def _predict_kdf(x): """Wrapper to query black box""" proba_kdf = model_kdf.predict_proba(x) predicted_label_kdf = np.argmax(proba_kdf, axis=1) return to_categorical( predicted_label_kdf, nb_classes=len(np.unique(y[indx_to_take_train])), ) def _predict_rf(x): """Wrapper to query blackbox for rf""" proba_rf = model_kdf.rf_model.predict_proba(x) predicted_label_rf = np.argmax(proba_rf, axis=1) return to_categorical(predicted_label_rf, nb_classes=len( np.unique(y[indx_to_take_train]))) art_classifier_kdf = BlackBoxClassifier( _predict_kdf, X[indx_to_take_train][0].shape, len(np.unique(y[indx_to_take_train])), ) art_classifier_rf = BlackBoxClassifier( _predict_rf, X[indx_to_take_train][0].shape, len(np.unique(y[indx_to_take_train])), ) attack_rf = HopSkipJump( classifier=art_classifier_rf, targeted=False, max_iter=50, max_eval=1000, init_eval=10, ) attack_kdf = HopSkipJump( classifier=art_classifier_kdf, targeted=False, max_iter=50, max_eval=1000, init_eval=10, ) ### For computational reasons, attack a random subset that is identified correctly # Get indices of correctly classified samples common to both selection_idx = indx_to_take_train proba_kdf = model_kdf.predict_proba(X[selection_idx]) proba_rf = model_kdf.rf_model.predict_proba(X[selection_idx]) predicted_label_kdf = np.argmax(proba_kdf, axis=1) predicted_label_rf = np.argmax(proba_rf, axis=1) idx_kdf = np.where(predicted_label_kdf == y[selection_idx])[0] idx_rf = np.where(predicted_label_rf == y[selection_idx])[0] idx_common = list(np.intersect1d(idx_kdf, idx_rf)) # Randomly sample from the common indices if n_attack > len(idx_common): n_attack = len(idx_common) idx = random.sample(idx_common, n_attack) if n_attack == 0: return ### Generate samples x_adv_kdf = attack_kdf.generate(X[selection_idx][idx]) x_adv_rf = attack_rf.generate(X[selection_idx][idx]) # Compute norms l2_kdf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_kdf, ord=2, axis=1)) l2_rf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_rf, ord=2, axis=1)) linf_rf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_rf, ord=np.inf, axis=1)) linf_kdf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_kdf, ord=np.inf, axis=1)) ### Classification # Make adversarial prediction proba_rf = model_kdf.rf_model.predict_proba(x_adv_rf) predicted_label_rf_adv = np.argmax(proba_rf, axis=1) err_adv_rf = 1 - np.mean( predicted_label_rf_adv == y[selection_idx][idx]) proba_kdf = model_kdf.predict_proba(x_adv_kdf) predicted_label_kdf_adv = np.argmax(proba_kdf, axis=1) err_adv_kdf = 1 - np.mean( predicted_label_kdf_adv == y[selection_idx][idx]) print("l2_rf = {:.4f}, linf_rf = {:.4f}, err_rf = {:.4f}".format( l2_rf, linf_rf, err_adv_rf)) print( "l2_kdf = {:.4f}, linf_kdf = {:.4f}, err_kdf = {:.4f}".format( l2_kdf, linf_kdf, err_adv_kdf)) l2_kdf_list.append(l2_kdf) l2_rf_list.append(l2_rf) linf_kdf_list.append(linf_kdf) linf_rf_list.append(linf_rf) err_adv_kdf_list.append(err_adv_kdf) err_adv_rf_list.append(err_adv_rf) mc_rep.append(rep) samples_attack.append(n_attack) samples.append(train_sample) df = pd.DataFrame() df["l2_kdf"] = l2_kdf_list df["l2_rf"] = l2_rf_list df["linf_kdf"] = linf_kdf_list df["linf_rf"] = linf_rf_list df["err_kdf"] = err_kdf df["err_rf"] = err_rf df["err_adv_kdf"] = err_adv_kdf_list df["err_adv_rf"] = err_adv_rf_list df["rep"] = mc_rep df["samples_attack"] = samples_attack df["samples"] = samples df.to_csv(folder + "/" + "openML_cc18_" + str(dataset_id) + ".csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) model = RandomForestClassifier() model.fit(X_train, y_train) print(X_test.shape) print("trained") classifier = SklearnClassifier(model=model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=10, init_eval=10, init_size=1) X_test_attacked = attack.generate(X_test, y_test) robustness = empirical_robustness(classifier, X_test, 'hsj', attack_params={ 'max_iter': 1, 'max_eval': 10, 'init_eval': 10, 'init_size': 1 }) print('Robustness: ' + str(robustness))