def hopskipjump( data, query, query_limit, art_model, victim_input_shape, substitute_input_shape, victim_output_targets, ): """Runs the HopSkipJump evasion attack Arxiv Paper: https://arxiv.org/abs/1904.02144""" internal_limit = int(query_limit * 0.5) X, y = copycat( data, query, internal_limit, art_model, victim_input_shape, substitute_input_shape, victim_output_targets, reshape=False, ) # import pdb; pdb.set_trace() X_np = X.detach().clone().numpy() # config = set_evasion_model(query, victim_input_shape, victim_input_targets) evasion_limit = int(query_limit * 0.5) # The initial evaluation number must be lower than the maximum lower_bound = 0.01 * evasion_limit init_eval = int(lower_bound if lower_bound > 1 else 1) # Run attack and process results attack = HopSkipJump( art_model, False, norm="inf", max_iter=evasion_limit, max_eval=evasion_limit, init_eval=init_eval, ) result = attack.generate(X_np) result = (torch.from_numpy(attack.generate(X_np)).clone().detach().float() ) # .detach().clone().float() y = query(result) result = reshape_input(result, substitute_input_shape) return result, y
def attack(predictWrapper, x_train, x_test, y_train, y_test, input_shape, datapoint): min_pixel_value = x_train.min() max_pixel_value = x_train.max() print('min_pixel_value ', min_pixel_value) print('max_pixel_value ', max_pixel_value) print('xtrain shape: ', x_train.shape) print('xtest shape: ', x_test.shape) print('y_train shape: ', y_train.shape) print('ytest shape: ', y_test.shape) # Create classifier classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=(input_shape, ), nb_classes=2, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data by HopSkipJump attack -----') # Generate adversarial test examples s = time.time() attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=100, max_eval=10000, init_eval=100, init_size=100) # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100) # Input data shape should be 2D datapoint = datapoint.reshape((-1, input_shape)) adv_data = attacker.generate(x=datapoint) distortion(datapoint, adv_data) print('Generate test adv cost time: ', time.time() - s) return adv_data
def init_hopskipjump(config, data, limit=50): """Runs the HopSkipJump evasion attack Arxiv Paper: https://arxiv.org/abs/1904.02144""" attack = HopSkipJump(config, False, max_iter=limit, max_eval=100, init_eval=10) return attack.generate(data)
def get(self): data = self.parser.parse_args() img = data.get('img') img_data = img.split(',') img = np.array(img_data, np.float32).reshape(28, 28) img = img * 255.0 img_new = np.zeros((1, 32, 32, 1)) img_new[0] = np.pad(img.reshape(28, 28), [(2, ), (2, )], mode='constant').reshape(32, 32, 1) global sess global graph with graph.as_default(): set_session(sess) attack = HopSkipJump(classifier=classifier, targeted=False, max_iter=0, max_eval=1000, init_eval=10) iter_step = 3 x_adv = None for i in range(iter_step): x_adv = attack.generate(x=img_new, x_adv_init=x_adv, resume=True) #clear_output() # print("Adversarial image at step %d." % (i * iter_step), # "and class label %d." % np.argmax(classifier.predict(x_adv)[0])) attack.max_iter = iter_step sav_img = Image.fromarray(x_adv.reshape(32, 32)) sav_img = sav_img.convert("L") sav_img.save("test.jpg") buffer = BytesIO() sav_img.save(buffer, format="JPEG") myimage = buffer.getvalue() res = str(predict(x_adv)) print("After Attack: ", res) return jsonify({ 'res': res, 'dat': bytes.decode(base64.b64encode(myimage)) })
def robust_score(y_true, y_pred, eps=0.1, X=None, y=None, model=None, feature_selector=None, scorer=None): all_ids = range(X.shape[0]) test_ids = y_true.index.values train_ids = list(set(all_ids) - set(test_ids)) y_train = y[train_ids] y_test = y[test_ids] X_train = X[train_ids, :] X_test = X[test_ids, :] if type(feature_selector) != type(None): X_train = feature_selector.fit_transform(X_train) X_test = feature_selector.transform(X_test) #tuned_parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]} #cv = GridSearchCV(model, tuned_parameters) #cv.fit(X_train, y_train) #best_model = cv.best_estimator_ best_model = copy.deepcopy(model) best_model.fit(X_train, y_train) classifier = SklearnClassifier(model=best_model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=10, init_eval=5, init_size=1) X_test_adv = attack.generate(X_test) diff = scorer(best_model, X_test, y_test) - scorer(best_model, X_test_adv, y_test) return diff
def robust_score_test(eps=0.1, X_test=None, y_test=None, model=None, feature_selector=None, scorer=None): X_test_filtered = feature_selector.transform(X_test) best_model = copy.deepcopy(model) classifier = SklearnClassifier(model=best_model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=10, init_eval=5, init_size=1) X_test_adv = attack.generate(X_test_filtered) score_original_test = scorer(best_model, X_test_filtered, y_test) score_corrupted_test = scorer(best_model, X_test_adv, y_test) diff = score_original_test - score_corrupted_test return diff
# Create a query function for a PyTorch Lightning model model = train_mnist_victim() def query_mnist(input_data): input_data = torch.from_numpy(input_data) return get_target(model, input_data) emnist_train, emnist_test = get_emnist_data() test = BlackBoxClassifier( predict=query_mnist, input_shape=(1, 28, 28, 1), nb_classes=10, clip_values=(0, 255), preprocessing_defences=None, postprocessing_defences=None, preprocessing=None, ) attack = HopSkipJump(test, False, max_iter=50, max_eval=100, init_eval=10) X, y = emnist_train.data, emnist_train.targets X = X.to(torch.float32) X = X.unsqueeze(3) attack.generate(X)
target_image = x_train[0] # Generate HopSkipJump attack against black box classifier attack = HopSkipJump(classifier=classifier, targeted=True, max_iter=0, max_eval=1000, init_eval=10) iter_step = 10 stop = Image.open(curr_path + "../danny-machine/machine.jpg") stop = np.array([np.array(stop)]).astype(float) x_adv = stop errors = [] for i in range(100): x_adv = attack.generate(x=np.array([target_image]), y=[1], x_adv_init=x_adv) l2_err = np.linalg.norm(np.reshape(x_adv[0] - target_image, [-1])) print("Adversarial image at step %d." % (i * iter_step), "L2 error", np.linalg.norm(np.reshape(x_adv[0] - target_image, [-1])), "and class label %d." % np.argmax(classifier.predict(x_adv)[0])) errors.append((i * iter_step, l2_err)) im = Image.fromarray(np.reshape(x_adv[0].astype(np.uint8), SHAPE)) im.save(curr_path + f"../danny-machine/step{i}.png") #plt.imshow(np.reshape(x_adv[0].astype(np.float32), (400, 400))) #plt.show(block=False) attack.max_iter = iter_step print(errors)
print('max_pixel_value ', max_pixel_value) # Create classifier classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot, input_shape=input_shape, nb_classes=args.n_classes, clip_values=(min_pixel_value, max_pixel_value)) print('----- generate adv data by HopSkipJump attack -----') # Generate adversarial test examples attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=40, max_eval=10000, init_eval=100, init_size=100) # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100) # Input data shape should be 2D datapoint = test[correct_index[:1]] s = time.time() adv_data = attacker.generate(x=datapoint) # distortion(datapoint, adv_data) print('Generate test adv cost time: ', time.time() - s) # return adv_data
# A toy example of how to call the class if __name__ == '__main__': from sklearn.datasets import load_breast_cancer from sklearn.metrics import f1_score diabetes = load_breast_cancer() X = diabetes.data y = diabetes.target model = PrivateRandomForest(n_estimators=100, epsilon=0.1) model.fit(X, y) print(f1_score(y, model.predict(X))) #print(model.predict(X)) import numpy as np from art.classifiers import SklearnClassifier import copy from art.attacks.evasion import HopSkipJump classifier = SklearnClassifier(model=model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=100) X_test_adv = attack.generate(X) print(model.predict(X_test_adv))
def experiment(dataset_id, folder, n_estimators=500, reps=5, n_attack=50): dataset = openml.datasets.get_dataset(dataset_id) X, y, is_categorical, _ = dataset.get_data( dataset_format="array", target=dataset.default_target_attribute) if np.mean(is_categorical) > 0: return if np.isnan(np.sum(y)): return if np.isnan(np.sum(X)): return total_sample = X.shape[0] unique_classes, counts = np.unique(y, return_counts=True) test_sample = min(counts) // 3 indx = [] for label in unique_classes: indx.append(np.where(y == label)[0]) max_sample = min(counts) - test_sample train_samples = np.logspace(np.log10(2), np.log10(max_sample), num=10, endpoint=True, dtype=int) train_samples = [train_samples[-1]] # Only use small data for now if train_samples[-1] > 1000: return l2_kdf_list = [] l2_rf_list = [] linf_kdf_list = [] linf_rf_list = [] err_adv_rf_list = [] err_adv_kdf_list = [] err_rf = [] err_kdf = [] mc_rep = [] samples_attack = [] samples = [] for train_sample in train_samples: for rep in range(reps): indx_to_take_train = [] indx_to_take_test = [] for ii, _ in enumerate(unique_classes): np.random.shuffle(indx[ii]) indx_to_take_train.extend(list(indx[ii][:train_sample])) indx_to_take_test.extend( list(indx[ii][-test_sample:counts[ii]])) # Fit the estimators model_kdf = kdf( kwargs={ "n_estimators": n_estimators, "min_samples_leaf": int(np.ceil(X.shape[1] * 10 / np.log(train_sample))), }) model_kdf.fit(X[indx_to_take_train], y[indx_to_take_train]) proba_kdf = model_kdf.predict_proba(X[indx_to_take_test]) proba_rf = model_kdf.rf_model.predict_proba(X[indx_to_take_test]) predicted_label_kdf = np.argmax(proba_kdf, axis=1) predicted_label_rf = np.argmax(proba_rf, axis=1) # Initial classification error err_rf.append(1 - np.mean(predicted_label_rf == y[indx_to_take_test])) err_kdf.append(1 - np.mean( predicted_label_kdf == y[indx_to_take_test])) ## Adversarial attack ### def _predict_kdf(x): """Wrapper to query black box""" proba_kdf = model_kdf.predict_proba(x) predicted_label_kdf = np.argmax(proba_kdf, axis=1) return to_categorical( predicted_label_kdf, nb_classes=len(np.unique(y[indx_to_take_train])), ) def _predict_rf(x): """Wrapper to query blackbox for rf""" proba_rf = model_kdf.rf_model.predict_proba(x) predicted_label_rf = np.argmax(proba_rf, axis=1) return to_categorical(predicted_label_rf, nb_classes=len( np.unique(y[indx_to_take_train]))) art_classifier_kdf = BlackBoxClassifier( _predict_kdf, X[indx_to_take_train][0].shape, len(np.unique(y[indx_to_take_train])), ) art_classifier_rf = BlackBoxClassifier( _predict_rf, X[indx_to_take_train][0].shape, len(np.unique(y[indx_to_take_train])), ) attack_rf = HopSkipJump( classifier=art_classifier_rf, targeted=False, max_iter=50, max_eval=1000, init_eval=10, ) attack_kdf = HopSkipJump( classifier=art_classifier_kdf, targeted=False, max_iter=50, max_eval=1000, init_eval=10, ) ### For computational reasons, attack a random subset that is identified correctly # Get indices of correctly classified samples common to both selection_idx = indx_to_take_train proba_kdf = model_kdf.predict_proba(X[selection_idx]) proba_rf = model_kdf.rf_model.predict_proba(X[selection_idx]) predicted_label_kdf = np.argmax(proba_kdf, axis=1) predicted_label_rf = np.argmax(proba_rf, axis=1) idx_kdf = np.where(predicted_label_kdf == y[selection_idx])[0] idx_rf = np.where(predicted_label_rf == y[selection_idx])[0] idx_common = list(np.intersect1d(idx_kdf, idx_rf)) # Randomly sample from the common indices if n_attack > len(idx_common): n_attack = len(idx_common) idx = random.sample(idx_common, n_attack) if n_attack == 0: return ### Generate samples x_adv_kdf = attack_kdf.generate(X[selection_idx][idx]) x_adv_rf = attack_rf.generate(X[selection_idx][idx]) # Compute norms l2_kdf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_kdf, ord=2, axis=1)) l2_rf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_rf, ord=2, axis=1)) linf_rf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_rf, ord=np.inf, axis=1)) linf_kdf = np.mean( np.linalg.norm(X[selection_idx][idx] - x_adv_kdf, ord=np.inf, axis=1)) ### Classification # Make adversarial prediction proba_rf = model_kdf.rf_model.predict_proba(x_adv_rf) predicted_label_rf_adv = np.argmax(proba_rf, axis=1) err_adv_rf = 1 - np.mean( predicted_label_rf_adv == y[selection_idx][idx]) proba_kdf = model_kdf.predict_proba(x_adv_kdf) predicted_label_kdf_adv = np.argmax(proba_kdf, axis=1) err_adv_kdf = 1 - np.mean( predicted_label_kdf_adv == y[selection_idx][idx]) print("l2_rf = {:.4f}, linf_rf = {:.4f}, err_rf = {:.4f}".format( l2_rf, linf_rf, err_adv_rf)) print( "l2_kdf = {:.4f}, linf_kdf = {:.4f}, err_kdf = {:.4f}".format( l2_kdf, linf_kdf, err_adv_kdf)) l2_kdf_list.append(l2_kdf) l2_rf_list.append(l2_rf) linf_kdf_list.append(linf_kdf) linf_rf_list.append(linf_rf) err_adv_kdf_list.append(err_adv_kdf) err_adv_rf_list.append(err_adv_rf) mc_rep.append(rep) samples_attack.append(n_attack) samples.append(train_sample) df = pd.DataFrame() df["l2_kdf"] = l2_kdf_list df["l2_rf"] = l2_rf_list df["linf_kdf"] = linf_kdf_list df["linf_rf"] = linf_rf_list df["err_kdf"] = err_kdf df["err_rf"] = err_rf df["err_adv_kdf"] = err_adv_kdf_list df["err_adv_rf"] = err_adv_rf_list df["rep"] = mc_rep df["samples_attack"] = samples_attack df["samples"] = samples df.to_csv(folder + "/" + "openML_cc18_" + str(dataset_id) + ".csv")
random_state=42) model = RandomForestClassifier() model.fit(X_train, y_train) print(X_test.shape) print("trained") classifier = SklearnClassifier(model=model) attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=10, init_eval=10, init_size=1) X_test_attacked = attack.generate(X_test, y_test) robustness = empirical_robustness(classifier, X_test, 'hsj', attack_params={ 'max_iter': 1, 'max_eval': 10, 'init_eval': 10, 'init_size': 1 }) print('Robustness: ' + str(robustness)) print("generated") y_test_attacked = model.predict(X_test_attacked)