def infer( self, attack_model_type: str = "nn", *args, **kwargs ) -> Tuple[np.ndarray, np.ndarray]: """Alias method for attack(). :param attack_model_type: Type of the attack model. On of "rf", "gb", "nn". :param args: Arguments of the attack. :param kwargs: Keyword arguments of the attack. :return: Two arrays holding the inferred membership status. The first array includes the results for the inferred membership status of the train data and the second includes the results for the test data, where 1 indicates a member and 0 indicates non-member. The optimal attack would return only ones for the first array and only zeros for the second. """ assert attack_model_type in ["rf", "gb", "nn"] attack = MembershipInferenceBlackBox( self.target_model.art_classifier, attack_model_type=attack_model_type ) attack.fit( self.x_train[: self.attack_train_size], self.y_train[: self.attack_train_size], self.x_test[: self.attack_test_size], self.y_test[: self.attack_test_size], ) inferred_train_data = attack.infer( self.x_train[self.attack_train_size :], self.y_train[self.attack_train_size :], ) inferred_test_data = attack.infer( self.x_test[self.attack_test_size :], self.y_test[self.attack_test_size :] ) return inferred_train_data, inferred_test_data
def test_meminf_black_box_slice(art_warning, decision_tree_estimator, get_iris_dataset): try: attack_feature = 2 # petal length # need to transform attacked feature into categorical def transform_feature(x): x[x > 0.5] = 0.6 x[(x > 0.2) & (x <= 0.5)] = 0.35 x[x <= 0.2] = 0.1 values = [0.1, 0.35, 0.6] (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset # training data without attacked feature x_train_for_attack = np.delete(x_train_iris, attack_feature, 1) # only attacked feature x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1) transform_feature(x_train_feature) # training data with attacked feature (after transformation) x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1) x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1) # test data without attacked feature x_test_for_attack = np.delete(x_test_iris, attack_feature, 1) # only attacked feature x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1) transform_feature(x_test_feature) # test data with attacked feature (after transformation) x_test = np.concatenate((x_test_for_attack[:, :attack_feature], x_test_feature), axis=1) x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1) classifier = decision_tree_estimator() meminf_attack = MembershipInferenceBlackBox(classifier, attack_model_type="nn") attack_train_ratio = 0.5 attack_train_size = int(len(x_train) * attack_train_ratio) attack_test_size = int(len(x_test) * attack_train_ratio) meminf_attack.fit( x_train[:attack_train_size], y_train_iris[:attack_train_size], x_test[:attack_test_size], y_test_iris[:attack_test_size], ) attack = AttributeInferenceMembership( classifier, meminf_attack, attack_feature=slice(attack_feature, attack_feature + 1) ) # infer attacked feature inferred_train = attack.infer(x_train_for_attack, y_train_iris, values=values) inferred_test = attack.infer(x_test_for_attack, y_test_iris, values=values) # check accuracy train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train) test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test) assert 0.1 <= train_acc assert 0.1 <= test_acc except ARTTestException as e: art_warning(e)
def test_black_box_one_hot_float(art_warning, get_iris_dataset): try: attack_feature = 2 # petal length # need to transform attacked feature into categorical def transform_feature(x): x[x > 0.5] = 2 x[(x > 0.2) & (x <= 0.5)] = 1 x[x <= 0.2] = 0 (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset # training data without attacked feature x_train_for_attack = np.delete(x_train_iris, attack_feature, 1) # only attacked feature x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1) transform_feature(x_train_feature) # transform to one-hot encoding num_columns = int(x_train_feature.max()) + 1 train_one_hot = np.zeros((x_train_feature.size, num_columns)) train_one_hot[np.arange(x_train_feature.size), x_train_feature.reshape(1, -1).astype(int)] = 1 # training data with attacked feature (after transformation) x_train = np.concatenate((x_train_for_attack[:, :attack_feature], train_one_hot), axis=1) x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1) y_train = np.array([np.argmax(y) for y in y_train_iris]).reshape(-1, 1) # test data without attacked feature x_test_for_attack = np.delete(x_test_iris, attack_feature, 1) # only attacked feature x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1) transform_feature(x_test_feature) # transform to one-hot encoding test_one_hot = np.zeros((x_test_feature.size, int(x_test_feature.max()) + 1)) test_one_hot[np.arange(x_test_feature.size), x_test_feature.reshape(1, -1).astype(int)] = 1 # test data with attacked feature (after transformation) x_test = np.concatenate((x_test_for_attack[:, :attack_feature], test_one_hot), axis=1) x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1) # scale before training scaler = StandardScaler().fit(x_train) x_test = scaler.transform(x_test).astype(np.float32) x_train = scaler.transform(x_train).astype(np.float32) # derive dataset for attack (after scaling) attack_feature = slice(attack_feature, attack_feature + 3) x_train_for_attack = np.delete(x_train, attack_feature, 1) x_test_for_attack = np.delete(x_test, attack_feature, 1) train_one_hot = x_train[:, attack_feature] test_one_hot = x_test[:, attack_feature] tree = DecisionTreeClassifier() tree.fit(x_train, y_train) classifier = ScikitlearnDecisionTreeClassifier(tree) meminf_attack = MembershipInferenceBlackBox(classifier, attack_model_type="nn") attack_train_ratio = 0.5 attack_train_size = int(len(x_train) * attack_train_ratio) attack_test_size = int(len(x_test) * attack_train_ratio) meminf_attack.fit( x_train[:attack_train_size], y_train_iris[:attack_train_size], x_test[:attack_test_size], y_test_iris[:attack_test_size], ) attack = AttributeInferenceMembership(classifier, meminf_attack, attack_feature=attack_feature) # infer attacked feature values = [[-0.559017, 1.7888544], [-0.47003216, 2.127514], [-1.1774395, 0.84930056]] inferred_train = attack.infer(x_train_for_attack, y_train_iris, values=values) inferred_test = attack.infer(x_test_for_attack, y_test_iris, values=values) # check accuracy train_acc = np.sum( np.all(np.around(inferred_train, decimals=3) == np.around(train_one_hot, decimals=3), axis=1) ) / len(inferred_train) test_acc = np.sum( np.all(np.around(inferred_test, decimals=3) == np.around(test_one_hot, decimals=3), axis=1) ) / len(inferred_test) assert 0.1 <= train_acc assert 0.1 <= test_acc except ARTTestException as e: art_warning(e)
def test_meminf_black_box_regressor(art_warning, get_diabetes_dataset): try: attack_feature = 0 # age bins = [ -0.96838121, -0.18102872, 0.21264752, 1.0, ] # need to transform attacked feature into categorical def transform_feature(x): for i in range(len(bins) - 1): x[(x >= bins[i]) & (x < bins[i + 1])] = i values = list(range(len(bins) - 1)) (x_train_diabetes, y_train_diabetes), (x_test_diabetes, y_test_diabetes) = get_diabetes_dataset # training data without attacked feature x_train_for_attack = np.delete(x_train_diabetes, attack_feature, 1) # only attacked feature x_train_feature = x_train_diabetes[:, attack_feature].copy().reshape(-1, 1) transform_feature(x_train_feature) # training data with attacked feature (after transformation) x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1) x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1) # test data without attacked feature x_test_for_attack = np.delete(x_test_diabetes, attack_feature, 1) # only attacked feature x_test_feature = x_test_diabetes[:, attack_feature].copy().reshape(-1, 1) transform_feature(x_test_feature) # test data with attacked feature (after transformation) x_test = np.concatenate((x_test_for_attack[:, :attack_feature], x_test_feature), axis=1) x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1) from sklearn import linear_model regr_model = linear_model.LinearRegression() regr_model.fit(x_train_diabetes, y_train_diabetes) regressor = ScikitlearnRegressor(regr_model) meminf_attack = MembershipInferenceBlackBox(regressor, attack_model_type="rf", input_type="loss") attack_train_ratio = 0.5 attack_train_size = int(len(x_train) * attack_train_ratio) attack_test_size = int(len(x_test) * attack_train_ratio) meminf_attack.fit( x_train[:attack_train_size], y_train_diabetes[:attack_train_size], x_test[:attack_test_size], y_test_diabetes[:attack_test_size], ) attack = AttributeInferenceMembership(regressor, meminf_attack, attack_feature=attack_feature) # infer attacked feature inferred_train = attack.infer(x_train_for_attack, y_train_diabetes, values=values) inferred_test = attack.infer(x_test_for_attack, y_test_diabetes, values=values) # check accuracy train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train) test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test) assert 0.1 <= train_acc assert 0.1 <= test_acc except ARTTestException as e: art_warning(e)
# x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid')) # x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid) # attack.calibrate_distance_threshold(x_train[100:300], y_train[100:300], x_valid[100:300], y_valid[100:300]) # result = np.concatenate((attack.infer(x_train[:100], y_train[:100]), attack.infer(x_valid[:100], y_valid[:100]))) # y_truth = np.concatenate(([1] * len(x_train[:100]), [0] * len(x_valid[:100]))) # print('result:') # print('F1 score: ', metrics.f1_score(result, y_truth)) # print('Accuracy score: ', metrics.accuracy_score(result, y_truth)) # print('Recall score: ', metrics.recall_score(result, y_truth)) # print('Precision score: ', metrics.precision_score(result, y_truth)) attack = Attack(classifier) x_train, y_train = dataset_to_list(dataset.get_dataset('train')) x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train) x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid')) x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid) x_train, y_train = x_train[:1000], y_train[:1000] x_valid, y_valid = x_valid[:1000], y_valid[:1000] attack.fit(x_train[100:], y_train[100:], x_valid[100:], y_valid[100:]) result = np.concatenate((attack.infer(x_train[:100], y_train[:100]), attack.infer(x_valid[:100], y_valid[:100]))) y_truth = np.concatenate( ([1] * len(x_train[:100]), [0] * len(x_valid[:100]))) print('result:') print('F1 score: ', metrics.f1_score(result, y_truth)) print('Accuracy score: ', metrics.accuracy_score(result, y_truth)) print('Recall score: ', metrics.recall_score(result, y_truth)) print('Precision score: ', metrics.precision_score(result, y_truth))