예제 #1
0
    def infer(
        self, attack_model_type: str = "nn", *args, **kwargs
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Alias method for attack().

        :param attack_model_type: Type of the attack model. On of "rf", "gb", "nn".
        :param args: Arguments of the attack.
        :param kwargs: Keyword arguments of the attack.
        :return: Two arrays holding the inferred membership status. The first array includes the results for the
        inferred membership status of the train data and the second includes the results for the test data, where 1
        indicates a member and 0 indicates non-member. The optimal attack would return only ones for the first array and
        only zeros for the second.
        """
        assert attack_model_type in ["rf", "gb", "nn"]

        attack = MembershipInferenceBlackBox(
            self.target_model.art_classifier, attack_model_type=attack_model_type
        )

        attack.fit(
            self.x_train[: self.attack_train_size],
            self.y_train[: self.attack_train_size],
            self.x_test[: self.attack_test_size],
            self.y_test[: self.attack_test_size],
        )

        inferred_train_data = attack.infer(
            self.x_train[self.attack_train_size :],
            self.y_train[self.attack_train_size :],
        )
        inferred_test_data = attack.infer(
            self.x_test[self.attack_test_size :], self.y_test[self.attack_test_size :]
        )

        return inferred_train_data, inferred_test_data
예제 #2
0
    # x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid'))
    # x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid)

    # attack.calibrate_distance_threshold(x_train[100:300], y_train[100:300], x_valid[100:300], y_valid[100:300])
    # result = np.concatenate((attack.infer(x_train[:100], y_train[:100]), attack.infer(x_valid[:100], y_valid[:100])))
    # y_truth = np.concatenate(([1] * len(x_train[:100]), [0] * len(x_valid[:100])))
    # print('result:')
    # print('F1 score: ', metrics.f1_score(result, y_truth))
    # print('Accuracy score: ', metrics.accuracy_score(result, y_truth))
    # print('Recall score: ', metrics.recall_score(result, y_truth))
    # print('Precision score: ', metrics.precision_score(result, y_truth))
    attack = Attack(classifier)
    x_train, y_train = dataset_to_list(dataset.get_dataset('train'))
    x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train)
    x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid'))
    x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid)

    x_train, y_train = x_train[:1000], y_train[:1000]
    x_valid, y_valid = x_valid[:1000], y_valid[:1000]

    attack.fit(x_train[100:], y_train[100:], x_valid[100:], y_valid[100:])
    result = np.concatenate((attack.infer(x_train[:100], y_train[:100]),
                             attack.infer(x_valid[:100], y_valid[:100])))
    y_truth = np.concatenate(
        ([1] * len(x_train[:100]), [0] * len(x_valid[:100])))
    print('result:')
    print('F1 score: ', metrics.f1_score(result, y_truth))
    print('Accuracy score: ', metrics.accuracy_score(result, y_truth))
    print('Recall score: ', metrics.recall_score(result, y_truth))
    print('Precision score: ', metrics.precision_score(result, y_truth))