Example #1
0
def test_black_box_no_values(art_warning, decision_tree_estimator,
                             get_iris_dataset, model_type):
    try:
        attack_feature = 2  # petal length

        # need to transform attacked feature into categorical
        def transform_feature(x):
            x[x > 0.5] = 2.0
            x[(x > 0.2) & (x <= 0.5)] = 1.0
            x[x <= 0.2] = 0.0

        (x_train_iris, y_train_iris), (x_test_iris,
                                       y_test_iris) = get_iris_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # training data with attacked feature (after transformation)
        x_train = np.concatenate(
            (x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
        x_train = np.concatenate(
            (x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)

        classifier = decision_tree_estimator()

        attack = AttributeInferenceBlackBox(classifier,
                                            attack_feature=attack_feature,
                                            attack_model_type=model_type)
        # get original model's predictions
        x_train_predictions = np.array([
            np.argmax(arr) for arr in classifier.predict(x_train_iris)
        ]).reshape(-1, 1)
        x_test_predictions = np.array([
            np.argmax(arr) for arr in classifier.predict(x_test_iris)
        ]).reshape(-1, 1)
        # train attack model
        attack.fit(x_train)
        # infer attacked feature
        inferred_train = attack.infer(x_train_for_attack,
                                      pred=x_train_predictions)
        inferred_test = attack.infer(x_test_for_attack,
                                     pred=x_test_predictions)
        # check accuracy
        train_acc = np.sum(inferred_train == x_train_feature.reshape(
            1, -1)) / len(inferred_train)
        test_acc = np.sum(inferred_test == x_test_feature.reshape(
            1, -1)) / len(inferred_test)
        assert pytest.approx(0.8285, abs=0.12) == train_acc
        assert pytest.approx(0.8888, abs=0.18) == test_acc

    except ARTTestException as e:
        art_warning(e)
Example #2
0
def test_black_box_one_hot(art_warning, get_iris_dataset):
    try:
        attack_feature = 2  # petal length

        # need to transform attacked feature into categorical
        def transform_feature(x):
            x[x > 0.5] = 2
            x[(x > 0.2) & (x <= 0.5)] = 1
            x[x <= 0.2] = 0

        (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # transform to one-hot encoding
        train_one_hot = np.zeros((x_train_feature.size, int(x_train_feature.max()) + 1))
        train_one_hot[np.arange(x_train_feature.size), x_train_feature.reshape(1, -1).astype(int)] = 1
        # training data with attacked feature (after transformation)
        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], train_one_hot), axis=1)
        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        y_train = np.array([np.argmax(y) for y in y_train_iris]).reshape(-1, 1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)
        # transform to one-hot encoding
        test_one_hot = np.zeros((x_test_feature.size, int(x_test_feature.max()) + 1))
        test_one_hot[np.arange(x_test_feature.size), x_test_feature.reshape(1, -1).astype(int)] = 1
        # test data with attacked feature (after transformation)
        x_test = np.concatenate((x_test_for_attack[:, :attack_feature], test_one_hot), axis=1)
        x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1)

        tree = DecisionTreeClassifier()
        tree.fit(x_train, y_train)
        classifier = ScikitlearnDecisionTreeClassifier(tree)

        attack = AttributeInferenceBlackBox(classifier, attack_feature=slice(attack_feature, attack_feature + 3))
        # get original model's predictions
        x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train)]).reshape(-1, 1)
        x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test)]).reshape(-1, 1)
        # train attack model
        attack.fit(x_train)
        # infer attacked feature
        inferred_train = attack.infer(x_train_for_attack, x_train_predictions)
        inferred_test = attack.infer(x_test_for_attack, x_test_predictions)
        # check accuracy
        train_acc = np.sum(np.all(inferred_train == train_one_hot, axis=1)) / len(inferred_train)
        test_acc = np.sum(np.all(inferred_test == test_one_hot, axis=1)) / len(inferred_test)
        assert pytest.approx(0.9145, abs=0.03) == train_acc
        assert pytest.approx(0.9333, abs=0.03) == test_acc

    except ARTTestException as e:
        art_warning(e)
Example #3
0
def test_black_box_with_model(art_warning, decision_tree_estimator, get_iris_dataset):
    try:
        attack_feature = 2  # petal length

        # need to transform attacked feature into categorical
        def transform_feature(x):
            x[x > 0.5] = 2.0
            x[(x > 0.2) & (x <= 0.5)] = 1.0
            x[x <= 0.2] = 0.0

        values = [0.0, 1.0, 2.0]

        (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # training data with attacked feature (after transformation)
        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)

        model = nn.Linear(4, 3)

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)
        attack_model = PyTorchClassifier(
            model=model, clip_values=(0, 1), loss=loss_fn, optimizer=optimizer, input_shape=(4,), nb_classes=3
        )

        classifier = decision_tree_estimator()

        attack = AttributeInferenceBlackBox(classifier, attack_model=attack_model, attack_feature=attack_feature)
        # get original model's predictions
        x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
        x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
        # train attack model
        attack.fit(x_train)
        # infer attacked feature
        inferred_train = attack.infer(x_train_for_attack, x_train_predictions, values=values)
        inferred_test = attack.infer(x_test_for_attack, x_test_predictions, values=values)
        # check accuracy
        # train_acc
        _ = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
        # test_acc
        _ = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
        # assert train_acc == pytest.approx(0.5523, abs=0.03)
        # assert test_acc == pytest.approx(0.5777, abs=0.03)
    except ARTTestException as e:
        art_warning(e)
def test_black_box_with_model(get_tabular_classifier_list, get_iris_dataset):
    classifier_list = get_tabular_classifier_list(AttributeInferenceBlackBox)
    if not classifier_list:
        logging.warning("Couldn't perform  this test because no classifier is defined")
        return

    attack_feature = 2  # petal length

    # need to transform attacked feature into categorical
    def transform_feature(x):
        x[x > 0.5] = 2.0
        x[(x > 0.2) & (x <= 0.5)] = 1.0
        x[x <= 0.2] = 0.0

    values = [0.0, 1.0, 2.0]

    (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
    # training data without attacked feature
    x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
    # only attacked feature
    x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
    transform_feature(x_train_feature)
    # training data with attacked feature (after transformation)
    x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
    x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

    # test data without attacked feature
    x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
    # only attacked feature
    x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
    transform_feature(x_test_feature)

    model = nn.Linear(4, 3)

    # Define a loss function and optimizer
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    attack_model = PyTorchClassifier(
        model=model, clip_values=(0, 1), loss=loss_fn, optimizer=optimizer, input_shape=(4,), nb_classes=3
    )

    for classifier in classifier_list:
        if type(classifier).__name__ == "ScikitlearnDecisionTreeClassifier":
            attack = AttributeInferenceBlackBox(classifier, attack_model=attack_model, attack_feature=attack_feature)
            # get original model's predictions
            x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
            x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
            # train attack model
            attack.fit(x_train)
            # infer attacked feature
            inferred_train = attack.infer(x_train_for_attack, x_train_predictions, values=values)
            inferred_test = attack.infer(x_test_for_attack, x_test_predictions, values=values)
            # check accuracy
            train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
            test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
def test_black_box(get_tabular_classifier_list, get_iris_dataset):
    classifier_list = get_tabular_classifier_list(AttributeInferenceBlackBox)
    if not classifier_list:
        logging.warning("Couldn't perform  this test because no classifier is defined")
        return

    attack_feature = 2  # petal length

    # need to transform attacked feature into categorical
    def transform_feature(x):
        x[x > 0.5] = 2.0
        x[(x > 0.2) & (x <= 0.5)] = 1.0
        x[x <= 0.2] = 0.0

    values = [0.0, 1.0, 2.0]

    (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
    # training data without attacked feature
    x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
    # only attacked feature
    x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
    transform_feature(x_train_feature)
    # training data with attacked feature (after transformation)
    x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
    x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

    # test data without attacked feature
    x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
    # only attacked feature
    x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
    transform_feature(x_test_feature)

    for classifier in classifier_list:
        # print(type(classifier).__name__)
        if type(classifier).__name__ == "ScikitlearnDecisionTreeClassifier":
            attack = AttributeInferenceBlackBox(classifier, attack_feature=attack_feature)
            # get original model's predictions
            x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
            x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
            # train attack model
            attack.fit(x_train)
            # infer attacked feature
            inferred_train = attack.infer(x_train_for_attack, x_train_predictions, values=values)
            inferred_test = attack.infer(x_test_for_attack, x_test_predictions, values=values)
            # check accuracy
            train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
            test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
            assert train_acc == pytest.approx(0.8285, abs=0.03)
            assert test_acc == pytest.approx(0.8888, abs=0.03)
Example #6
0
def test_errors(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset):
    try:
        classifier = tabular_dl_estimator_for_attack(AttributeInferenceBlackBox)
        (x_train, y_train), (x_test, y_test) = get_iris_dataset

        with pytest.raises(ValueError):
            AttributeInferenceBlackBox(classifier, attack_feature="a")
        with pytest.raises(ValueError):
            AttributeInferenceBlackBox(classifier, attack_feature=-3)
        attack = AttributeInferenceBlackBox(classifier, attack_feature=8)
        with pytest.raises(ValueError):
            attack.fit(x_train)
        attack = AttributeInferenceBlackBox(classifier)
        with pytest.raises(ValueError):
            attack.fit(np.delete(x_train, 1, 1))
        with pytest.raises(ValueError):
            attack.infer(x_train, y_test)
        with pytest.raises(ValueError):
            attack.infer(x_train, y_train)
    except ARTTestException as e:
        art_warning(e)
Example #7
0
def test_black_box_baseline(art_warning, decision_tree_estimator,
                            get_iris_dataset):
    try:
        attack_feature = 2  # petal length

        # need to transform attacked feature into categorical
        def transform_feature(x):
            x[x > 0.5] = 2.0
            x[(x > 0.2) & (x <= 0.5)] = 1.0
            x[x <= 0.2] = 0.0

        values = [0.0, 1.0, 2.0]

        (x_train_iris, y_train_iris), (x_test_iris,
                                       y_test_iris) = get_iris_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # training data with attacked feature (after transformation)
        x_train = np.concatenate(
            (x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
        x_train = np.concatenate(
            (x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)

        classifier = decision_tree_estimator()

        attack = AttributeInferenceBlackBox(classifier,
                                            attack_feature=attack_feature)
        # get original model's predictions
        x_train_predictions = np.array([
            np.argmax(arr) for arr in classifier.predict(x_train_iris)
        ]).reshape(-1, 1)
        x_test_predictions = np.array([
            np.argmax(arr) for arr in classifier.predict(x_test_iris)
        ]).reshape(-1, 1)
        # train attack model
        attack.fit(x_train)
        # infer attacked feature
        # inferred_train
        _ = attack.infer(x_train_for_attack,
                         x_train_predictions,
                         values=values)
        inferred_test = attack.infer(x_test_for_attack,
                                     x_test_predictions,
                                     values=values)
        # check accuracy
        # train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
        test_acc = np.sum(inferred_test == x_test_feature.reshape(
            1, -1)) / len(inferred_test)

        baseline_attack = AttributeInferenceBaseline(
            attack_feature=attack_feature)
        # train attack model
        baseline_attack.fit(x_train)
        # infer attacked feature
        # baseline_inferred_train
        _ = baseline_attack.infer(x_train_for_attack, values=values)
        baseline_inferred_test = baseline_attack.infer(x_test_for_attack,
                                                       values=values)
        # check accuracy
        # baseline_train_acc = np.sum(baseline_inferred_train == x_train_feature.reshape(1, -1)) / len(
        #     baseline_inferred_train
        # )
        baseline_test_acc = np.sum(
            baseline_inferred_test == x_test_feature.reshape(1, -1)) / len(
                baseline_inferred_test)

        assert test_acc >= baseline_test_acc

    except ARTTestException as e:
        art_warning(e)
Example #8
0
def test_black_box_one_hot_float(art_warning, get_iris_dataset):
    try:
        attack_feature = 2  # petal length

        # need to transform attacked feature into categorical
        def transform_feature(x):
            x[x > 0.5] = 2
            x[(x > 0.2) & (x <= 0.5)] = 1
            x[x <= 0.2] = 0

        (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # transform to one-hot encoding
        num_columns = int(x_train_feature.max()) + 1
        train_one_hot = np.zeros((x_train_feature.size, num_columns))
        train_one_hot[np.arange(x_train_feature.size), x_train_feature.reshape(1, -1).astype(int)] = 1
        # training data with attacked feature (after transformation)
        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], train_one_hot), axis=1)
        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        y_train = np.array([np.argmax(y) for y in y_train_iris]).reshape(-1, 1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)
        # transform to one-hot encoding
        test_one_hot = np.zeros((x_test_feature.size, int(x_test_feature.max()) + 1))
        test_one_hot[np.arange(x_test_feature.size), x_test_feature.reshape(1, -1).astype(int)] = 1
        # test data with attacked feature (after transformation)
        x_test = np.concatenate((x_test_for_attack[:, :attack_feature], test_one_hot), axis=1)
        x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1)

        # scale before training
        scaler = StandardScaler().fit(x_train)
        x_test = scaler.transform(x_test).astype(np.float32)
        x_train = scaler.transform(x_train).astype(np.float32)
        # derive dataset for attack (after scaling)
        attack_feature = slice(attack_feature, attack_feature + 3)
        x_train_for_attack = np.delete(x_train, attack_feature, 1)
        x_test_for_attack = np.delete(x_test, attack_feature, 1)
        train_one_hot = x_train[:, attack_feature]
        test_one_hot = x_test[:, attack_feature]

        tree = DecisionTreeClassifier()
        tree.fit(x_train, y_train)
        classifier = ScikitlearnDecisionTreeClassifier(tree)

        attack = AttributeInferenceBlackBox(classifier, attack_feature=attack_feature)
        # get original model's predictions
        x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train)]).reshape(-1, 1)
        x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test)]).reshape(-1, 1)
        # train attack model
        attack.fit(x_train)
        # infer attacked feature
        values = [[-0.6324555, 1.5811388], [-0.4395245, 2.2751858], [-1.1108746, 0.9001915]]
        inferred_train = attack.infer(x_train_for_attack, x_train_predictions, values=values)
        inferred_test = attack.infer(x_test_for_attack, x_test_predictions, values=values)
        # check accuracy
        train_acc = np.sum(
            np.all(np.around(inferred_train, decimals=3) == np.around(train_one_hot, decimals=3), axis=1)
        ) / len(inferred_train)
        test_acc = np.sum(
            np.all(np.around(inferred_test, decimals=3) == np.around(test_one_hot, decimals=3), axis=1)
        ) / len(inferred_test)
        assert pytest.approx(0.9145, abs=0.03) == train_acc
        assert pytest.approx(0.9333, abs=0.03) == test_acc

    except ARTTestException as e:
        art_warning(e)
Example #9
0
def test_black_box_regressor(art_warning, get_diabetes_dataset, model_type):
    try:
        attack_feature = 0  # age

        bins = [
            -0.96838121,
            -0.77154309,
            -0.57470497,
            -0.37786684,
            -0.18102872,
            0.0158094,
            0.21264752,
            0.40948564,
            0.60632376,
            0.80316188,
            1.0,
        ]

        # need to transform attacked feature into categorical
        def transform_feature(x):
            for i in range(len(bins) - 1):
                x[(x >= bins[i]) & (x <= bins[i + 1])] = i

        values = list(range(len(bins) - 1))

        (x_train_diabetes,
         y_train_diabetes), (x_test_diabetes,
                             y_test_diabetes) = get_diabetes_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_diabetes, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_diabetes[:, attack_feature].copy().reshape(
            -1, 1)
        transform_feature(x_train_feature)
        # training data with attacked feature (after transformation)
        x_train = np.concatenate(
            (x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
        x_train = np.concatenate(
            (x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_diabetes, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_diabetes[:,
                                         attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)

        from sklearn import linear_model

        regr_model = linear_model.LinearRegression()
        regr_model.fit(x_train_diabetes, y_train_diabetes)
        regressor = ScikitlearnRegressor(regr_model)

        attack = AttributeInferenceBlackBox(regressor,
                                            attack_feature=attack_feature,
                                            prediction_normal_factor=1 / 250,
                                            attack_model_type=model_type)
        # get original model's predictions
        x_train_predictions = regressor.predict(x_train_diabetes).reshape(
            -1, 1)
        x_test_predictions = regressor.predict(x_test_diabetes).reshape(-1, 1)
        # train attack model
        attack.fit(x_train)
        # infer attacked feature
        inferred_train = attack.infer(x_train_for_attack,
                                      pred=x_train_predictions,
                                      values=values)
        inferred_test = attack.infer(x_test_for_attack,
                                     pred=x_test_predictions,
                                     values=values)
        # check accuracy
        train_acc = np.sum(inferred_train == x_train_feature.reshape(
            1, -1)) / len(inferred_train)
        test_acc = np.sum(inferred_test == x_test_feature.reshape(
            1, -1)) / len(inferred_test)

        assert pytest.approx(0.0258, abs=0.12) == train_acc
        assert pytest.approx(0.0375, abs=0.12) == test_acc

    except ARTTestException as e:
        art_warning(e)