def test_warm_start(self):
        X = np.array([0.50, 0.75, 1.00])
        y = np.array([0, 1, 2])
        X = X[:, np.newaxis]

        clf = LogisticRegression(data_norm=1.0, warm_start=True)
        clf.fit(X, y)
        self.assertIsNotNone(clf.fit(X, y))
    def test_one_class(self):
        X = [[1]]
        y = [0]

        clf = LogisticRegression(data_norm=1)

        with self.assertRaises(ValueError):
            clf.fit(X, y)
    def test_large_norm(self):
        X = np.array(
            [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
             5.00, 5.50])
        y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]

        clf = LogisticRegression(data_norm=1.0)

        with self.assertWarns(PrivacyLeakWarning):
            clf.fit(X, y)
    def test_no_params(self):
        clf = LogisticRegression()

        X = np.array(
            [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
             5.00, 5.50])
        y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]

        with self.assertWarns(PrivacyLeakWarning):
            clf.fit(X, y)
    def test_sample_weight_warning(self):
        X = np.array([
            0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75,
            3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50
        ])
        y = np.array(
            [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]

        clf = LogisticRegression(data_norm=5.5)

        with self.assertWarns(DiffprivlibCompatibilityWarning):
            clf.fit(X, y, sample_weight=np.ones_like(y))
    def test_trinomial(self):
        X = np.array([0.50, 0.75, 1.00])
        y = np.array([0, 1, 2])
        X = X[:, np.newaxis]

        clf = LogisticRegression(data_norm=1.0)

        self.assertIsNotNone(clf.fit(X, y))
    def test_simple(self):
        X = np.array(
            [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
             5.00, 5.50])
        y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]
        X -= 3.0
        X /= 2.5

        clf = LogisticRegression(epsilon=2, data_norm=1.0)
        clf.fit(X, y)

        # print(clf.predict(np.array([0.5, 2, 5.5])))

        self.assertIsNotNone(clf)
        self.assertFalse(clf.predict(np.array([(0.5 - 3) / 2.5]).reshape(-1, 1)))
        self.assertTrue(clf.predict(np.array([(5.5 - 3) / 2.5]).reshape(-1, 1)))
    def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12, epsilon=float("inf"))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.all(predict1 == predict2))
    def test_large_norm(self):
        X = np.array([
            0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75,
            3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50
        ])
        y = np.array(
            [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]

        clf = LogisticRegression(data_norm=1.0)

        self.assertIsNotNone(clf.fit(X, y))
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.array([
            0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75,
            3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50
        ])
        y = np.array(
            [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]
        X -= 3.0
        X /= 2.5

        clf = LogisticRegression(epsilon=2, data_norm=1.0, accountant=acc)
        clf.fit(X, y)
        self.assertEqual((2, 0), acc.total())

        with BudgetAccountant(3, 0) as acc2:
            clf = LogisticRegression(epsilon=2, data_norm=1.0)
            clf.fit(X, y)
            self.assertEqual((2, 0), acc2.total())

            with self.assertRaises(BudgetError):
                clf.fit(X, y)
    def test_different_results(self):
        from sklearn import datasets
        from sklearn import linear_model
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data,
                                                            dataset.target,
                                                            test_size=0.2)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs",
                                              multi_class="ovr")
        clf.fit(X_train, y_train)

        predict3 = clf.predict(X_test)

        self.assertFalse(np.all(predict1 == predict2))
        self.assertFalse(
            np.all(predict3 == predict1) and np.all(predict3 == predict2))
# Convert dataframe to numpy array
X_train = np.array(X_train, dtype=np.float32)
X_test = np.array(X_test, dtype=np.float32)
y_train = np.array(y_train, dtype=np.int32)
y_test = np.array(y_test, dtype=np.int32)

# # define list of epsilon
epsilons = [1]  # np.logspace(-2, 2, 50)

acc_w_dp = list()

# output = open("ibm_mnist_dp_data_norm_18_70k_images.txt", "w+")
for epsilon in epsilons:
    logreg_w_dp = LogisticRegression(epsilon=epsilon,
                                     data_norm=500,
                                     max_iter=1000)
    # 	# l2 norm = sqrt(all_columns), there are 64 columns -> data_norm = 8
    logreg_w_dp.fit(X_train, y_train.ravel())
    # score_dp = logreg_w_dp.score(X_test, y_test)
    accuracy, recall, precision, auc = utils.predict_score(
        logreg_w_dp, X_test, y_test)
    # output.write("%.3f \t %.3f \t %.3f \n" % (epsilon, recall, precision))
    print(accuracy, recall, precision, auc)
    # acc_w_dp.append(score_dp)
    # output.write("%.3f \t %.3f\n" % (epsilon, score_dp))

print('Total time = ', time.perf_counter() - start_time, ' seconds')
# plt.plot(epsilons, acc_w_dp)

# output.close()
# plt.show()
Beispiel #13
0
    df_X_train_col_filtered = df_X_train[keep_cols]
    df_X_tests_col_filtered = [df[keep_cols] for df in df_X_tests]

    # Modify sample values without modifying the shape of the dataset
    X_train_processed, X_tests_processed = sample_preprocessing(
        df_X_train_col_filtered, df_X_tests_col_filtered)

    return X_train_processed, X_tests_processed, list(keep_cols)


X_train, [X_test, X_eval], keep_cols = preprocessing(df_X_train,
                                                     [df_X_test, df_X_eval])
print(X_train.shape, X_test.shape)
#epsilons = [float('inf')]
epsilons = [5]  #np.logspace(-2, 2, 50)
# output = open("aps_dataset_ibm_dp_data_norm_1000_70k_images.txt", "w+")
for epsilon in epsilons:
    # can't make a new scoring function, this option is not allow in IBM diffprivlib
    logreg_w_dp = LogisticRegression(epsilon=epsilon,
                                     data_norm=500,
                                     max_iter=20)
    logreg_w_dp.fit(X_train, y_train)
    # score_dp = logreg_w_dp.score(X_test, y_test)
    # print('Accuracy = ', score_dp)
    accuracy, recall, precision, auc = utils.predict_score(
        logreg_w_dp, X_test, y_test)
    # output.write("%.3f \t %.3f \t %.3f \n" % (epsilon, recall, precision))
    print(accuracy, recall, precision, auc)

print('Total time = ', time.perf_counter() - start_time, ' seconds')
# output.close()