def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() X = np.array([ 0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50 ]) y = np.array( [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] X -= 3.0 X /= 2.5 clf = LogisticRegression(epsilon=2, data_norm=1.0, accountant=acc) clf.fit(X, y) self.assertEqual((2, 0), acc.total()) with BudgetAccountant(3, 0) as acc2: clf = LogisticRegression(epsilon=2, data_norm=1.0) clf.fit(X, y) self.assertEqual((2, 0), acc2.total()) with self.assertRaises(BudgetError): clf.fit(X, y)
def test_one_class(self): X = [[1]] y = [0] clf = LogisticRegression(data_norm=1) with self.assertRaises(ValueError): clf.fit(X, y)
def test_trinomial(self): X = np.array([0.50, 0.75, 1.00]) y = np.array([0, 1, 2]) X = X[:, np.newaxis] clf = LogisticRegression(data_norm=1.0) self.assertIsNotNone(clf.fit(X, y))
def test_warm_start(self): X = np.array([0.50, 0.75, 1.00]) y = np.array([0, 1, 2]) X = X[:, np.newaxis] clf = LogisticRegression(data_norm=1.0, warm_start=True) clf.fit(X, y) self.assertIsNotNone(clf.fit(X, y))
def test_large_norm(self): X = np.array( [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50]) y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] clf = LogisticRegression(data_norm=1.0) with self.assertWarns(PrivacyLeakWarning): clf.fit(X, y)
def test_no_params(self): clf = LogisticRegression() X = np.array( [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50]) y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] with self.assertWarns(PrivacyLeakWarning): clf.fit(X, y)
def test_large_norm(self): X = np.array([ 0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50 ]) y = np.array( [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] clf = LogisticRegression(data_norm=1.0) self.assertIsNotNone(clf.fit(X, y))
def test_sample_weight_warning(self): X = np.array([ 0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50 ]) y = np.array( [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] clf = LogisticRegression(data_norm=5.5) with self.assertWarns(DiffprivlibCompatibilityWarning): clf.fit(X, y, sample_weight=np.ones_like(y))
def test_multi_dim_y(self): X = np.array([0.25, 0.50, 0.75, 1.00]) y = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) X = X[:, np.newaxis] clf = LogisticRegression(data_norm=1.0) self.assertRaises(ValueError, clf.fit, X, y)
def test_same_results(self): from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn import linear_model dataset = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2) clf = LogisticRegression(data_norm=12, epsilon=float("inf")) clf.fit(X_train, y_train) predict1 = clf.predict(X_test) clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr") clf.fit(X_train, y_train) predict2 = clf.predict(X_test) self.assertTrue(np.all(predict1 == predict2))
def test_simple(self): X = np.array( [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50]) y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] X -= 3.0 X /= 2.5 clf = LogisticRegression(epsilon=2, data_norm=1.0) clf.fit(X, y) # print(clf.predict(np.array([0.5, 2, 5.5]))) self.assertIsNotNone(clf) self.assertFalse(clf.predict(np.array([(0.5 - 3) / 2.5]).reshape(-1, 1))) self.assertTrue(clf.predict(np.array([(5.5 - 3) / 2.5]).reshape(-1, 1)))
def test_bad_params(self): X = [[1]] y = [0] with self.assertRaises(ValueError): LogisticRegression(data_norm=1, C=-1).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, C=1.2).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, max_iter=-1).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, max_iter="100").fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, tol=-1).fit(X, y) with self.assertRaises(ValueError): LogisticRegression(data_norm=1, tol="1").fit(X, y)
# Convert dataframe to numpy array X_train = np.array(X_train, dtype=np.float32) X_test = np.array(X_test, dtype=np.float32) y_train = np.array(y_train, dtype=np.int32) y_test = np.array(y_test, dtype=np.int32) # # define list of epsilon epsilons = [1] # np.logspace(-2, 2, 50) acc_w_dp = list() # output = open("ibm_mnist_dp_data_norm_18_70k_images.txt", "w+") for epsilon in epsilons: logreg_w_dp = LogisticRegression(epsilon=epsilon, data_norm=500, max_iter=1000) # # l2 norm = sqrt(all_columns), there are 64 columns -> data_norm = 8 logreg_w_dp.fit(X_train, y_train.ravel()) # score_dp = logreg_w_dp.score(X_test, y_test) accuracy, recall, precision, auc = utils.predict_score( logreg_w_dp, X_test, y_test) # output.write("%.3f \t %.3f \t %.3f \n" % (epsilon, recall, precision)) print(accuracy, recall, precision, auc) # acc_w_dp.append(score_dp) # output.write("%.3f \t %.3f\n" % (epsilon, score_dp)) print('Total time = ', time.perf_counter() - start_time, ' seconds') # plt.plot(epsilons, acc_w_dp) # output.close()
def test_multi_class_warning(self): with self.assertWarns(DiffprivlibCompatibilityWarning): LogisticRegression(multi_class="multinomial")
# # define list of epsilon epsilons = np.logspace(-2, 2, 50) acc_w_dp = list() # acc_wo_dp = list() output = open("ibm_mnist_dp_data_norm_18_70k_images.txt", "w+") for epsilon in epsilons: # for i in range(1): # # logreg wo DP # logreg_wo_dp = LogisticRegression(epsilon=float('inf'), data_norm=28, max_iter=1000) # normal linear model wo dp #linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr", max_iter=1000) # # logreg from DP logreg_w_dp = LogisticRegression(epsilon=epsilon, data_norm=18, max_iter=20) # # l2 norm = sqrt(all_columns), there are 64 columns -> data_norm = 8 # logreg_wo_dp.fit(X_train, y_train) logreg_w_dp.fit(X_train, y_train) score_dp = logreg_w_dp.score(X_test, y_test) # score = logreg_wo_dp.score(X_test, y_test) # print('Accuracy = ', score_dp) # acc_wo_dp.append(score) acc_w_dp.append(score_dp) output.write("%.3f \t %.3f\n" % (epsilon, score_dp)) print('Total time = ', time.perf_counter() - start_time, ' seconds') plt.plot(epsilons, acc_w_dp) # plt.plot(epsilons, acc_wo_dp)
def test_solver_warning(self): with self.assertWarns(DiffprivlibCompatibilityWarning): LogisticRegression(solver="newton-cg")
def test_different_results(self): from sklearn import datasets from sklearn import linear_model from sklearn.model_selection import train_test_split dataset = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2) clf = LogisticRegression(data_norm=12) clf.fit(X_train, y_train) predict1 = clf.predict(X_test) clf = LogisticRegression(data_norm=12) clf.fit(X_train, y_train) predict2 = clf.predict(X_test) clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr") clf.fit(X_train, y_train) predict3 = clf.predict(X_test) self.assertFalse(np.all(predict1 == predict2)) self.assertFalse( np.all(predict3 == predict1) and np.all(predict3 == predict2))