def test_private_gaussian_mean(example_private_table: PrivateTable): """check private guassian mean implementation.""" noisy_mean = np.mean([ example_private_table.gaussian_mean('Age', PrivacyBudget(0.99, 0.5)) for i in range(100) ]) check_absolute_error(noisy_mean, 33.2, 10.)
def example_private_table(): adult_data = pd.read_csv(os.path.join("dataset", "adult_data.txt"), names=[ "Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Martial Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country", "Target" ]) data = { 'Age': adult_data["Age"].tolist(), 'Education': adult_data["Education"].tolist() } df = pd.DataFrame(data) domains = { 'Age': RealDataDomain(17., 90.), 'Education': CategoricalDataDomain([ ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th', ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th', ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th', ' Preschool', ' 12th' ]) } return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
def test_private_categorical_hist(example_private_table: PrivateTable): """check private hist implementation for categorical column.""" noisy_hist = example_private_table.cat_hist('Name', PrivacyBudget(10000.)) err = [1, 1, 1] noisy_hist.sort() assert all(np.abs(noisy_hist - [1, 1, 2]) < err) del noisy_hist
def test_column_names(example_table: DataFrame): domains = { 'Name': CategoricalDataDomain( ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']), 'Age': RealDataDomain(0., 130.) } t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.)) assert 'Age' in t._columns assert 'Name' in t._columns
def test_private_categorical_hist(example_private_table: PrivateTable): """check private hist implementation for categorical column of Classes in iris dataset. bins: Iris-setosa, Iris-versicolor, Iris-virginica """ noisy_hist = example_private_table.cat_hist('Class', PrivacyBudget(10000.)) err = [1, 1, 1] noisy_hist.sort() assert all(np.abs(noisy_hist-[50, 50, 50]) < err) del noisy_hist
def example_private_table(): data = {'Name': ['Tom', 'Jack', 'Steve', 'Jack'], 'Age': [28, 34, 29, 42]} df = pd.DataFrame(data) domains = { 'Name': CategoricalDataDomain( ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']), 'Age': RealDataDomain(0., 130.) } return PrivateTable(df, domains, PrivacyBudget(100000.0, 1000.))
def test_column_names(example_table: DataFrame): """check to ensure column names correspond to the domains""" domains = {'Sepal Length': RealDataDomain(0., 10.), 'Sepal Width': RealDataDomain(0., 10.), 'Petal Length': RealDataDomain(0., 10.), 'Petal Width': RealDataDomain(0., 10.), 'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])} t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.)) assert 'Sepal Length' in t._columns assert 'Sepal Width' in t._columns assert 'Petal Length' in t._columns assert 'Petal Width' in t._columns assert 'Class' in t._columns
def test_private_numerical_hist(example_private_table: PrivateTable): """check private hist implementation for numerical column. bins: |.......|.......|.......| boundaries: a0 a1 a2 a3 """ bins: List[float] = [20, 30, 40, 50] # [a0, a1, a2, a3] noisy_hist = example_private_table.num_hist('Age', bins, PrivacyBudget(10000.)) err = [1, 1, 1] noisy_hist.sort() assert all(np.abs(noisy_hist - [1, 1, 2]) < err) del noisy_hist, bins
def test_private_categorical_hist(example_private_table: PrivateTable): """check private hist implementation for categorical column of Education in adult dataset. bins: HS-grad, Bachelors etc """ noisy_hist = example_private_table.cat_hist('Education', PrivacyBudget(10000.)) err = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] noisy_hist.sort() assert all( np.abs(noisy_hist - [ 51, 168, 333, 413, 433, 514, 576, 646, 933, 1067, 1175, 1382, 1723, 5355, 7291, 10501 ]) < err) del noisy_hist
def test_column_names(example_table: DataFrame): """check to ensure column names correspond to the domains""" domains = { 'Age': RealDataDomain(0., 130.), 'Education': CategoricalDataDomain([ ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th', ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th', ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th', ' Preschool', ' 12th' ]) } t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.)) assert 'Age' in t._columns assert 'Education' in t._columns
def example_private_table(): iris_data = pd.read_csv(os.path.join("dataset", "iris_data.txt"), names=["Sepal Length", "Sepal Width", "Petal Length", "Petal Width", "Class"]) data = {'Sepal Length': iris_data["Sepal Length"].tolist(), 'Sepal Width': iris_data["Sepal Width"].tolist(), 'Petal Length': iris_data["Petal Length"].tolist(), 'Petal Width': iris_data["Petal Width"].tolist(), 'Class': iris_data["Class"].tolist()} df = pd.DataFrame(data) domains = {'Sepal Length': RealDataDomain(0., 10.), 'Sepal Width': RealDataDomain(0., 10.), 'Petal Length': RealDataDomain(0., 10.), 'Petal Width': RealDataDomain(0., 10.), 'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])} return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
def test_private_numerical_hist(example_private_table: PrivateTable): """check private hist implementation for numerical column of Age in adult dataset. bins: 17, 18, 19 ... 90 """ bins: List[float] = [int(i) for i in range(17, 91)] noisy_hist = example_private_table.num_hist('Age', bins, PrivacyBudget(10000.)) err = [int(1) for i in range(1, 74)] noisy_hist.sort() assert all( np.abs(noisy_hist - [ 1, 1, 3, 3, 6, 10, 12, 20, 22, 22, 23, 29, 43, 45, 46, 51, 64, 67, 72, 89, 108, 120, 150, 151, 178, 208, 230, 258, 300, 312, 355, 358, 366, 366, 395, 415, 419, 464, 478, 543, 550, 577, 595, 602, 708, 712, 720, 724, 734, 737, 753, 765, 770, 780, 785, 794, 798, 808, 813, 816, 827, 828, 835, 841, 858, 861, 867, 875, 876, 877, 886, 888, 898 ]) < err) del noisy_hist, bins
def test_private_mean_sepal_length(example_private_table: PrivateTable): """check private mean implementation using Sepal Length in iris dataset.""" noisy_mean = example_private_table.mean('Sepal Length', PrivacyBudget(10000.)) check_absolute_error(noisy_mean, 5.843333333333335, 1.)
def test_private_mean(example_private_table: PrivateTable): """check private mean implementation.""" noisy_mean = example_private_table.mean('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_mean, 33.2, 1.)
def test_private_mean_petal_width(example_private_table: PrivateTable): """check private mean implementation using Petal Width in iris dataset.""" noisy_mean = example_private_table.mean('Petal Width', PrivacyBudget(10000.)) check_absolute_error(noisy_mean, 1.1986666666666672, 1.)
def test_private_gaussian_mean_sepal_width(example_private_table: PrivateTable): """check private gaussian mean implementation using Sepal Width in iris dataset.""" noisy_mean = example_private_table.gaussian_mean('Sepal Width', PrivacyBudget(0.99, 0.5)) check_absolute_error(noisy_mean, 3.0540000000000007, 1.)
def test_private_mode(example_private_table: PrivateTable): """check private mode implementation.""" noisy_mode = example_private_table.mode('Name', PrivacyBudget(10000.)) assert noisy_mode == "Jack" del noisy_mode
def test_private_min(example_private_table: PrivateTable): """check private min implementation.""" noisy_min = example_private_table.min('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_min, 28., 1.) del noisy_min
def test_private_var(example_private_table: PrivateTable): """check private var implementation using Age in adult dataset.""" noisy_var = example_private_table.var('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_var, 186.06140024879625, 2.) del noisy_var
def test_private_gaussian_mean(example_private_table: PrivateTable): """check private guassian mean implementation using Age in adult dataset.""" noisy_mean = example_private_table.gaussian_mean('Age', PrivacyBudget(0.99, 0.5)) check_absolute_error(noisy_mean, 38.58164675532078, 1.)
def test_private_mean(example_private_table: PrivateTable): """check private mean implementation using Age in adult dataset.""" noisy_mean = example_private_table.mean('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_mean, 38.58164675532078, 1.)
def test_private_median_petal_length(example_private_table: PrivateTable): """check private median implementation using Petal Length in iris dataset.""" noisy_median = example_private_table.median('Petal Length', PrivacyBudget(10000.)) check_absolute_error(noisy_median, 4.35, 1.) del noisy_median
def test_private_std(example_private_table: PrivateTable): """check private std implementation.""" noisy_std = example_private_table.std('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_std, 5.54, 1.) del noisy_std
def test_private_mode(example_private_table: PrivateTable): """check private mode implementation using Education in adult dataset.""" noisy_mode = example_private_table.mode('Education', PrivacyBudget(10000.)) assert noisy_mode == " HS-grad" del noisy_mode
def test_private_min(example_private_table: PrivateTable): """check private min implementation using Age in adult dataset.""" noisy_min = example_private_table.min('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_min, 17., 1.) del noisy_min
def test_private_max(example_private_table: PrivateTable): """check private max implementation.""" noisy_max = example_private_table.max('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_max, 42., 1.) del noisy_max
def test_private_gaussian_mean_petal_length(example_private_table: PrivateTable): """check private gaussian mean implementation using Petal Length in iris dataset.""" noisy_mean = example_private_table.gaussian_mean('Petal Length', PrivacyBudget(0.99, 0.5)) check_absolute_error(noisy_mean, 3.7586666666666693, 1.)
def test_private_std(example_private_table: PrivateTable): """check private std implementation using Age in adult dataset.""" noisy_std = example_private_table.std('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_std, 13.640432553581146, 1.) del noisy_std
def test_private_var(example_private_table: PrivateTable): """check private var implementation.""" noisy_var = example_private_table.var('Age', PrivacyBudget(10000.)) check_absolute_error(noisy_var, 30.69, 2.) del noisy_var
def test_private_median_sepal_width(example_private_table: PrivateTable): """check private median implementation using Sepal Width in iris dataset.""" noisy_median = example_private_table.median('Sepal Width', PrivacyBudget(10000.)) check_absolute_error(noisy_median, 3.0, 1.) del noisy_median