Example #1
0
def test_private_gaussian_mean(example_private_table: PrivateTable):
    """check private guassian mean implementation."""
    noisy_mean = np.mean([
        example_private_table.gaussian_mean('Age', PrivacyBudget(0.99, 0.5))
        for i in range(100)
    ])
    check_absolute_error(noisy_mean, 33.2, 10.)
Example #2
0
def example_private_table():
    adult_data = pd.read_csv(os.path.join("dataset", "adult_data.txt"),
                             names=[
                                 "Age", "Workclass", "fnlwgt", "Education",
                                 "Education-Num", "Martial Status",
                                 "Occupation", "Relationship", "Race", "Sex",
                                 "Capital Gain", "Capital Loss",
                                 "Hours per week", "Country", "Target"
                             ])
    data = {
        'Age': adult_data["Age"].tolist(),
        'Education': adult_data["Education"].tolist()
    }
    df = pd.DataFrame(data)
    domains = {
        'Age':
        RealDataDomain(17., 90.),
        'Education':
        CategoricalDataDomain([
            ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th',
            ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th',
            ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th',
            ' Preschool', ' 12th'
        ])
    }
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
Example #3
0
def test_private_categorical_hist(example_private_table: PrivateTable):
    """check private hist implementation for categorical column."""
    noisy_hist = example_private_table.cat_hist('Name', PrivacyBudget(10000.))

    err = [1, 1, 1]
    noisy_hist.sort()
    assert all(np.abs(noisy_hist - [1, 1, 2]) < err)
    del noisy_hist
Example #4
0
def test_column_names(example_table: DataFrame):
    domains = {
        'Name':
        CategoricalDataDomain(
            ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']),
        'Age':
        RealDataDomain(0., 130.)
    }
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Age' in t._columns
    assert 'Name' in t._columns
def test_private_categorical_hist(example_private_table: PrivateTable):
    """check private hist implementation for categorical column of Classes in iris dataset.
    bins:       Iris-setosa, Iris-versicolor, Iris-virginica

    """
    noisy_hist = example_private_table.cat_hist('Class', PrivacyBudget(10000.))

    err = [1, 1, 1]
    noisy_hist.sort()
    assert all(np.abs(noisy_hist-[50, 50, 50]) < err)
    del noisy_hist
Example #6
0
def example_private_table():
    data = {'Name': ['Tom', 'Jack', 'Steve', 'Jack'], 'Age': [28, 34, 29, 42]}
    df = pd.DataFrame(data)
    domains = {
        'Name':
        CategoricalDataDomain(
            ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']),
        'Age':
        RealDataDomain(0., 130.)
    }
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1000.))
def test_column_names(example_table: DataFrame):
    """check to ensure column names correspond to the domains"""
    domains = {'Sepal Length': RealDataDomain(0., 10.),
               'Sepal Width': RealDataDomain(0., 10.),
               'Petal Length': RealDataDomain(0., 10.),
               'Petal Width': RealDataDomain(0., 10.),
               'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])}
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Sepal Length' in t._columns
    assert 'Sepal Width' in t._columns
    assert 'Petal Length' in t._columns
    assert 'Petal Width' in t._columns
    assert 'Class' in t._columns
Example #8
0
def test_private_numerical_hist(example_private_table: PrivateTable):
    """check private hist implementation for numerical column.
    bins:         |.......|.......|.......|
    boundaries:   a0      a1      a2      a3

    """
    bins: List[float] = [20, 30, 40, 50]  # [a0, a1, a2, a3]
    noisy_hist = example_private_table.num_hist('Age', bins,
                                                PrivacyBudget(10000.))
    err = [1, 1, 1]
    noisy_hist.sort()
    assert all(np.abs(noisy_hist - [1, 1, 2]) < err)
    del noisy_hist, bins
Example #9
0
def test_private_categorical_hist(example_private_table: PrivateTable):
    """check private hist implementation for categorical column of Education in adult dataset.
    bins:       HS-grad, Bachelors etc

    """
    noisy_hist = example_private_table.cat_hist('Education',
                                                PrivacyBudget(10000.))

    err = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    noisy_hist.sort()
    assert all(
        np.abs(noisy_hist - [
            51, 168, 333, 413, 433, 514, 576, 646, 933, 1067, 1175, 1382, 1723,
            5355, 7291, 10501
        ]) < err)
    del noisy_hist
Example #10
0
def test_column_names(example_table: DataFrame):
    """check to ensure column names correspond to the domains"""
    domains = {
        'Age':
        RealDataDomain(0., 130.),
        'Education':
        CategoricalDataDomain([
            ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th',
            ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th',
            ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th',
            ' Preschool', ' 12th'
        ])
    }
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Age' in t._columns
    assert 'Education' in t._columns
Example #11
0
def example_private_table():
    iris_data = pd.read_csv(os.path.join("dataset", "iris_data.txt"),
                            names=["Sepal Length", "Sepal Width", "Petal Length",
                                   "Petal Width", "Class"])
    data = {'Sepal Length': iris_data["Sepal Length"].tolist(),
            'Sepal Width': iris_data["Sepal Width"].tolist(),
            'Petal Length': iris_data["Petal Length"].tolist(),
            'Petal Width': iris_data["Petal Width"].tolist(),
            'Class': iris_data["Class"].tolist()}
    df = pd.DataFrame(data)
    domains = {'Sepal Length': RealDataDomain(0., 10.),
               'Sepal Width': RealDataDomain(0., 10.),
               'Petal Length': RealDataDomain(0., 10.),
               'Petal Width': RealDataDomain(0., 10.),
               'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])}
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
Example #12
0
def test_private_numerical_hist(example_private_table: PrivateTable):
    """check private hist implementation for numerical column of Age in adult dataset.
    bins:         17, 18, 19 ... 90

    """
    bins: List[float] = [int(i) for i in range(17, 91)]
    noisy_hist = example_private_table.num_hist('Age', bins,
                                                PrivacyBudget(10000.))
    err = [int(1) for i in range(1, 74)]
    noisy_hist.sort()
    assert all(
        np.abs(noisy_hist - [
            1, 1, 3, 3, 6, 10, 12, 20, 22, 22, 23, 29, 43, 45, 46, 51, 64, 67,
            72, 89, 108, 120, 150, 151, 178, 208, 230, 258, 300, 312, 355, 358,
            366, 366, 395, 415, 419, 464, 478, 543, 550, 577, 595, 602, 708,
            712, 720, 724, 734, 737, 753, 765, 770, 780, 785, 794, 798, 808,
            813, 816, 827, 828, 835, 841, 858, 861, 867, 875, 876, 877, 886,
            888, 898
        ]) < err)
    del noisy_hist, bins
Example #13
0
def test_private_mean_sepal_length(example_private_table: PrivateTable):
    """check private mean implementation using Sepal Length in iris dataset."""
    noisy_mean = example_private_table.mean('Sepal Length', PrivacyBudget(10000.))
    check_absolute_error(noisy_mean, 5.843333333333335, 1.)
Example #14
0
def test_private_mean(example_private_table: PrivateTable):
    """check private mean implementation."""
    noisy_mean = example_private_table.mean('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_mean, 33.2, 1.)
Example #15
0
def test_private_mean_petal_width(example_private_table: PrivateTable):
    """check private mean implementation using Petal Width in iris dataset."""
    noisy_mean = example_private_table.mean('Petal Width', PrivacyBudget(10000.))
    check_absolute_error(noisy_mean, 1.1986666666666672, 1.)
Example #16
0
def test_private_gaussian_mean_sepal_width(example_private_table: PrivateTable):
    """check private gaussian mean implementation using Sepal Width in iris dataset."""
    noisy_mean = example_private_table.gaussian_mean('Sepal Width', PrivacyBudget(0.99, 0.5))
    check_absolute_error(noisy_mean, 3.0540000000000007, 1.)
Example #17
0
def test_private_mode(example_private_table: PrivateTable):
    """check private mode implementation."""
    noisy_mode = example_private_table.mode('Name', PrivacyBudget(10000.))
    assert noisy_mode == "Jack"
    del noisy_mode
Example #18
0
def test_private_min(example_private_table: PrivateTable):
    """check private min implementation."""
    noisy_min = example_private_table.min('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_min, 28., 1.)
    del noisy_min
Example #19
0
def test_private_var(example_private_table: PrivateTable):
    """check private var implementation using Age in adult dataset."""
    noisy_var = example_private_table.var('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_var, 186.06140024879625, 2.)
    del noisy_var
Example #20
0
def test_private_gaussian_mean(example_private_table: PrivateTable):
    """check private guassian mean implementation using Age in adult dataset."""
    noisy_mean = example_private_table.gaussian_mean('Age',
                                                     PrivacyBudget(0.99, 0.5))
    check_absolute_error(noisy_mean, 38.58164675532078, 1.)
Example #21
0
def test_private_mean(example_private_table: PrivateTable):
    """check private mean implementation using Age in adult dataset."""
    noisy_mean = example_private_table.mean('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_mean, 38.58164675532078, 1.)
Example #22
0
def test_private_median_petal_length(example_private_table: PrivateTable):
    """check private median implementation using Petal Length in iris dataset."""
    noisy_median = example_private_table.median('Petal Length', PrivacyBudget(10000.))
    check_absolute_error(noisy_median, 4.35, 1.)
    del noisy_median
Example #23
0
def test_private_std(example_private_table: PrivateTable):
    """check private std implementation."""
    noisy_std = example_private_table.std('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_std, 5.54, 1.)
    del noisy_std
Example #24
0
def test_private_mode(example_private_table: PrivateTable):
    """check private mode implementation using Education in adult dataset."""
    noisy_mode = example_private_table.mode('Education', PrivacyBudget(10000.))
    assert noisy_mode == " HS-grad"
    del noisy_mode
Example #25
0
def test_private_min(example_private_table: PrivateTable):
    """check private min implementation using Age in adult dataset."""
    noisy_min = example_private_table.min('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_min, 17., 1.)
    del noisy_min
Example #26
0
def test_private_max(example_private_table: PrivateTable):
    """check private max implementation."""
    noisy_max = example_private_table.max('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_max, 42., 1.)
    del noisy_max
Example #27
0
def test_private_gaussian_mean_petal_length(example_private_table: PrivateTable):
    """check private gaussian mean implementation using Petal Length in iris dataset."""
    noisy_mean = example_private_table.gaussian_mean('Petal Length', PrivacyBudget(0.99, 0.5))
    check_absolute_error(noisy_mean, 3.7586666666666693, 1.)
Example #28
0
def test_private_std(example_private_table: PrivateTable):
    """check private std implementation using Age in adult dataset."""
    noisy_std = example_private_table.std('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_std, 13.640432553581146, 1.)
    del noisy_std
Example #29
0
def test_private_var(example_private_table: PrivateTable):
    """check private var implementation."""
    noisy_var = example_private_table.var('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_var, 30.69, 2.)
    del noisy_var
Example #30
0
def test_private_median_sepal_width(example_private_table: PrivateTable):
    """check private median implementation using Sepal Width in iris dataset."""
    noisy_median = example_private_table.median('Sepal Width', PrivacyBudget(10000.))
    check_absolute_error(noisy_median, 3.0, 1.)
    del noisy_median