Esempio n. 1
0
 def __init__(self, total_privacy_budget: PrivacyBudget):
     """
     :param total_privacy_budget: The total privacy budget that can be consumed by the private table. 
         When is there is no privacy budget left, stop answering queries.
     """
     self.total_privacy_budget = total_privacy_budget
     self.consumed_privacy_budget = PrivacyBudget(0., 0.)
Esempio n. 2
0
def example_private_table():
    adult_data = pd.read_csv(os.path.join("dataset", "adult_data.txt"),
                             names=[
                                 "Age", "Workclass", "fnlwgt", "Education",
                                 "Education-Num", "Martial Status",
                                 "Occupation", "Relationship", "Race", "Sex",
                                 "Capital Gain", "Capital Loss",
                                 "Hours per week", "Country", "Target"
                             ])
    data = {
        'Age': adult_data["Age"].tolist(),
        'Education': adult_data["Education"].tolist()
    }
    df = pd.DataFrame(data)
    domains = {
        'Age':
        RealDataDomain(17., 90.),
        'Education':
        CategoricalDataDomain([
            ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th',
            ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th',
            ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th',
            ' Preschool', ' 12th'
        ])
    }
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
Esempio n. 3
0
def test_private_gaussian_mean(example_private_table: PrivateTable):
    """check private guassian mean implementation."""
    noisy_mean = np.mean([
        example_private_table.gaussian_mean('Age', PrivacyBudget(0.99, 0.5))
        for i in range(100)
    ])
    check_absolute_error(noisy_mean, 33.2, 10.)
Esempio n. 4
0
    def update_privacy_loss(self,
                            sampling_ratio: float,
                            sigma: float,
                            steps: int,
                            moment_order: int = 32,
                            target_eps: Union[float, None] = None,
                            target_delta: Union[float, None] = None):
        """Calculate and update privacy loss. Must specify exactly either one of `target_eps` or `target_delta`.

        :param sampling_ratio: Ratio of data used to total data in one step
        :param sigma: Noise scale
        :param steps: Number of update performed
        :param moment_order: Maximum order of moment to calculate privacy budget, defaults to 32
        :param target_eps: Target value of :math:`\epsilon`, defaults to None
        :param target_delta: Target value of :math:`\delta`, defaults to None
        """
        assert (target_eps is None) or (
            target_eps > 0), "Value of epsilon should be positive"
        assert (target_delta is None) or (
            target_delta > 0), "Value of delta should be positive"

        log_moments = [(i, compute_log_moment(sampling_ratio, sigma, steps, i))
                       for i in range(1, moment_order + 1)]
        privacy = get_privacy_spent(log_moments, target_eps, target_delta)
        privacy_budget = PrivacyBudget(privacy[0], privacy[1])

        e = self.consumed_privacy_budget + privacy_budget
        assert e <= self.total_privacy_budget, "there is not enough privacy budget."

        self.consumed_privacy_budget = e
Esempio n. 5
0
def test_private_categorical_hist(example_private_table: PrivateTable):
    """check private hist implementation for categorical column."""
    noisy_hist = example_private_table.cat_hist('Name', PrivacyBudget(10000.))

    err = [1, 1, 1]
    noisy_hist.sort()
    assert all(np.abs(noisy_hist - [1, 1, 2]) < err)
    del noisy_hist
Esempio n. 6
0
def test_private_SGD(data):

    train_data, test_data = data[:800], data[800:]
    param = np.random.rand(2)  # y = param[0]*x+param[1]

    def gradient_function(batch_data):
        x, y = batch_data
        y_pred = param[0] * x + param[1]

        d0 = -2.0 * x * (y - y_pred)
        d1 = -2.0 * (y - y_pred)

        return [d0, d1]

    def get_weights_function():
        return np.copy(param)

    def learning_rate_function(step):
        if step < 10:
            return 0.1
        elif step < 50:
            return 0.01
        else:
            return 0.005

    def update_weights_function(new_weight):
        param[:] = new_weight

    def test_function():
        n = len(test_data)
        x = np.array([i[0] for i in test_data])
        y = np.array([i[1] for i in test_data])
        y_pred = param[0] * x + param[1]

        loss = 1.0 / n * np.sum((y_pred - y)**2)

        check_absolute_error(loss, 0., 20.)

    moment_accountant = MomentPrivacyBudgetTracker(PrivacyBudget(10, 0.001))

    private_SGD(gradient_function=gradient_function,
                get_weights_function=get_weights_function,
                update_weights_function=update_weights_function,
                learning_rate_function=learning_rate_function,
                train_data=train_data,
                group_size=100,
                gradient_norm_bound=10,
                number_of_steps=100,
                sigma=1,
                moment_privacy_budget_tracker=moment_accountant,
                test_interval=100,
                test_function=test_function)

    check_absolute_error(moment_accountant.consumed_privacy_budget.epsilon,
                         8.805554, 1e-6)
    check_absolute_error(moment_accountant.consumed_privacy_budget.delta,
                         0.000625, 1e-6)
Esempio n. 7
0
def test_private_categorical_hist(example_private_table: PrivateTable):
    """check private hist implementation for categorical column of Classes in iris dataset.
    bins:       Iris-setosa, Iris-versicolor, Iris-virginica

    """
    noisy_hist = example_private_table.cat_hist('Class', PrivacyBudget(10000.))

    err = [1, 1, 1]
    noisy_hist.sort()
    assert all(np.abs(noisy_hist-[50, 50, 50]) < err)
    del noisy_hist
Esempio n. 8
0
def example_private_table():
    data = {'Name': ['Tom', 'Jack', 'Steve', 'Jack'], 'Age': [28, 34, 29, 42]}
    df = pd.DataFrame(data)
    domains = {
        'Name':
        CategoricalDataDomain(
            ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']),
        'Age':
        RealDataDomain(0., 130.)
    }
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1000.))
Esempio n. 9
0
def test_column_names(example_table: DataFrame):
    domains = {
        'Name':
        CategoricalDataDomain(
            ['Tom', 'Jack', 'Steve', 'Eve', 'Adam', 'Lucifer']),
        'Age':
        RealDataDomain(0., 130.)
    }
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Age' in t._columns
    assert 'Name' in t._columns
Esempio n. 10
0
def test_column_names(example_table: DataFrame):
    """check to ensure column names correspond to the domains"""
    domains = {'Sepal Length': RealDataDomain(0., 10.),
               'Sepal Width': RealDataDomain(0., 10.),
               'Petal Length': RealDataDomain(0., 10.),
               'Petal Width': RealDataDomain(0., 10.),
               'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])}
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Sepal Length' in t._columns
    assert 'Sepal Width' in t._columns
    assert 'Petal Length' in t._columns
    assert 'Petal Width' in t._columns
    assert 'Class' in t._columns
Esempio n. 11
0
def test_private_numerical_hist(example_private_table: PrivateTable):
    """check private hist implementation for numerical column.
    bins:         |.......|.......|.......|
    boundaries:   a0      a1      a2      a3

    """
    bins: List[float] = [20, 30, 40, 50]  # [a0, a1, a2, a3]
    noisy_hist = example_private_table.num_hist('Age', bins,
                                                PrivacyBudget(10000.))
    err = [1, 1, 1]
    noisy_hist.sort()
    assert all(np.abs(noisy_hist - [1, 1, 2]) < err)
    del noisy_hist, bins
Esempio n. 12
0
def test_column_names(example_table: DataFrame):
    """check to ensure column names correspond to the domains"""
    domains = {
        'Age':
        RealDataDomain(0., 130.),
        'Education':
        CategoricalDataDomain([
            ' Bachelors', ' HS-grad', ' 11th', ' Masters', ' 9th',
            ' Some-college', ' Assoc-acdm', ' Assoc-voc', ' 7th-8th',
            ' Doctorate', ' Prof-school', ' 5th-6th', ' 10th', ' 1st-4th',
            ' Preschool', ' 12th'
        ])
    }
    t = PrivateTable(example_table, domains, PrivacyBudget(1.0, 0.))
    assert 'Age' in t._columns
    assert 'Education' in t._columns
Esempio n. 13
0
def example_private_table():
    iris_data = pd.read_csv(os.path.join("dataset", "iris_data.txt"),
                            names=["Sepal Length", "Sepal Width", "Petal Length",
                                   "Petal Width", "Class"])
    data = {'Sepal Length': iris_data["Sepal Length"].tolist(),
            'Sepal Width': iris_data["Sepal Width"].tolist(),
            'Petal Length': iris_data["Petal Length"].tolist(),
            'Petal Width': iris_data["Petal Width"].tolist(),
            'Class': iris_data["Class"].tolist()}
    df = pd.DataFrame(data)
    domains = {'Sepal Length': RealDataDomain(0., 10.),
               'Sepal Width': RealDataDomain(0., 10.),
               'Petal Length': RealDataDomain(0., 10.),
               'Petal Width': RealDataDomain(0., 10.),
               'Class': CategoricalDataDomain(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])}
    return PrivateTable(df, domains, PrivacyBudget(100000.0, 1.))
Esempio n. 14
0
def test_private_categorical_hist(example_private_table: PrivateTable):
    """check private hist implementation for categorical column of Education in adult dataset.
    bins:       HS-grad, Bachelors etc

    """
    noisy_hist = example_private_table.cat_hist('Education',
                                                PrivacyBudget(10000.))

    err = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    noisy_hist.sort()
    assert all(
        np.abs(noisy_hist - [
            51, 168, 333, 413, 433, 514, 576, 646, 933, 1067, 1175, 1382, 1723,
            5355, 7291, 10501
        ]) < err)
    del noisy_hist
Esempio n. 15
0
def test_private_numerical_hist(example_private_table: PrivateTable):
    """check private hist implementation for numerical column of Age in adult dataset.
    bins:         17, 18, 19 ... 90

    """
    bins: List[float] = [int(i) for i in range(17, 91)]
    noisy_hist = example_private_table.num_hist('Age', bins,
                                                PrivacyBudget(10000.))
    err = [int(1) for i in range(1, 74)]
    noisy_hist.sort()
    assert all(
        np.abs(noisy_hist - [
            1, 1, 3, 3, 6, 10, 12, 20, 22, 22, 23, 29, 43, 45, 46, 51, 64, 67,
            72, 89, 108, 120, 150, 151, 178, 208, 230, 258, 300, 312, 355, 358,
            366, 366, 395, 415, 419, 464, 478, 543, 550, 577, 595, 602, 708,
            712, 720, 724, 734, 737, 753, 765, 770, 780, 785, 794, 798, 808,
            813, 816, 827, 828, 835, 841, 858, 861, 867, 875, 876, 877, 886,
            888, 898
        ]) < err)
    del noisy_hist, bins
Esempio n. 16
0
    def update_privacy_loss(self,
                            privacy_budget: PrivacyBudget,
                            delta_prime: float,
                            k: int = 1):
        """Calculate and update privacy loss of multiple query with same privacy_budget.
        :param privacy_budget: Privacy budget of query
        :param delta_prime: Value of :math:`\epsilon'`
        :param k: Number of query, defaults to 1
        """

        assert delta_prime > 0, "Value of delta should be positive"

        kfold_privacy_budget = PrivacyBudget(
            np.sqrt(2 * k * np.log(1 / delta_prime)) * privacy_budget.epsilon +
            k * privacy_budget.epsilon * (np.exp(privacy_budget.epsilon) - 1),
            k * privacy_budget.delta + delta_prime)

        e = self.consumed_privacy_budget + kfold_privacy_budget
        assert e <= self.total_privacy_budget, "there is not enough privacy budget."

        self.consumed_privacy_budget = e
Esempio n. 17
0
def test_private_std(example_private_table: PrivateTable):
    """check private std implementation."""
    noisy_std = example_private_table.std('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_std, 5.54, 1.)
    del noisy_std
Esempio n. 18
0
def test_private_var(example_private_table: PrivateTable):
    """check private var implementation using Age in adult dataset."""
    noisy_var = example_private_table.var('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_var, 186.06140024879625, 2.)
    del noisy_var
Esempio n. 19
0
def test_private_std(example_private_table: PrivateTable):
    """check private std implementation using Age in adult dataset."""
    noisy_std = example_private_table.std('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_std, 13.640432553581146, 1.)
    del noisy_std
Esempio n. 20
0
def test_private_mean(example_private_table: PrivateTable):
    """check private mean implementation."""
    noisy_mean = example_private_table.mean('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_mean, 33.2, 1.)
Esempio n. 21
0
def test_privacy_budget_class():
    e1 = PrivacyBudget(1., 0.01)
    e2 = PrivacyBudget(0.2, 0.004)
    e3 = PrivacyBudget(1 + 0.2, 0.01 + 0.004)
    assert e3 == e1 + e2
Esempio n. 22
0
def test_private_mode(example_private_table: PrivateTable):
    """check private mode implementation using Education in adult dataset."""
    noisy_mode = example_private_table.mode('Education', PrivacyBudget(10000.))
    assert noisy_mode == " HS-grad"
    del noisy_mode
Esempio n. 23
0
def test_private_min(example_private_table: PrivateTable):
    """check private min implementation using Age in adult dataset."""
    noisy_min = example_private_table.min('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_min, 17., 1.)
    del noisy_min
Esempio n. 24
0
def test_private_mode(example_private_table: PrivateTable):
    """check private mode implementation."""
    noisy_mode = example_private_table.mode('Name', PrivacyBudget(10000.))
    assert noisy_mode == "Jack"
    del noisy_mode
Esempio n. 25
0
def test_private_min(example_private_table: PrivateTable):
    """check private min implementation."""
    noisy_min = example_private_table.min('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_min, 28., 1.)
    del noisy_min
Esempio n. 26
0
def test_combine_privacy_losses():
    e1 = PrivacyBudget(1., 0.01)
    e2 = PrivacyBudget(0.2, 0.004)
    e3 = combine_privacy_losses([e1, e2])
    expected_e3 = PrivacyBudget(1. + 0.2, 0.01 + 0.004)
    assert e3 == expected_e3
Esempio n. 27
0
def test_private_var(example_private_table: PrivateTable):
    """check private var implementation."""
    noisy_var = example_private_table.var('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_var, 30.69, 2.)
    del noisy_var
Esempio n. 28
0
def test_private_gaussian_mean(example_private_table: PrivateTable):
    """check private guassian mean implementation using Age in adult dataset."""
    noisy_mean = example_private_table.gaussian_mean('Age',
                                                     PrivacyBudget(0.99, 0.5))
    check_absolute_error(noisy_mean, 38.58164675532078, 1.)
Esempio n. 29
0
def test_private_mean(example_private_table: PrivateTable):
    """check private mean implementation using Age in adult dataset."""
    noisy_mean = example_private_table.mean('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_mean, 38.58164675532078, 1.)
Esempio n. 30
0
def test_private_max(example_private_table: PrivateTable):
    """check private max implementation."""
    noisy_max = example_private_table.max('Age', PrivacyBudget(10000.))
    check_absolute_error(noisy_max, 42., 1.)
    del noisy_max