Beispiel #1
0
def test_basic_budget_accounting():
    querier = dp.DpQuerySession(db_path, privacy_budget=10 * epsilon)
    assert (querier.remaining_budget == 10 *
            epsilon), "Remaining budget is computed incorrectly"
    querier.get_count(movie_name, rating_threshold, epsilon)
    assert (querier.remaining_budget == 9 *
            epsilon), "Remaining budget is computed incorrectly"
Beispiel #2
0
def test_multiple_queries_2():
    querier = dp.DpQuerySession(db_path, privacy_budget=5 * epsilon)
    values = np.zeros(5)
    for i, stars in enumerate(range(1, 6)):
        noisy_count = querier.get_count(movie_name,
                                        rating_threshold=stars,
                                        epsilon=epsilon)
        values[i] = noisy_count

    # Same noise should be returned for the identical queries, which are defined by the values of
    # both movie_name and rating_threshold.
    assert np.all(values[1:] != values[0]
                  ), "Got the exact same response to different queries."
Beispiel #3
0
def test_budget_depleted():
    querier = dp.DpQuerySession(db_path, privacy_budget=2 * epsilon)
    querier.get_count(movie_name, 1, epsilon)
    querier.get_count(movie_name, 2, epsilon)

    # Budget is fully spent by now. The next query should raise BudgetDepletedError.
    raised = False
    try:
        querier.get_count(movie_name, 3, epsilon)
    except Exception as e:
        raised = True
        assert isinstance(
            e, dp.BudgetDepletedError
        ), f"Expected BudgetDepletedError exception, got {type(e)}"

    assert raised, "Does not raise an error when privacy budget is depleted."
Beispiel #4
0
def test_multiple_queries_1():
    num_trials = 30
    querier = dp.DpQuerySession(db_path, privacy_budget=2 * epsilon)
    values = np.zeros(num_trials)
    for i in range(num_trials):
        noisy_count = querier.get_count(movie_name,
                                        rating_threshold,
                                        epsilon=epsilon)
        values[i] = noisy_count

    # Responses to identical queries should have the same noise. Otherwise, the attacker can
    # retrieve the real value from multiple repetitions of the same query.
    assert np.all(values == values[0]), (
        "The attacker might be able to retrieve the exact value of a given query because the "
        "noise is freshly drawn for every response.")

    assert querier.remaining_budget == epsilon, (
        "Remaining budget is incorrectly computed when "
        "queries are repeated.")
Beispiel #5
0
def test_noise_distribution():
    num_trials = 300
    values = np.zeros(num_trials)
    for i in range(num_trials):
        querier = dp.DpQuerySession(db_path, privacy_budget=10 * epsilon)
        noisy_count = querier.get_count(movie_name,
                                        rating_threshold,
                                        epsilon=epsilon)
        values[i] = noisy_count

    noise = values - _get_real_count(db_path, movie_name, rating_threshold)

    # Check that the noise follows Laplace distribution with scale 1/epsilon
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        _, p_value = stats.kstest(noise, "laplace", (0, 1. / epsilon))
    p_value_thresh = 0.005
    assert (
        not np.isnan(p_value) and p_value > p_value_thresh
    ), "The added noise does not seem to result in the required level of privacy."