def test_query_dim_evaluation(iters=10):

    # random query dimension
    query_dim = np.random.randint(1, 20)

    for i in range(iters):

        # generate random database
        r = np.random.randint(0, 1000, 4)
        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)

        # generate multiple one-dimensional linear query
        queries = []
        for j in range(query_dim):
            queries.append(linear_query(db.uni))

        # generate multiple-dimension linear query
        func = lambda x: np.array([q._func(x) for q in queries])
        multidim_query = Query(db.uni,
                               func,
                               dim=query_dim,
                               sensitivity=query_dim)

        # test eval dim
        assert (multidim_query.dim == query_dim)
def test_laplace_accuracy_on_linear_queries(iters=10,
                                            lap_samples=10,
                                            eps=1e-5,
                                            beta=1e-5):

    for i in range(iters):

        # generate random database
        r = np.random.randint(0, 1000, 4)
        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)

        # generate random query
        query = linear_query(db.uni)

        # verify Laplace mechanism accuracy guarantee
        acceptable_err = np.log(query.dim / beta) * (query.sensitivity / eps)

        # sample laplace values to estimate the probability of error
        bad_case_cntr = 0
        for j in range(lap_samples):
            y = laplace(db, query, eps)
            ind_actual_err = np.abs(np.subtract(y, query.value(db)))
            actual_err = ind_actual_err if isinstance(
                ind_actual_err, (int, float)) else max(ind_actual_err)
            if actual_err > acceptable_err:
                bad_case_cntr += 1

        # We know from the Laplace Mechanism guaranties that
        # P[max_i |f(x)_i - y_i| >= log(query.dim/beta)(query.sensitivity/eps)] <= beta
        # we use (bad_case_cntr / lap_samples) as an estimation for the probability of a too large error
        assert (beta >= (bad_case_cntr / lap_samples))
def test_exponential_utility_on_linear_queries(iters=10,
                                               exp_samples=10,
                                               eps=1e-5,
                                               t=5):

    for i in range(iters):

        # generate random database
        r = np.random.randint(0, 1000, 4)
        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)

        # generate random utility
        utility = categorical_linear_query(db.uni)

        # verify Exponential mechanism accuracy guarantee
        acceptable_val = utility.optimal(db) - (
            (2 * utility.sensitivity) /
            eps) * (np.log(len(utility.categories)) + t)

        # sample exponential values to estimate the probability of error
        bad_case_cntr = 0
        for j in range(exp_samples):
            c = exponential(db, utility, eps)
            actual_val = utility.value(db, c)
            if actual_val <= acceptable_val:
                bad_case_cntr += 1

        # We know from the Exponential Mechanism guaranties that
        # P[u(db,c) <= utility.optimal(db) - (2 * utility.sensitivity / eps) (ln(|utility.categories|) + t)] <= e^-t
        # we use (bad_case_cntr / lap_samples) as an estimation for the probability of a too large error
        assert (np.exp(-t) >= (bad_case_cntr / exp_samples))
def test_utility_sensitivity(func_iters=10, db_iters=10):

    for i in range(func_iters):

        # generate random database
        r = np.random.randint(0, 100, 4)
        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)
        utility = categorical_linear_query(db.uni)

        assert (1 >= utility._eval_sensitivity(db_iters, random_db_size=r[0]))
def test_database_rep_change():

    # generate random database
    r = np.random.randint(0, 1000, 4)
    exact_number = np.random.choice([True, False])

    db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, exact_number)

    # test rep change
    db.change_representation(rep='probability')
    assert (np.isclose(1.0, np.sum(db.data)))
def test_linear_query_evaluation(iters=1000):

    for i in range(iters):

        # generate random database
        r = np.random.randint(0, 1000, 4)
        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)

        query, qvec = linear_query(db.uni, return_underlying_vector=True)

        assert (np.inner(qvec, db.data) == query.value(db))
def test_utility_copy():
    # generate database
    db = generate_database(m=20, n=10, exact_number=True)
    uni = db.uni

    r = np.random.randint(0, 1000, 4)
    db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)
    utility, umat = categorical_linear_query(db.uni,
                                             return_underlying_matrix=True)

    # generate utility
    utility = categorical_linear_query(uni)

    # copy query
    cutility = utility.copy()

    # change universe and verify independence
    utility._uni.objects = [
        1,
    ]
    assert (1 != len(cutility._uni.objects))
def test_utility_evaluation(iters=10):

    for i in range(iters):

        # generate random database
        r = np.random.randint(0, 1000, 4)
        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)
        utility, umat = categorical_linear_query(db.uni,
                                                 return_underlying_matrix=True)

        for c in utility.categories:
            assert (np.inner(db.data, umat[:, c]) == utility.value(db, c))
def test_database_copy():
    # generate database
    db = generate_database(m=20, n=10, exact_number=True)

    # copy database
    cdb = db.copy()

    # change and verify independence
    db.uni.objects = [
        1,
    ]
    assert (10 == len(cdb.uni.objects))
def test_query_copy():

    # generate database
    db = generate_database(m=20, n=10, exact_number=True)
    uni = db.uni

    # generate query
    query = linear_query(uni)

    # copy query
    cquery = query.copy()

    # change and verify independence
    query._uni.objects = [
        1,
    ]
    assert (1 != len(cquery._uni.objects))
def test_generate_database(iters=1000):

    for i in range(iters):

        # generate random database
        r = np.random.randint(0, 1000, 4)
        exact_number = np.random.choice([True, False])

        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1,
                               exact_number)

        if exact_number:
            assert (r[1] + 1 == db.uni.size)
        else:
            assert (r[1] + 1 >= db.uni.size)

        assert (db.data.shape == db.uni.shape)
        assert (r[0] == np.sum(db.data))
def test_report_noise_max(iters=10,
                          rnm_samples=10,
                          query_dim=5,
                          eps=1e-5,
                          beta=1e-5):

    for i in range(iters):
        # generate random database
        r = np.random.randint(0, 1000, 4)
        db = generate_database(r[0], r[1] + 1, r[2], r[2] + r[3] + 1, False)

        # generate multiple one-dimensional linear query
        queries = []
        for j in range(query_dim):
            queries.append(linear_query(db.uni))

        # test one dimensional RNM
        assert (0 == report_noisy_max(db, queries[0], eps))

        # generate multiple-dimension linear query
        func = lambda x: np.array([q._func(x) for q in queries])
        multidim_query = Query(db.uni,
                               func,
                               dim=query_dim,
                               sensitivity=query_dim)
        true_values = [q.value(db) for q in queries]
        true_argmax = np.argmax(true_values)

        # verify mechanism accuracy guarantee
        acceptable_err = np.log(1 / beta) / eps

        # sample multi-dimensional RNM values to estimate the probability of error
        bad_case_cntr = 0
        for j in range(rnm_samples):
            noise_argmax = report_noisy_max(db, multidim_query, eps)
            if true_values[noise_argmax] > true_values[true_argmax]:
                bad_case_cntr += 1

        # We know from the Report Noise Max guaranties that
        # P[max_i |y[true_argmax] - t[rnm_argmax]| >= log(1/beta)(1/eps)] <= beta
        # we use (bad_case_cntr / lap_samples) as an estimation for the probability of a too large error
        assert (beta >= (bad_case_cntr / rnm_samples))
Exemple #13
0
def test_smalldb(iters=10,
                 num_queries=5,
                 smalldb_samples=10,
                 eps=1e-5,
                 alpha=1e-0,
                 beta=1e-5):

    for i in range(iters):
        # generate random database
        r = np.random.randint(0, 15, 2)
        db = generate_database(r[0] + 1, r[1] + 2, exact_number=True)

        # generate multiple one-dimensional linear query
        queries = []
        for j in range(num_queries):
            queries.append(linear_query(db.uni))

        # verify mechanism accuracy guarantee
        lg_u = np.log(db.uni.size)
        lg_q = np.log(len(queries))
        lg_b = np.log(1 / beta)
        db_nrm = np.linalg.norm(db.data, ord=1)
        acceptable_err = np.power(db_nrm, 2/3) * \
                         np.power((1/eps) * (16*lg_u*lg_q + 4*lg_b), 1/3)

        # sample smalldb
        bad_case_cntr = 0
        for j in range(smalldb_samples):
            y = small_db(db, queries, eps, alpha)
            err = max([
                norm(np.subtract(q.value(db), q.value(y)), ord=1)
                for q in queries
            ])
            if err > acceptable_err:
                bad_case_cntr += 1

        # We know from the Small DB guaranties that
        # P[ M > db.size**(2/3) * ((16*log(|U|)*log(|Q|) + 4*log(1/beta)))/eps)**(1/3) ] <= beta
        # we use (bad_case_cntr / lap_samples) as an estimation for the probability of a too large error
        assert (beta >= (bad_case_cntr / smalldb_samples))
Exemple #14
0
def test_at(iters=10, at_samples=10, eps=1e-5, alpha=1e-2):

    for i in range(iters):
        # generate random database
        r = np.random.randint(0, 1000, 3)
        db = generate_database(r[0] + 1, r[1] + 2, exact_number=True)

        # generate multiple one-dimensional linear query
        queries = []
        for j in range(r[2]):
            queries.append(linear_query(db.uni))

        # AT arguments
        y = np.array([q.value(db) for q in queries])
        thresh = np.percentile(y, 80)
        num_at_queries = np.sum(y >= (thresh - alpha))
        num_queries = r[2]
        beta = (4 * num_at_queries) / np.exp(eps * alpha *
                                             (1 / (9 * num_at_queries)) -
                                             np.log(num_queries))
        at = AT(db, num_queries, num_at_queries, thresh, eps, 0)

        # sample AT
        bad_case_cntr = 0
        for j in range(at_samples):
            for k in range(num_queries):
                try:
                    y_hat = at.value(queries[k])
                    if ((y_hat is None and y[k] > thresh + alpha) or
                        (y_hat is not None and np.abs(y[k] - y_hat) > alpha)):
                        bad_case_cntr += 1
                except AssertionError:
                    # AT exceeded max queries or max AT queries
                    bad_case_cntr += 1

        assert (beta >= (bad_case_cntr / (at_samples * num_queries)))