コード例 #1
0
def test_index():
    with wn.Analysis(filter_level='all') as analysis:
        data = wn.Dataset(**dataset_bools)

        index_0 = data[0]

        analysis.release()
        assert all(a == b
                   for a, b in zip(index_0.value, [True, True, False, False]))
コード例 #2
0
def test_equal():
    with wn.Analysis(filter_level='all') as analysis:
        data = wn.Dataset(**dataset_bools)

        equality = data[0] == data[1]

        analysis.release()
        assert np.array_equal(equality.value,
                              np.array([True, False, False, True]))
コード例 #3
0
def test_partition():
    with wn.Analysis(filter_level='all') as analysis:
        data = wn.Dataset(**dataset_bools)[[0, 1]]

        partitioned = wn.partition(data, num_partitions=3)
        analysis.release()
        # print(partitioned.value)

        assert np.array_equal(partitioned.value[0],
                              np.array([[True, True], [True, False]]))
        assert np.array_equal(partitioned.value[1], np.array([[False, True]]))
        assert np.array_equal(partitioned.value[2], np.array([[False, False]]))
コード例 #4
0
def test_dp_mean():
    with wn.Analysis():
        data = wn.Dataset(**generate_synthetic(float, variants=['Random']))
        mean = wn.dp_mean(data['F_Random'],
                          accuracy={
                              'value': .2,
                              'alpha': .05
                          },
                          data_lower=0.,
                          data_upper=10.,
                          data_n=10)

        print("accuracy", mean.get_accuracy(0.05))
        print(mean.from_accuracy(2.3, .05))
コード例 #5
0
def test_divide():
    with wn.Analysis():
        data_A = wn.Dataset(**generate_synthetic(float, variants=['Random']))

        f_random = data_A['F_Random']
        imputed = wn.impute(f_random, lower=0., upper=10.)
        clamped_nonzero = wn.clamp(imputed, lower=1., upper=10.)
        clamped_zero = wn.clamp(imputed, lower=0., upper=10.)

        # test properties
        assert f_random.nullity
        assert not imputed.nullity
        assert (2. / imputed).nullity
        assert (f_random / imputed).nullity
        assert (2. / clamped_zero).nullity
コード例 #6
0
def test_insertion_simple():
    """
    Conduct a differentially private analysis with values inserted from other systems
    :return:
    """
    with wn.Analysis() as analysis:

        # construct a fake dataset that describes your actual data (will never be run)
        data = wn.Dataset(path="", column_names=["A", "B", "C", "D"])

        # pull a column out
        col_a = wn.to_float(data['A'])

        # describe the preprocessing you actually perform on the data
        col_a_clamped = wn.impute(wn.clamp(col_a, lower=0., upper=10.))
        col_a_resized = wn.resize(col_a_clamped, n=1000000)

        # run a fake aggregation
        actual_mean = wn.mean(col_a_resized)

        # insert aggregated data from an external system
        actual_mean.set(10)

        # describe the differentially private operation
        gaussian_mean = wn.gaussian_mechanism(actual_mean,
                                              privacy_usage={
                                                  "epsilon": .4,
                                                  "delta": 1e-6
                                              })

        # check if the analysis is permissible
        analysis.validate()

        # compute the missing releasable nodes- in this case, only the gaussian mean
        analysis.release()

        # retrieve the noised mean
        print("gaussian mean", gaussian_mean.value)

        # release a couple other statistics using other mechanisms in the same batch
        actual_sum = wn.sum(col_a_clamped)
        actual_sum.set(123456)
        laplace_sum = wn.laplace_mechanism(actual_sum,
                                           privacy_usage={"epsilon": .1})

        actual_count = wn.count(col_a)
        actual_count.set(9876)

        geo_count = wn.simple_geometric_mechanism(
            actual_count, 0, 10000, privacy_usage={"epsilon": .1})

        analysis.release()
        print("laplace sum", laplace_sum.value)
        print("geometric count", geo_count.value)

        actual_histogram_b = wn.histogram(
            wn.clamp(data['B'], categories=['X', 'Y', 'Z'], null_value="W"))
        actual_histogram_b.set([12, 1280, 2345, 12])
        geo_histogram_b = wn.simple_geometric_mechanism(
            actual_histogram_b, 0, 10000, privacy_usage={"epsilon": .1})

        col_c = wn.to_bool(data['C'], true_label="T")
        actual_histogram_c = wn.histogram(col_c)
        actual_histogram_c.set([5000, 5000])
        lap_histogram_c = wn.laplace_mechanism(actual_histogram_c,
                                               privacy_usage={"epsilon": .1})

        analysis.release()
        print("noised histogram b", geo_histogram_b.value)
        print("noised histogram c", lap_histogram_c.value)
        print("C dimensionality", col_c.dimensionality)
        print("C categories", col_c.categories)

        # multicolumnar insertion

        # pull a column out
        col_rest = wn.to_float(data[['C', 'D']])

        # describe the preprocessing you actually perform on the data
        col_rest_resized = wn.resize(wn.impute(
            wn.clamp(col_rest, lower=[0., 5.], upper=1000.)),
                                     n=10000)

        # run a fake aggregation
        actual_mean = wn.mean(col_rest_resized)

        # insert aggregated data from an external system
        actual_mean.set([[10., 12.]])

        # describe the differentially private operation
        gaussian_mean = wn.gaussian_mechanism(actual_mean,
                                              privacy_usage={
                                                  "epsilon": .4,
                                                  "delta": 1e-6
                                              })

        # check if the analysis is permissible
        analysis.validate()

        # compute the missing releasable nodes- in this case, only the gaussian mean
        analysis.release()

        # retrieve the noised mean
        print("rest gaussian mean", gaussian_mean.value)