예제 #1
0
def test_equal():
    with sn.Analysis(filter_level='all') as analysis:
        data = generate_bools()

        equality = sn.index(data, indices=0) == sn.index(data, indices=1)

        analysis.release()
        assert np.array_equal(equality.value,
                              np.array([True, False, False, True]))
예제 #2
0
def test_index():
    with sn.Analysis(filter_level='all') as analysis:
        data = generate_bools()

        index_0 = sn.index(data, indices=0)

        analysis.release()
        assert all(a == b
                   for a, b in zip(index_0.value, [True, True, False, False]))
예제 #3
0
def test_dp_linear_regression():

    with sn.Analysis():
        wn_data = sn.Dataset(path=TEST_PUMS_PATH, column_names=TEST_PUMS_NAMES)
        wn_data = sn.resize(sn.to_float(wn_data[["age", "income"]]),
                            number_rows=1000,
                            lower=[0., 0.],
                            upper=[100., 500_000.])

        dp_linear_regression = sn.dp_linear_regression(
            data_x=sn.index(wn_data, indices=0),
            data_y=sn.index(wn_data, indices=1),
            privacy_usage={'epsilon': 10.},
            lower_slope=0.,
            upper_slope=1000.,
            lower_intercept=0.,
            upper_intercept=1000.)

        print(dp_linear_regression.value)
예제 #4
0
        def analyze(data):
            educ = sn.clamp(sn.to_int(sn.index(data, indices=0),
                                      lower=0,
                                      upper=15),
                            categories=list(range(15)),
                            null_value=-1)
            income = sn.index(data, indices=1)
            repartitioned = sn.partition(income, by=educ)

            inner_count = {}
            inner_means = {}
            for key in [5, 8, 12]:
                educ_level_part = repartitioned[key]

                inner_count[key] = sn.dp_count(educ_level_part,
                                               privacy_usage={"epsilon": 0.4})
                inner_means[key] = sn.mean(
                    sn.resize(educ_level_part,
                              number_rows=sn.row_min(1, inner_count[key] * 4 //
                                                     5)))

            return sn.union(inner_means), sn.union(inner_count)
예제 #5
0
def test_everything(run=True):
    with sn.Analysis() as analysis:
        data = sn.Dataset(path=TEST_PUMS_PATH, column_names=TEST_PUMS_NAMES)

        age_int = sn.to_int(data['age'], 0, 150)
        sex = sn.to_bool(data['sex'], "1")
        educ = sn.to_float(data['educ'])
        race = data['race']
        income = sn.to_float(data['income'])
        married = sn.to_bool(data['married'], "1")

        numerics = sn.to_float(data[['age', 'income']])

        # intentionally busted component
        # print("invalid component id ", (sex + "a").component_id)

        # broadcast scalar over 2d, broadcast scalar over 1d, columnar broadcasting, left and right mul
        numerics * 2. + 2. * educ

        # add different values for each column
        numerics + [[1., 2.]]

        # index into first column
        age = sn.index(numerics, indices=0)
        income = sn.index(numerics, mask=[False, True])

        # boolean ops and broadcasting
        mask = sex & married | (~married ^ False) | (age > 50.) | (age_int
                                                                   == 25)

        # numerical clamping
        sn.clamp(numerics, 0., [150., 150_000.])
        sn.clamp(data['educ'],
                 categories=[str(i) for i in range(8, 10)],
                 null_value="-1")

        sn.count(mask)
        sn.covariance(age, income)
        sn.digitize(educ, edges=[1., 3., 10.], null_value=-1)

        # checks for safety against division by zero
        income / 2.
        income / sn.clamp(educ, 5., 20.)

        sn.dp_count(data, privacy_usage={"epsilon": 0.5})
        sn.dp_count(mask, privacy_usage={"epsilon": 0.5})

        sn.dp_histogram(mask, privacy_usage={"epsilon": 0.5})
        age = sn.impute(sn.clamp(age, 0., 150.))
        sn.dp_maximum(age, privacy_usage={"epsilon": 0.5})
        sn.dp_minimum(age, privacy_usage={"epsilon": 0.5})
        sn.dp_median(age, privacy_usage={"epsilon": 0.5})

        age_n = sn.resize(age, number_rows=800)
        sn.dp_mean(age_n, privacy_usage={"epsilon": 0.5})
        sn.dp_raw_moment(age_n, order=3, privacy_usage={"epsilon": 0.5})

        sn.dp_sum(age, privacy_usage={"epsilon": 0.5})
        sn.dp_variance(age_n, privacy_usage={"epsilon": 0.5})

        sn.filter(income, mask)
        race_histogram = sn.histogram(race,
                                      categories=["1", "2", "3"],
                                      null_value="3")
        sn.histogram(income, edges=[0., 10000., 50000.], null_value=-1)

        sn.dp_histogram(married, privacy_usage={"epsilon": 0.5})

        sn.gaussian_mechanism(race_histogram,
                              privacy_usage={
                                  "epsilon": 0.5,
                                  "delta": .000001
                              })
        sn.laplace_mechanism(race_histogram,
                             privacy_usage={
                                 "epsilon": 0.5,
                                 "delta": .000001
                             })

        sn.raw_moment(educ, order=3)

        sn.log(sn.clamp(educ, 0.001, 50.))
        sn.maximum(educ)
        sn.mean(educ)
        sn.minimum(educ)

        educ % 2.
        educ**2.

        sn.quantile(educ, .32)

        sn.resize(educ, number_rows=1200, lower=0., upper=50.)
        sn.resize(race,
                  number_rows=1200,
                  categories=["1", "2"],
                  weights=[1, 2])
        sn.resize(data[["age", "sex"]],
                  1200,
                  categories=[["1", "2"], ["a", "b"]],
                  weights=[1, 2])
        sn.resize(data[["age", "sex"]],
                  1200,
                  categories=[["1", "2"], ["a", "b", "c"]],
                  weights=[[1, 2], [3, 7, 2]])

        sn.sum(educ)
        sn.variance(educ)

    if run:
        analysis.release()

    return analysis