def test_equal(): with sn.Analysis(filter_level='all') as analysis: data = generate_bools() equality = sn.index(data, indices=0) == sn.index(data, indices=1) analysis.release() assert np.array_equal(equality.value, np.array([True, False, False, True]))
def test_index(): with sn.Analysis(filter_level='all') as analysis: data = generate_bools() index_0 = sn.index(data, indices=0) analysis.release() assert all(a == b for a, b in zip(index_0.value, [True, True, False, False]))
def test_dp_linear_regression(): with sn.Analysis(): wn_data = sn.Dataset(path=TEST_PUMS_PATH, column_names=TEST_PUMS_NAMES) wn_data = sn.resize(sn.to_float(wn_data[["age", "income"]]), number_rows=1000, lower=[0., 0.], upper=[100., 500_000.]) dp_linear_regression = sn.dp_linear_regression( data_x=sn.index(wn_data, indices=0), data_y=sn.index(wn_data, indices=1), privacy_usage={'epsilon': 10.}, lower_slope=0., upper_slope=1000., lower_intercept=0., upper_intercept=1000.) print(dp_linear_regression.value)
def analyze(data): educ = sn.clamp(sn.to_int(sn.index(data, indices=0), lower=0, upper=15), categories=list(range(15)), null_value=-1) income = sn.index(data, indices=1) repartitioned = sn.partition(income, by=educ) inner_count = {} inner_means = {} for key in [5, 8, 12]: educ_level_part = repartitioned[key] inner_count[key] = sn.dp_count(educ_level_part, privacy_usage={"epsilon": 0.4}) inner_means[key] = sn.mean( sn.resize(educ_level_part, number_rows=sn.row_min(1, inner_count[key] * 4 // 5))) return sn.union(inner_means), sn.union(inner_count)
def test_everything(run=True): with sn.Analysis() as analysis: data = sn.Dataset(path=TEST_PUMS_PATH, column_names=TEST_PUMS_NAMES) age_int = sn.to_int(data['age'], 0, 150) sex = sn.to_bool(data['sex'], "1") educ = sn.to_float(data['educ']) race = data['race'] income = sn.to_float(data['income']) married = sn.to_bool(data['married'], "1") numerics = sn.to_float(data[['age', 'income']]) # intentionally busted component # print("invalid component id ", (sex + "a").component_id) # broadcast scalar over 2d, broadcast scalar over 1d, columnar broadcasting, left and right mul numerics * 2. + 2. * educ # add different values for each column numerics + [[1., 2.]] # index into first column age = sn.index(numerics, indices=0) income = sn.index(numerics, mask=[False, True]) # boolean ops and broadcasting mask = sex & married | (~married ^ False) | (age > 50.) | (age_int == 25) # numerical clamping sn.clamp(numerics, 0., [150., 150_000.]) sn.clamp(data['educ'], categories=[str(i) for i in range(8, 10)], null_value="-1") sn.count(mask) sn.covariance(age, income) sn.digitize(educ, edges=[1., 3., 10.], null_value=-1) # checks for safety against division by zero income / 2. income / sn.clamp(educ, 5., 20.) sn.dp_count(data, privacy_usage={"epsilon": 0.5}) sn.dp_count(mask, privacy_usage={"epsilon": 0.5}) sn.dp_histogram(mask, privacy_usage={"epsilon": 0.5}) age = sn.impute(sn.clamp(age, 0., 150.)) sn.dp_maximum(age, privacy_usage={"epsilon": 0.5}) sn.dp_minimum(age, privacy_usage={"epsilon": 0.5}) sn.dp_median(age, privacy_usage={"epsilon": 0.5}) age_n = sn.resize(age, number_rows=800) sn.dp_mean(age_n, privacy_usage={"epsilon": 0.5}) sn.dp_raw_moment(age_n, order=3, privacy_usage={"epsilon": 0.5}) sn.dp_sum(age, privacy_usage={"epsilon": 0.5}) sn.dp_variance(age_n, privacy_usage={"epsilon": 0.5}) sn.filter(income, mask) race_histogram = sn.histogram(race, categories=["1", "2", "3"], null_value="3") sn.histogram(income, edges=[0., 10000., 50000.], null_value=-1) sn.dp_histogram(married, privacy_usage={"epsilon": 0.5}) sn.gaussian_mechanism(race_histogram, privacy_usage={ "epsilon": 0.5, "delta": .000001 }) sn.laplace_mechanism(race_histogram, privacy_usage={ "epsilon": 0.5, "delta": .000001 }) sn.raw_moment(educ, order=3) sn.log(sn.clamp(educ, 0.001, 50.)) sn.maximum(educ) sn.mean(educ) sn.minimum(educ) educ % 2. educ**2. sn.quantile(educ, .32) sn.resize(educ, number_rows=1200, lower=0., upper=50.) sn.resize(race, number_rows=1200, categories=["1", "2"], weights=[1, 2]) sn.resize(data[["age", "sex"]], 1200, categories=[["1", "2"], ["a", "b"]], weights=[1, 2]) sn.resize(data[["age", "sex"]], 1200, categories=[["1", "2"], ["a", "b", "c"]], weights=[[1, 2], [3, 7, 2]]) sn.sum(educ) sn.variance(educ) if run: analysis.release() return analysis