def test_err1(self): s = copy.copy(schema) s["PUMS.PUMS"]["income"].upper = None reader = PandasReader(df, s) private_reader = PrivateReader(reader, s, privacy=Privacy(epsilon=4.0)) with pytest.raises(ValueError): rs = private_reader.execute_df("SELECT SUM(income) FROM PUMS.PUMS")
def test_ok1(self): s = copy.copy(schema) s["PUMS.PUMS"]["income"].upper = None reader = PandasReader(df, s) private_reader = PrivateReader(reader, s, privacy=Privacy(epsilon=4.0)) rs = private_reader.execute_df( "SELECT income FROM PUMS.PUMS GROUP BY income")
def test_dpsu_vs_korolova(self): query = "SELECT ngram, COUNT(*) as n FROM reddit.reddit GROUP BY ngram ORDER BY n desc" reader = PandasReader(df, schema) private_reader = PrivateReader(reader, schema, privacy=Privacy(epsilon=3.0)) private_reader.options.max_contrib = 10 result = private_reader.execute_df(query) private_reader_korolova = PrivateReader(reader, schema, privacy=Privacy(epsilon=3.0)) private_reader_korolova.options.dpsu = False private_reader_korolova.options.max_contrib = 10 korolova_result = private_reader_korolova.execute_df(query) assert len(result['n']) > len(korolova_result['n']) assert len(final_df) < len(df)
def test_sum_noisy_postprocess(self): reader = PandasReader(df, schema) private_reader = PrivateReader(reader, schema, privacy=Privacy(epsilon=1.0)) trs = private_reader.execute_df("SELECT POWER(SUM(age), 2) as age_total FROM PUMS.PUMS") assert(trs['age_total'][0] > 1000 ** 2)
def test_group_by_noisy_typed_order_desc(self): reader = PandasReader(df, schema) private_reader = PrivateReader(reader, schema, privacy=Privacy(epsilon=4.0)) rs = private_reader.execute_df("SELECT COUNT(*) AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c DESC") assert(rs['c'][0] > rs['c'][1])