Esempio n. 1
0
 def test_err1(self):
     s = copy.copy(schema)
     s["PUMS.PUMS"]["income"].upper = None
     reader = PandasReader(df, s)
     private_reader = PrivateReader(reader, s, privacy=Privacy(epsilon=4.0))
     with pytest.raises(ValueError):
         rs = private_reader.execute_df("SELECT SUM(income) FROM PUMS.PUMS")
Esempio n. 2
0
 def test_ok1(self):
     s = copy.copy(schema)
     s["PUMS.PUMS"]["income"].upper = None
     reader = PandasReader(df, s)
     private_reader = PrivateReader(reader, s, privacy=Privacy(epsilon=4.0))
     rs = private_reader.execute_df(
         "SELECT income FROM PUMS.PUMS GROUP BY income")
    def test_dpsu_vs_korolova(self):
        query = "SELECT ngram, COUNT(*) as n FROM reddit.reddit GROUP BY ngram ORDER BY n desc"
        reader = PandasReader(df, schema)
        private_reader = PrivateReader(reader,
                                       schema,
                                       privacy=Privacy(epsilon=3.0))
        private_reader.options.max_contrib = 10
        result = private_reader.execute_df(query)

        private_reader_korolova = PrivateReader(reader,
                                                schema,
                                                privacy=Privacy(epsilon=3.0))
        private_reader_korolova.options.dpsu = False
        private_reader_korolova.options.max_contrib = 10
        korolova_result = private_reader_korolova.execute_df(query)

        assert len(result['n']) > len(korolova_result['n'])
        assert len(final_df) < len(df)
Esempio n. 4
0
 def test_sum_noisy_postprocess(self):
     reader = PandasReader(df, schema)
     private_reader = PrivateReader(reader, schema, privacy=Privacy(epsilon=1.0))
     trs = private_reader.execute_df("SELECT POWER(SUM(age), 2) as age_total FROM PUMS.PUMS")
     assert(trs['age_total'][0] > 1000 ** 2)
Esempio n. 5
0
 def test_group_by_noisy_typed_order_desc(self):
     reader = PandasReader(df, schema)
     private_reader = PrivateReader(reader, schema, privacy=Privacy(epsilon=4.0))
     rs = private_reader.execute_df("SELECT COUNT(*) AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c DESC")
     assert(rs['c'][0] > rs['c'][1])