Ejemplo n.º 1
0
 def test_yes_tau(self):
     # should usually drop some rows
     reader = CSVReader(schema, df)
     private_reader = PrivateQuery(reader, schema, 0.01)
     lengths = []
     for i in range(10):
         rs = private_reader.execute_typed("SELECT COUNT(*) AS c FROM PUMS.PUMS WHERE age > 90 GROUP BY educ")
         lengths.append(len(rs['c']))
     l = lengths[0]
     print(lengths)
     assert(any([l != ll for ll in lengths]))
Ejemplo n.º 2
0
    def run_agg_query(self, df, metadata_path, query, confidence):
        metadata = MetadataLoader(metadata_path).read_schema()
        reader = CSVReader(metadata, df)
        private_reader = PrivateQuery(reader, metadata, self.epsilon)
        query_ast = private_reader.parse_query_string(query)
        subquery, query, syms, types, sens, srs_orig = private_reader._preprocess(
            query_ast)

        #exact_values = private_reader.execute_ast(query)
        #bounds_centered_zero = list(private_reader._apply_noise(*exact_values, confidence)[1].values())[1]
        #actual_value = exact_values[1:][0][1]
        #bounds = np.array([bounds_centered_zero[0] + actual_value, bounds_centered_zero[1] + actual_value])

        noisy_values = []
        for idx in range(self.repeat_count):
            srs = TypedRowset(srs_orig.rows(), types, sens)
            noisy_values.append(
                private_reader._postprocess(subquery, query, syms, types, sens,
                                            srs).rows()[1:][0][0])

        return np.array(noisy_values)  #, bounds
Ejemplo n.º 3
0
 def test_count_exact(self):
     reader = CSVReader(schema, df)
     rs = reader.execute("SELECT COUNT(*) AS c FROM PUMS.PUMS")
     assert (rs[1][0] == 1000)
Ejemplo n.º 4
0
 def _load_reader(dataset_document):
     return CSVReader(DataverseAdapter.load_metadata(dataset_document),
                      DataverseAdapter.load_df(dataset_document))
Ejemplo n.º 5
0
 def test_sum_no_rows_exact_typed(self):
     reader = CSVReader(schema, df)
     query = QueryParser(schema).queries("SELECT SUM(age) as c FROM PUMS.PUMS WHERE age > 100")[0]
     trs = reader.execute_typed(query)
     assert(trs['c'][0] == None)
Ejemplo n.º 6
0
 def test_group_by_noisy_typed_order_desc(self):
     reader = CSVReader(schema, df)
     private_reader = PrivateQuery(reader, schema, 1.0)
     rs = private_reader.execute_typed("SELECT COUNT(*) AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c DESC")
     assert(rs['c'][0] > rs['c'][1])
Ejemplo n.º 7
0
 def test_group_by_noisy_order(self):
     reader = CSVReader(schema, df)
     private_reader = PrivateQuery(reader, schema, 1.0)
     rs = private_reader.execute("SELECT COUNT(*) AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c")
     assert(rs[1][0] < rs[2][0])
Ejemplo n.º 8
0
 def test_group_by_exact_order_expr_desc(self):
     reader = CSVReader(schema, df)
     rs = reader.execute("SELECT COUNT(*) * 5 AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c DESC")
     assert(rs[1][0] == 549 * 5)
     assert(rs[2][0] == 451 * 5)
Ejemplo n.º 9
0
 def test_empty_result_typed(self):
     reader = CSVReader(schema, df)
     rs = reader.execute("SELECT age as a FROM PUMS.PUMS WHERE age > 100")
     trs = TypedRowset(rs, ['int'], [None])
     assert(len(trs) == 0)
Ejemplo n.º 10
0
 def test_empty_result(self):
     reader = CSVReader(schema, df)
     rs = reader.execute("SELECT age as a FROM PUMS.PUMS WHERE age > 100")
     print(rs)
     assert(len(rs) == 1)
Ejemplo n.º 11
0
 def _load_reader(dataset_document):
     return CSVReader(LocalCSVAdapter.load_metadata(dataset_document),
                      LocalCSVAdapter.load_df(dataset_document))