def test_yes_tau(self): # should usually drop some rows reader = CSVReader(schema, df) private_reader = PrivateQuery(reader, schema, 0.01) lengths = [] for i in range(10): rs = private_reader.execute_typed("SELECT COUNT(*) AS c FROM PUMS.PUMS WHERE age > 90 GROUP BY educ") lengths.append(len(rs['c'])) l = lengths[0] print(lengths) assert(any([l != ll for ll in lengths]))
def run_agg_query(self, df, metadata_path, query, confidence): metadata = MetadataLoader(metadata_path).read_schema() reader = CSVReader(metadata, df) private_reader = PrivateQuery(reader, metadata, self.epsilon) query_ast = private_reader.parse_query_string(query) subquery, query, syms, types, sens, srs_orig = private_reader._preprocess( query_ast) #exact_values = private_reader.execute_ast(query) #bounds_centered_zero = list(private_reader._apply_noise(*exact_values, confidence)[1].values())[1] #actual_value = exact_values[1:][0][1] #bounds = np.array([bounds_centered_zero[0] + actual_value, bounds_centered_zero[1] + actual_value]) noisy_values = [] for idx in range(self.repeat_count): srs = TypedRowset(srs_orig.rows(), types, sens) noisy_values.append( private_reader._postprocess(subquery, query, syms, types, sens, srs).rows()[1:][0][0]) return np.array(noisy_values) #, bounds
def test_count_exact(self): reader = CSVReader(schema, df) rs = reader.execute("SELECT COUNT(*) AS c FROM PUMS.PUMS") assert (rs[1][0] == 1000)
def _load_reader(dataset_document): return CSVReader(DataverseAdapter.load_metadata(dataset_document), DataverseAdapter.load_df(dataset_document))
def test_sum_no_rows_exact_typed(self): reader = CSVReader(schema, df) query = QueryParser(schema).queries("SELECT SUM(age) as c FROM PUMS.PUMS WHERE age > 100")[0] trs = reader.execute_typed(query) assert(trs['c'][0] == None)
def test_group_by_noisy_typed_order_desc(self): reader = CSVReader(schema, df) private_reader = PrivateQuery(reader, schema, 1.0) rs = private_reader.execute_typed("SELECT COUNT(*) AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c DESC") assert(rs['c'][0] > rs['c'][1])
def test_group_by_noisy_order(self): reader = CSVReader(schema, df) private_reader = PrivateQuery(reader, schema, 1.0) rs = private_reader.execute("SELECT COUNT(*) AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c") assert(rs[1][0] < rs[2][0])
def test_group_by_exact_order_expr_desc(self): reader = CSVReader(schema, df) rs = reader.execute("SELECT COUNT(*) * 5 AS c, married AS m FROM PUMS.PUMS GROUP BY married ORDER BY c DESC") assert(rs[1][0] == 549 * 5) assert(rs[2][0] == 451 * 5)
def test_empty_result_typed(self): reader = CSVReader(schema, df) rs = reader.execute("SELECT age as a FROM PUMS.PUMS WHERE age > 100") trs = TypedRowset(rs, ['int'], [None]) assert(len(trs) == 0)
def test_empty_result(self): reader = CSVReader(schema, df) rs = reader.execute("SELECT age as a FROM PUMS.PUMS WHERE age > 100") print(rs) assert(len(rs) == 1)
def _load_reader(dataset_document): return CSVReader(LocalCSVAdapter.load_metadata(dataset_document), LocalCSVAdapter.load_df(dataset_document))