Example #1
0
 def test_check_thresholds_gauss(self):
     # check tau for various privacy parameters
     epsilons = [0.1, 2.0]
     max_contribs = [1, 3]
     deltas = [10E-5, 10E-15]
     query = "SELECT COUNT(*) FROM PUMS.PUMS GROUP BY married"
     reader = PandasReader(df, schema)
     qp = QueryParser(schema)
     q = qp.query(query)
     for eps in epsilons:
         for d in max_contribs:
             for delta in deltas:
                 # using slightly different formulations of same formula from different papers
                 # make sure private_reader round-trips
                 gaus_scale = math.sqrt(d) * math.sqrt(
                     2 * math.log(1.25 / delta)) / eps
                 gaus_rho = 1 + gaus_scale * math.sqrt(
                     2 * math.log(d / math.sqrt(2 * math.pi * delta)))
                 schema_c = copy.copy(schema)
                 schema_c["PUMS.PUMS"].max_ids = d
                 private_reader = PrivateReader(reader, schema_c, eps,
                                                delta)
                 assert (private_reader._options.max_contrib == d)
                 r = private_reader.execute_ast(q)
                 assert (math.isclose(private_reader.tau,
                                      gaus_rho,
                                      rel_tol=0.03,
                                      abs_tol=2))
Example #2
0
 def runRewrite(self):
     qb = QueryParser(metadata).queries(self.queryBatch)
     for q in qb:
         print(q)
         new_q = Rewriter(metadata).query(q)
         assert q.has_symbols()
         assert new_q.has_symbols()
         assert all([qt[1].type() == nqt[1].type() for qt, nqt in zip(q.m_symbols, new_q.m_symbols) ])
Example #3
0
 def test_empty_result_count_typed_notau_prepost(self):
     reader = PandasReader(df, schema)
     query = QueryParser(schema).queries("SELECT COUNT(*) as c FROM PUMS.PUMS WHERE age > 100")[0]
     private_reader = PrivateReader(reader, schema, 1.0)
     private_reader._execute_ast(query, True)
     for i in range(3):
         trs = private_reader._execute_ast(query, True)
         assert(len(trs) == 2)
Example #4
0
 def test_execute_without_dpsu(self):
     schema_no_dpsu = copy.copy(schema)
     schema_no_dpsu["PUMS.PUMS"].use_dpsu = False
     reader = PandasReader(df, schema_no_dpsu)
     private_reader = PrivateReader(reader, schema_no_dpsu, 1.0)
     assert(private_reader._options.use_dpsu == False)
     query = QueryParser(schema_no_dpsu).queries("SELECT COUNT(*) AS c FROM PUMS.PUMS GROUP BY married")[0]
     assert(private_reader._get_reader(query) is private_reader.reader)
Example #5
0
 def runBuild(self):
     for query in self.queries:
         q = QueryParser().query(query)
         self.walk_children(q)
         #        assert len(qb) == len(self.queries)
         assert query.replace(' ', '').replace(
             '\n', '').lower() == str(q).replace(' ',
                                                 '').replace('\n',
                                                             '').lower()
         self.runParseAgain(q)
Example #6
0
 def test_viz_query_rewritten(self):
     query = "SELECT SUM(age) AS my_sum FROM PUMS.PUMS GROUP BY age"
     parsed_query = QueryParser(schema).query(query)
     reader = PandasReader(df, schema)
     private_reader = PrivateReader(reader, schema, 1.0)
     inner, outer = private_reader.rewrite_ast(parsed_query)
     graph = outer.visualize(n_trunc=30)
     assert (isinstance(graph, Digraph))
     #graph.render('ast_digraph', view=True, cleanup=True)
     graph = inner.visualize(n_trunc=30)
     assert (isinstance(graph, Digraph))
Example #7
0
 def runBuild(self, exc):
     for query in self.queries:
         failed = False
         try:
             qb = QueryParser().query(query)
         except exc:
             failed = True
         if not failed:
             print(
                 "{0} should have thrown ValueError, but succeeded".format(
                     query))
         assert failed
Example #8
0
def preprocess_df_from_query(schema, df, query_string):
    """
    Returns a dataframe with user_id | tuple based on query grouping keys.
    """
    qp = QueryParser(schema)
    q = qp.query(query_string)
    queries = qp.queries(query_string)
    query_ast = queries[0]

    group_cols = [ge.expression.name for ge in query_ast.agg.groupingExpressions]
    table_name = q.source.find_node(Table).name
    key_col = schema[table_name].key_cols()[0].name

    preprocessed_df = pd.DataFrame()
    preprocessed_df[key_col] = df[key_col]
    preprocessed_df["group_cols"] = tuple(df[group_cols].values.tolist())

    return preprocessed_df
Example #9
0
def test_rewriting():
    for query in queries:
        query = QueryParser(metadata).query(str(query))
        dp_query = Rewriter(metadata).query(query)
        parsed_dp_query = QueryParser(metadata).query(str(dp_query))
        assert dp_query == parsed_dp_query
Example #10
0
 def test_simple(self):
     query = "SELECT * FROM FOO;"
     QueryParser().parse_only(query)  # try parsing without building
     qb = QueryParser().query(query)
Example #11
0
 def test_sum_noisy(self):
     reader = PandasReader(df, schema)
     query = QueryParser(schema).queries("SELECT SUM(age) as age_total FROM PUMS.PUMS")[0]
     trs = reader._execute_ast_df(query)
     assert(trs['age_total'][0] > 1000)
Example #12
0
 def test_viz_query_symbols(self):
     query = "SELECT SUM(age) AS my_sum FROM PUMS.PUMS GROUP BY age"
     parsed_query = QueryParser(schema).query(query)
     graph = parsed_query.visualize(color_types={Table: 'red'}, n_trunc=5)
     assert (isinstance(graph, Digraph))
Example #13
0
 def test_unsupported(self):
     with pytest.raises(ValueError) as err:
         qb = QueryParser().query(
             "SELECT * FROM FOO UNION ALL SELECT * FROM BAR", True)
Example #14
0
 def test_bad_token(self):
     with pytest.raises(ValueError) as err:
         QueryParser().parse_only("SELECT * FROM FOO WHENCE ZIP ZAG")
     err.match("^Bad token")
Example #15
0
 def test_viz_query(self):
     query = "SELECT SUM(age) AS my_sum FROM pums.pums GROUP BY age"
     parsed_query = QueryParser().query(query)
     graph = parsed_query.visualize(color_types={Query: 'red'}, n_trunc=30)
     assert (isinstance(graph, Digraph))
Example #16
0
 def test_batch13(self):
     qb = QueryParser().queries(
         open(testpath + "parse/" + "test.sql").read())
     assert len(qb) == 13
Example #17
0
 def test_sum_no_rows_exact_typed(self):
     reader = PandasReader(df, schema)
     query = QueryParser(schema).queries(
         "SELECT SUM(age) as c FROM PUMS.PUMS WHERE age > 100")[0]
     trs = reader.execute_ast_typed(query)
     assert (trs['c'][0] == None)
Example #18
0
 def test_tsql_escaped_error(self):
     with pytest.raises(ValueError) as err:
         QueryParser().parse_only("SELECT [FOO.BAR] FROM HR;")
     err.match("^Lexer error")
Example #19
0
 def test_count_no_rows_exact_typed(self):
     reader = PandasReader(df, schema)
     query = QueryParser(schema).queries("SELECT COUNT(*) as c FROM PUMS.PUMS WHERE age > 100")[0]
     trs = reader._execute_ast_df(query)
     assert(trs['c'][0] == 0)
 def runValidate(self):
     for qs in self.queries:
         print(qs)
         with pytest.raises(ValueError):
             q = QueryParser(metadata).query(qs)
             self.validateSingle(q)
 def runValidate(self):
     for qs in self.queries:
         print(qs)
         q = QueryParser(metadata).query(qs)
         Validate().validateQuery(q, metadata)
Example #22
0
 def runParse(self):
     for query in self.queries:
         print(query)
         QueryParser().parse_only(query)
Example #23
0
def qp(query_string):
    return QueryParser().query(query_string)
Example #24
0
 def runParseAgain(self, q):
     """ Converts AST to text, re-parses to AST, and compares the two ASTs"""
     repeat = QueryParser().query(str(q))
     assert q == repeat