Esempio n. 1
0
 def test_check_thresholds_gauss(self):
     # check tau for various privacy parameters
     epsilons = [0.1, 2.0]
     max_contribs = [1, 3]
     deltas = [10E-5, 10E-15]
     query = "SELECT COUNT(*) FROM PUMS.PUMS GROUP BY married"
     reader = PandasReader(schema, df)
     qp = QueryParser(schema)
     q = qp.query(query)
     for eps in epsilons:
         for d in max_contribs:
             for delta in deltas:
                 # using slightly different formulations of same formula from different papers
                 # make sure private_reader round-trips
                 gaus_scale = math.sqrt(d) * math.sqrt(
                     2 * math.log(1.25 / delta)) / eps
                 gaus_rho = 1 + gaus_scale * math.sqrt(
                     2 * math.log(d / math.sqrt(2 * math.pi * delta)))
                 private_reader = PrivateReader(schema, reader, eps, delta)
                 q.max_ids = d  # hijack the AST
                 r = private_reader.execute_ast(q)
                 assert (math.isclose(private_reader.tau,
                                      gaus_rho,
                                      rel_tol=0.03,
                                      abs_tol=2))
Esempio n. 2
0
 def test_execute_without_dpsu(self):
     reader = PandasReader(schema, df)
     private_reader = PrivateReader(schema, reader, 1.0)
     query = QueryParser(schema).queries(
         "SELECT COUNT(*) AS c FROM PUMS.PUMS GROUP BY married")[0]
     private_reader.options.use_dpsu = False
     assert (private_reader._get_reader(query) is private_reader.reader)
Esempio n. 3
0
 def test_empty_result_count_typed_notau_prepost(self):
     reader = PandasReader(schema, df)
     query = QueryParser(schema).queries("SELECT COUNT(*) as c FROM PUMS.PUMS WHERE age > 100")[0]
     private_reader = PrivateReader(schema, reader, 1.0)
     private_reader._execute_ast(query, True)
     for i in range(3):
         trs = private_reader._execute_ast(query, True)
         assert(len(trs) == 1)
Esempio n. 4
0
def preprocess_df_from_query(schema, df, query_string):
    """
    Returns a dataframe with user_id | tuple based on query grouping keys.
    """
    qp = QueryParser(schema)
    q = qp.query(query_string)
    queries = qp.queries(query_string)
    query_ast = queries[0]

    group_cols = [
        ge.expression.name for ge in query_ast.agg.groupingExpressions
    ]
    table_name = q.source.find_node(Table).name
    key_col = schema[table_name].key_cols()[0].name

    preprocessed_df = pd.DataFrame()
    preprocessed_df[key_col] = df[key_col]
    preprocessed_df["group_cols"] = tuple(df[group_cols].values.tolist())

    return preprocessed_df
Esempio n. 5
0
 def runRewrite(self):
     qb = QueryParser(metadata).queries(self.queryBatch)
     for q in qb:
         print(q)
         new_q = Rewriter(metadata).query(q)
         assert q.has_symbols()
         assert new_q.has_symbols()
         assert all([
             qt[1].type() == nqt[1].type()
             for qt, nqt in zip(q.m_symbols, new_q.m_symbols)
         ])
Esempio n. 6
0
 def test_count_no_rows_exact_typed(self):
     reader = PandasReader(schema, df)
     query = QueryParser(schema).queries("SELECT COUNT(*) as c FROM PUMS.PUMS WHERE age > 100")[0]
     trs = reader.execute_ast_typed(query)
     assert(trs['c'][0] == 0)
Esempio n. 7
0
 def test_sum_noisy(self):
     reader = PandasReader(schema, df)
     query = QueryParser(schema).queries("SELECT SUM(age) as age_total FROM PUMS.PUMS")[0]
     trs = reader.execute_ast_typed(query)
     assert(trs['age_total'][0] > 1000)
 def runValidate(self):
     for qs in self.queries:
         print(qs)
         with pytest.raises(ValueError):
             q = QueryParser(metadata).query(qs)
             self.validateSingle(q)
 def runValidate(self):
     for qs in self.queries:
         print(qs)
         q = QueryParser(metadata).query(qs)
         Validate().validateQuery(q, metadata)
Esempio n. 10
0
 def runValidate(self):
     for qs in self.queries:
         print(qs)
         q = QueryParser(metadata).query(qs)
         self.validateSingle(q)
Esempio n. 11
0
def qp(query_string):
    return QueryParser().query(query_string)