def test_viz_child_nodes(self): query = "SELECT AVG(age) AS my_sum FROM PUMS.PUMS GROUP BY age" reader = PandasReader(df, schema) private_reader = PrivateReader(reader, schema, privacy=Privacy(epsilon=1.0)) inner, outer = private_reader._rewrite(query) aggfuncs = outer.find_nodes(AggFunction) for aggfunc in aggfuncs: graph = aggfunc.visualize(n_trunc=30) assert (isinstance(graph, Digraph))
def test_case_sensitive(self): sample = Table( "PUMS", "PUMS", [Int('pid', is_key=True), Int('"PiD"')], 150) meta = Metadata([sample], "csv") reader = PostgresReader("localhost", "PUMS", "admin", "password") private_reader = PrivateReader(reader, meta, privacy=Privacy(epsilon=3.0)) query = 'SELECT COUNT (DISTINCT pid) AS foo, COUNT(DISTINCT "PiD") AS bar FROM PUMS.PUMS' inner, outer = private_reader._rewrite(query) ne = outer.select.namedExpressions assert (ne[0].expression.expression.name == 'keycount') assert (ne[1].expression.expression.name != 'keycount')
def test_reuse_expression(self): meta = Metadata.from_file(meta_path) df = pd.read_csv(csv_path) reader = PandasReader(df, meta) private_reader = PrivateReader(reader, meta, privacy=Privacy(epsilon=3.0)) query = 'SELECT AVG(age), SUM(age), COUNT(age) FROM PUMS.PUMS' q = QueryParser(meta).query(query) inner, outer = private_reader._rewrite(query) names = unique( [f.name for f in outer.select.namedExpressions.find_nodes(Column)]) assert (len(names) == 2) assert ('count_age' in names) assert ('sum_age' in names)
"git rev-parse --show-toplevel".split(" ")).decode("utf-8").strip() meta_path = os.path.join(git_root_dir, os.path.join("datasets", "PUMS_pid.yaml")) csv_path = os.path.join(git_root_dir, os.path.join("datasets", "PUMS_pid.csv")) from snsql.xpath.parse import XPath p = XPath() meta = Metadata.from_file(meta_path) pums = pd.read_csv(csv_path) query = 'SELECT AVG(age) + 3, STD(age), VAR(age), SUM(age) / 10, COUNT(age) + 2 FROM PUMS.PUMS' q = QueryParser(meta).query(query) reader = SqlReader.from_connection(pums, "pandas", metadata=meta) priv = PrivateReader(reader, meta, privacy=Privacy(epsilon=1.0)) subquery, root = priv._rewrite(query) class TestXPathExecutionNoRewrite: def test_all_root_descend(self): path = '//*' # returns value xx = p.parse(path) res = xx.evaluate(q) assert (len(res) > 40) assert (str(xx) == path) def test_all_with_condition(self): path = '//*[@left]' # returns value xx = p.parse(path) res = xx.evaluate(q) assert (len(res) >= 3)