def test_x00_replacement(): adapter = PostgresAdapter(replica_metadata={}) id_col = "id" content_col = "content" normal_val = "normal_value" weird_value = "weird\x00value" custom_replacement = "__CUSTOM_VALUE__" cols = [ Attribute(id_col, data_types.BIGINT), Attribute(content_col, data_types.VARCHAR) ] # test default replacement relation = Relation("db", "schema", "relation", TABLE, cols) relation.data = DataFrame({ id_col: [1, 2], content_col: [normal_val, weird_value] }) fixed_relation = adapter.replace_x00_values(relation) assert all(fixed_relation.data.loc[fixed_relation.data[id_col] == 1, [content_col]] == normal_val) assert all(fixed_relation.data.loc[fixed_relation.data[id_col] == 2, [content_col]] == "weirdvalue") # test custom replacement adapter = PostgresAdapter(replica_metadata={}, pg_0x00_replacement=custom_replacement) relation = Relation("db", "schema", "relation", TABLE, cols) relation.data = DataFrame({ id_col: [1, 2], content_col: [normal_val, weird_value] }) fixed_relation = adapter.replace_x00_values(relation) assert all(fixed_relation.data.loc[fixed_relation.data[id_col] == 1, [content_col]] == normal_val) assert all(fixed_relation.data.loc[fixed_relation.data[id_col] == 2, [content_col]] == f"weird{custom_replacement}value")
def test_run_deps_mixed_multi_deps(): r""" a --bidir--> c <--dir-- b \ / \ \ bidir dir \ | | \ V V dir-> d e """ relation_helper = RelationTestHelper() relation_a = Relation(name='rel_a', **relation_helper.rand_relation_helper()) relation_a.attributes = [ Attribute('col_a_c', dt.INTEGER), Attribute('col_a_d', dt.VARCHAR) ] relation_a.data = pd.DataFrame({ "col_a_c": [ 1, 2, 3, 4, 5, ], "col_a_d": ["var_a_1", "var_a_2", "var_a_3", "var_a_1", "var_a_2"], }) relation_b = Relation(name='rel_b', **relation_helper.rand_relation_helper()) relation_b.attributes = [Attribute('col_b_c', dt.VARCHAR)] relation_b.data = pd.DataFrame({ "col_b_c": [ "val1", "val2", "val3", "val4", "val5", ], }) relation_c = Relation(name='rel_c', **relation_helper.rand_relation_helper()) relation_c.attributes = [ Attribute('col_c_ae', dt.INTEGER), Attribute('col_c_bd', dt.VARCHAR) ] relation_c.data = pd.DataFrame({ "col_c_ae": [ 1, 1, 2, 2, 5, 5, 5, ], "col_c_bd": [ "val1", "val1", "val2", "val2", "val5", "val5", "val5", ] }) relation_d = Relation(name='rel_d', **relation_helper.rand_relation_helper()) relation_d.attributes = [ Attribute('col_d_a', dt.INTEGER), Attribute('col_d_c', dt.INTEGER) ] relation_e = Relation(name='rel_e', **relation_helper.rand_relation_helper()) relation_e.attributes = [Attribute('col_e_c', dt.INTEGER)] for relation in ( relation_a, relation_b, relation_c, relation_d, relation_e, ): relation = stub_out_sampling(relation) dag = nx.DiGraph() dag.add_edge(relation_a, relation_c, direction="bidirectional", remote_attribute="col_a_c", local_attribute="col_c_ae") dag.add_edge(relation_a, relation_d, direction="directional", remote_attribute="col_a_d", local_attribute="col_d_a") dag.add_edge(relation_b, relation_c, direction="directional", remote_attribute="col_b_c", local_attribute="col_c_bd") dag.add_edge(relation_c, relation_d, direction="bidirectional", remote_attribute="col_c_bd", local_attribute="col_d_c") dag.add_edge(relation_c, relation_e, direction="directional", remote_attribute="col_c_ae", local_attribute="col_e_c") adapter = SnowflakeAdapter() RuntimeSourceCompiler.compile_queries_for_relation(relation_a, dag, adapter, False) RuntimeSourceCompiler.compile_queries_for_relation(relation_b, dag, adapter, False) RuntimeSourceCompiler.compile_queries_for_relation(relation_c, dag, adapter, False) RuntimeSourceCompiler.compile_queries_for_relation(relation_d, dag, adapter, False) RuntimeSourceCompiler.compile_queries_for_relation(relation_e, dag, adapter, False) assert query_equalize(relation_a.compiled_query) == query_equalize(f""" WITH {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( SELECT * FROM {relation_a.quoted_dot_notation} WHERE col_a_c in (SELECT col_c_ae FROM {relation_c.quoted_dot_notation}) ) ,{relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( SELECT * FROM {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) ) SELECT * FROM {relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} """) assert query_equalize(relation_b.compiled_query) == query_equalize(f""" SELECT * FROM {relation_b.quoted_dot_notation} SAMPLE BERNOULLI (1500 ROWS) """) assert query_equalize(relation_c.compiled_query) == query_equalize(f""" SELECT * FROM {relation_c.quoted_dot_notation} WHERE col_c_bd in (SELECT col_d_c FROM {relation_d.quoted_dot_notation}) AND col_c_ae IN (1,2,3,4,5) AND col_c_bd IN ('val1','val2','val3','val4','val5') """) assert query_equalize(relation_d.compiled_query) == query_equalize(f""" SELECT * FROM {relation_d.quoted_dot_notation} WHERE col_d_a IN ('var_a_1','var_a_2','var_a_3') AND col_d_c IN ('val1','val2','val5') """) assert query_equalize(relation_e.compiled_query) == query_equalize(f""" WITH {relation_e.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( SELECT * FROM {relation_e.quoted_dot_notation} WHERE col_e_c IN (1,2,5) ) ,{relation_e.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( SELECT * FROM {relation_e.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) ) SELECT * FROM {relation_e.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} """)
def test_run_deps_bidirectional_multi_deps(): """ a --bidir--> c <--bidir-- b """ relation_helper = RelationTestHelper() relation_a = Relation(name='rel_a', **relation_helper.rand_relation_helper()) relation_a.attributes = [Attribute('col_a', dt.INTEGER)] relation_a.data = pd.DataFrame({"col_a": [ 1, 2, 3, 4, 5, ]}) relation_b = Relation(name='rel_b', **relation_helper.rand_relation_helper()) relation_b.attributes = [Attribute('col_b', dt.VARCHAR)] relation_b.data = pd.DataFrame({ "col_b": [ "val1", "val2", "val3", "val4", "val5", ], }) relation_c = Relation(name='rel_c', **relation_helper.rand_relation_helper()) relation_c.attributes = [ Attribute('col_c_a', dt.INTEGER), Attribute('col_c_b', dt.VARCHAR) ] for relation in ( relation_a, relation_b, relation_c, ): relation = stub_out_sampling(relation) dag = nx.DiGraph() dag.add_edge(relation_a, relation_c, direction="bidirectional", remote_attribute="col_a", local_attribute="col_c_a") dag.add_edge(relation_b, relation_c, direction="bidirectional", remote_attribute="col_b", local_attribute="col_c_b") adapter = SnowflakeAdapter() RuntimeSourceCompiler.compile_queries_for_relation(relation_a, dag, adapter, False) RuntimeSourceCompiler.compile_queries_for_relation(relation_b, dag, adapter, False) RuntimeSourceCompiler.compile_queries_for_relation(relation_c, dag, adapter, False) assert query_equalize(relation_a.compiled_query) == query_equalize(f""" WITH {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( SELECT * FROM {relation_a.quoted_dot_notation} WHERE col_a in (SELECT col_c_a FROM {relation_c.quoted_dot_notation}) ) ,{relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( SELECT * FROM {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) ) SELECT * FROM {relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} """) assert query_equalize(relation_b.compiled_query) == query_equalize(f""" WITH {relation_b.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( SELECT * FROM {relation_b.quoted_dot_notation} WHERE col_b in (SELECT col_c_b FROM {relation_c.quoted_dot_notation}) ) ,{relation_b.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( SELECT * FROM {relation_b.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) ) SELECT * FROM {relation_b.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} """) assert query_equalize(relation_c.compiled_query) == query_equalize(f""" SELECT * FROM {relation_c.quoted_dot_notation} WHERE col_c_a IN (1,2,3,4,5) AND col_c_b IN ('val1','val2','val3','val4','val5') """)