Beispiel #1
0
def test_run_deps_mixed_multi_deps():
    r"""
        a --bidir--> c <--dir-- b
         \          / \
          \     bidir  dir
           \       |   |
            \      V   V
             dir-> d   e
    """
    relation_helper = RelationTestHelper()
    relation_a = Relation(name='rel_a',
                          **relation_helper.rand_relation_helper())
    relation_a.attributes = [
        Attribute('col_a_c', dt.INTEGER),
        Attribute('col_a_d', dt.VARCHAR)
    ]
    relation_a.data = pd.DataFrame({
        "col_a_c": [
            1,
            2,
            3,
            4,
            5,
        ],
        "col_a_d": ["var_a_1", "var_a_2", "var_a_3", "var_a_1", "var_a_2"],
    })
    relation_b = Relation(name='rel_b',
                          **relation_helper.rand_relation_helper())
    relation_b.attributes = [Attribute('col_b_c', dt.VARCHAR)]
    relation_b.data = pd.DataFrame({
        "col_b_c": [
            "val1",
            "val2",
            "val3",
            "val4",
            "val5",
        ],
    })

    relation_c = Relation(name='rel_c',
                          **relation_helper.rand_relation_helper())
    relation_c.attributes = [
        Attribute('col_c_ae', dt.INTEGER),
        Attribute('col_c_bd', dt.VARCHAR)
    ]
    relation_c.data = pd.DataFrame({
        "col_c_ae": [
            1,
            1,
            2,
            2,
            5,
            5,
            5,
        ],
        "col_c_bd": [
            "val1",
            "val1",
            "val2",
            "val2",
            "val5",
            "val5",
            "val5",
        ]
    })

    relation_d = Relation(name='rel_d',
                          **relation_helper.rand_relation_helper())
    relation_d.attributes = [
        Attribute('col_d_a', dt.INTEGER),
        Attribute('col_d_c', dt.INTEGER)
    ]

    relation_e = Relation(name='rel_e',
                          **relation_helper.rand_relation_helper())
    relation_e.attributes = [Attribute('col_e_c', dt.INTEGER)]

    for relation in (
            relation_a,
            relation_b,
            relation_c,
            relation_d,
            relation_e,
    ):
        relation = stub_out_sampling(relation)

    dag = nx.DiGraph()
    dag.add_edge(relation_a,
                 relation_c,
                 direction="bidirectional",
                 remote_attribute="col_a_c",
                 local_attribute="col_c_ae")
    dag.add_edge(relation_a,
                 relation_d,
                 direction="directional",
                 remote_attribute="col_a_d",
                 local_attribute="col_d_a")
    dag.add_edge(relation_b,
                 relation_c,
                 direction="directional",
                 remote_attribute="col_b_c",
                 local_attribute="col_c_bd")
    dag.add_edge(relation_c,
                 relation_d,
                 direction="bidirectional",
                 remote_attribute="col_c_bd",
                 local_attribute="col_d_c")
    dag.add_edge(relation_c,
                 relation_e,
                 direction="directional",
                 remote_attribute="col_c_ae",
                 local_attribute="col_e_c")
    adapter = SnowflakeAdapter()
    RuntimeSourceCompiler.compile_queries_for_relation(relation_a, dag,
                                                       adapter, False)
    RuntimeSourceCompiler.compile_queries_for_relation(relation_b, dag,
                                                       adapter, False)
    RuntimeSourceCompiler.compile_queries_for_relation(relation_c, dag,
                                                       adapter, False)
    RuntimeSourceCompiler.compile_queries_for_relation(relation_d, dag,
                                                       adapter, False)
    RuntimeSourceCompiler.compile_queries_for_relation(relation_e, dag,
                                                       adapter, False)
    assert query_equalize(relation_a.compiled_query) == query_equalize(f"""
        WITH {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( 
        SELECT 
            * 
        FROM 
            {relation_a.quoted_dot_notation} 
        WHERE 
            col_a_c 
        in (SELECT 
                col_c_ae 
            FROM 
                {relation_c.quoted_dot_notation}) ) 
        ,{relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( 
            SELECT 
                * 
            FROM 
                {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) 
        ) 
        SELECT 
            * 
        FROM 
        {relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')}
    """)
    assert query_equalize(relation_b.compiled_query) == query_equalize(f"""
        SELECT
            *
        FROM
            {relation_b.quoted_dot_notation}
        SAMPLE BERNOULLI (1500 ROWS)
    """)
    assert query_equalize(relation_c.compiled_query) == query_equalize(f"""
        SELECT 
            * 
        FROM 
            {relation_c.quoted_dot_notation} 
        WHERE 
            col_c_bd 
        in (SELECT 
                col_d_c 
            FROM 
                {relation_d.quoted_dot_notation})
        AND
            col_c_ae IN (1,2,3,4,5)
        AND
            col_c_bd IN ('val1','val2','val3','val4','val5')
    """)
    assert query_equalize(relation_d.compiled_query) == query_equalize(f"""
        SELECT 
            * 
        FROM 
            {relation_d.quoted_dot_notation}
        WHERE 
            col_d_a IN ('var_a_1','var_a_2','var_a_3') 
        AND
            col_d_c IN ('val1','val2','val5') 
    """)
    assert query_equalize(relation_e.compiled_query) == query_equalize(f"""
        WITH 
        {relation_e.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( 
        SELECT 
            * 
        FROM 
        {relation_e.quoted_dot_notation}
        WHERE 
            col_e_c IN (1,2,5) 
        )
        ,{relation_e.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( 
        SELECT 
            * 
        FROM 
        {relation_e.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) 
        ) 
        SELECT 
            * 
        FROM 
        {relation_e.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')}
    """)
Beispiel #2
0
def test_run_deps_bidirectional_multi_deps():
    """
        a --bidir--> c <--bidir-- b
    """
    relation_helper = RelationTestHelper()
    relation_a = Relation(name='rel_a',
                          **relation_helper.rand_relation_helper())
    relation_a.attributes = [Attribute('col_a', dt.INTEGER)]
    relation_a.data = pd.DataFrame({"col_a": [
        1,
        2,
        3,
        4,
        5,
    ]})

    relation_b = Relation(name='rel_b',
                          **relation_helper.rand_relation_helper())
    relation_b.attributes = [Attribute('col_b', dt.VARCHAR)]
    relation_b.data = pd.DataFrame({
        "col_b": [
            "val1",
            "val2",
            "val3",
            "val4",
            "val5",
        ],
    })

    relation_c = Relation(name='rel_c',
                          **relation_helper.rand_relation_helper())
    relation_c.attributes = [
        Attribute('col_c_a', dt.INTEGER),
        Attribute('col_c_b', dt.VARCHAR)
    ]

    for relation in (
            relation_a,
            relation_b,
            relation_c,
    ):
        relation = stub_out_sampling(relation)

    dag = nx.DiGraph()
    dag.add_edge(relation_a,
                 relation_c,
                 direction="bidirectional",
                 remote_attribute="col_a",
                 local_attribute="col_c_a")
    dag.add_edge(relation_b,
                 relation_c,
                 direction="bidirectional",
                 remote_attribute="col_b",
                 local_attribute="col_c_b")
    adapter = SnowflakeAdapter()
    RuntimeSourceCompiler.compile_queries_for_relation(relation_a, dag,
                                                       adapter, False)
    RuntimeSourceCompiler.compile_queries_for_relation(relation_b, dag,
                                                       adapter, False)
    RuntimeSourceCompiler.compile_queries_for_relation(relation_c, dag,
                                                       adapter, False)
    assert query_equalize(relation_a.compiled_query) == query_equalize(f"""
        WITH {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( 
        SELECT 
            * 
        FROM 
            {relation_a.quoted_dot_notation} 
        WHERE 
            col_a 
        in (SELECT 
                col_c_a 
            FROM 
                {relation_c.quoted_dot_notation}) ) 
        ,{relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( 
            SELECT 
                * 
            FROM 
                {relation_a.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) 
        ) 
        SELECT 
            * 
        FROM 
        {relation_a.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')}
    """)
    assert query_equalize(relation_b.compiled_query) == query_equalize(f"""
        WITH 
        {relation_b.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( 
        SELECT 
            * 
        FROM 
        {relation_b.quoted_dot_notation}
        WHERE 
            col_b 
        in (SELECT 
                col_c_b 
            FROM 
                {relation_c.quoted_dot_notation})
        )
        ,{relation_b.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( 
        SELECT 
            * 
        FROM 
        {relation_b.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (1500 ROWS) 
        ) 
        SELECT 
            * 
        FROM 
        {relation_b.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')}
    """)
    assert query_equalize(relation_c.compiled_query) == query_equalize(f"""
            SELECT
                *
            FROM {relation_c.quoted_dot_notation} 
            WHERE
                col_c_a IN (1,2,3,4,5)
            AND
                col_c_b IN ('val1','val2','val3','val4','val5')
    """)