def test_select_with(): dag = FugueWorkflow() dag.select("with x as ( select * from a ) , y as ( select * from b ) " "select * from x union select * from y") assert_eq( """ with x as (select * from a), y as (select * from b) select * from x union select * from y """, dag, )
def test_general_set_op(): dag = FugueWorkflow() a = dag.create(mock_create1, params=dict(n=1)) b = dag.create(mock_create1, params=dict(n=2)) dag.select("select * from", a, "AS a union all select * from", b, "AS b") dag.select( "SELECT * FROM", dag.create(mock_create1), "union select * from", b, "AS b" ) dag.select( "SELECT * FROM", dag.create(mock_create1), "intersect distinct SELECT * FROM", a.process(mock_processor1), ) dag.select( "select * from", dag.create(mock_create1), "union SELECT * FROM", a.process(mock_processor1), ) c = dag.create(mock_create1, params=dict(n=2)) dag.select( "SELECT * FROM", c.transform(mock_transformer2), "union SELECT * FROM", c.process(mock_processor1), ) assert_eq( """ a=create using mock_create1(n=1) b=create using mock_create1(n=2) select * from a union all select * from b create using mock_create1 union select * from b create using mock_create1 intersect distinct process a using mock_processor1 select * from (create using mock_create1) union process a using mock_processor1 # operation on omitted dependencies should work as expected c=create using mock_create1(n=2) transform using mock_transformer2 union process using mock_processor1 """, dag, )
def test_select_nested(): dag = FugueWorkflow() a = dag.create(mock_create1, params=dict(n=1)) b = dag.create(mock_create1, params=dict(n=2)) dag.select("select * from (select * from a.b)") dag.select("select * from", dag.create(mock_create1), "AS bb") dag.select("select * from", dag.create(mock_create1), "TABLESAMPLE (5 PERCENT)") dag.select("select * from (select * from", dag.create(mock_create1), ")") assert_eq( """ a=create using mock_create1(n=1) b=create using mock_create1(n=2) # nested query select * from (select * from a.b) select * from (create using mock_create1) AS bb select * from (create using mock_create1) TABLESAMPLE(5 PERCENT) select * from (select * from (create using mock_create1)) """, dag, )
def test_select_plus_engine(): class MockEngine(SqliteEngine): def __init__(self, execution_engine, p: int = 0): super().__init__(execution_engine) self.p = p register_sql_engine("_mock", lambda e, **kwargs: MockEngine(e, **kwargs)) dag = FugueWorkflow() dag.select("select * from xyz", sql_engine=MockEngine).persist() dag.select("select * from xyz", sql_engine="_mock", sql_engine_params={"p": 2}) dag.select("select * from xyz order by t limit 10", sql_engine=MockEngine) dag.select( "with a as ( select * from b ) select * from a order by t limit 10", sql_engine=MockEngine, ) # temp = dag.select("select a , b from a", sql_engine=MockEngine) # temp.transform(mock_transformer2) # temp = dag.select("select aa , bb from a", sql_engine=MockEngine) # dag.select("select aa + bb as t from", temp) assert_eq( """ connect MockEngine select * from xyz persist connect _mock(p=2) select * from xyz connect MockEngine select * from xyz order by t limit 10 connect MockEngine with a as (select * from b) select * from a order by t limit 10 # This is not supported # transform (connect MockEngine select a,b from a) using mock_transformer2 # This is not supported # select aa+bb as t from (connect MockEngine select aa,bb from a) """, dag, )
def test_select(): dag = FugueWorkflow() a = dag.create(mock_create1, params=dict(n=1)) b = dag.create(mock_create1, params=dict(n=2)) dag.select("select * from a.b") dag.select("select * from a.b TABLESAMPLE (5 PERCENT) AS x") dag.select("select * from a.b AS x") dag.select("select * from", a, "AS a") # fugue sql adds 'AS a' dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS a") x = dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS x") y = dag.select("select * FROM", x) z = dag.select("select * FROM", y, "where t = 100") dag.select("select a.* from", a, "AS a join", b, "AS b on a.a == b.a") dag.select("select * from", a, "AS a").persist().broadcast().show() dag.select("select * from", a, "AS a").weak_checkpoint( level="a.b.c" ).broadcast().show() assert_eq( """ a=create using mock_create1(n=1) b=create using mock_create1(n=2) # assignment and table not found x=select * from a.b # sample and alias when table not found select * from a.b TABLESAMPLE (5 PERCENT) AS x select * from a.b AS x # when table is found select * from a select * from a TABLESAMPLE(5 PERCENT) select * from a TABLESAMPLE(5 PERCENT) AS x # no from select * select * where t=100 # multiple dependencies select a.* from a join b on a.a==b.a # persist & checkpoint & broadcast select * from a persist broadcast print select * from a persist (level="a.b.c") broadcast print """, dag, )