def test_persist_checkpoint_broadcast(): dag = FugueWorkflow() dag.create(mock_create1).persist() dag.create(mock_create1).persist("a.b") dag.create(mock_create1).broadcast() dag.create(mock_create1).persist("a.b").broadcast() dag.create(mock_create1).checkpoint() dag.create(mock_create1).checkpoint() dag.create(mock_create1).checkpoint("xy z") dag.create(mock_create1).checkpoint("xy z").broadcast() assert_eq(""" create using mock_create1 persist a=create using mock_create1 persist a.b create using mock_create1 broadcast a=create using mock_create1 persist a.b broadcast create using mock_create1 checkpoint a?? create using mock_create1 a=create using mock_create1 checkpoint "xy z" a??create using mock_create1 checkpoint "xy z" broadcast """, dag)
def test_select(): dag = FugueWorkflow() a = dag.create(mock_create1, params=dict(n=1)) b = dag.create(mock_create1, params=dict(n=2)) dag.select("select * from a.b") dag.select("select * from a.b TABLESAMPLE (5 PERCENT) AS x") dag.select("select * from a.b AS x") dag.select("select * from", a, "AS a") # fugue sql adds 'AS a' dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS a") x = dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS x") y = dag.select("select * FROM", x) z = dag.select("select * FROM", y, "where t = 100") dag.select("select a.* from", a, "AS a join", b, "AS b on a.a == b.a") dag.select("select * from (select * from a.b)") dag.select("select * from", dag.create(mock_create1), "TABLESAMPLE (5 PERCENT)") dag.select("select * from", dag.create(mock_create1), "AS b") dag.select("select * from (select * from", dag.create(mock_create1), ")") dag.select("select * from", a, "AS a").persist().broadcast().show() dag.select("select * from", a, "AS a").persist("a.b.c").broadcast().show() assert_eq(""" a=create using mock_create1(n=1) b=create using mock_create1(n=2) # assignment and table not found x=select * from a.b # sample and alias when table not found select * from a.b TABLESAMPLE (5 PERCENT) AS x select * from a.b AS x # when table is found select * from a select * from a TABLESAMPLE(5 PERCENT) select * from a TABLESAMPLE(5 PERCENT) AS x # no from select * select * where t=100 # multiple dependencies select a.* from a join b on a.a==b.a # nested query select * from (select * from a.b) # nested fugue extensions select * from (create using mock_create1) TABLESAMPLE(5 PERCENT) select * from (create using mock_create1) AS b select * from (select * from (create using mock_create1)) # persist & checkpoint & broadcast select * from a persist broadcast print select * from a persist a.b.c broadcast print """, dag)
def test_create_data(): w = FugueWorkflow().df([[0], [1]], "a:int") assert_eq(""" a=create [[0],[1]] schema a:int """, w.workflow)