Example #1
0
def test_persist_checkpoint_broadcast():
    dag = FugueWorkflow()
    dag.create(mock_create1).persist()
    dag.create(mock_create1).persist("a.b")

    dag.create(mock_create1).broadcast()
    dag.create(mock_create1).persist("a.b").broadcast()

    dag.create(mock_create1).checkpoint()
    dag.create(mock_create1).checkpoint()
    dag.create(mock_create1).checkpoint("xy z")
    dag.create(mock_create1).checkpoint("xy z").broadcast()
    assert_eq("""
    create using mock_create1 persist
    a=create using mock_create1 persist a.b

    create using mock_create1 broadcast
    a=create using mock_create1 persist a.b broadcast

    create using mock_create1 checkpoint
    a?? create using mock_create1
    a=create using mock_create1 checkpoint "xy z"
    a??create using mock_create1 checkpoint "xy z" broadcast
    """, dag)
Example #2
0
def test_select():
    dag = FugueWorkflow()
    a = dag.create(mock_create1, params=dict(n=1))
    b = dag.create(mock_create1, params=dict(n=2))
    dag.select("select * from a.b")
    dag.select("select * from a.b TABLESAMPLE (5 PERCENT) AS x")
    dag.select("select * from a.b AS x")
    dag.select("select * from", a, "AS a")  # fugue sql adds 'AS a'
    dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS a")
    x = dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS x")
    y = dag.select("select * FROM", x)
    z = dag.select("select * FROM", y, "where t = 100")
    dag.select("select a.* from", a, "AS a join", b, "AS b on a.a == b.a")

    dag.select("select * from (select * from a.b)")
    dag.select("select * from", dag.create(mock_create1), "TABLESAMPLE (5 PERCENT)")
    dag.select("select * from", dag.create(mock_create1), "AS b")
    dag.select("select * from (select * from", dag.create(mock_create1), ")")

    dag.select("select * from", a, "AS a").persist().broadcast().show()
    dag.select("select * from", a, "AS a").persist("a.b.c").broadcast().show()
    assert_eq("""
    a=create using mock_create1(n=1)
    b=create using mock_create1(n=2)
    
    # assignment and table not found
    x=select * from a.b
    
    # sample and alias when table not found
    select * from a.b TABLESAMPLE (5 PERCENT) AS x
    select * from a.b AS x
    
    # when table is found
    select * from a
    select * from a TABLESAMPLE(5 PERCENT)
    select * from a TABLESAMPLE(5 PERCENT) AS x

    # no from
    select *
    select * where t=100

    # multiple dependencies
    select a.* from a join b on a.a==b.a

    # nested query
    select * from (select * from a.b)

    # nested fugue extensions
    select * from (create using mock_create1) TABLESAMPLE(5 PERCENT)
    select * from (create using mock_create1) AS b
    select * from (select * from (create using mock_create1))

    # persist & checkpoint & broadcast
    select * from a persist broadcast print
    select * from a persist a.b.c broadcast print
    """, dag)
Example #3
0
def test_create_data():
    w = FugueWorkflow().df([[0], [1]], "a:int")
    assert_eq("""
    a=create [[0],[1]] schema a:int
    """, w.workflow)