Exemple #1
0
def test_workflow_determinism_8():
    dag1 = FugueWorkflow()
    a1 = dag1.create_data([[0], [0], [1]], "a:int32")
    a1.select("a", "b")
    a1.show()

    dag2 = FugueWorkflow()
    a2 = dag2.create_data([[0], [0], [1]], "a:int32")
    a2.select("a", "b")
    a2.show()

    dag3 = FugueWorkflow()
    a3 = dag3.create_data([[0], [0], [1]], "a:int32")
    a3.select("b", "a")
    a3.show()

    dag4 = FugueWorkflow()
    a4 = dag4.create_data([[0], [0], [1]], "a:int32")
    a4.select("a", "b", distinct=True)
    a4.show()

    assert a1.spec_uuid() == a2.spec_uuid()
    assert dag1.spec_uuid() == dag2.spec_uuid()

    assert a1.spec_uuid() == a3.spec_uuid()
    assert dag1.spec_uuid() != dag3.spec_uuid()

    assert a1.spec_uuid() == a4.spec_uuid()
    assert dag1.spec_uuid() != dag4.spec_uuid()
Exemple #2
0
def test_workflow():
    builder = FugueWorkflow()

    a = builder.create_data([[0], [0], [1]], "a:int")
    raises(InvalidOperationError, lambda: a._task.copy())
    raises(InvalidOperationError, lambda: copy.copy(a._task))
    raises(InvalidOperationError, lambda: copy.deepcopy(a._task))
    a.show()
    a.show()

    raises(FugueWorkflowCompileError, lambda: builder.df(123))

    b = a.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"]))
    b.show()
    builder.create_data([[0], [1]], "b:int").show()
    c = ArrayDataFrame([[100]], "a:int")
    builder.show(a, b, c)
    b = a.partition(by=["a"]).transform(mock_tf2).persist().broadcast()
    b.show()

    builder.run()
    df_eq(a.result, [[0], [0], [1]], "a:int")
    raises(TypeError, lambda: builder.run("abc"))
    builder.run(FugueWorkflowContext())
    df_eq(a.result, [[0], [0], [1]], "a:int")
    builder.run("NativeExecutionEngine")
    df_eq(b.result, [[0, 2], [0, 2], [1, 1]], "a:int,b:int")
    df_eq(b.compute(), [[0, 2], [0, 2], [1, 1]], "a:int,b:int")
    df_eq(b.compute(NativeExecutionEngine), [[0, 2], [0, 2], [1, 1]],
          "a:int,b:int")
Exemple #3
0
def test_workflow_determinism_5():
    dag1 = FugueWorkflow()
    a1 = dag1.create_data([[0], [0], [1]], "a:int32")
    b1 = a1.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2))
    a1.show()

    dag2 = FugueWorkflow()
    a2 = dag2.create_data([[0], [0], [1]], "a:int32")
    b2 = a2.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2))
    a2.show(rows=22)  # <---

    assert a1.spec_uuid() == a2.spec_uuid()
    assert b1.spec_uuid() == b2.spec_uuid()
    assert dag1.spec_uuid() != dag2.spec_uuid()
Exemple #4
0
def test_workflow_determinism_6():
    dag1 = FugueWorkflow()
    dag1.create_data([[0], [0], [1]], "a:int32")
    a1 = dag1.create_data([[0], [0], [1]], "a:int32")
    b1 = dag1.create_data([[1], [0], [1]], "a:int32")
    c1 = a1.union(b1)

    dag2 = FugueWorkflow()
    a2 = dag1.create_data([[0], [0], [1]], "a:int32")
    b2 = dag1.create_data([[1], [0], [1]], "a:int32")
    c2 = a1.union(b2)

    assert a1.spec_uuid() == a2.spec_uuid()
    assert b1.spec_uuid() == b2.spec_uuid()
    assert c1.spec_uuid() == c2.spec_uuid()
    assert dag1.spec_uuid() != dag2.spec_uuid()
Exemple #5
0
def test_workflow_determinism_3():
    dag1 = FugueWorkflow()
    data = [[0], [0], [1]]
    a1 = dag1.create_data(data, "a:int32", data_determiner=to_uuid)
    b1 = a1.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2))
    a1.show()

    dag2 = FugueWorkflow()
    data = [[1], [10], [20]]
    a2 = dag2.create_data(data, "a:int32", data_determiner=to_uuid)  # <---
    b2 = a2.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2))
    a2.show()

    assert a1.spec_uuid() != a2.spec_uuid()
    assert b1.spec_uuid() != b2.spec_uuid()
    assert dag1.spec_uuid() != dag2.spec_uuid()
Exemple #6
0
def test_workflow_determinism():
    # TODO: need more thorough test, separate this to small ones and remove it
    builder1 = FugueWorkflow()
    a1 = builder1.create_data([[0], [0], [1]], "a:int32")
    b1 = a1.transform("mock_tf1",
                      "*,b:int",
                      pre_partition=dict(by=["a"], num=2))
    a1.show()

    builder2 = FugueWorkflow()
    a2 = builder2.create_data([[0], [0], [1]], Schema("a:int"))
    b2 = a2.transform("mock_tf1",
                      "*,b:int",
                      pre_partition=dict(num="2", by=["a"]))
    a2.show()

    assert builder1.spec_uuid() == builder1.spec_uuid()
    assert a1.spec_uuid() == a2.spec_uuid()
    assert b1.spec_uuid() == b2.spec_uuid()
    assert builder1.spec_uuid() == builder2.spec_uuid()

    builder3 = FugueWorkflow()
    a3 = builder2.create_data([[0], [0], [1]], Schema("a:int"))
    b3 = a2.transform("mock_tf1",
                      "*,b:str",
                      pre_partition=dict(num="2", by=["a"]))
    a3.show()

    assert a1.spec_uuid() == a3.spec_uuid()
    assert b1.spec_uuid() != b3.spec_uuid()
    assert builder1.spec_uuid() != builder3.spec_uuid()

    builder3 = FugueWorkflow()
    a3 = builder2.create_data([[0], [0], [1]], Schema("a:int"))
    b3 = a2.transform("mock_tf1",
                      "*,b:int",
                      pre_partition=dict(num="200", by=["a"]))
    a3.show()

    assert a1.spec_uuid() == a3.spec_uuid()
    assert b1.spec_uuid() != b3.spec_uuid()
    assert builder1.spec_uuid() != builder3.spec_uuid()
Exemple #7
0
def test_workflow_determinism_7():
    dag1 = FugueWorkflow()
    a1 = dag1.create_data([[0], [0], [1]], "a:int32")
    a1.out_transform(mock_tf1)
    a1.show()

    dag2 = FugueWorkflow()
    a2 = dag2.create_data([[0], [0], [1]], "a:int32")
    a2.out_transform(mock_tf1)
    a2.show()

    dag3 = FugueWorkflow()
    a3 = dag3.create_data([[0], [0], [1]], "a:int32")
    a3.show()

    assert a1.spec_uuid() == a2.spec_uuid()
    assert dag1.spec_uuid() == dag2.spec_uuid()

    assert a1.spec_uuid() == a3.spec_uuid()
    assert dag1.spec_uuid() != dag3.spec_uuid()