def test_workflow_determinism_8(): dag1 = FugueWorkflow() a1 = dag1.create_data([[0], [0], [1]], "a:int32") a1.select("a", "b") a1.show() dag2 = FugueWorkflow() a2 = dag2.create_data([[0], [0], [1]], "a:int32") a2.select("a", "b") a2.show() dag3 = FugueWorkflow() a3 = dag3.create_data([[0], [0], [1]], "a:int32") a3.select("b", "a") a3.show() dag4 = FugueWorkflow() a4 = dag4.create_data([[0], [0], [1]], "a:int32") a4.select("a", "b", distinct=True) a4.show() assert a1.spec_uuid() == a2.spec_uuid() assert dag1.spec_uuid() == dag2.spec_uuid() assert a1.spec_uuid() == a3.spec_uuid() assert dag1.spec_uuid() != dag3.spec_uuid() assert a1.spec_uuid() == a4.spec_uuid() assert dag1.spec_uuid() != dag4.spec_uuid()
def test_workflow(): builder = FugueWorkflow() a = builder.create_data([[0], [0], [1]], "a:int") raises(InvalidOperationError, lambda: a._task.copy()) raises(InvalidOperationError, lambda: copy.copy(a._task)) raises(InvalidOperationError, lambda: copy.deepcopy(a._task)) a.show() a.show() raises(FugueWorkflowCompileError, lambda: builder.df(123)) b = a.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"])) b.show() builder.create_data([[0], [1]], "b:int").show() c = ArrayDataFrame([[100]], "a:int") builder.show(a, b, c) b = a.partition(by=["a"]).transform(mock_tf2).persist().broadcast() b.show() builder.run() df_eq(a.result, [[0], [0], [1]], "a:int") raises(TypeError, lambda: builder.run("abc")) builder.run(FugueWorkflowContext()) df_eq(a.result, [[0], [0], [1]], "a:int") builder.run("NativeExecutionEngine") df_eq(b.result, [[0, 2], [0, 2], [1, 1]], "a:int,b:int") df_eq(b.compute(), [[0, 2], [0, 2], [1, 1]], "a:int,b:int") df_eq(b.compute(NativeExecutionEngine), [[0, 2], [0, 2], [1, 1]], "a:int,b:int")
def test_workflow_determinism_5(): dag1 = FugueWorkflow() a1 = dag1.create_data([[0], [0], [1]], "a:int32") b1 = a1.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2)) a1.show() dag2 = FugueWorkflow() a2 = dag2.create_data([[0], [0], [1]], "a:int32") b2 = a2.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2)) a2.show(rows=22) # <--- assert a1.spec_uuid() == a2.spec_uuid() assert b1.spec_uuid() == b2.spec_uuid() assert dag1.spec_uuid() != dag2.spec_uuid()
def test_workflow_determinism_6(): dag1 = FugueWorkflow() dag1.create_data([[0], [0], [1]], "a:int32") a1 = dag1.create_data([[0], [0], [1]], "a:int32") b1 = dag1.create_data([[1], [0], [1]], "a:int32") c1 = a1.union(b1) dag2 = FugueWorkflow() a2 = dag1.create_data([[0], [0], [1]], "a:int32") b2 = dag1.create_data([[1], [0], [1]], "a:int32") c2 = a1.union(b2) assert a1.spec_uuid() == a2.spec_uuid() assert b1.spec_uuid() == b2.spec_uuid() assert c1.spec_uuid() == c2.spec_uuid() assert dag1.spec_uuid() != dag2.spec_uuid()
def test_workflow_determinism_3(): dag1 = FugueWorkflow() data = [[0], [0], [1]] a1 = dag1.create_data(data, "a:int32", data_determiner=to_uuid) b1 = a1.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2)) a1.show() dag2 = FugueWorkflow() data = [[1], [10], [20]] a2 = dag2.create_data(data, "a:int32", data_determiner=to_uuid) # <--- b2 = a2.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"], num=2)) a2.show() assert a1.spec_uuid() != a2.spec_uuid() assert b1.spec_uuid() != b2.spec_uuid() assert dag1.spec_uuid() != dag2.spec_uuid()
def test_workflow_determinism(): # TODO: need more thorough test, separate this to small ones and remove it builder1 = FugueWorkflow() a1 = builder1.create_data([[0], [0], [1]], "a:int32") b1 = a1.transform("mock_tf1", "*,b:int", pre_partition=dict(by=["a"], num=2)) a1.show() builder2 = FugueWorkflow() a2 = builder2.create_data([[0], [0], [1]], Schema("a:int")) b2 = a2.transform("mock_tf1", "*,b:int", pre_partition=dict(num="2", by=["a"])) a2.show() assert builder1.spec_uuid() == builder1.spec_uuid() assert a1.spec_uuid() == a2.spec_uuid() assert b1.spec_uuid() == b2.spec_uuid() assert builder1.spec_uuid() == builder2.spec_uuid() builder3 = FugueWorkflow() a3 = builder2.create_data([[0], [0], [1]], Schema("a:int")) b3 = a2.transform("mock_tf1", "*,b:str", pre_partition=dict(num="2", by=["a"])) a3.show() assert a1.spec_uuid() == a3.spec_uuid() assert b1.spec_uuid() != b3.spec_uuid() assert builder1.spec_uuid() != builder3.spec_uuid() builder3 = FugueWorkflow() a3 = builder2.create_data([[0], [0], [1]], Schema("a:int")) b3 = a2.transform("mock_tf1", "*,b:int", pre_partition=dict(num="200", by=["a"])) a3.show() assert a1.spec_uuid() == a3.spec_uuid() assert b1.spec_uuid() != b3.spec_uuid() assert builder1.spec_uuid() != builder3.spec_uuid()
def test_workflow_determinism_7(): dag1 = FugueWorkflow() a1 = dag1.create_data([[0], [0], [1]], "a:int32") a1.out_transform(mock_tf1) a1.show() dag2 = FugueWorkflow() a2 = dag2.create_data([[0], [0], [1]], "a:int32") a2.out_transform(mock_tf1) a2.show() dag3 = FugueWorkflow() a3 = dag3.create_data([[0], [0], [1]], "a:int32") a3.show() assert a1.spec_uuid() == a2.spec_uuid() assert dag1.spec_uuid() == dag2.spec_uuid() assert a1.spec_uuid() == a3.spec_uuid() assert dag1.spec_uuid() != dag3.spec_uuid()