def test_workflow(): builder = FugueWorkflow() a = builder.create_data([[0], [0], [1]], "a:int") raises(InvalidOperationError, lambda: a._task.copy()) raises(InvalidOperationError, lambda: copy.copy(a._task)) raises(InvalidOperationError, lambda: copy.deepcopy(a._task)) a.show() a.show() raises(FugueWorkflowCompileError, lambda: builder.df(123)) b = a.transform(mock_tf1, "*,b:int", pre_partition=dict(by=["a"])) b.show() builder.create_data([[0], [1]], "b:int").show() c = ArrayDataFrame([[100]], "a:int") builder.show(a, b, c) b = a.partition(by=["a"]).transform(mock_tf2).persist().broadcast() b.show() builder.run() df_eq(a.result, [[0], [0], [1]], "a:int") raises(TypeError, lambda: builder.run("abc")) builder.run(FugueWorkflowContext()) df_eq(a.result, [[0], [0], [1]], "a:int") builder.run("NativeExecutionEngine") df_eq(b.result, [[0, 2], [0, 2], [1, 1]], "a:int,b:int") df_eq(b.compute(), [[0, 2], [0, 2], [1, 1]], "a:int,b:int") df_eq(b.compute(NativeExecutionEngine), [[0, 2], [0, 2], [1, 1]], "a:int,b:int")
def test_print(): dag = FugueWorkflow() a = dag.create(mock_create1, params=dict(n=1)) a.show() b = dag.create(mock_create1, params=dict(n=2)) dag.show(a, b, rows=5, show_count=True, title='"b B') assert_eq( """ a=create using mock_create1(n=1) print print 5 rows from a, (create using mock_create1(n=2)) rowcount title "\\"b B" """, dag, )
def test_yield(): dag = FugueWorkflow() dag.df([[0]], "a:int32").show() id0 = dag.spec_uuid() x = FugueWorkflow().df([[0]], "a:int32") x.yield_file_as("x") x.show() id1 = x.workflow.spec_uuid() x = FugueWorkflow().df([[0]], "a:int32") x.deterministic_checkpoint().yield_file_as("y") x.show() id2 = x.workflow.spec_uuid() x = FugueWorkflow().df([[0]], "a:int32") x.deterministic_checkpoint().yield_dataframe_as("z") x.show() id3 = x.workflow.spec_uuid() # yield doesn't change determinism assert id0 == id1 assert id0 == id2 assert id0 == id3