def test_preview(): t1 = Task1() t2 = Task2() t3 = Task3() d6tflow.invalidate_upstream(t3, confirm=False) import io from contextlib import redirect_stdout with io.StringIO() as buf, redirect_stdout(buf): d6tflow.preview(t3) output = buf.getvalue() assert output.count('PENDING') == 3 assert output.count('COMPLETE') == 0 with io.StringIO() as buf, redirect_stdout(buf): d6tflow.run(t3) d6tflow.preview(t3) output = buf.getvalue() assert output.count('PENDING') == 0 assert output.count('COMPLETE') == 3 with io.StringIO() as buf, redirect_stdout(buf): d6tflow.preview(Task3(do_preprocess=False)) output = buf.getvalue() assert output.count('PENDING') == 1 assert output.count('COMPLETE') == 2
def test_execute(cleanup): # execute t1=Task1(); t2=Task2();t3=Task3(); [t.invalidate() for t in [t1,t2,t3]] d6tflow.run(t3) assert all(t.complete() for t in [t1,t2,t3]) t1.invalidate(); t2.invalidate(); assert not t3.complete() # cascade upstream d6tflow.settings.check_dependencies=False assert t3.complete() # no cascade upstream d6tflow.run([t3]) assert t3.complete() and not t1.complete() d6tflow.settings.check_dependencies=True d6tflow.run([t3]) assert all(t.complete() for t in [t1,t2,t3]) # forced single class TaskTest(d6tflow.tasks.TaskCachePandas): def run(self): self.save(df) d6tflow.run(TaskTest()) assert TaskTest().output().load().equals(df) class TaskTest(d6tflow.tasks.TaskCachePandas): def run(self): self.save(df*2) d6tflow.run(TaskTest()) assert TaskTest().output().load().equals(df) d6tflow.run(TaskTest(),forced=TaskTest(),confirm=False) assert TaskTest().output().load().equals(df*2) d6tflow.run([TaskTest()],forced=[TaskTest()],confirm=False) # forced flow mtimes = [t1.output().path.stat().st_mtime,t2.output()['df2'].path.stat().st_mtime] d6tflow.run(t3,forced=t1,confirm=False) assert t1.output().path.stat().st_mtime>mtimes[0] assert t2.output()['df2'].path.stat().st_mtime>mtimes[1] # downstream assert d6tflow.run(t3) d6tflow.invalidate_downstream(t2, t3, confirm=False) assert not (t2.complete() and t3.complete()) and t1.complete() # upstream assert d6tflow.run(t3) d6tflow.invalidate_upstream(t3, confirm=False) assert not all(t.complete() for t in [t1,t2,t3])
def run(self): self.save({'df':df}) class Task3(d6tflow.tasks.TaskCache): persist=['df'] def requires(self): return {'2':Task2(),'1a':Task1a()} def run(self): self.save({'df':df}) Task3().input() importlib.reload(d6tflow) d6tflow.show_flow(Task3()) d6tflow.invalidate_downstream(Task3()) d6tflow.invalidate_upstream(Task1(), Task3()) importlib.reload(d6tflow) d6tflow.run_local([Task3()],forced=[Task1()]) d6tflow.run_local([Task3()]) import luigi.tools.deps import luigi.tools.deps_tree import yaml cfg = yaml.load(open('tests/.creds.yml')) print(cfg) print(cfg['d6tpipe_profile']) from pathlib import Path
persist = ['df', 'df2'] idx3 = luigi.Parameter(default='test3') export = False def run(self): self.save({'df': df, 'df2': df}) @d6tflow.requires(Task1A, Task1B, Task1C) class Task1All(d6tflow.tasks.TaskCache): def run(self): self.save(df) d6tflow.run(Task1All()) d6tflow.invalidate_upstream(Task1All(), confirm=False) d6tflow.preview(Task1All()) task = Task1All() #************************************************** # tests #************************************************** import pytest import d6tflow.pipes def readfile(file_dir): with open(file_dir, 'r') as f:
import d6tflow # Import workflow tasks and output visualizations import flow_tasks, flow_viz # Instantiate terminal task with parameters params = {'data_size': 6, 'mini_batch_size': 2} task = flow_tasks.TaskModelTrain(**params) # optional: reset everything every time workflow is run d6tflow.invalidate_upstream(task, confirm=False) # Preview terminal task d6tflow.preview(task, clip_params=True) # Run terminal task d6tflow.run(task) # Show output if task.complete(): flow_viz.show_test_prints(params)