def test_execute(cleanup): # execute t1=Task1(); t2=Task2();t3=Task3(); [t.invalidate() for t in [t1,t2,t3]] d6tflow.run(t3) assert all(t.complete() for t in [t1,t2,t3]) t1.invalidate(); t2.invalidate(); assert not t3.complete() # cascade upstream d6tflow.settings.check_dependencies=False assert t3.complete() # no cascade upstream d6tflow.run([t3]) assert t3.complete() and not t1.complete() d6tflow.settings.check_dependencies=True d6tflow.run([t3]) assert all(t.complete() for t in [t1,t2,t3]) # forced single class TaskTest(d6tflow.tasks.TaskCachePandas): def run(self): self.save(df) d6tflow.run(TaskTest()) assert TaskTest().output().load().equals(df) class TaskTest(d6tflow.tasks.TaskCachePandas): def run(self): self.save(df*2) d6tflow.run(TaskTest()) assert TaskTest().output().load().equals(df) d6tflow.run(TaskTest(),forced=TaskTest(),confirm=False) assert TaskTest().output().load().equals(df*2) d6tflow.run([TaskTest()],forced=[TaskTest()],confirm=False) # forced flow mtimes = [t1.output().path.stat().st_mtime,t2.output()['df2'].path.stat().st_mtime] d6tflow.run(t3,forced=t1,confirm=False) assert t1.output().path.stat().st_mtime>mtimes[0] assert t2.output()['df2'].path.stat().st_mtime>mtimes[1] # downstream assert d6tflow.run(t3) d6tflow.invalidate_downstream(t2, t3, confirm=False) assert not (t2.complete() and t3.complete()) and t1.complete() # upstream assert d6tflow.run(t3) d6tflow.invalidate_upstream(t3, confirm=False) assert not all(t.complete() for t in [t1,t2,t3])
import d6tflow import cfg, tasks, visualize # Check task dependencies and their execution status d6tflow.preview(tasks.TaskTrain()) # Execute the model training task including dependencies. See https://d6tflow.readthedocs.io/en/latest/run.html d6tflow.run(tasks.TaskTrain()) # use output visualize.accuracy() visualize.plot_importances() # change parameter and rerun, see https://d6tflow.readthedocs.io/en/latest/advparam.html d6tflow.run(tasks.TaskTrain(do_preprocess=False)) visualize.accuracy(do_preprocess=False) # task output is parameter specific # rerun flow after code changes import importlib importlib.reload(cfg) importlib.reload(tasks) # say you changed TaskGetData, reset all tasks depending on TaskGetData d6tflow.invalidate_downstream(tasks.TaskGetData(), tasks.TaskTrain()) d6tflow.preview(tasks.TaskTrain()) d6tflow.run(tasks.TaskTrain())
persist=['df'] def run(self): self.save({'df':df}) class Task3(d6tflow.tasks.TaskCache): persist=['df'] def requires(self): return {'2':Task2(),'1a':Task1a()} def run(self): self.save({'df':df}) Task3().input() importlib.reload(d6tflow) d6tflow.show_flow(Task3()) d6tflow.invalidate_downstream(Task3()) d6tflow.invalidate_upstream(Task1(), Task3()) importlib.reload(d6tflow) d6tflow.run_local([Task3()],forced=[Task1()]) d6tflow.run_local([Task3()]) import luigi.tools.deps import luigi.tools.deps_tree import yaml cfg = yaml.load(open('tests/.creds.yml')) print(cfg) print(cfg['d6tpipe_profile']) from pathlib import Path