def test_preview():
    t1 = Task1()
    t2 = Task2()
    t3 = Task3()
    d6tflow.invalidate_upstream(t3, confirm=False)

    import io
    from contextlib import redirect_stdout

    with io.StringIO() as buf, redirect_stdout(buf):
        d6tflow.preview(t3)
        output = buf.getvalue()
        assert output.count('PENDING') == 3
        assert output.count('COMPLETE') == 0

    with io.StringIO() as buf, redirect_stdout(buf):
        d6tflow.run(t3)
        d6tflow.preview(t3)
        output = buf.getvalue()
        assert output.count('PENDING') == 0
        assert output.count('COMPLETE') == 3

    with io.StringIO() as buf, redirect_stdout(buf):
        d6tflow.preview(Task3(do_preprocess=False))
        output = buf.getvalue()
        assert output.count('PENDING') == 1
        assert output.count('COMPLETE') == 2
Exemple #2
0
def test_execute(cleanup):
    # execute
    t1=Task1(); t2=Task2();t3=Task3();
    [t.invalidate() for t in [t1,t2,t3]]
    d6tflow.run(t3)
    assert all(t.complete() for t in [t1,t2,t3])
    t1.invalidate(); t2.invalidate();
    assert not t3.complete() # cascade upstream
    d6tflow.settings.check_dependencies=False
    assert t3.complete() # no cascade upstream
    d6tflow.run([t3])
    assert t3.complete() and not t1.complete()
    d6tflow.settings.check_dependencies=True
    d6tflow.run([t3])
    assert all(t.complete() for t in [t1,t2,t3])

    # forced single
    class TaskTest(d6tflow.tasks.TaskCachePandas):
        def run(self):
            self.save(df)

    d6tflow.run(TaskTest())
    assert TaskTest().output().load().equals(df)
    class TaskTest(d6tflow.tasks.TaskCachePandas):
        def run(self):
            self.save(df*2)

    d6tflow.run(TaskTest())
    assert TaskTest().output().load().equals(df)
    d6tflow.run(TaskTest(),forced=TaskTest(),confirm=False)
    assert TaskTest().output().load().equals(df*2)
    d6tflow.run([TaskTest()],forced=[TaskTest()],confirm=False)

    # forced flow
    mtimes = [t1.output().path.stat().st_mtime,t2.output()['df2'].path.stat().st_mtime]
    d6tflow.run(t3,forced=t1,confirm=False)
    assert t1.output().path.stat().st_mtime>mtimes[0]
    assert t2.output()['df2'].path.stat().st_mtime>mtimes[1]

    # downstream
    assert d6tflow.run(t3)
    d6tflow.invalidate_downstream(t2, t3, confirm=False)
    assert not (t2.complete() and t3.complete()) and t1.complete()

    # upstream
    assert d6tflow.run(t3)
    d6tflow.invalidate_upstream(t3, confirm=False)
    assert not all(t.complete() for t in [t1,t2,t3])
Exemple #3
0
    def run(self):
        self.save({'df':df})

class Task3(d6tflow.tasks.TaskCache):
    persist=['df']
    def requires(self):
        return {'2':Task2(),'1a':Task1a()}
    def run(self):
        self.save({'df':df})

Task3().input()

importlib.reload(d6tflow)
d6tflow.show_flow(Task3())
d6tflow.invalidate_downstream(Task3())
d6tflow.invalidate_upstream(Task1(), Task3())

importlib.reload(d6tflow)
d6tflow.run_local([Task3()],forced=[Task1()])
d6tflow.run_local([Task3()])

import luigi.tools.deps
import luigi.tools.deps_tree

import yaml
cfg = yaml.load(open('tests/.creds.yml'))
print(cfg)
print(cfg['d6tpipe_profile'])

from pathlib import Path
Exemple #4
0
    persist = ['df', 'df2']
    idx3 = luigi.Parameter(default='test3')
    export = False

    def run(self):
        self.save({'df': df, 'df2': df})


@d6tflow.requires(Task1A, Task1B, Task1C)
class Task1All(d6tflow.tasks.TaskCache):
    def run(self):
        self.save(df)


d6tflow.run(Task1All())
d6tflow.invalidate_upstream(Task1All(), confirm=False)
d6tflow.preview(Task1All())

task = Task1All()

#**************************************************
# tests
#**************************************************

import pytest

import d6tflow.pipes


def readfile(file_dir):
    with open(file_dir, 'r') as f:
Exemple #5
0
import d6tflow

# Import workflow tasks and output visualizations
import flow_tasks, flow_viz

# Instantiate terminal task with parameters
params = {'data_size': 6, 'mini_batch_size': 2}
task = flow_tasks.TaskModelTrain(**params)

# optional: reset everything every time workflow is run
d6tflow.invalidate_upstream(task, confirm=False)

# Preview terminal task
d6tflow.preview(task, clip_params=True)

# Run terminal task
d6tflow.run(task)

# Show output
if task.complete():
    flow_viz.show_test_prints(params)