Example #1
0
def test_upload_after_task_build(tmp_directory):
    dag = DAG()
    product = File('file.txt')
    product.upload = Mock(wraps=product.upload)
    task = PythonCallable(_touch, product, dag=dag)
    task.build()

    product.upload.assert_called_once()
Example #2
0
def test_python_callable_with_file():
    dag = DAG()
    t = PythonCallable(touch, File('file.txt'), dag, name='name')
    t.render()

    assert str(t.product) == 'file.txt'
    assert str(t.source) == ('def touch(product):\n    '
                             'Path(str(product)).touch()\n')
Example #3
0
def test_download_upload_without_client():
    dag = DAG()
    product = File('file.txt')
    PythonCallable(_touch, product, dag=dag)

    # this shouldn't crash
    product.download()
    product.upload()
Example #4
0
def test_can_access_tasks_inside_dag_using_getitem():
    dag = DAG('dag')
    dag2 = DAG('dag2')

    ta = PythonCallable(touch, File(Path('a.txt')), dag, 'ta')
    tb = PythonCallable(touch, File(Path('b.txt')), dag, 'tb')
    tc = PythonCallable(touch, File(Path('c.txt')), dag, 'tc')

    # td is still discoverable from dag even though it was declared in dag2,
    # since it is a dependency for a task in dag
    td = PythonCallable(touch_root, File(Path('c.txt')), dag2, 'td')
    # te is not discoverable since it is not a dependency for any task in dag
    te = PythonCallable(touch, File(Path('e.txt')), dag2, 'te')

    td >> ta >> tb >> tc >> te

    assert set(dag) == {'ta', 'tb', 'tc', 'td'}
Example #5
0
def test_placeholder_is_copied_upon_initialization():
    dag = DAG()
    dag.clients[SQLScript] = Mock()
    dag.clients[PostgresRelation] = Mock()

    p = Placeholder('CREATE TABLE {{product}} AS SELECT * FROM TABLE')

    t1 = SQLScript(p,
                   PostgresRelation(('schema', 'a_table', 'table')),
                   dag,
                   name='t1')
    t2 = SQLScript(p,
                   PostgresRelation(('schema', 'another_table', 'table')),
                   dag,
                   name='t2')

    assert t1.source._placeholder is not t2.source._placeholder
Example #6
0
def test_debug_and_develop_in_abstract_class(method):
    task = ConcreteTask(product=File('some_file'), dag=DAG())

    with pytest.raises(NotImplementedError) as excinfo:
        getattr(task, method)()

    assert f'"{method}" is not implemented in "ConcreteTask" tasks' == str(
        excinfo.value)
Example #7
0
def test_dag_task_status_life_cycle(executor, tmp_directory):
    """
    Check dag and task status along calls to DAG.render and DAG.build.
    Although DAG and Task status are automatically updated and propagated
    downstream upon calls to render and build, we have to parametrize this
    over executors since the object that gets updated might not be the same
    one that we declared here (this happens when a task runs in a different
    process), hence, it is the executor's responsibility to notify tasks
    on sucess/fail scenarios so downstream tasks are updated correctly
    """
    dag = DAG(executor=executor)
    t1 = PythonCallable(touch_root, File('ok.txt'), dag, name='t1')
    t2 = PythonCallable(failing_root, File('a_file.txt'), dag, name='t2')
    t3 = PythonCallable(touch, File('another_file.txt'), dag, name='t3')
    t4 = PythonCallable(touch, File('yet_another_file.txt'), dag, name='t4')
    t5 = PythonCallable(touch_root, File('file.txt'), dag, name='t5')
    t2 >> t3 >> t4

    assert dag._exec_status == DAGStatus.WaitingRender
    assert {TaskStatus.WaitingRender
            } == set([t.exec_status for t in dag.values()])

    dag.render()

    assert dag._exec_status == DAGStatus.WaitingExecution
    assert t1.exec_status == TaskStatus.WaitingExecution
    assert t2.exec_status == TaskStatus.WaitingExecution
    assert t3.exec_status == TaskStatus.WaitingUpstream
    assert t4.exec_status == TaskStatus.WaitingUpstream
    assert t5.exec_status == TaskStatus.WaitingExecution

    try:
        dag.build()
    except DAGBuildError:
        pass

    assert dag._exec_status == DAGStatus.Errored
    assert t1.exec_status == TaskStatus.Executed
    assert t2.exec_status == TaskStatus.Errored
    assert t3.exec_status == TaskStatus.Aborted
    assert t4.exec_status == TaskStatus.Aborted
    assert t5.exec_status == TaskStatus.Executed

    dag.render()

    assert dag._exec_status == DAGStatus.WaitingExecution
    assert t1.exec_status == TaskStatus.Skipped
    assert t2.exec_status == TaskStatus.WaitingExecution
    assert t3.exec_status == TaskStatus.WaitingUpstream
    assert t4.exec_status == TaskStatus.WaitingUpstream
    assert t5.exec_status == TaskStatus.Skipped
Example #8
0
def test_params_are_copied_upon_initialization():
    dag = DAG()

    params = {'a': 1}
    t1 = PythonCallable(touch, File('file'), dag, name='t1', params=params)
    t2 = PythonCallable(touch, File('file'), dag, name='t2', params=params)

    assert t1.params is not t2.params
Example #9
0
def test_error_if_missing_product(tmp_directory):
    dag = DAG()

    with pytest.raises(SourceInitializationError) as excinfo:
        ShellScript('touch file.txt', File('file.txt'), dag, name='touch')

    assert ('ShellScript must include {{product}} in its source'
            in str(excinfo.value))
Example #10
0
def test_forced_render_does_not_call_is_outdated(monkeypatch):
    """
    For products whose metadata is stored remotely, checking status is an
    expensive operation. Make dure forced render does not call
    Product._is_oudated
    """
    dag = DAG()
    t1 = PythonCallable(touch_root, File('1.txt'), dag, name=1)
    t2 = PythonCallable(touch, File('2.txt'), dag, name=2)
    t1 >> t2

    def _is_outdated(self, outdated_by_code):
        raise ValueError(f'Called _is_outdated on {self}')

    monkeypatch.setattr(File, '_is_outdated', _is_outdated)

    dag.render(force=True)
Example #11
0
 def make():
     dag = DAG(executor=Serial(build_in_subprocess=False))
     PythonCallable(touch_root_w_param,
                    File('1.txt'),
                    dag,
                    name='first',
                    params={'some_param': object()})
     return dag
Example #12
0
def test_can_access_sub_dag():
    sub_dag = DAG('sub_dag')

    ta = PythonCallable(touch_root, File('a.txt'), sub_dag, 'ta')
    tb = PythonCallable(touch, File('b.txt'), sub_dag, 'tb')
    tc = PythonCallable(touch, File('c.txt'), sub_dag, 'tc')

    ta >> tb >> tc

    dag = DAG('dag')

    fd = Path('d.txt')
    td = PythonCallable(touch, File(fd), dag, 'td')

    td.set_upstream(sub_dag)

    assert 'sub_dag' in td.upstream
Example #13
0
def test_grid_with_hook_lazy_import(backup_spec_with_functions_flat,
                                    tmp_imports):
    grid_spec = {
        'source': 'my_tasks_flat.raw.function',
        'name': 'function-',
        'product': 'some_file.txt',
        'grid': {
            'a': [1, 2],
            'b': [3, 4]
        },
        'on_render': 'hooks.on_render',
        'on_finish': 'hooks.on_finish',
        'on_failure': 'hooks.on_failure',
    }

    meta = Meta.default_meta()
    dag = DAG()

    TaskSpec(grid_spec, meta, project_root='.',
             lazy_import=True).to_task(dag=dag)

    assert all(t.on_render.callable is None for t in dag.values())
    assert all(t.on_finish.callable is None for t in dag.values())
    assert all(t.on_failure.callable is None for t in dag.values())

    assert all(t.on_render._spec.dotted_path == 'hooks.on_render'
               for t in dag.values())
    assert all(t.on_finish._spec.dotted_path == 'hooks.on_finish'
               for t in dag.values())
    assert all(t.on_failure._spec.dotted_path == 'hooks.on_failure'
               for t in dag.values())
Example #14
0
def test_grid_and_params(backup_spec_with_functions_flat, tmp_imports,
                         grid_spec):
    grid_spec['params'] = {'a': 1}

    with pytest.raises(DAGSpecInitializationError) as excinfo:
        TaskSpec(grid_spec, Meta.default_meta(),
                 project_root='.').to_task(dag=DAG())

    assert "'params' is not allowed when using 'grid'" in str(excinfo.value)
Example #15
0
def test_build_triggers_metadata_download(tmp_directory):
    dag = DAG(executor=Serial(build_in_subprocess=False))

    def download(path, destination=None):
        Path(path).touch()

    client = Mock()
    client.download.side_effect = download

    product = File('file.txt', client=client)
    PythonCallable(_touch, product, dag=dag)

    # this should download files instead of executing the task
    dag.build()

    client.download.assert_called_with(
        Path('.file.txt.metadata'),
        destination=Path('.file.txt.metadata.remote'))
Example #16
0
def test_error_raised_if_link_product_does_not_exist(tmp_directory):
    dag = DAG()

    with pytest.raises(RuntimeError) as excinfo:
        Link(File('some_file.txt'), dag, name='some_file')

    msg = ('Link tasks should point to Products that already exist. '
           '"some_file" task product "some_file.txt" does not exist')
    assert msg in str(excinfo.getrepr())
Example #17
0
def test_unsupported_extension():
    task = SQLDump('SELECT * FROM table',
                   File('my_file.json'),
                   DAG(),
                   name='task',
                   client=Mock())

    with pytest.raises(NotImplementedError):
        task.load()
Example #18
0
def test_grid_with_missing_name(backup_spec_with_functions_flat,
                                add_current_to_sys_path, spec):
    del spec['name']

    with pytest.raises(KeyError) as excinfo:
        TaskSpec(spec, Meta.default_meta(),
                 project_root='.').to_task(dag=DAG())

    assert 'Error initializing task with spec' in str(excinfo.value)
Example #19
0
def test_render_pass_on_missing_product_parameter(tmp_directory):
    path = Path('sample.py')

    path.write_text("""
# + tags=["parameters"]

# +
df = None
df.to_csv(product)
""")

    dag = DAG()
    NotebookRunner(path, product=File('out.ipynb'), dag=dag)

    # the render process injects the cell with the product variable so this
    # should not raise any errors, even if the raw source code does not contain
    # the product variable
    assert dag.render()
Example #20
0
def test_can_access_product_without_rendering_if_literal():
    dag = DAG()

    ShellScript('echo a > {{product}}', File('1.txt'), dag, 't1')

    # no rendering!

    # check str works even though we did not run dag.render()
    assert str(dag['t1'].product) == '1.txt'
Example #21
0
def test_hook_with_wrong_signature(callback):
    def my_callback(unknown_arg):
        pass

    dag = DAG()
    t = PythonCallable(fn, File('file1.txt'), dag)

    with pytest.raises(CallbackSignatureError):
        setattr(t, callback, my_callback)
Example #22
0
    def make():
        dag = DAG(executor=Serial(build_in_subprocess=False))

        PythonCallable(task_with_resource,
                       File('output'),
                       dag,
                       params=dict(resources_=dict(file='resource.txt')))

        return dag
Example #23
0
def test_task_grouping():
    dag = DAG()
    t1 = PythonCallable(touch_root, File('1.txt'), dag, name='first')
    t2 = PythonCallable(touch_root, File('2.txt'), dag, name='second')
    t3 = PythonCallable(touch, File('3.txt'), dag, name='third')
    t3.set_upstream(t1, group_name='group')
    t3.set_upstream(t2, group_name='group')
    dag.render()

    assert set(t3.upstream) == {'first', 'second'}

    assert set(t3._upstream_product_grouped) == {'group'}
    assert set(t3._upstream_product_grouped['group']) == {'first', 'second'}

    assert set(t3.params['upstream']) == {'group'}

    assert t3.params['upstream']['group']['first'] is t1.product
    assert t3.params['upstream']['group']['second'] is t2.product
Example #24
0
def test_sql_dump_shows_executed_code_if_fails(tmp_directory):
    tmp = Path(tmp_directory)

    client = SQLAlchemyClient('sqlite:///{}'.format(tmp / "database.db"))

    dag = DAG()

    SQLDump('SOME INVALID SQL',
            File('data.parquet'),
            dag,
            name='data',
            client=client)

    with pytest.raises(DAGBuildError) as excinfo:
        dag.build()

    assert 'SOME INVALID SQL' in str(excinfo.value)
    assert 'near "SOME": syntax error' in str(excinfo.value)
Example #25
0
def test_grid_with_missing_name(backup_spec_with_functions_flat, tmp_imports,
                                grid_spec):
    del grid_spec['name']

    with pytest.raises(DAGSpecInitializationError) as excinfo:
        TaskSpec(grid_spec, Meta.default_meta(),
                 project_root='.').to_task(dag=DAG())

    assert 'Error initializing task with source' in str(excinfo.value)
Example #26
0
def incomplete_doc(env):
    """

    Parameters
    ----------
    optional : int
        Optional parameter, defaults to 1
    """
    return DAG()
Example #27
0
def test_build_task(tmp_directory, monkeypatch):
    dag = DAG()
    task = ShellScript('touch {{product}}',
                       File('file.txt'),
                       dag,
                       name='touch')

    # need this to because dag.build verifies products exist after execution
    def side_effect(code):
        Path('file.txt').touch()

    # mock the actual execution to make this test work on windows
    mock_execute = Mock(side_effect=side_effect)
    monkeypatch.setattr(task.client, 'execute', mock_execute)

    dag.build()

    mock_execute.assert_called_once_with('touch file.txt')
Example #28
0
def test_duplicated_files_metaproduct():
    dag = DAG()
    PythonCallable(touch_root, File('a'), dag, name='task')
    PythonCallable(touch_root, {
        'product': File('a'),
        'another': File('b')
    },
                   dag,
                   name='another')

    with pytest.raises(DAGRenderError) as excinfo:
        dag.render()

    expected = ("Tasks must generate unique Products. "
                "The following Products appear in more than one task "
                "{File('a'): ['task', 'another']}")

    assert expected == str(excinfo.value)
def test_warn_on_sql_missing_docstrings():
    dag = DAG()

    sql = 'SELECT * FROM table'
    SQLDump(sql, File('file1.txt'), dag, client=Mock(), name='sql')

    qc = DAGQualityChecker()

    with pytest.warns(UserWarning):
        qc(dag)
Example #30
0
def test_calling_unrendered_task(method):
    dag = DAG()
    t = PythonCallable(touch, File('1.txt'), dag)

    msg = f'Cannot call task.{method}() on a task that has'

    with pytest.raises(TaskBuildError) as excinfo:
        getattr(t, method)()

    assert msg in str(excinfo.value)