예제 #1
0
def test_tracking_pass_through_default_tracking(pandas_data_frame_on_disk,
                                                mock_channel_tracker):
    df, df_file = pandas_data_frame_on_disk

    # we'll pass string instead of defined expected DataFrame and it should work
    some_date = utcnow().isoformat()
    task_result = task_pass_through_default(str(df_file),
                                            some_date,
                                            expect_pass_through=True)
    assert task_result == str(df_file)
    # this should happen on process exit in normal circumstances
    dbnd_run_stop()

    _check_tracking_calls(
        mock_channel_tracker,
        {
            "init_run": 1,
            "add_task_runs": 1,
            "log_metrics": 1,
            "log_targets": 1,
            "save_task_run_log": 2,
            "set_run_state": 1,
            "update_task_run_attempts":
            3,  # DAG start(with task start), task finished, dag stop
        },
    )

    _assert_tracked_params(
        mock_channel_tracker,
        task_pass_through_default,
        data=str(df_file),
        dt=some_date,
        expect_pass_through=True,
    )
def track_airflow_dag_run_operator_run(task_cls, call_args, call_kwargs,
                                       airflow_task_context):
    from dbnd import dbnd_run_stop

    # this part will run DAG and Operator Tasks
    dr = dbnd_run_start_airflow_dag_task(
        dag_id=airflow_task_context.dag_id,
        execution_date=airflow_task_context.execution_date,
        task_id=airflow_task_context.task_id,
    )

    # this is the real run of the decorated function
    try:
        task_run = run_dynamic_task(
            parent_task_run=current_task_run(),
            task_cls=task_cls,
            call_args=call_args,
            call_kwargs=call_kwargs,
        )
        t = task_run.task
        # if we are inside run, we want to have real values, not deferred!
        if t.task_definition.single_result_output:
            return t.__class__.result.load_from_target(t.result)
            # we have func without result, just fallback to None
        return t
    finally:
        # we use update_run_state=False, since during airflow actual task run
        # we don't know anything about whole run - like is it passed or failed
        dbnd_run_stop(at_exit=False, update_run_state=False)
예제 #3
0
def test_tracking_user_exception():
    with mock.patch(COMPOSITE_TRACKING_STORE_INVOKE_REF) as mock_store:
        # we'll pass string instead of defined expected DataFrame and it should work
        with pytest.raises(ZeroDivisionError):
            task_pass_through_exception()

        _check_tracking_calls(
            mock_store,
            {
                "init_run": 1,
                "add_task_runs": 2,
                "set_task_run_state": 4,
                "save_task_run_log": 1,
            },
        )

        # this should happen on process exit in normal circumstances
        dbnd_run_stop()

        _check_tracking_calls(
            mock_store,
            {
                "init_run": 1,
                "add_task_runs": 2,
                "set_task_run_state": 6,
                "set_run_state": 1,
                "save_task_run_log": 3,
            },
        )

        set_task_run_state_chain = [
            call.args[1]["state"]
            for call in mock_store.call_args_list
            if call.args[0] == "set_task_run_state"
        ]
        assert [
            TaskRunState.RUNNING,  # driver
            TaskRunState.RUNNING,  # DAG
            TaskRunState.RUNNING,  # task
            TaskRunState.FAILED,  # task
            TaskRunState.UPSTREAM_FAILED,  # DAG
            TaskRunState.SUCCESS,  # driver
        ] == set_task_run_state_chain

        set_run_state_chain = [
            call.args[1]["state"]
            for call in mock_store.call_args_list
            if call.args[0] == "set_run_state"
        ]
        assert [RunState.FAILED] == set_run_state_chain
예제 #4
0
def test_tracking_pass_through_default_airflow(pandas_data_frame_on_disk,
                                               mock_channel_tracker):
    df, df_file = pandas_data_frame_on_disk

    # we'll pass string instead of defined expected DataFrame and it should work
    from targets.values import DateTimeValueType

    some_date = DateTimeValueType().to_str(utcnow())
    task_result = task_pass_through_default(str(df_file),
                                            some_date,
                                            expect_pass_through=True)
    assert task_result == str(df_file)

    _check_tracking_calls(
        mock_channel_tracker,
        {
            "init_run": 1,
            "add_task_runs": 1,  # real task only
            "update_task_run_attempts":
            2,  # DAG start(with task start), task finished
            "log_metrics": 1,
            "log_targets": 1,
            "save_task_run_log": 1,
        },
    )

    _assert_tracked_params(
        mock_channel_tracker,
        task_pass_through_default,
        data=str(df_file),
        dt=some_date,
        expect_pass_through=True,
    )

    # this should happen on process exit in normal circumstances
    dbnd_run_stop()

    _check_tracking_calls(
        mock_channel_tracker,
        {
            "init_run": 1,
            "add_task_runs": 1,
            "update_task_run_attempts": 3,  # as above +   airflow root stop
            "log_metrics": 1,
            "log_targets": 1,
            "set_run_state": 1,
            "save_task_run_log": 2,  # as above + airflow root log
        },
    )
예제 #5
0
def test_tracking_user_exception(mock_channel_tracker):
    # we'll pass string instead of defined expected DataFrame and it should work
    with pytest.raises(ZeroDivisionError):
        task_pass_through_exception()

    _check_tracking_calls(
        mock_channel_tracker,
        {
            "init_run": 1,
            "add_task_runs": 2,
            "update_task_run_attempts": 4,
            "save_task_run_log": 1,
        },
    )

    # this should happen on process exit in normal circumstances
    dbnd_run_stop()

    _check_tracking_calls(
        mock_channel_tracker,
        {
            "init_run": 1,
            "add_task_runs": 2,
            "update_task_run_attempts": 6,
            "set_run_state": 1,
            "save_task_run_log": 3,
        },
    )

    update_task_run_attempts_chain = [
        call.kwargs["task_run_attempt_updates"][0].state
        for call in mock_channel_tracker.call_args_list
        if call.args[0].__name__ == "update_task_run_attempts"
    ]
    assert [
        TaskRunState.RUNNING,  # driver
        TaskRunState.RUNNING,  # DAG
        TaskRunState.RUNNING,  # task
        TaskRunState.FAILED,  # task
        TaskRunState.UPSTREAM_FAILED,  # DAG
        TaskRunState.SUCCESS,  # driver
    ] == update_task_run_attempts_chain

    set_run_state_chain = [
        call.kwargs["state"] for call in mock_channel_tracker.call_args_list
        if call.args[0].__name__ == "set_run_state"
    ]
    assert [RunState.FAILED] == set_run_state_chain
예제 #6
0
def set_airflow_context():
    with patch(
            "dbnd._core.tracking.airflow_dag_inplace_tracking.try_get_airflow_context"
    ) as m:
        try:
            reset_dbnd_project_config()

            m.return_value = AirflowTaskContext(
                dag_id="test_dag",
                task_id="test_task",
                execution_date=utcnow().isoformat(),
            )
            yield
        finally:
            # ensure dbnd_run_stop() is called (normally should happen on exit() )
            dbnd_run_stop()
            reset_dbnd_project_config()
예제 #7
0
def test_tracking_pass_through_nested_default(pandas_data_frame_on_disk,
                                              mock_channel_tracker):
    df, df_file = pandas_data_frame_on_disk

    # we'll pass string instead of defined expected DataFrame and it should work
    task_result = task_pass_through_nested_default(str(df_file),
                                                   utcnow().isoformat(),
                                                   expect_pass_through=True)
    assert task_result == str(df_file) + str(df_file)

    _check_tracking_calls(
        mock_channel_tracker,
        {
            "init_run": 1,
            "add_task_runs": 2,
            "update_task_run_attempts": 4,
            "log_metrics": 1,
            "log_targets": 2,
            "save_task_run_log": 2,
        },
    )

    # this should happen on process exit in normal circumstances
    dbnd_run_stop()

    _check_tracking_calls(
        mock_channel_tracker,
        {
            "init_run": 1,
            "add_task_runs": 2,
            "update_task_run_attempts": 5,
            "log_metrics": 1,
            "log_targets": 2,
            "set_run_state": 1,
            "save_task_run_log": 3,
        },
    )
예제 #8
0
def test_tracking_pass_through_nested_default(pandas_data_frame_on_disk):
    df, df_file = pandas_data_frame_on_disk

    with mock.patch(COMPOSITE_TRACKING_STORE_INVOKE_REF) as mock_store:
        # we'll pass string instead of defined expected DataFrame and it should work
        task_result = task_pass_through_nested_default(
            str(df_file), utcnow().isoformat(), expect_pass_through=True
        )
        assert task_result == str(df_file) + str(df_file)

        _check_tracking_calls(
            mock_store,
            {
                "init_run": 1,
                "add_task_runs": 3,
                "set_task_run_state": 6,
                "log_metrics": 1,
                "log_target": 2,
                "save_task_run_log": 2,
            },
        )

        # this should happen on process exit in normal circumstances
        dbnd_run_stop()

        _check_tracking_calls(
            mock_store,
            {
                "init_run": 1,
                "add_task_runs": 3,
                "set_task_run_state": 8,
                "log_metrics": 1,
                "log_target": 2,
                "set_run_state": 1,
                "save_task_run_log": 4,
            },
        )
예제 #9
0
    return p * p


@task
def f1():
    print("Running f1 function")
    sum = 0

    for i in range(1, 4):
        sum += f2(i)

    assert sum == 14


if __name__ == "__main__":
    if USE_DBND_START in sys.argv:
        dbnd_run_start()
        dbnd_run_start()

    f1()

    print("Done")

    if FAIL_MAIN in sys.argv:
        raise Exception("main bummer!")

    if USE_DBND_STOP in sys.argv:
        dbnd_run_stop()
        dbnd_run_stop()
예제 #10
0
 def new_post_execute(*args, **kwargs):
     dbnd_run_stop(at_exit=False)
     post_execute(*args, **kwargs)
예제 #11
0
 def test_sanity_with_airflow(self, with_airflow_tracking_env):
     fake_task_inside_dag()
     dbnd_run_stop()
     print("hey")
예제 #12
0
def test_log_file_write_fail(set_env):
    with mock.patch.object(TaskRunLogManager, "save_log_preview",
                           fake_save_log_preview):
        try_run_function()
        dbnd_run_stop()
예제 #13
0
def test_webserver_connection_fail(set_env):
    with mock.patch.object(TaskRunTracker, "save_task_run_log",
                           fake_save_task_run_log):
        try_run_function()
        # function enables tracking -> dbnd_run_start, but dbnd_run_stop is never called.
        dbnd_run_stop()