def test_tracking_pass_through_default_tracking(pandas_data_frame_on_disk, mock_channel_tracker): df, df_file = pandas_data_frame_on_disk # we'll pass string instead of defined expected DataFrame and it should work some_date = utcnow().isoformat() task_result = task_pass_through_default(str(df_file), some_date, expect_pass_through=True) assert task_result == str(df_file) # this should happen on process exit in normal circumstances dbnd_run_stop() _check_tracking_calls( mock_channel_tracker, { "init_run": 1, "add_task_runs": 1, "log_metrics": 1, "log_targets": 1, "save_task_run_log": 2, "set_run_state": 1, "update_task_run_attempts": 3, # DAG start(with task start), task finished, dag stop }, ) _assert_tracked_params( mock_channel_tracker, task_pass_through_default, data=str(df_file), dt=some_date, expect_pass_through=True, )
def track_airflow_dag_run_operator_run(task_cls, call_args, call_kwargs, airflow_task_context): from dbnd import dbnd_run_stop # this part will run DAG and Operator Tasks dr = dbnd_run_start_airflow_dag_task( dag_id=airflow_task_context.dag_id, execution_date=airflow_task_context.execution_date, task_id=airflow_task_context.task_id, ) # this is the real run of the decorated function try: task_run = run_dynamic_task( parent_task_run=current_task_run(), task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs, ) t = task_run.task # if we are inside run, we want to have real values, not deferred! if t.task_definition.single_result_output: return t.__class__.result.load_from_target(t.result) # we have func without result, just fallback to None return t finally: # we use update_run_state=False, since during airflow actual task run # we don't know anything about whole run - like is it passed or failed dbnd_run_stop(at_exit=False, update_run_state=False)
def test_tracking_user_exception(): with mock.patch(COMPOSITE_TRACKING_STORE_INVOKE_REF) as mock_store: # we'll pass string instead of defined expected DataFrame and it should work with pytest.raises(ZeroDivisionError): task_pass_through_exception() _check_tracking_calls( mock_store, { "init_run": 1, "add_task_runs": 2, "set_task_run_state": 4, "save_task_run_log": 1, }, ) # this should happen on process exit in normal circumstances dbnd_run_stop() _check_tracking_calls( mock_store, { "init_run": 1, "add_task_runs": 2, "set_task_run_state": 6, "set_run_state": 1, "save_task_run_log": 3, }, ) set_task_run_state_chain = [ call.args[1]["state"] for call in mock_store.call_args_list if call.args[0] == "set_task_run_state" ] assert [ TaskRunState.RUNNING, # driver TaskRunState.RUNNING, # DAG TaskRunState.RUNNING, # task TaskRunState.FAILED, # task TaskRunState.UPSTREAM_FAILED, # DAG TaskRunState.SUCCESS, # driver ] == set_task_run_state_chain set_run_state_chain = [ call.args[1]["state"] for call in mock_store.call_args_list if call.args[0] == "set_run_state" ] assert [RunState.FAILED] == set_run_state_chain
def test_tracking_pass_through_default_airflow(pandas_data_frame_on_disk, mock_channel_tracker): df, df_file = pandas_data_frame_on_disk # we'll pass string instead of defined expected DataFrame and it should work from targets.values import DateTimeValueType some_date = DateTimeValueType().to_str(utcnow()) task_result = task_pass_through_default(str(df_file), some_date, expect_pass_through=True) assert task_result == str(df_file) _check_tracking_calls( mock_channel_tracker, { "init_run": 1, "add_task_runs": 1, # real task only "update_task_run_attempts": 2, # DAG start(with task start), task finished "log_metrics": 1, "log_targets": 1, "save_task_run_log": 1, }, ) _assert_tracked_params( mock_channel_tracker, task_pass_through_default, data=str(df_file), dt=some_date, expect_pass_through=True, ) # this should happen on process exit in normal circumstances dbnd_run_stop() _check_tracking_calls( mock_channel_tracker, { "init_run": 1, "add_task_runs": 1, "update_task_run_attempts": 3, # as above + airflow root stop "log_metrics": 1, "log_targets": 1, "set_run_state": 1, "save_task_run_log": 2, # as above + airflow root log }, )
def test_tracking_user_exception(mock_channel_tracker): # we'll pass string instead of defined expected DataFrame and it should work with pytest.raises(ZeroDivisionError): task_pass_through_exception() _check_tracking_calls( mock_channel_tracker, { "init_run": 1, "add_task_runs": 2, "update_task_run_attempts": 4, "save_task_run_log": 1, }, ) # this should happen on process exit in normal circumstances dbnd_run_stop() _check_tracking_calls( mock_channel_tracker, { "init_run": 1, "add_task_runs": 2, "update_task_run_attempts": 6, "set_run_state": 1, "save_task_run_log": 3, }, ) update_task_run_attempts_chain = [ call.kwargs["task_run_attempt_updates"][0].state for call in mock_channel_tracker.call_args_list if call.args[0].__name__ == "update_task_run_attempts" ] assert [ TaskRunState.RUNNING, # driver TaskRunState.RUNNING, # DAG TaskRunState.RUNNING, # task TaskRunState.FAILED, # task TaskRunState.UPSTREAM_FAILED, # DAG TaskRunState.SUCCESS, # driver ] == update_task_run_attempts_chain set_run_state_chain = [ call.kwargs["state"] for call in mock_channel_tracker.call_args_list if call.args[0].__name__ == "set_run_state" ] assert [RunState.FAILED] == set_run_state_chain
def set_airflow_context(): with patch( "dbnd._core.tracking.airflow_dag_inplace_tracking.try_get_airflow_context" ) as m: try: reset_dbnd_project_config() m.return_value = AirflowTaskContext( dag_id="test_dag", task_id="test_task", execution_date=utcnow().isoformat(), ) yield finally: # ensure dbnd_run_stop() is called (normally should happen on exit() ) dbnd_run_stop() reset_dbnd_project_config()
def test_tracking_pass_through_nested_default(pandas_data_frame_on_disk, mock_channel_tracker): df, df_file = pandas_data_frame_on_disk # we'll pass string instead of defined expected DataFrame and it should work task_result = task_pass_through_nested_default(str(df_file), utcnow().isoformat(), expect_pass_through=True) assert task_result == str(df_file) + str(df_file) _check_tracking_calls( mock_channel_tracker, { "init_run": 1, "add_task_runs": 2, "update_task_run_attempts": 4, "log_metrics": 1, "log_targets": 2, "save_task_run_log": 2, }, ) # this should happen on process exit in normal circumstances dbnd_run_stop() _check_tracking_calls( mock_channel_tracker, { "init_run": 1, "add_task_runs": 2, "update_task_run_attempts": 5, "log_metrics": 1, "log_targets": 2, "set_run_state": 1, "save_task_run_log": 3, }, )
def test_tracking_pass_through_nested_default(pandas_data_frame_on_disk): df, df_file = pandas_data_frame_on_disk with mock.patch(COMPOSITE_TRACKING_STORE_INVOKE_REF) as mock_store: # we'll pass string instead of defined expected DataFrame and it should work task_result = task_pass_through_nested_default( str(df_file), utcnow().isoformat(), expect_pass_through=True ) assert task_result == str(df_file) + str(df_file) _check_tracking_calls( mock_store, { "init_run": 1, "add_task_runs": 3, "set_task_run_state": 6, "log_metrics": 1, "log_target": 2, "save_task_run_log": 2, }, ) # this should happen on process exit in normal circumstances dbnd_run_stop() _check_tracking_calls( mock_store, { "init_run": 1, "add_task_runs": 3, "set_task_run_state": 8, "log_metrics": 1, "log_target": 2, "set_run_state": 1, "save_task_run_log": 4, }, )
return p * p @task def f1(): print("Running f1 function") sum = 0 for i in range(1, 4): sum += f2(i) assert sum == 14 if __name__ == "__main__": if USE_DBND_START in sys.argv: dbnd_run_start() dbnd_run_start() f1() print("Done") if FAIL_MAIN in sys.argv: raise Exception("main bummer!") if USE_DBND_STOP in sys.argv: dbnd_run_stop() dbnd_run_stop()
def new_post_execute(*args, **kwargs): dbnd_run_stop(at_exit=False) post_execute(*args, **kwargs)
def test_sanity_with_airflow(self, with_airflow_tracking_env): fake_task_inside_dag() dbnd_run_stop() print("hey")
def test_log_file_write_fail(set_env): with mock.patch.object(TaskRunLogManager, "save_log_preview", fake_save_log_preview): try_run_function() dbnd_run_stop()
def test_webserver_connection_fail(set_env): with mock.patch.object(TaskRunTracker, "save_task_run_log", fake_save_task_run_log): try_run_function() # function enables tracking -> dbnd_run_start, but dbnd_run_stop is never called. dbnd_run_stop()