def test_listen_notify_single_run_event(): event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) @solid def return_one(_): return 1 def _solids(): return_one() event_watcher = create_event_watcher(get_test_conn_string()) run_id = str(uuid.uuid4()) event_watcher.watch_run(run_id) try: events, result = gather_events(_solids, run_config=RunConfig(run_id=run_id)) for event in events: event_log_storage.store_event(event) event = event_watcher.queue.get(block=True) assert isinstance(event, EventWatcherStart) for _ in range(0, 5): watcher_event = event_watcher.queue.get(block=True) assert isinstance(watcher_event, EventWatcherEvent) assert watcher_event.payload.run_id == result.run_id finally: event_watcher.close()
def test_basic_event_store(): @solid def return_one(_): return 1 def _solids(): return_one() events, _result = gather_events(_solids) event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) for event in events: event_log_storage.store_event(event) rows = fetch_all_events(get_test_conn_string()) out_events = list( map(lambda r: deserialize_json_to_dagster_namedtuple(r[0]), rows)) assert list(map(lambda e: e.dagster_event.event_type, out_events)) == [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ]
def test_wipe_postgres_event_log(): @solid def return_one(_): return 1 def _solids(): return_one() # pylint: disable=no-value-for-parameter events, result = gather_events(_solids) event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) for event in events: event_log_storage.store_event(event) out_events = event_log_storage.get_logs_for_run(result.run_id) assert event_types(out_events) == [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ] event_log_storage.wipe() assert event_log_storage.get_logs_for_run(result.run_id) == []
def test_add_get_postgres_run_storage(pg_db): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun( pipeline_name='pipeline_name', run_id=run_id, environment_dict={}, mode='some_mode', # https://github.com/dagster-io/dagster/issues/1709 # ExecutionSelector should be threaded all the way # down from the top selector=ExecutionSelector('pipeline_name'), reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) run_storage.add_run(run_to_add) fetched_run = run_storage.get_run_by_id(run_id) assert run_to_add == fetched_run assert run_storage.has_run(run_id) assert not run_storage.has_run(str(uuid.uuid4())) assert run_storage.all_runs() == [run_to_add] assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add] assert run_storage.all_runs_for_pipeline('nope') == [] run_storage.wipe() assert run_storage.all_runs() == []
def test_fetch_by_status(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) four = str(uuid.uuid4()) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED) ) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.FAILURE) ) assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.NOT_STARTED)} == { one } assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.STARTED)} == { two, three, } assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.FAILURE)} == {four} assert {run.run_id for run in storage.get_runs_for_status(PipelineRunStatus.SUCCESS)} == set()
def test_basic_get_logs_for_run(): @solid def return_one(_): return 1 def _solids(): return_one() events, result = gather_events(_solids) event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) for event in events: event_log_storage.store_event(event) out_events = event_log_storage.get_logs_for_run(result.run_id) assert event_types(out_events) == [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ]
def test_nuke(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage run_id = str(uuid.uuid4()) storage.add_run(build_run(run_id=run_id, pipeline_name='some_pipeline')) assert len(storage.all_runs()) == 1 storage.wipe() assert list(storage.all_runs()) == []
def test_handle_run_event_pipeline_success_test(): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun.create_empty_run(pipeline_name='pipeline_name', run_id=run_id) run_storage.add_run(run_to_add) dagster_pipeline_start_event = DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_START.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ) run_storage.handle_run_event(run_id, dagster_pipeline_start_event) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( str(uuid.uuid4()), # diff run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( run_id, # correct run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.SUCCESS
def test_listen_notify_filter_two_runs_event(): event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) @solid def return_one(_): return 1 def _solids(): return_one() event_watcher = create_event_watcher(get_test_conn_string()) run_id_one = str(uuid.uuid4()) run_id_two = str(uuid.uuid4()) event_watcher.watch_run(run_id_one) event_watcher.watch_run(run_id_two) try: events_one, result_one = gather_events( _solids, run_config=RunConfig(run_id=run_id_one)) for event in events_one: event_log_storage.store_event(event) events_two, result_two = gather_events( _solids, run_config=RunConfig(run_id=run_id_two)) for event in events_two: event_log_storage.store_event(event) event = event_watcher.queue.get(block=True) assert isinstance(event, EventWatcherStart) for _ in range(0, 10): watcher_event = event_watcher.queue.get(block=True) assert isinstance(watcher_event, EventWatcherEvent) assert watcher_event.payload.run_id in { result_one.run_id, result_two.run_id } finally: event_watcher.close()
def test_fetch_by_pipeline(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline')) storage.add_run(build_run(run_id=two, pipeline_name='some_other_pipeline')) assert len(storage.all_runs()) == 2 some_runs = storage.all_runs_for_pipeline('some_pipeline') assert len(some_runs) == 1 assert some_runs[0].run_id == one
def test_listen_notify_filter_run_event(): event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) @solid def return_one(_): return 1 def _solids(): return_one() # pylint: disable=no-value-for-parameter event_watcher = create_event_watcher(get_test_conn_string()) run_id_one = str(uuid.uuid4()) run_id_two = str(uuid.uuid4()) # only watch one of the runs event_watcher.watch_run(run_id_two) try: events_one, _result_one = gather_events( _solids, run_config=RunConfig(run_id=run_id_one)) for event in events_one: event_log_storage.store_event(event) events_two, result_two = gather_events( _solids, run_config=RunConfig(run_id=run_id_two)) for event in events_two: event_log_storage.store_event(event) event = event_watcher.queue.get(block=True) assert isinstance(event, EventWatcherStart) for _ in range(0, 5): watcher_event = event_watcher.queue.get(block=True) assert isinstance(watcher_event, EventWatcherEvent) assert watcher_event.payload.run_id == result_two.run_id finally: event_watcher.close()
def test_fetch_by_tag(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run(build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'goodbye'})) storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline')) assert len(storage.all_runs()) == 3 some_runs = storage.all_runs_for_tag('mytag', 'hello') assert len(some_runs) == 1 assert some_runs[0].run_id == one
def test_basic_get_logs_for_run_multiple_runs_cursors(): event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) @solid def return_one(_): return 1 def _solids(): return_one() # pylint: disable=no-value-for-parameter events_one, result_one = gather_events(_solids) for event in events_one: event_log_storage.store_event(event) events_two, result_two = gather_events(_solids) for event in events_two: event_log_storage.store_event(event) out_events_one = event_log_storage.get_logs_for_run(result_one.run_id, cursor=1) assert len(out_events_one) == 5 assert event_types(out_events_one) == [ # DagsterEventType.PIPELINE_START, # DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ] assert set(map(lambda e: e.run_id, out_events_one)) == {result_one.run_id} out_events_two = event_log_storage.get_logs_for_run(result_two.run_id, cursor=2) assert len(out_events_two) == 4 assert event_types(out_events_two) == [ # DagsterEventType.PIPELINE_START, # DagsterEventType.ENGINE_EVENT, # DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_START, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ] assert set(map(lambda e: e.run_id, out_events_two)) == {result_two.run_id}
def test_basic_get_logs_for_run_multiple_runs(): event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) @solid def return_one(_): return 1 def _solids(): return_one() events_one, result_one = gather_events(_solids) for event in events_one: event_log_storage.store_event(event) events_two, result_two = gather_events(_solids) for event in events_two: event_log_storage.store_event(event) out_events_one = event_log_storage.get_logs_for_run(result_one.run_id) assert len(out_events_one) == 7 assert set(event_types(out_events_one)) == set([ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ]) assert set(map(lambda e: e.run_id, out_events_one)) == {result_one.run_id} out_events_two = event_log_storage.get_logs_for_run(result_two.run_id) assert len(out_events_two) == 7 assert set(event_types(out_events_two)) == set([ DagsterEventType.STEP_OUTPUT, DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ]) assert set(map(lambda e: e.run_id, out_events_two)) == {result_two.run_id}
def test_add_get_postgres_run_storage(pg_db): run_storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = build_run(pipeline_name='pipeline_name', run_id=run_id) added = run_storage.add_run(run_to_add) assert added fetched_run = run_storage.get_run_by_id(run_id) assert run_to_add == fetched_run assert run_storage.has_run(run_id) assert not run_storage.has_run(str(uuid.uuid4())) assert run_storage.all_runs() == [run_to_add] assert run_storage.all_runs_for_pipeline('pipeline_name') == [run_to_add] assert run_storage.all_runs_for_pipeline('nope') == [] run_storage.wipe() assert run_storage.all_runs() == []
def test_basic_get_logs_for_run_cursor(): event_log_storage = PostgresEventLogStorage.create_nuked_storage( get_test_conn_string()) @solid def return_one(_): return 1 def _solids(): return_one() # pylint: disable=no-value-for-parameter events, result = gather_events(_solids) for event in events: event_log_storage.store_event(event) assert event_types( event_log_storage.get_logs_for_run(result.run_id, cursor=0)) == [ # DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ] assert event_types( event_log_storage.get_logs_for_run(result.run_id, cursor=1)) == [ # DagsterEventType.PIPELINE_START, # DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ]
def test_fetch_by_status_cursored(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) assert storage one = str(uuid.uuid4()) two = str(uuid.uuid4()) three = str(uuid.uuid4()) four = str(uuid.uuid4()) storage.add_run( build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) storage.add_run( build_run(run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED) ) storage.add_run( build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED) ) cursor_four_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=four) assert len(cursor_four_runs) == 2 assert {run.run_id for run in cursor_four_runs} == {one, two} cursor_two_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=two) assert len(cursor_two_runs) == 1 assert {run.run_id for run in cursor_two_runs} == {one} cursor_one_runs = storage.get_runs_for_status(PipelineRunStatus.STARTED, cursor=one) assert not cursor_one_runs cursor_four_limit_one = storage.get_runs_for_status( PipelineRunStatus.STARTED, cursor=four, limit=1 ) assert len(cursor_four_limit_one) == 1 assert cursor_four_limit_one[0].run_id == two
def test_slice(): storage = PostgresRunStorage.create_nuked_storage(get_test_conn_string()) one, two, three = sorted([str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())]) storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run(build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline', tags={'mytag': 'hello'})) all_runs = storage.all_runs() assert len(all_runs) == 3 sliced_runs = storage.all_runs(cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.all_runs_for_pipeline('some_pipeline') assert len(all_runs) == 3 sliced_runs = storage.all_runs_for_pipeline('some_pipeline', cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two all_runs = storage.all_runs_for_tag('mytag', 'hello') assert len(all_runs) == 3 sliced_runs = storage.all_runs_for_tag('mytag', 'hello', cursor=three, limit=1) assert len(sliced_runs) == 1 assert sliced_runs[0].run_id == two
def test_handle_run_event_pipeline_success_test(): run_storage = PostgresRunStorage.create_nuked_storage( get_test_conn_string()) run_id = str(uuid.uuid4()) run_to_add = PipelineRun( pipeline_name='pipeline_name', run_id=run_id, environment_dict={}, mode='some_mode', # https://github.com/dagster-io/dagster/issues/1709 # ExecutionSelector should be threaded all the way # down from the top selector=ExecutionSelector('pipeline_name'), reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) run_storage.add_run(run_to_add) dagster_pipeline_start_event = DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_START.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ) run_storage.handle_run_event(run_id, dagster_pipeline_start_event) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( str(uuid.uuid4()), # diff run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.STARTED run_storage.handle_run_event( run_id, # correct run DagsterEvent( message='a message', event_type_value=DagsterEventType.PIPELINE_SUCCESS.value, pipeline_name='pipeline_name', step_key=None, solid_handle=None, step_kind_value=None, logging_tags=None, ), ) assert run_storage.get_run_by_id( run_id).status == PipelineRunStatus.SUCCESS
def get_conn_with_run_events(): conn = psycopg2.connect(get_test_conn_string()) conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) conn.cursor().execute(DROP_TABLE_SQL) conn.cursor().execute(CREATE_TABLE_SQL) return conn