def test_asset_events_error_parsing(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) _logs = [] def mock_log(msg): _logs.append(msg) asset_key = AssetKey("asset_one") @solid def materialize_one(_): yield AssetMaterialization(asset_key=asset_key) yield Output(1) def _solids(): materialize_one() events_one, _ = synthesize_events(_solids) for event in events_one: event_log_storage.store_event(event) with mock.patch( "dagster.core.storage.event_log.sql_event_log.logging.warning", side_effect=mock_log, ): with mock.patch( "dagster.core.storage.event_log.sql_event_log.deserialize_json_to_dagster_namedtuple", return_value="not_an_event_record", ): assert asset_key in set(event_log_storage.get_all_asset_keys()) events = event_log_storage.get_asset_events(asset_key) assert len(events) == 0 assert len(_logs) == 1 assert re.match( "Could not resolve asset event record as EventRecord", _logs[0]) _logs = [] # reset logs with mock.patch( "dagster.core.storage.event_log.sql_event_log.deserialize_json_to_dagster_namedtuple", side_effect=seven.JSONDecodeError("error", "", 0), ): assert asset_key in set(event_log_storage.get_all_asset_keys()) events = event_log_storage.get_asset_events(asset_key) assert len(events) == 0 assert len(_logs) == 1 assert re.match("Could not parse asset event record id", _logs[0])
def clean_event_log_storage(conn_string, should_autocreate_tables=True): check.invariant( TestPostgresInstance.dagster_postgres_installed(), "dagster_postgres must be installed to test with postgres", ) from dagster_postgres.event_log import ( # pylint: disable=import-error PostgresEventLogStorage, ) storage = PostgresEventLogStorage.create_clean_storage( conn_string, should_autocreate_tables=should_autocreate_tables ) assert storage return storage
def test_basic_get_logs_for_run_multiple_runs_cursors(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage(conn_string) @solid def return_one(_): return 1 def _solids(): return_one() events_one, result_one = synthesize_events(_solids) for event in events_one: event_log_storage.store_event(event) events_two, result_two = synthesize_events(_solids) for event in events_two: event_log_storage.store_event(event) out_events_one = event_log_storage.get_logs_for_run(result_one.run_id, cursor=1) assert len(out_events_one) == 7 assert set(event_types(out_events_one)) == set( [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ] ) assert set(map(lambda e: e.run_id, out_events_one)) == {result_one.run_id} out_events_two = event_log_storage.get_logs_for_run(result_two.run_id, cursor=2) assert len(out_events_two) == 7 assert set(event_types(out_events_two)) == set( [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_OUTPUT, DagsterEventType.STEP_START, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ] ) assert set(map(lambda e: e.run_id, out_events_two)) == {result_two.run_id}
def test_listen_notify_filter_two_runs_event(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) @solid def return_one(_): return 1 def _solids(): return_one() event_list_one = [] event_list_two = [] run_id_one = make_new_run_id() run_id_two = make_new_run_id() event_log_storage.event_watcher.watch_run(run_id_one, 0, event_list_one.append) event_log_storage.event_watcher.watch_run(run_id_two, 0, event_list_two.append) try: events_one, _result_one = gather_events( _solids, run_config=RunConfig(run_id=run_id_one)) for event in events_one: event_log_storage.store_event(event) events_two, _result_two = gather_events( _solids, run_config=RunConfig(run_id=run_id_two)) for event in events_two: event_log_storage.store_event(event) start = time.time() while (len(event_list_one) < 7 or len(event_list_two) < 7) and time.time() - start < TEST_TIMEOUT: pass assert len(event_list_one) == 7 assert len(event_list_two) == 7 assert all([ isinstance(event, DagsterEventRecord) for event in event_list_one ]) assert all([ isinstance(event, DagsterEventRecord) for event in event_list_two ]) finally: del event_log_storage
def test_basic_get_logs_for_run(conn_string): @solid def return_one(_): return 1 def _solids(): return_one() events, result = synthesize_events(_solids) event_log_storage = PostgresEventLogStorage.create_clean_storage(conn_string) for event in events: event_log_storage.store_event(event) out_events = event_log_storage.get_logs_for_run(result.run_id) assert event_types(out_events) == event_types(events)
def test_listen_notify_filter_two_runs_event(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) @solid def return_one(_): return 1 def _solids(): return_one() event_list_one = [] event_list_two = [] run_id_one = make_new_run_id() run_id_two = make_new_run_id() event_log_storage.event_watcher.watch_run(run_id_one, 0, event_list_one.append) event_log_storage.event_watcher.watch_run(run_id_two, 0, event_list_two.append) try: events_one, _result_one = synthesize_events(_solids, run_id=run_id_one) for event in events_one: event_log_storage.store_event(event) events_two, _result_two = synthesize_events(_solids, run_id=run_id_two) for event in events_two: event_log_storage.store_event(event) start = time.time() while (len(event_list_one) < len(events_one) or len(event_list_two) < len(events_two)) and time.time() - start < TEST_TIMEOUT: pass assert len(event_list_one) == len(events_one) assert len(event_list_two) == len(events_two) # uncomment when https://github.com/dagster-io/dagster/issues/3368 is resolved with structured event # assert all([isinstance(event, DagsterEventRecord) for event in event_list_one]) # assert all([isinstance(event, DagsterEventRecord) for event in event_list_two]) finally: del event_log_storage
def test_basic_event_store(conn_string): @solid def return_one(_): return 1 def _solids(): return_one() events, _result = synthesize_events(_solids) event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) for event in events: event_log_storage.store_event(event) rows = fetch_all_events(conn_string) out_events = list( map(lambda r: deserialize_json_to_dagster_namedtuple(r[0]), rows)) # messages can come out of order assert Counter(event_types(out_events)) == Counter([ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_SUCCESS, DagsterEventType.PIPELINE_SUCCESS, DagsterEventType.STEP_OUTPUT, DagsterEventType.OBJECT_STORE_OPERATION, DagsterEventType.ENGINE_EVENT, ]) assert (sorted_event_types(out_events)) == [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.OBJECT_STORE_OPERATION, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ]
def test_run_step_stats_with_retries(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage(conn_string) @solid(input_defs=[InputDefinition("_input", str)], output_defs=[OutputDefinition(str)]) def should_retry(context, _input): raise RetryRequested(max_retries=3) def _one(): should_retry(should_succeed()) events, result = synthesize_events(_one, check_success=False) for event in events: event_log_storage.store_event(event) step_stats = event_log_storage.get_step_stats_for_run(result.run_id, step_keys=["should_retry"]) assert len(step_stats) == 1 assert step_stats[0].step_key == "should_retry" assert step_stats[0].status == StepEventStatus.FAILURE assert step_stats[0].end_time > step_stats[0].start_time assert step_stats[0].attempts == 4
def test_secondary_index_asset_keys(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) asset_key_one = AssetKey(["one"]) asset_key_two = AssetKey(["two"]) @solid def materialize_one(_): yield AssetMaterialization(asset_key=asset_key_one) yield Output(1) @solid def materialize_two(_): yield AssetMaterialization(asset_key=asset_key_two) yield Output(1) def _one(): materialize_one() def _two(): materialize_two() events_one, _ = synthesize_events(_one) for event in events_one: event_log_storage.store_event(event) asset_keys = event_log_storage.get_all_asset_keys() assert len(asset_keys) == 1 assert asset_key_one in set(asset_keys) migrate_asset_key_data(event_log_storage) asset_keys = event_log_storage.get_all_asset_keys() assert len(asset_keys) == 1 assert asset_key_one in set(asset_keys) events_two, _ = synthesize_events(_two) for event in events_two: event_log_storage.store_event(event) asset_keys = event_log_storage.get_all_asset_keys() assert len(asset_keys) == 2 assert asset_key_one in set(asset_keys) assert asset_key_two in set(asset_keys)
def test_basic_get_logs_for_run_cursor(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) @solid def return_one(_): return 1 def _solids(): return_one() events, result = synthesize_events(_solids) for event in events: event_log_storage.store_event(event) assert event_types( event_log_storage.get_logs_for_run(result.run_id, cursor=0)) == [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.OBJECT_STORE_OPERATION, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ] assert event_types( event_log_storage.get_logs_for_run(result.run_id, cursor=1)) == [ DagsterEventType.PIPELINE_START, DagsterEventType.ENGINE_EVENT, DagsterEventType.STEP_START, DagsterEventType.STEP_OUTPUT, DagsterEventType.OBJECT_STORE_OPERATION, DagsterEventType.STEP_SUCCESS, DagsterEventType.ENGINE_EVENT, DagsterEventType.PIPELINE_SUCCESS, ]
def test_listen_notify_filter_run_event(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) @solid def return_one(_): return 1 def _solids(): return_one() run_id_one = str(uuid.uuid4()) run_id_two = str(uuid.uuid4()) # only watch one of the runs event_list = [] event_log_storage.event_watcher.watch_run(run_id_two, 0, event_list.append) try: events_one, _result_one = gather_events( _solids, run_config=RunConfig(run_id=run_id_one)) for event in events_one: event_log_storage.store_event(event) events_two, _result_two = gather_events( _solids, run_config=RunConfig(run_id=run_id_two)) for event in events_two: event_log_storage.store_event(event) start = time.time() while len(event_list) < 7 and time.time() - start < TEST_TIMEOUT: pass assert len(event_list) == 7 assert all( [isinstance(event, DagsterEventRecord) for event in event_list]) finally: del event_log_storage
def test_basic_get_logs_for_run_multiple_runs(conn_string): event_log_storage = PostgresEventLogStorage.create_clean_storage( conn_string) @solid def return_one(_): return 1 def _solids(): return_one() events_one, result_one = synthesize_events(_solids) for event in events_one: event_log_storage.store_event(event) events_two, result_two = synthesize_events(_solids) for event in events_two: event_log_storage.store_event(event) out_events_one = event_log_storage.get_logs_for_run(result_one.run_id) assert len(out_events_one) == len(events_one) assert set(event_types(out_events_one)) == set(event_types(events_one)) assert set(map(lambda e: e.run_id, out_events_one)) == {result_one.run_id} stats_one = event_log_storage.get_stats_for_run(result_one.run_id) assert stats_one.steps_succeeded == 1 out_events_two = event_log_storage.get_logs_for_run(result_two.run_id) assert len(out_events_two) == len(events_two) assert set(event_types(out_events_two)) == set(event_types(events_two)) assert set(map(lambda e: e.run_id, out_events_two)) == {result_two.run_id} stats_two = event_log_storage.get_stats_for_run(result_two.run_id) assert stats_two.steps_succeeded == 1
def test_basic_event_store(conn_string): @solid def return_one(_): return 1 def _solids(): return_one() events, _result = synthesize_events(_solids) event_log_storage = PostgresEventLogStorage.create_clean_storage(conn_string) for event in events: event_log_storage.store_event(event) rows = fetch_all_events(conn_string) out_events = list(map(lambda r: deserialize_json_to_dagster_namedtuple(r[0]), rows)) # messages can come out of order event_type_counts = Counter(event_types(out_events)) assert event_type_counts assert Counter(event_types(out_events)) == Counter(event_types(events))
def event_log_storage(self, conn_string): # pylint: disable=arguments-differ storage = PostgresEventLogStorage.create_clean_storage(conn_string) assert storage yield storage