def test_without_turbo_without_projects_needing_final(query: ClickhouseQuery) -> None: PostReplacementConsistencyEnforcer("project_id", None).process_query( query, HTTPRequestSettings() ) assert query.get_condition_from_ast() == build_in("project_id", [2]) assert not query.get_from_clause().final
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 5) set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS) PostReplacementConsistencyEnforcer( "project_id", ReplacerState.EVENTS ).process_query(query, HTTPRequestSettings()) assert query.get_condition_from_ast() == FunctionCall( None, BooleanFunctions.AND, ( FunctionCall( None, "notIn", ( FunctionCall( None, "assumeNotNull", (Column(None, None, "group_id"),) ), FunctionCall( None, "tuple", (Literal(None, 100), Literal(None, 101), Literal(None, 102),), ), ), ), build_in("project_id", [2]), ), ) assert not query.get_from_clause().final
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 5) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS).process_query( query, HTTPQuerySettings()) assert query.get_condition() == build_and( FunctionCall( None, "notIn", ( FunctionCall(None, "assumeNotNull", (Column(None, None, "group_id"), )), FunctionCall( None, "tuple", ( Literal(None, 100), Literal(None, 101), Literal(None, 102), ), ), ), ), build_in("project_id", [2]), ) assert not query.get_from_clause().final
def test_too_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 2) set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS) PostReplacementConsistencyEnforcer( "project_id", ReplacerState.EVENTS ).process_query(query, HTTPRequestSettings()) assert query.get_condition_from_ast() == build_in("project_id", [2]) assert query.get_from_clause().final
def test_without_turbo_with_projects_needing_final( query: ClickhouseQuery) -> None: set_project_needs_final(2, ReplacerState.EVENTS) PostReplacementConsistencyEnforcer("project_id", ReplacerState.EVENTS).process_query( query, HTTPRequestSettings()) assert query.get_condition_from_ast() == build_in("project_id", [2]) assert query.get_final()
def test_multiple_not_too_many_excludes( query_with_multiple_group_ids: ClickhouseQuery, ) -> None: """ Query is looking for multiple groups and there are not too many groups to exclude, but there are fewer groups queried for than replaced. """ enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query_with_multiple_group_ids, True) state.set_config("max_group_ids_exclude", 5) enforcer.process_query(query_with_multiple_group_ids, HTTPQuerySettings()) assert query_with_multiple_group_ids.get_condition() == build_and( build_not_in("group_id", [101, 102]), build_and(build_in("project_id", [2]), build_in("group_id", [101, 102])), ) assert not query_with_multiple_group_ids.get_from_clause().final
def test_single_too_many_exclude( query_with_single_group_id: ClickhouseQuery) -> None: """ Query is looking for a group that has been replaced, and there are too many groups to exclude. """ enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query_with_single_group_id, True) state.set_config("max_group_ids_exclude", 2) enforcer.process_query(query_with_single_group_id, HTTPQuerySettings()) assert query_with_single_group_id.get_condition() == build_and( build_not_in("group_id", [101]), build_and(build_in("project_id", [2]), build_in("group_id", [101])), ) assert not query_with_single_group_id.get_from_clause().final
def test_without_turbo_with_projects_needing_final( query: ClickhouseQuery) -> None: set_project_needs_final( 2, ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS).process_query( query, HTTPQuerySettings()) assert query.get_condition() == build_in("project_id", [2]) assert query.get_from_clause().final
def test_too_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 2) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS).process_query( query, HTTPQuerySettings()) assert query.get_condition() == build_in("project_id", [2]) assert query.get_from_clause().final
def test_no_groups_too_many_excludes(query: ClickhouseQuery) -> None: """ Query has no groups, and too many to exclude. """ enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query, True) state.set_config("max_group_ids_exclude", 1) enforcer.process_query(query, HTTPQuerySettings()) assert query.get_condition() == build_in("project_id", [2]) assert query.get_from_clause().final
def test_multiple_disjoint_replaced( query_with_multiple_group_ids: ClickhouseQuery, ) -> None: """ Query is looking for multiple groups and there are replaced groups, but these sets of group ids are disjoint. (No queried groups have been replaced) """ enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) set_project_exclude_groups( 2, [110, 120, 130], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query_with_multiple_group_ids, True) state.set_config("max_group_ids_exclude", 5) enforcer.process_query(query_with_multiple_group_ids, HTTPQuerySettings()) assert query_with_multiple_group_ids.get_condition() == build_and( build_in("project_id", [2]), build_in("group_id", [101, 102])) assert not query_with_multiple_group_ids.get_from_clause().final
def test_with_turbo(query: ClickhouseQuery) -> None: PostReplacementConsistencyEnforcer("project_id", None).process_query( query, HTTPRequestSettings(turbo=True) ) assert query.get_condition_from_ast() == build_in("project_id", [2])
required_columns = [ "event_id", "project_id", "group_id", "timestamp", "deleted", "retention_days", ] storage = WritableTableStorage( storage_key=StorageKey.ERRORS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=[ PostReplacementConsistencyEnforcer( project_column="project_id", replacer_state_name=ReplacerState.ERRORS, ), MappingColumnPromoter(mapping_specs={"tags": promoted_tag_columns}), ArrayJoinKeyValueOptimizer("tags"), PrewhereProcessor(), ], stream_loader=KafkaStreamLoader( processor=ErrorsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="errors-replacements", ), replacer_processor=ErrorsReplacer( write_schema=schema, read_schema=schema, required_columns=required_columns, tag_column_map={"tags": promoted_tag_columns, "contexts": {}},
def test_query_overlaps_replacements_processor( query: ClickhouseQuery, query_with_timestamp: ClickhouseQuery, query_with_future_timestamp: ClickhouseQuery, ) -> None: enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) # replacement time unknown, default to "overlaps" but no groups to exclude so shouldn't be final enforcer._set_query_final(query_with_timestamp, True) enforcer.process_query(query_with_timestamp, HTTPQuerySettings()) assert not query_with_timestamp.get_from_clause().final # overlaps replacement and should be final due to too many groups to exclude state.set_config("max_group_ids_exclude", 2) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query_with_timestamp, False) enforcer.process_query(query_with_timestamp, HTTPQuerySettings()) assert query_with_timestamp.get_from_clause().final # query time range unknown and should be final due to too many groups to exclude enforcer._set_query_final(query, False) enforcer.process_query(query, HTTPQuerySettings()) assert query.get_from_clause().final # doesn't overlap replacements enforcer._set_query_final(query_with_future_timestamp, True) enforcer.process_query(query_with_future_timestamp, HTTPQuerySettings()) assert not query_with_future_timestamp.get_from_clause().final
prewhere_candidates = [ "event_id", "group_id", "tags[sentry:release]", "sentry:release", "message", "title", "environment", "project_id", ] query_processors = [ PostReplacementConsistencyEnforcer( project_column="project_id", # key migration is on going. As soon as all the keys we are interested # into in redis are stored with "EVENTS" in the name, we can change this. replacer_state_name=None, ), EventsColumnProcessor(), MappingColumnPromoter( mapping_specs={ "tags": ChainMap( {col.flattened: col.flattened for col in promoted_tag_columns}, get_promoted_context_tag_col_mapping(), ), "contexts": get_promoted_context_col_mapping(), }, ), # This processor must not be ported to the errors dataset. We should # not support promoting tags/contexts with boolean values. There is # no way to convert them back consistently to the value provided by