def test_skip_execution_for_entity() -> None: state.set_config("subscription_mode_metrics_sets", "new") state.set_config("subscription_mode_metrics_counter", "new") # Skips execution if the entity name is not on the list dataset = get_dataset("metrics") entity_names = ["metrics_sets"] metrics = TestingMetricsBackend() next_step = mock.Mock() commit = mock.Mock() total_concurrent_queries = 4 strategy = ExecuteQuery( dataset, entity_names, 4, total_concurrent_queries, None, metrics, next_step, commit, ) metrics_sets_message = next(generate_message(EntityKey.METRICS_SETS)) strategy.submit(metrics_sets_message) metrics_counters_message = next( generate_message(EntityKey.METRICS_COUNTERS)) strategy.submit(metrics_counters_message) assert (Increment("skipped_execution", 1, {"entity": "metrics_sets"}) not in metrics.calls) assert (Increment("skipped_execution", 1, {"entity": "metrics_counters"}) in metrics.calls)
def test_table_rate_limit(query: Query, limit_to_set: str, params: RateLimitParameters) -> None: set_config(limit_to_set, 50) request_settings = HTTPRequestSettings(consistent=True) TableRateLimit().process_query(query, request_settings) rate_limiters = request_settings.get_rate_limit_params() assert params in rate_limiters
def test_too_many_concurrent_queries() -> None: state.set_config("subscription_mode_events", "new") state.set_config("executor_queue_size_factor", 1) dataset = get_dataset("events") entity_names = ["events"] metrics = TestingMetricsBackend() next_step = mock.Mock() commit = mock.Mock() total_concurrent_queries = 4 strategy = ExecuteQuery( dataset, entity_names, 4, total_concurrent_queries, None, metrics, next_step, commit, ) make_message = generate_message(EntityKey.EVENTS) for _ in range(4): strategy.submit(next(make_message)) with pytest.raises(MessageRejected): strategy.submit(next(make_message)) strategy.close() strategy.join()
def test_query_extension_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_granularity: int, ): state.set_config('max_days', 1) extension = TimeSeriesExtension( default_granularity=60, default_window=datetime.timedelta(days=5), timestamp_column='timestamp', ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_granularity() == expected_granularity
def test_prewhere(initial_table, consistent, expected_table) -> None: state.set_config("enable_events_readonly_table", True) body = { "conditions": [ ["d", "=", "1"], ["c", "=", "3"], ["a", "=", "1"], ["b", "=", "2"], ], } cols = ColumnSet([("col", String())]) query = Query( body, TableSource(initial_table, cols, [["time", "=", "1"]], ["c1"]), ) request_settings = HTTPRequestSettings(consistent=consistent) processor = ReadOnlyTableSelector("sentry_dist", "sentry_ro") processor.process_query(query, request_settings) source = query.get_data_source() assert isinstance(source, TableSource) assert source.format_from() == expected_table assert source.get_columns() == cols assert source.get_prewhere_candidates() == ["c1"] assert source.get_mandatory_conditions() == [["time", "=", "1"]]
def test_load_balancing( override_cluster: Callable[[bool], FakeClickhouseCluster]) -> None: """ Test running two replacements in a row and verify the queries are properly load balanced on different nodes. """ set_config("write_node_replacements_projects", "[1]") cluster = override_cluster(True) replacer = ReplacerWorker(get_writable_storage(StorageKey.ERRORS), DummyMetricsBackend()) replacement = LegacyReplacement( COUNT_QUERY_TEMPLATE, INSERT_QUERY_TEMPLATE, FINAL_QUERY_TEMPLATE, (NEEDS_FINAL, 1), ) replacer.flush_batch([replacement, replacement]) assert cluster.get_queries() == { "query_node": [ "SELECT count() FROM errors_dist FINAL WHERE event_id = '6f0ccc03-6efb-4f7c-8005-d0c992106b31'", "SELECT count() FROM errors_dist FINAL WHERE event_id = '6f0ccc03-6efb-4f7c-8005-d0c992106b31'", ], "storage-0-0": [LOCAL_QUERY], "storage-0-1": [LOCAL_QUERY], "storage-1-0": [LOCAL_QUERY], "storage-1-1": [LOCAL_QUERY], "storage-2-0": [LOCAL_QUERY], "storage-2-1": [LOCAL_QUERY], }
def test_fallback_logic() -> None: state.set_config("use_fallback_host_in_native_connection_pool", 1) network_failure_connection = mock.Mock() network_failure_connection.execute.side_effect = EOFError() verification_connection = mock.Mock() verification_connection.execute.return_value = [] pool = ClickhousePool(CLUSTER_HOST, CLUSTER_PORT, "test", "test", TEST_DB_NAME) # The execute method will try to reuse a single slot in the connection # pool but reestablish new connections with _create_conn if a connection # fails with a network-related error. It may be cleaner to move connection # negotation/establishment into another class for separation of concerns. with mock.patch.object(pool, "_create_conn", lambda x, y=False: network_failure_connection): pool.pool = queue.LifoQueue(1) pool.pool.put(network_failure_connection, block=False) pool.fallback_pool = queue.LifoQueue(1) pool.fallback_pool.put(verification_connection, block=False) pool.execute("SELECT something") assert (network_failure_connection.execute.call_count == 3 ), "Expected three (failed) attempts with main connection pool" assert ( verification_connection.execute.call_count == 1 ), "Expected one (successful) attempt with fallback connection pool"
def test_apply_quota( enabled: int, referrer: str, config_to_set: str, expected_quota: Optional[ResourceQuota], ) -> None: state.set_config(ENABLED_CONFIG, enabled) state.set_config(config_to_set, 5) query = Query( QueryEntity(EntityKey.EVENTS, EntityColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=binary_condition( ConditionFunctions.EQ, Column("_snuba_project_id", None, "project_id"), Literal(None, 1), ), ) settings = HTTPQuerySettings() settings.referrer = referrer ResourceQuotaProcessor("project_id").process_query(query, settings) assert settings.get_resource_quota() == expected_quota
def test_dedupe(self): try: state.set_config("use_query_id", 1) state.set_config("use_cache", 1) uniq_name = uuid.uuid4().hex[:8] def do_request(result_container): result = json.loads( self.app.post( "/query", data=json.dumps({ "project": 1, "granularity": 3600, "aggregations": [ ["count()", "", uniq_name], ["sleep(0.01)", "", "sleep"], ], }), ).data) result_container.append(result) # t0 and t1 are exact duplicate queries submitted concurrently. One of # them will execute normally and the other one should be held back by # the deduper, until it can use the cached result from the first. results = [[] for _ in range(3)] t0 = Thread(target=do_request, args=(results[0], )) t1 = Thread(target=do_request, args=(results[1], )) t0.start() t1.start() t0.join() t1.join() # a subsequent request will not be marked as duplicate # as we waited for the first 2 to finish # it is still fresh do_request(results[2]) results = [r.pop() for r in results] # The results should all have the same data datas = [r["data"] for r in results] assert datas[0] == [{uniq_name: 0, "sleep": 0}] assert all(d == datas[0] for d in datas) stats = [r["stats"] for r in results] # we don't know which order these will execute in, but one # of them will be a cached result assert stats[0]["cache_hit"] in (True, False) assert stats[1]["cache_hit"] in (True, False) assert stats[0]["cache_hit"] != stats[1]["cache_hit"] # and the cached one should be the one marked as dupe assert stats[0]["cache_hit"] == stats[0]["is_duplicate"] assert stats[1]["cache_hit"] == stats[1]["is_duplicate"] assert stats[2]["is_duplicate"] == False finally: state.delete_config("use_query_id") state.delete_config("use_cache")
def test_multiple_not_too_many_excludes( query_with_multiple_group_ids: ClickhouseQuery, ) -> None: """ Query is looking for multiple groups and there are not too many groups to exclude, but there are fewer groups queried for than replaced. """ enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query_with_multiple_group_ids, True) state.set_config("max_group_ids_exclude", 5) enforcer.process_query(query_with_multiple_group_ids, HTTPQuerySettings()) assert query_with_multiple_group_ids.get_condition() == build_and( build_not_in("group_id", [101, 102]), build_and(build_in("project_id", [2]), build_in("group_id", [101, 102])), ) assert not query_with_multiple_group_ids.get_from_clause().final
def test_project_rate_limiting(self) -> None: state.set_config("project_concurrent_limit", self.project_id) state.set_config(f"project_concurrent_limit_{self.project_id}", 0) response = self.post( "/events/snql", data=json.dumps( { "query": """MATCH (events) SELECT platform WHERE project_id = 2 AND timestamp >= toDateTime('2021-01-01') AND timestamp < toDateTime('2021-01-02') """, } ), ) assert response.status_code == 200 response = self.post( "/events/snql", data=json.dumps( { "query": f"""MATCH (events) SELECT platform WHERE project_id = {self.project_id} AND timestamp >= toDateTime('2021-01-01') AND timestamp < toDateTime('2021-01-02') """ } ), ) assert response.status_code == 429
def test_query_overlaps_replacements_processor( query: ClickhouseQuery, query_with_timestamp: ClickhouseQuery, query_with_future_timestamp: ClickhouseQuery, ) -> None: enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) # replacement time unknown, default to "overlaps" but no groups to exclude so shouldn't be final enforcer._set_query_final(query_with_timestamp, True) enforcer.process_query(query_with_timestamp, HTTPQuerySettings()) assert not query_with_timestamp.get_from_clause().final # overlaps replacement and should be final due to too many groups to exclude state.set_config("max_group_ids_exclude", 2) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query_with_timestamp, False) enforcer.process_query(query_with_timestamp, HTTPQuerySettings()) assert query_with_timestamp.get_from_clause().final # query time range unknown and should be final due to too many groups to exclude enforcer._set_query_final(query, False) enforcer.process_query(query, HTTPQuerySettings()) assert query.get_from_clause().final # doesn't overlap replacements enforcer._set_query_final(query_with_future_timestamp, True) enforcer.process_query(query_with_future_timestamp, HTTPQuerySettings()) assert not query_with_future_timestamp.get_from_clause().final
def test_single_too_many_exclude( query_with_single_group_id: ClickhouseQuery) -> None: """ Query is looking for a group that has been replaced, and there are too many groups to exclude. """ enforcer = PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) enforcer._set_query_final(query_with_single_group_id, True) state.set_config("max_group_ids_exclude", 2) enforcer.process_query(query_with_single_group_id, HTTPQuerySettings()) assert query_with_single_group_id.get_condition() == build_and( build_not_in("group_id", [101]), build_and(build_in("project_id", [2]), build_in("group_id", [101])), ) assert not query_with_single_group_id.get_from_clause().final
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 5) set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS) PostReplacementConsistencyEnforcer( "project_id", ReplacerState.EVENTS ).process_query(query, HTTPRequestSettings()) assert query.get_condition_from_ast() == FunctionCall( None, BooleanFunctions.AND, ( FunctionCall( None, "notIn", ( FunctionCall( None, "assumeNotNull", (Column(None, None, "group_id"),) ), FunctionCall( None, "tuple", (Literal(None, 100), Literal(None, 101), Literal(None, 102),), ), ), ), build_in("project_id", [2]), ), ) assert not query.get_from_clause().final
def test_format_expressions(query_body: str, expected_query: LogicalQuery) -> None: state.set_config("query_parsing_expand_aliases", 1) events = get_dataset("events") # TODO: Potentially remove this once entities have actual join relationships mapping = { "contains": (EntityKey.TRANSACTIONS, "event_id"), "assigned": (EntityKey.GROUPASSIGNEE, "group_id"), "bookmark": (EntityKey.GROUPEDMESSAGES, "first_release_id"), "activity": (EntityKey.SESSIONS, "org_id"), } def events_mock(relationship: str) -> JoinRelationship: entity_key, rhs_column = mapping[relationship] return JoinRelationship( rhs_entity=entity_key, join_type=JoinType.INNER, columns=[("event_id", rhs_column)], equivalences=[], ) events_entity = get_entity(EntityKey.EVENTS) setattr(events_entity, "get_join_relationship", events_mock) query = parse_snql_query(query_body, events) eq, reason = query.equals(expected_query) assert eq, reason
def test_invalid_function_name(expression: FunctionCall, should_raise: bool) -> None: data_source = QueryEntity(EntityKey.EVENTS, ColumnSet([])) state.set_config("function-validator.enabled", True) with pytest.raises(InvalidExpressionException): FunctionCallsValidator().validate(expression, data_source)
def test_entity_column_validation(query_body: str, expected_query: LogicalQuery, set_configs: Any) -> None: state.set_config("query_parsing_expand_aliases", 1) events = get_dataset("events") # TODO: Potentially remove this once entities have actual join relationships mapping = { "contains": (EntityKey.TRANSACTIONS, "event_id"), "connected": (EntityKey.SPANS, "trace_id"), } def events_mock(relationship: str) -> JoinRelationship: entity_key, rhs_column = mapping[relationship] return JoinRelationship( rhs_entity=entity_key, join_type=JoinType.INNER, columns=[("event_id", rhs_column)], equivalences=[], ) events_entity = get_entity(EntityKey.EVENTS) old_get_join = events_entity.get_join_relationship try: setattr(events_entity, "get_join_relationship", events_mock) query = parse_snql_query(query_body, [], events) eq, reason = query.equals(expected_query) assert eq, reason finally: setattr(events_entity, "get_join_relationship", old_get_join)
def test_failing_query( override_cluster: Callable[[bool], FakeClickhouseCluster]) -> None: """ Test the execution of replacement queries on single node when the query fails. """ set_config("write_node_replacements_projects", "[1]") override_cluster(False) replacer = ReplacerWorker( get_writable_storage(StorageKey.ERRORS), "consumer_group", DummyMetricsBackend(), ) with pytest.raises(ServerExplodedException): replacer.flush_batch([ LegacyReplacement( COUNT_QUERY_TEMPLATE, INSERT_QUERY_TEMPLATE, FINAL_QUERY_TEMPLATE, (NEEDS_FINAL, 1), REPLACEMENT_TYPE, REPLACEMENT_MESSAGE_METADATA, ) ])
def test_delete_configs(admin_api: FlaskClient[Any]) -> None: # delete a config and its description state.set_config("delete_this", "1") state.set_config_description("delete_this", "description for this config") assert state.get_uncached_config("delete_this") == 1 assert state.get_config_description( "delete_this") == "description for this config" response = admin_api.delete("/configs/delete_this") assert response.status_code == 200 assert state.get_uncached_config("delete_this") is None assert state.get_config_description("delete_this") is None # delete a config but not description state.set_config("delete_this", "1") state.set_config_description("delete_this", "description for this config") assert state.get_uncached_config("delete_this") == 1 assert state.get_config_description( "delete_this") == "description for this config" response = admin_api.delete("/configs/delete_this?keepDescription=true") assert response.status_code == 200 assert state.get_uncached_config("delete_this") is None assert state.get_config_description( "delete_this") == "description for this config"
def test_write_each_node( override_fixture: Callable[[bool], FakeClickhouseCluster], write_node_replacements_projects: str, expected_queries: Mapping[str, Sequence[str]], request: Any, ) -> None: """ Test the execution of replacement queries on both storage nodes and query nodes. """ set_config("write_node_replacements_projects", write_node_replacements_projects) override_func = request.getfixturevalue(override_fixture) test_cluster = override_func(True) replacer = ReplacerWorker( get_writable_storage(StorageKey.ERRORS), "consumer_group", DummyMetricsBackend(), ) replacer.flush_batch([ LegacyReplacement( COUNT_QUERY_TEMPLATE, INSERT_QUERY_TEMPLATE, FINAL_QUERY_TEMPLATE, (NEEDS_FINAL, 1), REPLACEMENT_TYPE, REPLACEMENT_MESSAGE_METADATA, ) ]) queries = test_cluster.get_queries() assert queries == expected_queries
def test_time_alignment(self): # Adding a half hour skew to the time. skew = timedelta(minutes=30) result = json.loads(self.app.post('/query', data=json.dumps({ 'project': 1, 'granularity': 60, 'groupby': 'time', 'from_date': (self.base_time + skew).replace(tzinfo=pytz.utc).isoformat(), 'to_date': (self.base_time + skew + timedelta(minutes=self.minutes)).isoformat(), 'orderby': 'time' })).data) bucket_time = parse_datetime(result['data'][0]['time']).replace(tzinfo=None) assert bucket_time == (self.base_time + skew) # But if we set time alignment to an hour, the buckets will fall back to # the 1hr boundary. state.set_config('date_align_seconds', 3600) result = json.loads(self.app.post('/query', data=json.dumps({ 'project': 1, 'granularity': 60, 'groupby': 'time', 'from_date': (self.base_time + skew).isoformat(), 'to_date': (self.base_time + skew + timedelta(minutes=self.minutes)).isoformat(), 'orderby': 'time' })).data) bucket_time = parse_datetime(result['data'][0]['time']).replace(tzinfo=None) assert bucket_time == self.base_time
def test_app_id_attribution(self) -> None: state.set_config("use_attribution", 1) response = self.post( "/events/snql", data=json.dumps({ "query": f"""MATCH (events) SELECT count() AS count WHERE timestamp >= toDateTime('{self.base_time.isoformat()}') AND timestamp < toDateTime('{self.next_time.isoformat()}') AND project_id IN tuple({self.project_id}) """, "app_id": "default", }), ) assert response.status_code == 200 metric_calls = get_recorded_metric_calls("increment", "snuba.attribution.log") assert metric_calls is not None assert len(metric_calls) == 1 assert metric_calls[0].value > 0 assert metric_calls[0].tags["app_id"] == "default" assert metric_calls[0].tags["referrer"] == "test" assert metric_calls[0].tags["dataset"] == "events" assert metric_calls[0].tags["entity"] == "events" assert metric_calls[0].tags["table"].startswith("errors")
def test_project_rate_limiting_joins(self) -> None: state.set_config("project_concurrent_limit", self.project_id) state.set_config(f"project_concurrent_limit_{self.project_id}", 0) response = self.post( "/discover/snql", data=json.dumps( { "query": """MATCH (s: spans) -[contained]-> (t: transactions) SELECT s.op, avg(s.duration_ms) AS avg BY s.op WHERE s.project_id = 2 AND t.project_id = 2 AND t.finish_ts >= toDateTime('2021-01-01') AND t.finish_ts < toDateTime('2021-01-02') """, } ), ) assert response.status_code == 200 response = self.post( "/discover/snql", data=json.dumps( { "query": f"""MATCH (s: spans) -[contained]-> (t: transactions) SELECT s.op, avg(s.duration_ms) AS avg BY s.op WHERE s.project_id = {self.project_id} AND t.project_id = {self.project_id} AND t.finish_ts >= toDateTime('2021-01-01') AND t.finish_ts < toDateTime('2021-01-02') """, } ), ) assert response.status_code == 429
def test_failures(query_body: str, message: str) -> None: state.set_config("query_parsing_expand_aliases", 1) # TODO: Potentially remove this once entities have actual join relationships mapping = { "contains": (EntityKey.TRANSACTIONS, "event_id"), "assigned": (EntityKey.GROUPASSIGNEE, "group_id"), "bookmark": (EntityKey.GROUPEDMESSAGES, "first_release_id"), "activity": (EntityKey.SESSIONS, "org_id"), } def events_mock(relationship: str) -> Optional[JoinRelationship]: if relationship not in mapping: return None entity_key, rhs_column = mapping[relationship] return JoinRelationship( rhs_entity=entity_key, join_type=JoinType.INNER, columns=[("event_id", rhs_column)], equivalences=[], ) events = get_dataset("events") events_entity = get_entity(EntityKey.EVENTS) setattr(events_entity, "get_join_relationship", events_mock) with pytest.raises(ParsingException, match=re.escape(message)): parse_snql_query(query_body, [], events)
def test_record_queries(self, record_query_mock: Any) -> None: for use_split, expected_query_count in [(0, 1), (1, 2)]: state.set_config("use_split", use_split) record_query_mock.reset_mock() result = json.loads( self.post( "/events/snql", data=json.dumps( { "query": f"""MATCH (events) SELECT event_id, title, transaction, tags[a], tags[b], message, project_id WHERE timestamp >= toDateTime('2021-01-01') AND timestamp < toDateTime('2022-01-01') AND project_id IN tuple({self.project_id}) LIMIT 5""", } ), ).data ) assert len(result["data"]) == 1 assert record_query_mock.call_count == 1 metadata = record_query_mock.call_args[0][0] assert metadata["dataset"] == "events" assert metadata["request"]["referrer"] == "test" assert len(metadata["query_list"]) == expected_query_count
def test_killswitch(): p = TupleUnaliaser() assert not p.should_run() set_config("tuple_unaliaser_rollout", 0) assert not p.should_run() set_config("tuple_unaliaser_rollout", 1) assert p.should_run()
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 5) set_project_exclude_groups( 2, [100, 101, 102], ReplacerState.ERRORS, ReplacementType. EXCLUDE_GROUPS, # Arbitrary replacement type, no impact on tests ) PostReplacementConsistencyEnforcer("project_id", ReplacerState.ERRORS).process_query( query, HTTPQuerySettings()) assert query.get_condition() == build_and( FunctionCall( None, "notIn", ( FunctionCall(None, "assumeNotNull", (Column(None, None, "group_id"), )), FunctionCall( None, "tuple", ( Literal(None, 100), Literal(None, 101), Literal(None, 102), ), ), ), ), build_in("project_id", [2]), ) assert not query.get_from_clause().final
def test_tags_processor(query_body, expected_query) -> None: state.set_config("ast_tag_processor_enabled", 1) dataset = get_dataset("transactions") query = parse_query(query_body, dataset) request_settings = HTTPRequestSettings() assert (DictClickhouseQuery( dataset, query, request_settings).format_sql() == expected_query)
def test_format_expressions(query_body: MutableMapping[str, Any], expected_query: Query) -> None: state.set_config("query_parsing_expand_aliases", 1) events = get_dataset("events") query = parse_query(query_body, events) eq, reason = query.equals(expected_query) assert eq, reason
def set(*, key: str, value: str, force_type: bool) -> None: "Set a single key." try: state.set_config(key, value, user=get_user(), force=force_type) except MismatchedTypeException as exc: print( f"The new value type {exc.new_type} does not match the old value type {exc.original_type}. Use the force option to disable this check" )