def validate(self, value) -> Request: value = validate_jsonschema(value, self.__composite_schema) query_body = { key: value.pop(key) for key in self.__query_schema['properties'].keys() if key in value } settings = { key: value.pop(key) for key in self.__settings_schema['properties'].keys() if key in value } extensions = {} for extension_name, extension_schema in self.__extension_schemas.items( ): extensions[extension_name] = { key: value.pop(key) for key in extension_schema['properties'].keys() if key in value } return Request( Query(query_body), RequestSettings(settings['turbo'], settings['consistent'], settings['debug']), extensions)
def test_project_extension_query_adds_rate_limits(): extension = ProjectExtension(processor=ProjectExtensionProcessor( project_column="project_id")) raw_data = {'project': [2, 3]} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) num_rate_limits_before_processing = len( request_settings.get_rate_limit_params()) extension.get_processor().process_query(query, valid_data, request_settings) rate_limits = request_settings.get_rate_limit_params() # make sure a rate limit was added by the processing assert len(rate_limits) == num_rate_limits_before_processing + 1 most_recent_rate_limit = rate_limits[-1] assert most_recent_rate_limit.bucket == '2' assert most_recent_rate_limit.per_second_limit == 1000 assert most_recent_rate_limit.concurrent_limit == 1000
def test_without_turbo_without_projects_needing_final(self): request_settings = RequestSettings(turbo=False, consistent=False, debug=False) self.extension.get_processor().process_query(self.query, self.valid_data, request_settings) assert self.query.get_conditions() == [('project_id', 'IN', [2])] assert not self.query.get_final()
def test_join_optimizer_two_tables( selected_cols: Sequence[Any], conditions: Sequence[Condition], groupby: Groupby, expected: str, ) -> None: query = Query( { "selected_columns": selected_cols, "conditions": conditions, "arrayjoin": None, "having": [], "groupby": groupby, "aggregations": [], "orderby": None, "limitby": None, "sample": 10, "limit": 100, "offset": 50, "totals": True, "granularity": 60, }, simple_join_structure, ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) optimizer = SimpleJoinOptimizer() optimizer.process_query(query, request_settings) assert query.get_data_source().format_from() == expected
def test_query_extension_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_granularity: int, ): state.set_config('max_days', 1) extension = TimeSeriesExtension( default_granularity=60, default_window=datetime.timedelta(days=5), timestamp_column='timestamp', ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_granularity() == expected_granularity
def test_no_split(dataset_name: str): events = get_dataset(dataset_name) query = Query( { "selected_columns": ["event_id"], "conditions": [""], "orderby": "event_id", "sample": 10, "limit": 100, "offset": 50, }, events.get_dataset_schemas().get_read_schema().get_data_source() ) @split_query def do_query(dataset: Dataset, request: Request, timer: Timer): assert request.query == query request = Request( query, RequestSettings(False, False, False), {}, ) do_query(events, request, None)
def test_when_there_are_too_many_groups_to_exclude(self): request_settings = RequestSettings(turbo=False, consistent=False, debug=False) state.set_config('max_group_ids_exclude', 2) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query(self.query, self.valid_data, request_settings) assert self.query.get_conditions() == [('project_id', 'IN', [2])] assert self.query.get_final()
def test_project_extension_query_processing(raw_data: dict, expected_conditions: Sequence[Condition]): extension = ProjectExtension( processor=ProjectExtensionProcessor() ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({ "conditions": [] }) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions
def test_organization_extension_query_processing_happy_path(): extension = OrganizationExtension() raw_data = {"organization": 2} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == [("org_id", "=", 2)]
def test_col_split( dataset_name: str, first_query_data: Mapping[str, Any], second_query_data: Mapping[str, Any], ): @split_query def do_query(dataset: Dataset, request: Request, timer: Timer): selected_cols = request.query.get_selected_columns() if selected_cols == list(first_query_data[0].keys()): return QueryResult({"data": first_query_data}, 200) elif selected_cols == list(second_query_data[0].keys()): return QueryResult({"data": second_query_data}, 200) else: raise ValueError(f"Unexpected selected columns: {selected_cols}") events = get_dataset(dataset_name) query = Query( { "selected_columns": list(second_query_data[0].keys()), "conditions": [""], "orderby": "events.event_id", "sample": 10, "limit": 100, "offset": 50, }, events.get_dataset_schemas().get_read_schema().get_data_source(), ) request = Request( query, RequestSettings(False, False, False), { "project": {"project": 1}, "timeseries": { "from_date": "2019-09-19T10:00:00", "to_date": "2019-09-19T12:00:00", "granularity": 3600, } }, ) do_query(events, request, None)
def test_project_extension_project_rate_limits_are_overridden(): extension = ProjectExtension( processor=ProjectExtensionProcessor() ) raw_data = { 'project': [2, 3] } valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({ 'conditions': [] }) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) state.set_config('project_per_second_limit_2', 5) state.set_config('project_concurrent_limit_2', 10) extension.get_processor().process_query(query, valid_data, request_settings) rate_limits = request_settings.get_rate_limit_params() most_recent_rate_limit = rate_limits[-1] assert most_recent_rate_limit.bucket == '2' assert most_recent_rate_limit.per_second_limit == 5 assert most_recent_rate_limit.concurrent_limit == 10