def test_schema(self): cols = ColumnSet([('foo', UInt(8)), ('bar', Nested([('qux:mux', String())]))]) assert cols.for_schema() == 'foo UInt8, bar Nested(`qux:mux` String)' assert cols['foo'].type == UInt(8) assert cols['bar.qux:mux'].type == Array(String())
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) SliceOfMapOptimizer().process_query(unprocessed_query, HTTPRequestSettings()) assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_timeseries_column_format_expressions( granularity, ast_value, formatted_value ) -> None: unprocessed = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ Column("transaction.duration", "duration", None), Column("my_start", "bucketed_start", None), ], ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[Column("transaction.duration", "duration", None), ast_value,], ) dataset = TransactionsDataset() TimeSeriesColumnProcessor( dataset._TimeSeriesDataset__time_group_columns ).process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == formatted_value
def detect_table(query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet) -> str: """ Given a query, we attempt to guess whether it is better to fetch data from the "events" or "transactions" storage. This is going to be wrong in some cases. """ # First check for a top level condition that matches either type = transaction # type != transaction. conditions = query.get_conditions() if conditions: for idx, condition in enumerate(conditions): if is_condition(condition): if tuple(condition) == ("type", "=", "error"): return EVENTS elif tuple(condition) == ("type", "=", "transaction"): return TRANSACTIONS # Check for any conditions that reference a table specific field condition_columns = query.get_columns_referenced_in_conditions() if any(events_only_columns.get(col) for col in condition_columns): return EVENTS if any(transactions_only_columns.get(col) for col in condition_columns): return TRANSACTIONS # Check for any other references to a table specific field all_referenced_columns = query.get_all_referenced_columns() if any(events_only_columns.get(col) for col in all_referenced_columns): return EVENTS if any( transactions_only_columns.get(col) for col in all_referenced_columns): return TRANSACTIONS # Use events by default return EVENTS
def test_type_condition_optimizer() -> None: cond1 = binary_condition( ConditionFunctions.EQ, Column(None, None, "col1"), Literal(None, "val1") ) unprocessed_query = Query( Table("errors", ColumnSet([])), condition=binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.NEQ, Column(None, None, "type"), Literal(None, "transaction"), ), cond1, ), ) expected_query = Query( Table("errors", ColumnSet([])), condition=binary_condition(BooleanFunctions.AND, Literal(None, 1), cond1), ) TypeConditionOptimizer().process_query(unprocessed_query, HTTPQuerySettings()) assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == "1 AND equals(col1, 'val1')"
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) FixedStringArrayColumnProcessor(set(["column1", "column2"]), 32).process_query(unprocessed_query, HTTPQuerySettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", Column(None, None, "column2"), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_timeseries_column_format_expressions(granularity: int, ast_value: FunctionCall, formatted_value: str) -> None: unprocessed = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("my_time", Column("my_time", None, "time")), ], ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression(ast_value.alias, ast_value), ], ) dataset = TransactionsDataset() for processor in dataset.get_query_processors(): if isinstance(processor, TimeSeriesColumnProcessor): processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_query_data_source() -> None: """ Tests using the Query as a data source """ query = Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression( "col1", Column(alias="col1", table_name=None, column_name="col1") ), SelectedExpression( "some_func", FunctionCall( "some_func", "f", (Column(alias="col1", table_name=None, column_name="col1"),), ), ), SelectedExpression( None, Column(alias="col2", table_name=None, column_name="col2") ), ], ) assert query.get_columns() == ColumnSet( [("col1", Any()), ("some_func", Any()), ("_invalid_alias_2", Any())] )
def test_schema(self): cols = ColumnSet([("foo", UInt(8)), ("bar", Nested([("qux:mux", String())]))]) assert cols.for_schema() == "foo UInt8, bar Nested(`qux:mux` String)" assert cols["foo"].type == UInt(8) assert cols["bar.qux:mux"].type == Array(String())
def test_nested_query() -> None: """ Simply builds a nested query. """ nested = LogicalQuery( Entity(EntityKey.EVENTS, ColumnSet([("event_id", String())])), selected_columns=[ SelectedExpression("string_evt_id", Column("string_evt_id", None, "event_id")) ], ) composite = CompositeQuery( from_clause=nested, selected_columns=[ SelectedExpression("output", Column("output", None, "string_evt_id")) ], ) # The iterator methods on the composite query do not descend into # the nested query assert composite.get_all_ast_referenced_columns() == { Column("output", None, "string_evt_id") } # The schema of the nested query is the selected clause of that query. assert composite.get_from_clause().get_columns() == ColumnSet([ ("string_evt_id", Any()) ])
def track_bad_query( query: Query, selected_entity: EntityKey, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet, ) -> None: event_columns = set() transaction_columns = set() for col in query.get_all_ast_referenced_columns(): if events_only_columns.get(col.column_name): event_columns.add(col.column_name) elif transactions_only_columns.get(col.column_name): transaction_columns.add(col.column_name) for subscript in query.get_all_ast_referenced_subscripts(): schema_col_name = subscript_key_column_name(subscript) if events_only_columns.get(schema_col_name): event_columns.add(schema_col_name) if transactions_only_columns.get(schema_col_name): transaction_columns.add(schema_col_name) event_mismatch = event_columns and selected_entity == TRANSACTIONS transaction_mismatch = transaction_columns and selected_entity in [ EVENTS, EVENTS_AND_TRANSACTIONS, ] if event_mismatch or transaction_mismatch: missing_columns = ",".join( sorted(event_columns if event_mismatch else transaction_columns)) selected_entity_str = (str(selected_entity.value) if isinstance( selected_entity, EntityKey) else selected_entity) metrics.increment( "query.impossible", tags={ "selected_table": selected_entity_str, "missing_columns": missing_columns, }, ) if selected_entity == EVENTS_AND_TRANSACTIONS and (event_columns or transaction_columns): # Not possible in future with merge table missing_events_columns = ",".join(sorted(event_columns)) missing_transactions_columns = ",".join(sorted(transaction_columns)) metrics.increment( "query.impossible-merge-table", tags={ "missing_events_columns": missing_events_columns, "missing_transactions_columns": missing_transactions_columns, }, ) else: metrics.increment("query.success")
def test_not_handled_processor() -> None: columnset = ColumnSet([]) unprocessed = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall("result", "notHandled", tuple(),), ), ], ) expected = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "arrayExists", ( Lambda( None, ("x",), binary_condition( BooleanFunctions.AND, FunctionCall(None, "isNotNull", (Argument(None, "x"),)), binary_condition( ConditionFunctions.EQ, FunctionCall( None, "assumeNotNull", (Argument(None, "x"),) ), Literal(None, 0), ), ), ), Column(None, None, "exception_stacks.mechanism_handled"), ), ), ), ], ) processor = handled_functions.HandledFunctionsProcessor( "exception_stacks.mechanism_handled", columnset ) processor.process_query(unprocessed, HTTPRequestSettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns() ret = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(arrayExists((x -> isNotNull(x) AND equals(assumeNotNull(x), 0)), exception_stacks.mechanism_handled) AS result)" )
def test_events_column_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression("the_group_id", Column("the_group_id", None, "group_id")), SelectedExpression("the_message", Column("the_message", None, "message")), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression( "the_group_id", FunctionCall( "the_group_id", "nullIf", ( Column(None, None, "group_id"), Literal(None, 0), ), ), ), SelectedExpression( "the_message", FunctionCall( "the_message", "coalesce", ( Column(None, None, "search_message"), Column(None, None, "message"), ), ), ), ], ) EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) expected = ( "(nullIf(group_id, 0) AS the_group_id)", "(coalesce(search_message, message) AS the_message)", ) for idx, column in enumerate( unprocessed.get_selected_columns_from_ast()[1:]): formatted = column.expression.accept(ClickhouseExpressionFormatter()) assert expected[idx] == formatted
def test_failure_rate_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( FunctionCall( None, "countIf", ( combine_and_conditions( [ binary_condition( None, ConditionFunctions.NEQ, Column(None, None, "transaction_status"), Literal(None, code), ) for code in [0, 1, 2] ] ), ), ), count(), "perf", ), ), ], ) failure_rate_processor(ColumnSet([])).process_query( unprocessed, HTTPRequestSettings() ) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)" )
def test_failure_rate_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( FunctionCall( None, "countIf", (binary_condition( None, ConditionFunctions.NOT_IN, Column(None, None, "transaction_status"), FunctionCall( None, "tuple", ( Literal(alias=None, value=0), Literal(alias=None, value=1), Literal(alias=None, value=2), ), ), ), ), ), count(), "perf", ), ), ], ) failure_rate_processor(ColumnSet([])).process_query( unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == ( "(divide(countIf(notIn(transaction_status, tuple(0, 1, 2))), count()) AS perf)" )
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) UUIDArrayColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", FunctionCall( None, "arrayMap", ( Lambda( None, ("x", ), FunctionCall( None, "replaceAll", ( FunctionCall(None, "toString", (Argument(None, "x"), )), Literal(None, "-"), Literal(None, ""), ), ), ), Column(None, None, "column2"), ), ), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_timeseries_format_expressions( granularity: int, condition: Optional[FunctionCall], exp_column: FunctionCall, exp_condition: Optional[FunctionCall], formatted_column: str, formatted_condition: str, ) -> None: unprocessed = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("my_time", Column("my_time", None, "time")), ], condition=condition, groupby=[Column("my_time", None, "time")], granularity=granularity, ) expected = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression(exp_column.alias, exp_column), ], condition=exp_condition, ) entity = TransactionsEntity() processors = entity.get_query_processors() for processor in processors: if isinstance(processor, TimeSeriesProcessor): processor.process_query(unprocessed, HTTPRequestSettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns( ) assert expected.get_condition() == unprocessed.get_condition() ret = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == formatted_column if condition: query_condition = unprocessed.get_condition() assert query_condition is not None ret = query_condition.accept(ClickhouseExpressionFormatter()) assert formatted_condition == ret assert extract_granularity_from_query(unprocessed, "finish_ts") == granularity
def test_timeseries_format_expressions( granularity: int, condition: Optional[FunctionCall], exp_column: FunctionCall, exp_condition: Optional[FunctionCall], formatted_column: str, formatted_condition: str, ) -> None: unprocessed = Query( {}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("my_time", Column("my_time", None, "time")), ], condition=condition, granularity=granularity, ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression(exp_column.alias, exp_column), ], condition=exp_condition, ) entity = TransactionsEntity() processors = entity.get_query_processors() for processor in processors: if isinstance(processor, TimeSeriesProcessor): processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) assert expected.get_condition_from_ast( ) == unprocessed.get_condition_from_ast() ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == formatted_column if condition: ret = unprocessed.get_condition_from_ast().accept( ClickhouseExpressionFormatter()) assert formatted_condition == ret
def query() -> ClickhouseQuery: return ClickhouseQuery( LogicalQuery( {"conditions": [("project_id", "IN", [2])]}, TableSource("my_table", ColumnSet([])), condition=build_in("project_id", [2]), ))
def test_joined_columns(): schema = JoinedSchema(complex_join_structure) columns = schema.get_columns() expected_columns = ColumnSet([ ("t1.t1c1", UInt(64)), ("t1.t1c2", String()), ("t1.t1c3", Nested([ ("t11c4", UInt(64)) ])), ("t2.t2c1", UInt(64)), ("t2.t2c2", String()), ("t2.t2c3", Nested([ ("t21c4", UInt(64)) ])), ("t3.t3c1", UInt(64)), ("t3.t3c2", String()), ("t3.t3c3", Nested([ ("t31c4", UInt(64)) ])), ]) # Checks equality between flattened columns. Nested columns are # exploded here assert set([c.flattened for c in columns]) \ == set([c.flattened for c in expected_columns]) # Checks equality between the structured set of columns. Nested columns # are not exploded. assert set([repr(c) for c in columns.columns]) \ == set([repr(c) for c in expected_columns.columns])
def test_full_query(): query = Query( { "selected_columns": ["c1", "c2", "c3"], "conditions": [["c1", "=", "a"]], "arrayjoin": "tags", "having": [["c4", "=", "c"]], "groupby": ["project_id"], "aggregations": [["count()", "", "count"]], "orderby": "event_id", "limitby": (100, "environment"), "sample": 10, "limit": 100, "offset": 50, "totals": True, "granularity": 60, }, TableSource("my_table", ColumnSet([])), ) assert query.get_selected_columns() == ["c1", "c2", "c3"] assert query.get_aggregations() == [["count()", "", "count"]] assert query.get_groupby() == ["project_id"] assert query.get_conditions() == [["c1", "=", "a"]] assert query.get_arrayjoin() == "tags" assert query.get_having() == [["c4", "=", "c"]] assert query.get_orderby() == "event_id" assert query.get_limitby() == (100, "environment") assert query.get_sample() == 10 assert query.get_limit() == 100 assert query.get_offset() == 50 assert query.has_totals() is True assert query.get_granularity() == 60 assert query.get_data_source().format_from() == "my_table"
def test_functions( default_validators: Mapping[str, FunctionCallValidator], entity_validators: Mapping[str, FunctionCallValidator], exception: Optional[Type[InvalidExpressionException]], ) -> None: fn_cached = functions.default_validators functions.default_validators = default_validators entity_return = MagicMock() entity_return.return_value = entity_validators events_entity = get_entity(EntityKey.EVENTS) cached = events_entity.get_function_call_validators setattr(events_entity, "get_function_call_validators", entity_return) data_source = QueryEntity(EntityKey.EVENTS, ColumnSet([])) expression = FunctionCall( None, "f", (Column(alias=None, table_name=None, column_name="col"), )) if exception is None: FunctionCallsValidator().validate(expression, data_source) else: with pytest.raises(exception): FunctionCallsValidator().validate(expression, data_source) # TODO: This should use fixture to do this setattr(events_entity, "get_function_call_validators", cached) functions.default_validators = fn_cached
def test_hexint_column_processor(unprocessed: Expression, formatted_value: str) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")) ], condition=unprocessed, ) HexIntColumnProcessor(set(["column1" ])).process_query(unprocessed_query, HTTPQuerySettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column1", FunctionCall( None, "lower", (FunctionCall( None, "hex", (Column(None, None, "column1"), ), ), ), ), ) ] condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def __init__(self) -> None: read_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("key_id", Nullable(UInt(64))), ("timestamp", DateTime()), ("outcome", UInt(8)), ("reason", LowCardinality(Nullable(String()))), ("event_id", Nullable(UUID())), ]) read_schema = MergeTreeSchema( columns=read_columns, local_table_name="outcomes_raw_local", dist_table_name="outcomes_raw_dist", order_by="(org_id, project_id, timestamp)", partition_by="(toMonday(timestamp))", settings={"index_granularity": 16384}, migration_function=outcomes_raw_migrations, ) dataset_schemas = DatasetSchemas(read_schema=read_schema, write_schema=None, intermediary_schemas=[]) super().__init__( dataset_schemas=dataset_schemas, time_group_columns={"time": "timestamp"}, time_parse_columns=("timestamp", ), )
def test_format_expressions(query: Query, expected_query: Query) -> None: processor = CustomFunction( ColumnSet([("param1", String()), ("param2", UInt(8)), ("other_col", String())]), "f_call", [("param1", ColType({String})), ("param2", ColType({UInt}))], partial_function( "f_call_impl(param1, inner_call(param2), my_const)", [("my_const", 420)], ), ) # We cannot just run == on the query objects. The content of the two # objects is different, being one the AST and the ont the AST + raw body processor.process_query(query, HTTPRequestSettings()) assert (query.get_selected_columns_from_ast() == expected_query.get_selected_columns_from_ast()) assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast( ) assert query.get_condition_from_ast( ) == expected_query.get_condition_from_ast() assert query.get_arrayjoin_from_ast( ) == expected_query.get_arrayjoin_from_ast() assert query.get_having_from_ast() == expected_query.get_having_from_ast() assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast( )
def __init__(self): read_columns = ColumnSet([ ('org_id', UInt(64)), ('project_id', UInt(64)), ('key_id', Nullable(UInt(64))), ('timestamp', DateTime()), ('outcome', UInt(8)), ('reason', LowCardinality(Nullable(String()))), ('event_id', Nullable(UUID())), ]) read_schema = MergeTreeSchema( columns=read_columns, local_table_name='outcomes_raw_local', dist_table_name='outcomes_raw_dist', order_by='(org_id, project_id, timestamp)', partition_by='(toMonday(timestamp))', settings={'index_granularity': 16384}) dataset_schemas = DatasetSchemas(read_schema=read_schema, write_schema=None, intermediary_schemas=[]) super().__init__(dataset_schemas=dataset_schemas, time_group_columns={ 'time': 'timestamp', }, time_parse_columns=('timestamp', ))
def __init__( self, writable_storage_key: StorageKey, readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, ) -> None: writable_storage = get_writable_storage(writable_storage_key) readable_storage = get_storage(readable_storage_key) super().__init__( storages=[writable_storage, readable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]), join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator({"org_id", "project_id"}) ], required_time_column="timestamp", )
def test_mand_conditions(table: str, mand_conditions: List[FunctionCall]) -> None: query = Query( Table( table, ColumnSet([]), final=False, sampling_rate=None, mandatory_conditions=mand_conditions, ), None, None, binary_condition( BooleanFunctions.AND, binary_condition( OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"), ), binary_condition( OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"), ), ), ) query_ast_copy = copy.deepcopy(query) request_settings = HTTPRequestSettings(consistent=True) processor = MandatoryConditionApplier() processor.process_query(query, request_settings) query_ast_copy.add_condition_to_ast(combine_and_conditions(mand_conditions)) assert query.get_condition_from_ast() == query_ast_copy.get_condition_from_ast()
def test_project_extension_query_adds_rate_limits(): extension = ProjectExtension(processor=ProjectExtensionProcessor( project_column="project_id")) raw_data = {'project': [2, 3]} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) num_rate_limits_before_processing = len( request_settings.get_rate_limit_params()) extension.get_processor().process_query(query, valid_data, request_settings) rate_limits = request_settings.get_rate_limit_params() # make sure a rate limit was added by the processing assert len(rate_limits) == num_rate_limits_before_processing + 1 most_recent_rate_limit = rate_limits[-1] assert most_recent_rate_limit.bucket == '2' assert most_recent_rate_limit.per_second_limit == 1000 assert most_recent_rate_limit.concurrent_limit == 1000
def test_invalid_function_name(expression: FunctionCall, should_raise: bool) -> None: data_source = QueryEntity(EntityKey.EVENTS, ColumnSet([])) state.set_config("function-validator.enabled", True) with pytest.raises(InvalidExpressionException): FunctionCallsValidator().validate(expression, data_source)