def detect_table(query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet) -> str: """ Given a query, we attempt to guess whether it is better to fetch data from the "events" or "transactions" storage. This is going to be wrong in some cases. """ # First check for a top level condition that matches either type = transaction # type != transaction. conditions = query.get_conditions() if conditions: for idx, condition in enumerate(conditions): if is_condition(condition): if tuple(condition) == ("type", "=", "error"): return EVENTS elif tuple(condition) == ("type", "=", "transaction"): return TRANSACTIONS # Check for any conditions that reference a table specific field condition_columns = query.get_columns_referenced_in_conditions() if any(events_only_columns.get(col) for col in condition_columns): return EVENTS if any(transactions_only_columns.get(col) for col in condition_columns): return TRANSACTIONS # Check for any other references to a table specific field all_referenced_columns = query.get_all_referenced_columns() if any(events_only_columns.get(col) for col in all_referenced_columns): return EVENTS if any( transactions_only_columns.get(col) for col in all_referenced_columns): return TRANSACTIONS # Use events by default return EVENTS
def test_referenced_columns(): # a = 1 AND b = 1 dataset = get_dataset("events") source = dataset.get_dataset_schemas().get_read_schema().get_data_source() body = {"conditions": [["a", "=", "1"], ["b", "=", "1"]]} query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b"]) assert query.get_columns_referenced_in_conditions() == set(["a", "b"]) assert query.get_columns_referenced_in_having() == set([]) # a = 1 AND (b = 1 OR c = 1) body = { "conditions": [["a", "=", "1"], [["b", "=", "1"], ["c", "=", "1"]]] } query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_having() == set([]) # a = 1 AND (b = 1 OR foo(c) = 1) body = { "conditions": [["a", "=", "1"], [["b", "=", "1"], [["foo", ["c"]], "=", "1"]]] } query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"]) assert query.get_columns_referenced_in_having() == set([]) # a = 1 AND (b = 1 OR foo(c, bar(d)) = 1) body = { "conditions": [ ["a", "=", "1"], [["b", "=", "1"], [["foo", ["c", ["bar", ["d"]]]], "=", "1"]], ] } query = Query(body, source) assert query.get_all_referenced_columns() == set(["a", "b", "c", "d"]) assert query.get_columns_referenced_in_conditions() == set( ["a", "b", "c", "d"]) assert query.get_columns_referenced_in_having() == set([]) # Other fields, including expressions in selected columns body = { "arrayjoin": "tags_key", "groupby": ["time", "group_id"], "orderby": "-time", "selected_columns": [ "group_id", "time", ["foo", ["c", ["bar", ["d"]]]], # foo(c, bar(d)) ], "aggregations": [["uniq", "tags_value", "values_seen"]], } query = Query(body, source) assert query.get_all_referenced_columns() == set( ["tags_key", "tags_value", "time", "group_id", "c", "d"]) assert query.get_columns_referenced_in_conditions() == set([]) assert query.get_columns_referenced_in_having() == set([]) body = { "conditions": [["a", "=", "1"]], "having": [ ["b", "=", "1"], [["c", "=", "1"], [["foo", ["d", ["bar", ["e"]]]], "=", "1"]], ], } query = Query(body, source) query.set_prewhere([["pc6", "=", "10"]]) assert query.get_all_referenced_columns() == set( ["a", "b", "c", "d", "e", "pc6"]) assert query.get_columns_referenced_in_having() == set( ["b", "c", "d", "e"])