예제 #1
0
def test_edit_query():
    query = Query(
        {
            "selected_columns": ["c1", "c2", "c3"],
            "conditions": [["c1", "=", "a"]],
            "arrayjoin": "tags",
            "having": [["c4", "=", "c"]],
            "groupby": ["project_id"],
            "aggregations": [["count()", "", "count"]],
            "orderby": "event_id",
            "limitby": (100, "environment"),
            "sample": 10,
            "limit": 100,
            "offset": 50,
            "totals": True,
        },
        TableSource("my_table", ColumnSet([])),
    )

    query.set_selected_columns(["c4"])
    assert query.get_selected_columns() == ["c4"]

    query.set_aggregations([["different_agg()", "", "something"]])
    assert query.get_aggregations() == [["different_agg()", "", "something"]]

    query.add_groupby(["more", "more2"])
    assert query.get_groupby() == ["project_id", "more", "more2"]

    query.add_conditions([["c5", "=", "9"]])
    assert query.get_conditions() == [
        ["c1", "=", "a"],
        ["c5", "=", "9"],
    ]

    query.set_conditions([["c6", "=", "10"]])
    assert query.get_conditions() == [
        ["c6", "=", "10"],
    ]

    query.set_arrayjoin("not_tags")
    assert query.get_arrayjoin() == "not_tags"

    query.set_granularity(7200)
    assert query.get_granularity() == 7200

    query.set_prewhere([["pc6", "=", "10"]])
    assert query.get_prewhere() == [["pc6", "=", "10"]]
예제 #2
0
 def process_query(self, query: Query, request_settings: RequestSettings,) -> None:
     max_prewhere_conditions: int = (
         self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS
     )
     prewhere_keys = query.get_data_source().get_prewhere_candidates()
     if not prewhere_keys:
         return
     prewhere_conditions: Sequence[Condition] = []
     # Add any condition to PREWHERE if:
     # - It is a single top-level condition (not OR-nested), and
     # - Any of its referenced columns are in prewhere_keys
     conditions = query.get_conditions()
     if not conditions:
         return
     prewhere_candidates = [
         (util.columns_in_expr(cond[0]), cond)
         for cond in conditions
         if util.is_condition(cond)
         and any(col in prewhere_keys for col in util.columns_in_expr(cond[0]))
     ]
     # Use the condition that has the highest priority (based on the
     # position of its columns in the prewhere keys list)
     prewhere_candidates = sorted(
         [
             (
                 min(
                     prewhere_keys.index(col) for col in cols if col in prewhere_keys
                 ),
                 cond,
             )
             for cols, cond in prewhere_candidates
         ],
         key=lambda priority_and_col: priority_and_col[0],
     )
     if prewhere_candidates:
         prewhere_conditions = [cond for _, cond in prewhere_candidates][
             :max_prewhere_conditions
         ]
         query.set_conditions(
             list(filter(lambda cond: cond not in prewhere_conditions, conditions))
         )
     query.set_prewhere(prewhere_conditions)
예제 #3
0
def test_referenced_columns():
    # a = 1 AND b = 1
    dataset = get_dataset("events")
    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    body = {"conditions": [["a", "=", "1"], ["b", "=", "1"]]}
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b"])
    assert query.get_columns_referenced_in_conditions() == set(["a", "b"])
    assert query.get_columns_referenced_in_having() == set([])

    # a = 1 AND (b = 1 OR c = 1)
    body = {
        "conditions": [["a", "=", "1"], [["b", "=", "1"], ["c", "=", "1"]]]
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_having() == set([])

    # a = 1 AND (b = 1 OR foo(c) = 1)
    body = {
        "conditions": [["a", "=", "1"],
                       [["b", "=", "1"], [["foo", ["c"]], "=", "1"]]]
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_having() == set([])

    # a = 1 AND (b = 1 OR foo(c, bar(d)) = 1)
    body = {
        "conditions": [
            ["a", "=", "1"],
            [["b", "=", "1"], [["foo", ["c", ["bar", ["d"]]]], "=", "1"]],
        ]
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b", "c", "d"])
    assert query.get_columns_referenced_in_conditions() == set(
        ["a", "b", "c", "d"])
    assert query.get_columns_referenced_in_having() == set([])

    # Other fields, including expressions in selected columns
    body = {
        "arrayjoin": "tags_key",
        "groupby": ["time", "group_id"],
        "orderby": "-time",
        "selected_columns": [
            "group_id",
            "time",
            ["foo", ["c", ["bar", ["d"]]]],  # foo(c, bar(d))
        ],
        "aggregations": [["uniq", "tags_value", "values_seen"]],
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(
        ["tags_key", "tags_value", "time", "group_id", "c", "d"])
    assert query.get_columns_referenced_in_conditions() == set([])
    assert query.get_columns_referenced_in_having() == set([])

    body = {
        "conditions": [["a", "=", "1"]],
        "having": [
            ["b", "=", "1"],
            [["c", "=", "1"], [["foo", ["d", ["bar", ["e"]]]], "=", "1"]],
        ],
    }
    query = Query(body, source)
    query.set_prewhere([["pc6", "=", "10"]])
    assert query.get_all_referenced_columns() == set(
        ["a", "b", "c", "d", "e", "pc6"])
    assert query.get_columns_referenced_in_having() == set(
        ["b", "c", "d", "e"])