예제 #1
0
    def test_schema(self):
        cols = ColumnSet([('foo', UInt(8)),
                          ('bar', Nested([('qux:mux', String())]))])

        assert cols.for_schema() == 'foo UInt8, bar Nested(`qux:mux` String)'
        assert cols['foo'].type == UInt(8)
        assert cols['bar.qux:mux'].type == Array(String())
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    SliceOfMapOptimizer().process_query(unprocessed_query,
                                        HTTPRequestSettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
예제 #3
0
def test_timeseries_column_format_expressions(
    granularity, ast_value, formatted_value
) -> None:
    unprocessed = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            Column("transaction.duration", "duration", None),
            Column("my_start", "bucketed_start", None),
        ],
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[Column("transaction.duration", "duration", None), ast_value,],
    )

    dataset = TransactionsDataset()
    TimeSeriesColumnProcessor(
        dataset._TimeSeriesDataset__time_group_columns
    ).process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == formatted_value
예제 #4
0
def detect_table(query: Query, events_only_columns: ColumnSet,
                 transactions_only_columns: ColumnSet) -> str:
    """
    Given a query, we attempt to guess whether it is better to fetch data from the
    "events" or "transactions" storage. This is going to be wrong in some cases.
    """
    # First check for a top level condition that matches either type = transaction
    # type != transaction.
    conditions = query.get_conditions()
    if conditions:
        for idx, condition in enumerate(conditions):
            if is_condition(condition):
                if tuple(condition) == ("type", "=", "error"):
                    return EVENTS
                elif tuple(condition) == ("type", "=", "transaction"):
                    return TRANSACTIONS

    # Check for any conditions that reference a table specific field
    condition_columns = query.get_columns_referenced_in_conditions()
    if any(events_only_columns.get(col) for col in condition_columns):
        return EVENTS
    if any(transactions_only_columns.get(col) for col in condition_columns):
        return TRANSACTIONS

    # Check for any other references to a table specific field
    all_referenced_columns = query.get_all_referenced_columns()
    if any(events_only_columns.get(col) for col in all_referenced_columns):
        return EVENTS
    if any(
            transactions_only_columns.get(col)
            for col in all_referenced_columns):
        return TRANSACTIONS

    # Use events by default
    return EVENTS
예제 #5
0
def test_type_condition_optimizer() -> None:
    cond1 = binary_condition(
        ConditionFunctions.EQ, Column(None, None, "col1"), Literal(None, "val1")
    )

    unprocessed_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(
            BooleanFunctions.AND,
            binary_condition(
                ConditionFunctions.NEQ,
                Column(None, None, "type"),
                Literal(None, "transaction"),
            ),
            cond1,
        ),
    )
    expected_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(BooleanFunctions.AND, Literal(None, 1), cond1),
    )
    TypeConditionOptimizer().process_query(unprocessed_query, HTTPQuerySettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == "1 AND equals(col1, 'val1')"
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    FixedStringArrayColumnProcessor(set(["column1", "column2"]),
                                    32).process_query(unprocessed_query,
                                                      HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            Column(None, None, "column2"),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
예제 #7
0
def test_timeseries_column_format_expressions(granularity: int,
                                              ast_value: FunctionCall,
                                              formatted_value: str) -> None:
    unprocessed = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(ast_value.alias, ast_value),
        ],
    )

    dataset = TransactionsDataset()
    for processor in dataset.get_query_processors():
        if isinstance(processor, TimeSeriesColumnProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_value
예제 #8
0
def test_query_data_source() -> None:
    """
    Tests using the Query as a data source
    """

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "col1", Column(alias="col1", table_name=None, column_name="col1")
            ),
            SelectedExpression(
                "some_func",
                FunctionCall(
                    "some_func",
                    "f",
                    (Column(alias="col1", table_name=None, column_name="col1"),),
                ),
            ),
            SelectedExpression(
                None, Column(alias="col2", table_name=None, column_name="col2")
            ),
        ],
    )
    assert query.get_columns() == ColumnSet(
        [("col1", Any()), ("some_func", Any()), ("_invalid_alias_2", Any())]
    )
예제 #9
0
    def test_schema(self):
        cols = ColumnSet([("foo", UInt(8)),
                          ("bar", Nested([("qux:mux", String())]))])

        assert cols.for_schema() == "foo UInt8, bar Nested(`qux:mux` String)"
        assert cols["foo"].type == UInt(8)
        assert cols["bar.qux:mux"].type == Array(String())
예제 #10
0
def test_nested_query() -> None:
    """
    Simply builds a nested query.
    """

    nested = LogicalQuery(
        Entity(EntityKey.EVENTS, ColumnSet([("event_id", String())])),
        selected_columns=[
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id"))
        ],
    )

    composite = CompositeQuery(
        from_clause=nested,
        selected_columns=[
            SelectedExpression("output", Column("output", None,
                                                "string_evt_id"))
        ],
    )

    # The iterator methods on the composite query do not descend into
    # the nested query
    assert composite.get_all_ast_referenced_columns() == {
        Column("output", None, "string_evt_id")
    }

    # The schema of the nested query is the selected clause of that query.
    assert composite.get_from_clause().get_columns() == ColumnSet([
        ("string_evt_id", Any())
    ])
예제 #11
0
파일: discover.py 프로젝트: getsentry/snuba
def track_bad_query(
    query: Query,
    selected_entity: EntityKey,
    events_only_columns: ColumnSet,
    transactions_only_columns: ColumnSet,
) -> None:
    event_columns = set()
    transaction_columns = set()
    for col in query.get_all_ast_referenced_columns():
        if events_only_columns.get(col.column_name):
            event_columns.add(col.column_name)
        elif transactions_only_columns.get(col.column_name):
            transaction_columns.add(col.column_name)

    for subscript in query.get_all_ast_referenced_subscripts():
        schema_col_name = subscript_key_column_name(subscript)
        if events_only_columns.get(schema_col_name):
            event_columns.add(schema_col_name)
        if transactions_only_columns.get(schema_col_name):
            transaction_columns.add(schema_col_name)

    event_mismatch = event_columns and selected_entity == TRANSACTIONS
    transaction_mismatch = transaction_columns and selected_entity in [
        EVENTS,
        EVENTS_AND_TRANSACTIONS,
    ]

    if event_mismatch or transaction_mismatch:
        missing_columns = ",".join(
            sorted(event_columns if event_mismatch else transaction_columns))
        selected_entity_str = (str(selected_entity.value) if isinstance(
            selected_entity, EntityKey) else selected_entity)

        metrics.increment(
            "query.impossible",
            tags={
                "selected_table": selected_entity_str,
                "missing_columns": missing_columns,
            },
        )

    if selected_entity == EVENTS_AND_TRANSACTIONS and (event_columns
                                                       or transaction_columns):
        # Not possible in future with merge table
        missing_events_columns = ",".join(sorted(event_columns))
        missing_transactions_columns = ",".join(sorted(transaction_columns))
        metrics.increment(
            "query.impossible-merge-table",
            tags={
                "missing_events_columns": missing_events_columns,
                "missing_transactions_columns": missing_transactions_columns,
            },
        )

    else:
        metrics.increment("query.success")
예제 #12
0
def test_not_handled_processor() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result", FunctionCall("result", "notHandled", tuple(),),
            ),
        ],
    )

    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "arrayExists",
                    (
                        Lambda(
                            None,
                            ("x",),
                            binary_condition(
                                BooleanFunctions.AND,
                                FunctionCall(None, "isNotNull", (Argument(None, "x"),)),
                                binary_condition(
                                    ConditionFunctions.EQ,
                                    FunctionCall(
                                        None, "assumeNotNull", (Argument(None, "x"),)
                                    ),
                                    Literal(None, 0),
                                ),
                            ),
                        ),
                        Column(None, None, "exception_stacks.mechanism_handled"),
                    ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset
    )
    processor.process_query(unprocessed, HTTPRequestSettings())

    assert expected.get_selected_columns() == unprocessed.get_selected_columns()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(arrayExists((x -> isNotNull(x) AND equals(assumeNotNull(x), 0)), exception_stacks.mechanism_handled) AS result)"
    )
def test_events_column_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression("the_group_id",
                               Column("the_group_id", None, "group_id")),
            SelectedExpression("the_message",
                               Column("the_message", None, "message")),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression(
                "the_group_id",
                FunctionCall(
                    "the_group_id",
                    "nullIf",
                    (
                        Column(None, None, "group_id"),
                        Literal(None, 0),
                    ),
                ),
            ),
            SelectedExpression(
                "the_message",
                FunctionCall(
                    "the_message",
                    "coalesce",
                    (
                        Column(None, None, "search_message"),
                        Column(None, None, "message"),
                    ),
                ),
            ),
        ],
    )

    EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    expected = (
        "(nullIf(group_id, 0) AS the_group_id)",
        "(coalesce(search_message, message) AS the_message)",
    )

    for idx, column in enumerate(
            unprocessed.get_selected_columns_from_ast()[1:]):
        formatted = column.expression.accept(ClickhouseExpressionFormatter())
        assert expected[idx] == formatted
예제 #14
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (
                            combine_and_conditions(
                                [
                                    binary_condition(
                                        None,
                                        ConditionFunctions.NEQ,
                                        Column(None, None, "transaction_status"),
                                        Literal(None, code),
                                    )
                                    for code in [0, 1, 2]
                                ]
                            ),
                        ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings()
    )
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)"
    )
예제 #15
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate",
                                                    ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (binary_condition(
                            None,
                            ConditionFunctions.NOT_IN,
                            Column(None, None, "transaction_status"),
                            FunctionCall(
                                None,
                                "tuple",
                                (
                                    Literal(alias=None, value=0),
                                    Literal(alias=None, value=1),
                                    Literal(alias=None, value=2),
                                ),
                            ),
                        ), ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == (
        "(divide(countIf(notIn(transaction_status, tuple(0, 1, 2))), count()) AS perf)"
    )
예제 #16
0
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    UUIDArrayColumnProcessor(set(["column1", "column2"
                                  ])).process_query(unprocessed_query,
                                                    HTTPRequestSettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            FunctionCall(
                None,
                "arrayMap",
                (
                    Lambda(
                        None,
                        ("x", ),
                        FunctionCall(
                            None,
                            "replaceAll",
                            (
                                FunctionCall(None, "toString",
                                             (Argument(None, "x"), )),
                                Literal(None, "-"),
                                Literal(None, ""),
                            ),
                        ),
                    ),
                    Column(None, None, "column2"),
                ),
            ),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
예제 #17
0
def test_timeseries_format_expressions(
    granularity: int,
    condition: Optional[FunctionCall],
    exp_column: FunctionCall,
    exp_condition: Optional[FunctionCall],
    formatted_column: str,
    formatted_condition: str,
) -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
        condition=condition,
        groupby=[Column("my_time", None, "time")],
        granularity=granularity,
    )
    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(exp_column.alias, exp_column),
        ],
        condition=exp_condition,
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert expected.get_selected_columns() == unprocessed.get_selected_columns(
    )
    assert expected.get_condition() == unprocessed.get_condition()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_column
    if condition:
        query_condition = unprocessed.get_condition()
        assert query_condition is not None
        ret = query_condition.accept(ClickhouseExpressionFormatter())
        assert formatted_condition == ret

    assert extract_granularity_from_query(unprocessed,
                                          "finish_ts") == granularity
예제 #18
0
def test_timeseries_format_expressions(
    granularity: int,
    condition: Optional[FunctionCall],
    exp_column: FunctionCall,
    exp_condition: Optional[FunctionCall],
    formatted_column: str,
    formatted_condition: str,
) -> None:
    unprocessed = Query(
        {},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
        condition=condition,
        granularity=granularity,
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(exp_column.alias, exp_column),
        ],
        condition=exp_condition,
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())
    assert expected.get_condition_from_ast(
    ) == unprocessed.get_condition_from_ast()

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_column
    if condition:
        ret = unprocessed.get_condition_from_ast().accept(
            ClickhouseExpressionFormatter())
        assert formatted_condition == ret
예제 #19
0
def query() -> ClickhouseQuery:
    return ClickhouseQuery(
        LogicalQuery(
            {"conditions": [("project_id", "IN", [2])]},
            TableSource("my_table", ColumnSet([])),
            condition=build_in("project_id", [2]),
        ))
예제 #20
0
def test_joined_columns():
    schema = JoinedSchema(complex_join_structure)
    columns = schema.get_columns()

    expected_columns = ColumnSet([
        ("t1.t1c1", UInt(64)),
        ("t1.t1c2", String()),
        ("t1.t1c3", Nested([
            ("t11c4", UInt(64))
        ])),
        ("t2.t2c1", UInt(64)),
        ("t2.t2c2", String()),
        ("t2.t2c3", Nested([
            ("t21c4", UInt(64))
        ])),
        ("t3.t3c1", UInt(64)),
        ("t3.t3c2", String()),
        ("t3.t3c3", Nested([
            ("t31c4", UInt(64))
        ])),
    ])

    # Checks equality between flattened columns. Nested columns are
    # exploded here
    assert set([c.flattened for c in columns]) \
        == set([c.flattened for c in expected_columns])

    # Checks equality between the structured set of columns. Nested columns
    # are not exploded.
    assert set([repr(c) for c in columns.columns]) \
        == set([repr(c) for c in expected_columns.columns])
예제 #21
0
def test_full_query():
    query = Query(
        {
            "selected_columns": ["c1", "c2", "c3"],
            "conditions": [["c1", "=", "a"]],
            "arrayjoin": "tags",
            "having": [["c4", "=", "c"]],
            "groupby": ["project_id"],
            "aggregations": [["count()", "", "count"]],
            "orderby": "event_id",
            "limitby": (100, "environment"),
            "sample": 10,
            "limit": 100,
            "offset": 50,
            "totals": True,
            "granularity": 60,
        },
        TableSource("my_table", ColumnSet([])),
    )

    assert query.get_selected_columns() == ["c1", "c2", "c3"]
    assert query.get_aggregations() == [["count()", "", "count"]]
    assert query.get_groupby() == ["project_id"]
    assert query.get_conditions() == [["c1", "=", "a"]]
    assert query.get_arrayjoin() == "tags"
    assert query.get_having() == [["c4", "=", "c"]]
    assert query.get_orderby() == "event_id"
    assert query.get_limitby() == (100, "environment")
    assert query.get_sample() == 10
    assert query.get_limit() == 100
    assert query.get_offset() == 50
    assert query.has_totals() is True
    assert query.get_granularity() == 60

    assert query.get_data_source().format_from() == "my_table"
예제 #22
0
def test_functions(
    default_validators: Mapping[str, FunctionCallValidator],
    entity_validators: Mapping[str, FunctionCallValidator],
    exception: Optional[Type[InvalidExpressionException]],
) -> None:
    fn_cached = functions.default_validators
    functions.default_validators = default_validators

    entity_return = MagicMock()
    entity_return.return_value = entity_validators
    events_entity = get_entity(EntityKey.EVENTS)
    cached = events_entity.get_function_call_validators
    setattr(events_entity, "get_function_call_validators", entity_return)
    data_source = QueryEntity(EntityKey.EVENTS, ColumnSet([]))

    expression = FunctionCall(
        None, "f", (Column(alias=None, table_name=None, column_name="col"), ))
    if exception is None:
        FunctionCallsValidator().validate(expression, data_source)
    else:
        with pytest.raises(exception):
            FunctionCallsValidator().validate(expression, data_source)

    # TODO: This should use fixture to do this
    setattr(events_entity, "get_function_call_validators", cached)
    functions.default_validators = fn_cached
예제 #23
0
def test_hexint_column_processor(unprocessed: Expression,
                                 formatted_value: str) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1"))
        ],
        condition=unprocessed,
    )

    HexIntColumnProcessor(set(["column1"
                               ])).process_query(unprocessed_query,
                                                 HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column1",
            FunctionCall(
                None,
                "lower",
                (FunctionCall(
                    None,
                    "hex",
                    (Column(None, None, "column1"), ),
                ), ),
            ),
        )
    ]

    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
예제 #24
0
    def __init__(self) -> None:
        read_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("key_id", Nullable(UInt(64))),
            ("timestamp", DateTime()),
            ("outcome", UInt(8)),
            ("reason", LowCardinality(Nullable(String()))),
            ("event_id", Nullable(UUID())),
        ])

        read_schema = MergeTreeSchema(
            columns=read_columns,
            local_table_name="outcomes_raw_local",
            dist_table_name="outcomes_raw_dist",
            order_by="(org_id, project_id, timestamp)",
            partition_by="(toMonday(timestamp))",
            settings={"index_granularity": 16384},
            migration_function=outcomes_raw_migrations,
        )

        dataset_schemas = DatasetSchemas(read_schema=read_schema,
                                         write_schema=None,
                                         intermediary_schemas=[])

        super().__init__(
            dataset_schemas=dataset_schemas,
            time_group_columns={"time": "timestamp"},
            time_parse_columns=("timestamp", ),
        )
예제 #25
0
def test_format_expressions(query: Query, expected_query: Query) -> None:
    processor = CustomFunction(
        ColumnSet([("param1", String()), ("param2", UInt(8)),
                   ("other_col", String())]),
        "f_call",
        [("param1", ColType({String})), ("param2", ColType({UInt}))],
        partial_function(
            "f_call_impl(param1, inner_call(param2), my_const)",
            [("my_const", 420)],
        ),
    )
    # We cannot just run == on the query objects. The content of the two
    # objects is different, being one the AST and the ont the AST + raw body
    processor.process_query(query, HTTPRequestSettings())
    assert (query.get_selected_columns_from_ast() ==
            expected_query.get_selected_columns_from_ast())
    assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast(
    )
    assert query.get_condition_from_ast(
    ) == expected_query.get_condition_from_ast()
    assert query.get_arrayjoin_from_ast(
    ) == expected_query.get_arrayjoin_from_ast()
    assert query.get_having_from_ast() == expected_query.get_having_from_ast()
    assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast(
    )
예제 #26
0
파일: outcomes_raw.py 프로젝트: Appva/snuba
    def __init__(self):
        read_columns = ColumnSet([
            ('org_id', UInt(64)),
            ('project_id', UInt(64)),
            ('key_id', Nullable(UInt(64))),
            ('timestamp', DateTime()),
            ('outcome', UInt(8)),
            ('reason', LowCardinality(Nullable(String()))),
            ('event_id', Nullable(UUID())),
        ])

        read_schema = MergeTreeSchema(
            columns=read_columns,
            local_table_name='outcomes_raw_local',
            dist_table_name='outcomes_raw_dist',
            order_by='(org_id, project_id, timestamp)',
            partition_by='(toMonday(timestamp))',
            settings={'index_granularity': 16384})

        dataset_schemas = DatasetSchemas(read_schema=read_schema,
                                         write_schema=None,
                                         intermediary_schemas=[])

        super().__init__(dataset_schemas=dataset_schemas,
                         time_group_columns={
                             'time': 'timestamp',
                         },
                         time_parse_columns=('timestamp', ))
예제 #27
0
    def __init__(
        self,
        writable_storage_key: StorageKey,
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
    ) -> None:
        writable_storage = get_writable_storage(writable_storage_key)
        readable_storage = get_storage(readable_storage_key)

        super().__init__(
            storages=[writable_storage, readable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ]),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator({"org_id", "project_id"})
            ],
            required_time_column="timestamp",
        )
예제 #28
0
def test_mand_conditions(table: str, mand_conditions: List[FunctionCall]) -> None:

    query = Query(
        Table(
            table,
            ColumnSet([]),
            final=False,
            sampling_rate=None,
            mandatory_conditions=mand_conditions,
        ),
        None,
        None,
        binary_condition(
            BooleanFunctions.AND,
            binary_condition(
                OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"),
            ),
            binary_condition(
                OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"),
            ),
        ),
    )

    query_ast_copy = copy.deepcopy(query)

    request_settings = HTTPRequestSettings(consistent=True)
    processor = MandatoryConditionApplier()
    processor.process_query(query, request_settings)

    query_ast_copy.add_condition_to_ast(combine_and_conditions(mand_conditions))

    assert query.get_condition_from_ast() == query_ast_copy.get_condition_from_ast()
예제 #29
0
def test_project_extension_query_adds_rate_limits():
    extension = ProjectExtension(processor=ProjectExtensionProcessor(
        project_column="project_id"))
    raw_data = {'project': [2, 3]}
    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query(
        {"conditions": []},
        TableSource("my_table", ColumnSet([])),
    )
    request_settings = RequestSettings(turbo=False,
                                       consistent=False,
                                       debug=False)

    num_rate_limits_before_processing = len(
        request_settings.get_rate_limit_params())
    extension.get_processor().process_query(query, valid_data,
                                            request_settings)

    rate_limits = request_settings.get_rate_limit_params()
    # make sure a rate limit was added by the processing
    assert len(rate_limits) == num_rate_limits_before_processing + 1

    most_recent_rate_limit = rate_limits[-1]
    assert most_recent_rate_limit.bucket == '2'
    assert most_recent_rate_limit.per_second_limit == 1000
    assert most_recent_rate_limit.concurrent_limit == 1000
예제 #30
0
def test_invalid_function_name(expression: FunctionCall,
                               should_raise: bool) -> None:
    data_source = QueryEntity(EntityKey.EVENTS, ColumnSet([]))
    state.set_config("function-validator.enabled", True)

    with pytest.raises(InvalidExpressionException):
        FunctionCallsValidator().validate(expression, data_source)