Esempio n. 1
0
def _format_storage_query_and_run(
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    referrer: str,
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    reader: Reader,
    robust: bool,
    concurrent_queries_gauge: Optional[Gauge] = None,
) -> QueryResult:
    """
    Formats the Storage Query and pass it to the DB specific code for execution.
    """
    from_clause = clickhouse_query.get_from_clause()
    visitor = TablesCollector()
    visitor.visit(from_clause)
    table_names = ",".join(sorted(visitor.get_tables()))
    with sentry_sdk.start_span(description="create_query", op="db") as span:
        _apply_turbo_sampling_if_needed(clickhouse_query, request_settings)

        formatted_query = format_query(clickhouse_query)
        span.set_data("query", formatted_query.structured())
        span.set_data("query_size_bytes",
                      _string_size_in_bytes(formatted_query.get_sql()))
        sentry_sdk.set_tag("query_size_group",
                           get_query_size_group(formatted_query.get_sql()))
        metrics.increment("execute")

    timer.mark("prepare_query")

    stats = {
        "clickhouse_table": table_names,
        "final": visitor.any_final(),
        "referrer": referrer,
        "sample": visitor.get_sample_rate(),
    }

    with sentry_sdk.start_span(description=formatted_query.get_sql(),
                               op="db") as span:
        span.set_tag("table", table_names)

        def execute() -> QueryResult:
            return raw_query(
                clickhouse_query,
                request_settings,
                formatted_query,
                reader,
                timer,
                query_metadata,
                stats,
                span.trace_id,
                robust=robust,
            )

        if concurrent_queries_gauge is not None:
            with concurrent_queries_gauge:
                return execute()
        else:
            return execute()
Esempio n. 2
0
def test_format_expressions(
    query: Query,
    formatted_seq: Sequence[Any],
    formatted_str: str,
    formatted_anonymized_str: str,
) -> None:
    clickhouse_query = format_query(query)
    clickhouse_query_anonymized = format_query_anonymized(query)
    assert clickhouse_query.get_sql() == formatted_str
    assert clickhouse_query.structured() == formatted_seq
    assert clickhouse_query_anonymized.get_sql() == formatted_anonymized_str
Esempio n. 3
0
def test_format_clickhouse_specific_query() -> None:
    """
    Adds a few of the Clickhosue specific fields to the query.
    """

    query = ClickhouseQuery(
        Table("my_table", ColumnSet([]), final=True, sampling_rate=0.1),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1")),
            SelectedExpression("column2", Column(None, "table1", "column2")),
        ],
        condition=binary_condition(
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, "blabla"),
        ),
        groupby=[
            Column(None, None, "column1"),
            Column(None, "table1", "column2")
        ],
        having=binary_condition(
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, 123),
        ),
        order_by=[
            OrderBy(OrderByDirection.ASC, Column(None, None, "column1"))
        ],
        array_join=Column(None, None, "column1"),
        totals=True,
        limitby=LimitBy(10, Column(None, None, "environment")),
    )

    query.set_offset(50)
    query.set_limit(100)

    request_settings = HTTPRequestSettings()
    clickhouse_query = format_query(query, request_settings)

    expected = [
        "SELECT column1, table1.column2",
        ["FROM", "my_table FINAL SAMPLE 0.1"],
        "ARRAY JOIN column1",
        "WHERE eq(column1, 'blabla')",
        "GROUP BY column1, table1.column2 WITH TOTALS",
        "HAVING eq(column1, 123)",
        "ORDER BY column1 ASC",
        "LIMIT 10 BY environment",
        "LIMIT 100 OFFSET 50",
    ]

    assert clickhouse_query.structured() == expected
Esempio n. 4
0
def _dry_run_query_runner(
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    reader: Reader,
) -> QueryResult:
    with sentry_sdk.start_span(description="dryrun_create_query",
                               op="db") as span:
        formatted_query = format_query(clickhouse_query, request_settings)
        span.set_data("query", formatted_query.structured())

    return QueryResult({
        "data": [],
        "meta": []
    }, {
        "stats": {},
        "sql": formatted_query.get_sql()
    })
def test_aliasing() -> None:
    """
    Validates aliasing works properly when the query contains both tags_key
    and tags_value.
    """
    processed = parse_and_process({
        "aggregations": [],
        "groupby": [],
        "selected_columns": ["tags_value"],
        "conditions": [["tags_key", "IN", ["t1", "t2"]]],
    })
    sql = format_query(processed, HTTPRequestSettings()).get_sql()

    assert sql == (
        "SELECT (tupleElement((arrayJoin(arrayMap((x, y -> tuple(x, y)), "
        "tags.key, tags.value)) AS snuba_all_tags), 2) AS _snuba_tags_value) "
        "FROM transactions_local "
        "WHERE in((tupleElement(snuba_all_tags, 1) AS _snuba_tags_key), tuple('t1', 't2'))"
    )
Esempio n. 6
0
def _format_storage_query_and_run(
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    referrer: str,
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    reader: Reader,
) -> QueryResult:
    """
    Formats the Storage Query and pass it to the DB specific code for execution.
    """
    from_clause = clickhouse_query.get_from_clause()
    visitor = TablesCollector()
    visitor.visit(from_clause)
    table_names = ",".join(sorted(visitor.get_tables()))
    with sentry_sdk.start_span(description="create_query", op="db") as span:
        formatted_query = format_query(clickhouse_query, request_settings)
        span.set_data("query", formatted_query.structured())
        metrics.increment("execute")

    timer.mark("prepare_query")

    stats = {
        "clickhouse_table": table_names,
        "final": visitor.any_final(),
        "referrer": referrer,
        "sample": visitor.get_sample_rate(),
    }

    with sentry_sdk.start_span(description=formatted_query.get_sql(),
                               op="db") as span:
        span.set_tag("table", table_names)

        return raw_query(
            clickhouse_query,
            request_settings,
            formatted_query,
            reader,
            timer,
            query_metadata,
            stats,
            span.trace_id,
        )
Esempio n. 7
0
def test_format_expressions(query: Query, formatted_seq: Sequence[Any],
                            formatted_str: str) -> None:
    request_settings = HTTPRequestSettings()
    clickhouse_query = format_query(query, request_settings)
    assert clickhouse_query.get_sql() == formatted_str
    assert clickhouse_query.structured() == formatted_seq
Esempio n. 8
0
def _format_storage_query_and_run(
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    referrer: str,
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    query_settings: QuerySettings,
    reader: Reader,
    robust: bool,
    concurrent_queries_gauge: Optional[Gauge] = None,
) -> QueryResult:
    """
    Formats the Storage Query and pass it to the DB specific code for execution.
    """
    from_clause = clickhouse_query.get_from_clause()
    visitor = TablesCollector()
    visitor.visit(from_clause)
    table_names = ",".join(sorted(visitor.get_tables()))
    with sentry_sdk.start_span(description="create_query", op="db") as span:
        _apply_turbo_sampling_if_needed(clickhouse_query, query_settings)

        formatted_query = format_query(clickhouse_query)
        query_size_bytes = len(formatted_query.get_sql().encode("utf-8"))
        span.set_data("query", formatted_query.structured())
        span.set_data("query_size_bytes", query_size_bytes)
        sentry_sdk.set_tag("query_size_group",
                           get_query_size_group(query_size_bytes))
        metrics.increment("execute")

    timer.mark("prepare_query")

    stats = {
        "clickhouse_table": table_names,
        "final": visitor.any_final(),
        "referrer": referrer,
        "sample": visitor.get_sample_rate(),
    }

    if query_size_bytes > MAX_QUERY_SIZE_BYTES:
        raise QueryException(extra=QueryExtraData(
            stats=stats,
            sql=formatted_query.get_sql(),
            experiments=clickhouse_query.get_experiments(),
        )) from QueryTooLongException(
            f"After processing, query is {query_size_bytes} bytes, "
            "which is too long for ClickHouse to process. "
            f"Max size is {MAX_QUERY_SIZE_BYTES} bytes.")

    with sentry_sdk.start_span(description=formatted_query.get_sql(),
                               op="db") as span:
        span.set_tag("table", table_names)

        def execute() -> QueryResult:
            return raw_query(
                clickhouse_query,
                query_settings,
                formatted_query,
                reader,
                timer,
                query_metadata,
                stats,
                span.trace_id,
                robust=robust,
            )

        if concurrent_queries_gauge is not None:
            with concurrent_queries_gauge:
                return execute()
        else:
            return execute()