Example #1
0
def test_format_expressions(
    name: str, query: ClickhouseQuery, expected_query: ClickhouseQuery
) -> None:
    MappingColumnPromoter({"tags": {"promoted_tag": "promoted"}}).process_query(
        query, HTTPQuerySettings()
    )

    assert query.get_selected_columns() == expected_query.get_selected_columns()
Example #2
0
    dist_table_name="discover_dist",
    storage_set_key=StorageSetKey.DISCOVER,
    mandatory_conditions=mandatory_conditions,
)

storage = ReadableTableStorage(
    storage_key=StorageKey.DISCOVER,
    storage_set_key=StorageSetKey.DISCOVER,
    schema=schema,
    query_processors=[
        MappingColumnPromoter(
            mapping_specs={
                "tags": {
                    "environment": "environment",
                    "sentry:release": "release",
                    "sentry:dist": "dist",
                    "sentry:user": "******",
                },
                "contexts": {"trace.trace_id": "trace_id"},
            }
        ),
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        ArrayJoinKeyValueOptimizer("tags"),
        UUIDColumnProcessor(set(["event_id", "trace_id"])),
        EventsBooleanContextsProcessor(),
        PrewhereProcessor(
            [
                "event_id",
                "release",
                "message",
                "transaction_name",
Example #3
0
    "project_id",
    "group_id",
    "timestamp",
    "deleted",
    "retention_days",
]

storage = WritableTableStorage(
    storage_key=StorageKey.ERRORS,
    storage_set_key=StorageSetKey.EVENTS,
    schema=schema,
    query_processors=[
        PostReplacementConsistencyEnforcer(
            project_column="project_id", replacer_state_name=ReplacerState.ERRORS,
        ),
        MappingColumnPromoter(mapping_specs={"tags": promoted_tag_columns}),
        ArrayJoinKeyValueOptimizer("tags"),
        PrewhereProcessor(),
    ],
    stream_loader=KafkaStreamLoader(
        processor=ErrorsProcessor(promoted_tag_columns),
        default_topic="events",
        replacement_topic="errors-replacements",
    ),
    replacer_processor=ErrorsReplacer(
        write_schema=schema,
        read_schema=schema,
        required_columns=required_columns,
        tag_column_map={"tags": promoted_tag_columns, "contexts": {}},
        promoted_tags={"tags": list(promoted_tag_columns.keys()), "contexts": []},
        state_name=ReplacerState.ERRORS,
Example #4
0
    "group_id",
    "tags[sentry:release]",
    "release",
    "message",
    "environment",
    "project_id",
]

query_processors = [
    UniqInSelectAndHavingProcessor(),
    PostReplacementConsistencyEnforcer(
        project_column="project_id",
        replacer_state_name=ReplacerState.ERRORS,
    ),
    MappingColumnPromoter(mapping_specs={
        "tags": promoted_tag_columns,
        "contexts": promoted_context_columns,
    }),
    UserColumnProcessor(),
    UUIDColumnProcessor({"event_id", "primary_hash", "trace_id"}),
    HexIntColumnProcessor({"span_id"}),
    UUIDArrayColumnProcessor({"hierarchical_hashes"}),
    SliceOfMapOptimizer(),
    EventsBooleanContextsProcessor(),
    TypeConditionOptimizer(),
    MappingOptimizer("tags", "_tags_hash_map", "events_tags_hash_map_enabled"),
    EmptyTagConditionProcessor(),
    ArrayJoinKeyValueOptimizer("tags"),
    PrewhereProcessor(
        prewhere_candidates,
        # Environment and release are excluded from prewhere in case of final
        # queries because of a Clickhouse bug.
Example #5
0
    "project_id",
]

query_processors = [
    PostReplacementConsistencyEnforcer(
        project_column="project_id",
        # key migration is on going. As soon as all the keys we are interested
        # into in redis are stored with "EVENTS" in the name, we can change this.
        replacer_state_name=None,
    ),
    EventsColumnProcessor(),
    MappingColumnPromoter(
        mapping_specs={
            "tags": ChainMap(
                {col.flattened: col.flattened for col in promoted_tag_columns},
                get_promoted_context_tag_col_mapping(),
            ),
            "contexts": get_promoted_context_col_mapping(),
        },
    ),
    # This processor must not be ported to the errors dataset. We should
    # not support promoting tags/contexts with boolean values. There is
    # no way to convert them back consistently to the value provided by
    # the client when the event is ingested, in all ways to access
    # tags/contexts. Once the errors dataset is in use, we will not have
    # boolean promoted tags/contexts so this constraint will be easy
    # to enforce.
    EventsBooleanContextsProcessor(),
    MappingOptimizer("tags", "_tags_hash_map", "events_tags_hash_map_enabled"),
    ArrayJoinKeyValueOptimizer("tags"),
    PrewhereProcessor(),
Example #6
0
def test_events_promoted_boolean_context() -> None:
    columns = ColumnSet(
        [
            ("device_charging", UInt(8, Modifier(nullable=True))),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ]
    )
    query = ClickhouseQuery(
        Table("events", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "device.charging"),
                            ),
                        ),
                    ),
                ),
            )
        ],
    )

    expected = ClickhouseQuery(
        Table("events", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "if",
                    (
                        binary_condition(
                            ConditionFunctions.IN,
                            FunctionCall(
                                None,
                                "toString",
                                (Column(None, None, "device_charging"),),
                            ),
                            literals_tuple(
                                None, [Literal(None, "1"), Literal(None, "True")]
                            ),
                        ),
                        Literal(None, "True"),
                        Literal(None, "False"),
                    ),
                ),
            )
        ],
    )

    settings = HTTPQuerySettings()
    MappingColumnPromoter(
        {"contexts": {"device.charging": "device_charging"}}, cast_to_string=True
    ).process_query(query, settings)
    EventsPromotedBooleanContextsProcessor().process_query(query, settings)

    assert query.get_selected_columns() == expected.get_selected_columns()
Example #7
0
def test_events_boolean_context() -> None:
    columns = ColumnSet([
        ("device_charging", Nullable(UInt(8))),
        ("contexts", Nested([("key", String()), ("value", String())])),
    ])
    query = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "arrayElement",
                        (
                            Column(None, None, "contexts.value"),
                            FunctionCall(
                                None,
                                "indexOf",
                                (
                                    Column(None, None, "contexts.key"),
                                    Literal(None, "device.charging"),
                                ),
                            ),
                        ),
                    ),
                )
            ],
        ))

    expected = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "multiIf",
                        (
                            binary_condition(
                                None,
                                ConditionFunctions.EQ,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                Literal(None, ""),
                            ),
                            Literal(None, ""),
                            binary_condition(
                                None,
                                ConditionFunctions.IN,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                literals_tuple(None, [
                                    Literal(None, "1"),
                                    Literal(None, "True")
                                ]),
                            ),
                            Literal(None, "True"),
                            Literal(None, "False"),
                        ),
                    ),
                )
            ],
        ))

    settings = HTTPRequestSettings()
    MappingColumnPromoter({
        "contexts": {
            "device.charging": "device_charging"
        }
    }).process_query(query, settings)
    EventsBooleanContextsProcessor().process_query(query, settings)

    assert (query.get_selected_columns_from_ast() ==
            expected.get_selected_columns_from_ast())