Exemple #1
0
def wip_snql_query(
    selected_columns,
    query,
    params,
    equations=None,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    """
    Replacement API for query using snql, this function is still a work in
    progress and is not ready for use in production
    """
    builder = QueryBuilder(
        Dataset.Discover,
        params,
        query=query,
        selected_columns=selected_columns,
        orderby=orderby,
        use_aggregate_conditions=use_aggregate_conditions,
        limit=limit,
    )
    snql_query = builder.get_snql_query()

    results = raw_snql_query(snql_query, referrer)
    return results
Exemple #2
0
    def test_environment_param(self):
        self.params["environment"] = ["", "prod"]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["environment"])

        self.assertCountEqual(
            query.where,
            [
                *self.default_conditions,
                Or([
                    Condition(Column("environment"), Op.IS_NULL),
                    Condition(Column("environment"), Op.EQ, "prod"),
                ]),
            ],
        )
        query.get_snql_query().validate()

        self.params["environment"] = ["dev", "prod"]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["environment"])

        self.assertCountEqual(
            query.where,
            [
                *self.default_conditions,
                Condition(Column("environment"), Op.IN, ["dev", "prod"]),
            ],
        )
        query.get_snql_query().validate()
Exemple #3
0
 def test_turbo(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count()",
         ],
         turbo=True,
     )
     assert query.turbo.value
     snql_query = query.get_snql_query()
     snql_query.validate()
     assert snql_query.turbo.value
Exemple #4
0
 def test_sample_rate(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count()",
         ],
         sample_rate=0.1,
     )
     assert query.sample_rate == 0.1
     snql_query = query.get_snql_query()
     snql_query.validate()
     assert snql_query.match.sample == 0.1
Exemple #5
0
    def test_environment_filter(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "environment:prod",
            ["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("environment"), Op.EQ, "prod"),
                *self.default_conditions,
            ],
        )
        query.get_snql_query().validate()

        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "environment:[dev, prod]",
            ["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("environment"), Op.IN, ["dev", "prod"]),
                *self.default_conditions,
            ],
        )
        query.get_snql_query().validate()
Exemple #6
0
    def test_simple_orderby(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            selected_columns=["user.email", "release"],
            orderby=["user.email"],
        )

        self.assertCountEqual(query.where, self.default_conditions)
        self.assertCountEqual(
            query.orderby,
            [
                OrderBy(Function("toString", [Column("email")], "user.email"),
                        Direction.ASC)
            ],
        )
        query.get_snql_query().validate()

        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            selected_columns=["user.email", "release"],
            orderby=["-user.email"],
        )

        self.assertCountEqual(query.where, self.default_conditions)
        self.assertCountEqual(
            query.orderby,
            [
                OrderBy(Function("toString", [Column("email")], "user.email"),
                        Direction.DESC)
            ],
        )
        query.get_snql_query().validate()
Exemple #7
0
def resolve_team_key_transaction_alias(
    builder: QueryBuilder, resolve_metric_index: bool = False
) -> SelectType:
    org_id = builder.params.get("organization_id")
    project_ids = builder.params.get("project_id")
    team_ids = builder.params.get("team_id")

    if org_id is None or team_ids is None or project_ids is None:
        raise TypeError("Team key transactions parameters cannot be None")

    team_key_transactions = list(
        TeamKeyTransaction.objects.filter(
            organization_id=org_id,
            project_team__in=ProjectTeam.objects.filter(
                project_id__in=project_ids, team_id__in=team_ids
            ),
        )
        .order_by("transaction", "project_team__project_id")
        .values_list("project_team__project_id", "transaction")
        .distinct("transaction", "project_team__project_id")[
            : fields.MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS
        ]
    )

    count = len(team_key_transactions)
    if resolve_metric_index:
        team_key_transactions = [
            (project, indexer.resolve(transaction))
            for project, transaction in team_key_transactions
        ]

    # NOTE: this raw count is not 100% accurate because if it exceeds
    # `MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS`, it will not be reflected
    sentry_sdk.set_tag("team_key_txns.count", count)
    sentry_sdk.set_tag(
        "team_key_txns.count.grouped", format_grouped_length(count, [10, 100, 250, 500])
    )

    if count == 0:
        return Function("toInt8", [0], constants.TEAM_KEY_TRANSACTION_ALIAS)

    return Function(
        "in",
        [
            (builder.column("project_id"), builder.column("transaction")),
            team_key_transactions,
        ],
        constants.TEAM_KEY_TRANSACTION_ALIAS,
    )
Exemple #8
0
 def test_spans_columns(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "array_join(spans_op)",
             "array_join(spans_group)",
             "sumArray(spans_exclusive_time)",
         ],
         functions_acl=["array_join", "sumArray"],
     )
     self.assertCountEqual(
         query.columns,
         [
             Function("arrayJoin", [Column("spans.op")],
                      "array_join_spans_op"),
             Function("arrayJoin", [Column("spans.group")],
                      "array_join_spans_group"),
             Function(
                 "sum",
                 [Function("arrayJoin", [Column("spans.exclusive_time")])],
                 "sumArray_spans_exclusive_time",
             ),
         ],
     )
Exemple #9
0
def release_filter_converter(builder: QueryBuilder, search_filter: SearchFilter) -> WhereType:
    """Parse releases for potential aliases like `latest`"""
    if search_filter.value.is_wildcard():
        operator = search_filter.operator
        value = search_filter.value
    else:
        operator_conversions = {"=": "IN", "!=": "NOT IN"}
        operator = operator_conversions.get(search_filter.operator, search_filter.operator)
        value = SearchValue(
            reduce(
                lambda x, y: x + y,
                [
                    parse_release(
                        v,
                        builder.params["project_id"],
                        builder.params.get("environment_objects"),
                        builder.params.get("organization_id"),
                    )
                    for v in to_list(search_filter.value.value)
                ],
                [],
            )
        )

    return builder._default_filter_converter(SearchFilter(search_filter.key, operator, value))
Exemple #10
0
 def test_count_if_with_tags(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count_if(foo,equals,bar)",
             'count_if(foo,notEquals,"baz")',
         ],
     )
     self.assertCountEqual(query.where, self.default_conditions)
     self.assertCountEqual(
         query.aggregates,
         [
             Function(
                 "countIf",
                 [
                     Function("equals", [Column("tags[foo]"), "bar"]),
                 ],
                 "count_if_foo_equals_bar",
             ),
             Function(
                 "countIf",
                 [
                     Function("notEquals", [Column("tags[foo]"), "baz"]),
                 ],
                 "count_if_foo_notEquals__baz",
             ),
         ],
     )
Exemple #11
0
 def test_count_if(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count_if(event.type,equals,transaction)",
             'count_if(event.type,notEquals,"transaction")',
         ],
     )
     self.assertCountEqual(query.where, self.default_conditions)
     self.assertCountEqual(
         query.aggregates,
         [
             Function(
                 "countIf",
                 [
                     Function("equals", [Column("type"), "transaction"]),
                 ],
                 "count_if_event_type_equals_transaction",
             ),
             Function(
                 "countIf",
                 [
                     Function("notEquals", [Column("type"), "transaction"]),
                 ],
                 "count_if_event_type_notEquals__transaction",
             ),
         ],
     )
Exemple #12
0
    def test_project_alias_column_with_project_condition(self):
        project1 = self.create_project()
        project2 = self.create_project()
        self.params["project_id"] = [project1.id, project2.id]
        query = QueryBuilder(
            Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["project"]
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("project_id"), Op.EQ, project1.id),
                Condition(Column("timestamp"), Op.GTE, self.start),
                Condition(Column("timestamp"), Op.LT, self.end),
            ],
        )
        # Because of the condition on project there should only be 1 project in the transform
        self.assertCountEqual(
            query.select,
            [
                Function(
                    "transform",
                    [
                        Column("project_id"),
                        [project1.id],
                        [project1.slug],
                        "",
                    ],
                    "project",
                )
            ],
        )
Exemple #13
0
    def test_project_alias_column(self):
        # TODO(snql-boolean): Update this to match the corresponding test in test_filter
        project1 = self.create_project()
        project2 = self.create_project()
        self.params["project_id"] = [project1.id, project2.id]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["project"])

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("project_id"), Op.IN,
                          [project1.id, project2.id]),
                Condition(Column("timestamp"), Op.GTE, self.start),
                Condition(Column("timestamp"), Op.LT, self.end),
            ],
        )
        self.assertCountEqual(
            query.select,
            [
                Function(
                    "transform",
                    [
                        Column("project_id"),
                        [project1.id, project2.id],
                        [project1.slug, project2.slug],
                        "",
                    ],
                    "project",
                )
            ],
        )
Exemple #14
0
 def test_retention(self):
     with self.options({"system.event-retention-days": 10}):
         with self.assertRaises(QueryOutsideRetentionError):
             QueryBuilder(
                 Dataset.Discover,
                 self.params,
                 "",
                 selected_columns=[],
             )
 def data_fn(offset: int, limit: int) -> Any:
     builder = QueryBuilder(
         dataset=Dataset.Discover,
         params=params,
         selected_columns=["spans_op", "count()"],
         array_join="spans_op",
         query=query,
         limit=limit,
         offset=offset,
         orderby="-count",
     )
     snql_query = builder.get_snql_query()
     results = raw_snql_query(snql_query,
                              "api.organization-events-span-ops")
     return [
         SpanOp(op=row["spans_op"], count=row["count"])
         for row in results["data"]
     ]
Exemple #16
0
 def test_array_combinator_is_private(self):
     with self.assertRaisesRegexp(InvalidSearchQuery,
                                  "sum: no access to private function"):
         QueryBuilder(
             Dataset.Discover,
             self.params,
             "",
             selected_columns=["sumArray(measurements_value)"],
         )
Exemple #17
0
 def test_array_combinator_with_non_array_arg(self):
     with self.assertRaisesRegexp(InvalidSearchQuery,
                                  "stuff is not a valid array column"):
         QueryBuilder(
             Dataset.Discover,
             self.params,
             "",
             selected_columns=["sumArray(stuff)"],
             functions_acl=["sumArray"],
         )
Exemple #18
0
 def test_array_join_clause(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "spans_op",
             "count()",
         ],
         array_join="spans_op",
     )
     self.assertCountEqual(
         query.columns,
         [
             AliasedExpression(Column("spans.op"), "spans_op"),
             Function("count", [], "count"),
         ],
     )
     assert query.array_join == Column("spans.op")
     query.get_snql_query().validate()
Exemple #19
0
 def test_orderby_duplicate_columns(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         selected_columns=["user.email", "user.email"],
         orderby=["user.email"],
     )
     self.assertCountEqual(
         query.orderby,
         [OrderBy(Column("email"), Direction.ASC)],
     )
Exemple #20
0
    def test_simple_limitby(self):
        query = QueryBuilder(
            dataset=Dataset.Discover,
            params=self.params,
            query="",
            selected_columns=["message"],
            orderby="message",
            limitby=("message", 1),
            limit=4,
        )

        assert query.limitby == LimitBy(Column("message"), 1)
Exemple #21
0
    def test_simple_query(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "user.email:[email protected] release:1.2.1",
            ["user.email", "release"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("email"), Op.EQ, "*****@*****.**"),
                Condition(Column("release"), Op.EQ, "1.2.1"),
                *self.default_conditions,
            ],
        )
        self.assertCountEqual(
            query.select,
            [
                Function("toString", [Column("email")], "user.email"),
                Column("release"),
            ],
        )
        query.get_snql_query().validate()
Exemple #22
0
def team_key_transaction_filter(builder: QueryBuilder, search_filter: SearchFilter) -> WhereType:
    value = search_filter.value.value
    key_transaction_expr = builder.resolve_field_alias(constants.TEAM_KEY_TRANSACTION_ALIAS)

    if search_filter.value.raw_value == "":
        return Condition(
            key_transaction_expr, Op.NEQ if search_filter.operator == "!=" else Op.EQ, 0
        )
    if value in ("1", 1):
        return Condition(key_transaction_expr, Op.EQ, 1)
    if value in ("0", 0):
        return Condition(key_transaction_expr, Op.EQ, 0)

    raise InvalidSearchQuery(
        "Invalid value for key_transaction condition. Accepted values are 1, 0"
    )
Exemple #23
0
 def test_project_in_condition_filters_not_in_project_filter(self):
     # TODO(snql-boolean): Update this to match the corresponding test in test_filter
     project1 = self.create_project()
     project2 = self.create_project()
     # params is assumed to be validated at this point, so this query should be invalid
     self.params["project_id"] = [project2.id]
     with self.assertRaisesRegexp(
             InvalidSearchQuery,
             re.escape(
                 f"Invalid query. Project(s) {str(project1.slug)} do not exist or are not actively selected."
             ),
     ):
         QueryBuilder(
             Dataset.Discover,
             self.params,
             f"project:{project1.slug}",
             selected_columns=["environment"],
         )
Exemple #24
0
 def test_array_combinator(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=["sumArray(measurements_value)"],
         functions_acl=["sumArray"],
     )
     self.assertCountEqual(
         query.columns,
         [
             Function(
                 "sum",
                 [Function("arrayJoin", [Column("measurements.value")])],
                 "sumArray_measurements_value",
             )
         ],
     )
Exemple #25
0
 def test_array_join(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=["array_join(measurements_key)", "count()"],
         functions_acl=["array_join"],
     )
     array_join_column = Function(
         "arrayJoin",
         [Column("measurements.key")],
         "array_join_measurements_key",
     )
     self.assertCountEqual(
         query.columns, [array_join_column,
                         Function("count", [], "count")])
     # make sure the the array join columns are present in gropuby
     self.assertCountEqual(query.groupby, [array_join_column])
Exemple #26
0
    def test_project_in_condition_filters(self):
        # TODO(snql-boolean): Update this to match the corresponding test in test_filter
        project1 = self.create_project()
        project2 = self.create_project()
        self.params["project_id"] = [project1.id, project2.id]
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            f"project:{project1.slug}",
            selected_columns=["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("project_id"), Op.EQ, project1.id),
                Condition(Column("timestamp"), Op.GTE, self.start),
                Condition(Column("timestamp"), Op.LT, self.end),
            ],
        )
Exemple #27
0
def project_slug_converter(
    builder: QueryBuilder, search_filter: SearchFilter
) -> Optional[WhereType]:
    """Convert project slugs to ids and create a filter based on those.
    This is cause we only store project ids in clickhouse.
    """
    value = search_filter.value.value

    if Op(search_filter.operator) == Op.EQ and value == "":
        raise InvalidSearchQuery(
            'Cannot query for has:project or project:"" as every event will have a project'
        )

    slugs = to_list(value)
    project_slugs: Mapping[str, int] = {
        slug: project_id for slug, project_id in builder.project_slugs.items() if slug in slugs
    }
    missing: List[str] = [slug for slug in slugs if slug not in project_slugs]
    if missing and search_filter.operator in constants.EQUALITY_OPERATORS:
        raise InvalidSearchQuery(
            f"Invalid query. Project(s) {', '.join(missing)} do not exist or are not actively selected."
        )
    # Sorted for consistent query results
    project_ids = list(sorted(project_slugs.values()))
    if project_ids:
        # Create a new search filter with the correct values
        converted_filter = builder.convert_search_filter_to_condition(
            SearchFilter(
                SearchKey("project.id"),
                search_filter.operator,
                SearchValue(project_ids if search_filter.is_in_filter else project_ids[0]),
            )
        )
        if converted_filter:
            if search_filter.operator in constants.EQUALITY_OPERATORS:
                builder.projects_to_filter.update(project_ids)
            return converted_filter

    return None
Exemple #28
0
def resolve_project_slug_alias(builder: QueryBuilder, alias: str) -> SelectType:
    project_ids = {
        project_id
        for project_id in builder.params.get("project_id", [])
        if isinstance(project_id, int)
    }

    # Try to reduce the size of the transform by using any existing conditions on projects
    # Do not optimize projects list if conditions contain OR operator
    if not builder.has_or_condition and len(builder.projects_to_filter) > 0:
        project_ids &= builder.projects_to_filter

    projects = Project.objects.filter(id__in=project_ids).values("slug", "id")

    return Function(
        "transform",
        [
            builder.column("project.id"),
            [project["id"] for project in projects],
            [project["slug"] for project in projects],
            "",
        ],
        alias,
    )
Exemple #29
0
def query_example_transactions(
    params: ParamsType,
    query: Optional[str],
    direction: str,
    orderby: str,
    spans: List[Span],
    per_suspect: int = 5,
    offset: Optional[int] = None,
) -> Dict[Span, List[EventID]]:
    # there aren't any suspects, early return to save an empty query
    if not spans or per_suspect == 0:
        return {}

    orderby_columns = SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_sort

    selected_columns: List[str] = [
        "id",
        "project.id",
        "project",
        "array_join(spans_op)",
        "array_join(spans_group)",
        *orderby_columns,
    ]

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        query=query,
        orderby=[direction + column for column in orderby_columns],
        # we want only `per_suspect` examples for each suspect
        limit=len(spans) * per_suspect,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    # we are only interested in the specific op, group pairs from the suspects
    builder.add_conditions([
        Condition(
            Function(
                "tuple",
                [
                    builder.resolve_function("array_join(spans_op)"),
                    builder.resolve_function("array_join(spans_group)"),
                ],
            ),
            Op.IN,
            Function(
                "tuple",
                [
                    Function("tuple", [suspect.op, suspect.group])
                    for suspect in spans
                ],
            ),
        ),
    ])

    if len(spans) > 1:
        # Hack: the limit by clause only allows columns but here we want to
        # do a limitby on the two array joins. For the time being, directly
        # do the limitby on the internal snuba name for the span group column
        # but this should not be relied upon in production, and if two spans
        # differ only by the span op, this will result in a incorrect query
        builder.limitby = LimitBy(Column("_snuba_array_join_spans_group"),
                                  per_suspect)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-examples")

    examples: Dict[Span, List[EventID]] = {
        Span(suspect.op, suspect.group): []
        for suspect in spans
    }

    for example in results["data"]:
        key = Span(example["array_join_spans_op"],
                   example["array_join_spans_group"])
        value = EventID(example["project.id"], example["project"],
                        example["id"])
        examples[key].append(value)

    return examples
Exemple #30
0
def query_suspect_span_groups(
    params: ParamsType,
    fields: List[str],
    query: Optional[str],
    span_ops: Optional[List[str]],
    span_groups: Optional[List[str]],
    direction: str,
    orderby: str,
    limit: int,
    offset: int,
) -> List[SuspectSpan]:
    suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby]

    selected_columns: List[str] = [
        column
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if not is_equation(column)
    ] + [
        "array_join(spans_op)",
        "array_join(spans_group)",
        "count()",
        "count_unique(id)",
    ]

    equations: List[str] = [
        strip_equation(column)
        for column in suspect_span_columns.suspect_op_group_columns
        if is_equation(column)
    ]

    # TODO: This adds all the possible fields to the query by default. However,
    # due to the way shards aggregate the rows, this can be slow. As an
    # optimization, allow the fields to be user specified to only get the
    # necessary aggregations.
    #
    # As part of the transition, continue to add all possible fields when its
    # not specified, but this should be removed in the future.
    if not fields:
        for column in SPAN_PERFORMANCE_COLUMNS.values():
            for col in column.suspect_op_group_sort:
                if not col.startswith("equation["):
                    selected_columns.append(col)

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        equations=equations,
        query=query,
        orderby=[
            direction + column
            for column in suspect_span_columns.suspect_op_group_sort
        ],
        auto_aggregations=True,
        use_aggregate_conditions=True,
        limit=limit,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    extra_conditions = []

    if span_ops:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_op)"),
                Op.IN,
                Function("tuple", span_ops),
            ))

    if span_groups:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_group)"),
                Op.IN,
                Function("tuple", span_groups),
            ))

    if extra_conditions:
        builder.add_conditions(extra_conditions)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-suspects")

    return [
        SuspectSpan(
            op=suspect["array_join_spans_op"],
            group=suspect["array_join_spans_group"],
            frequency=suspect.get("count_unique_id"),
            count=suspect.get("count"),
            avg_occurrences=suspect.get("equation[0]"),
            sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"),
            p50_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_50"),
            p75_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_75"),
            p95_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_95"),
            p99_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_99"),
        ) for suspect in results["data"]
    ]