Example #1
0
def get_timeseries_snuba_filter(selected_columns,
                                query,
                                params,
                                rollup,
                                default_count=True):
    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_filter.update_with(
        resolve_field_list(selected_columns, snuba_filter, auto_fields=False))

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    # Change the alias of the first aggregation to count. This ensures compatibility
    # with other parts of the timeseries endpoint expectations
    if len(snuba_filter.aggregations) == 1 and default_count:
        snuba_filter.aggregations[0][2] = "count"

    return snuba_filter, translated_columns
    def build_snuba_filter(
        self,
        query: str,
        environment: Optional[Environment],
        params: Optional[Mapping[str, Any]] = None,
    ) -> Filter:
        resolve_func = resolve_column(Dataset(self.dataset.value))
        aggregations = [self.aggregate]
        # This aggregation is added to return the total number of sessions in crash
        # rate alerts that is used to identify if we are below a general minimum alert threshold
        count_col = re.search(r"(sessions|users)", self.aggregate)
        if not count_col:
            raise UnsupportedQuerySubscription(
                "Only crash free percentage queries are supported for subscriptions"
                "over the sessions dataset"
            )
        count_col_matched = count_col.group()

        aggregations += [f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}"]
        functions_acl = ["identity"]
        snuba_filter = get_filter(query, params=params)
        snuba_filter.update_with(
            resolve_field_list(
                aggregations, snuba_filter, auto_fields=False, functions_acl=functions_acl
            )
        )
        snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
        if environment:
            snuba_filter.conditions.append(["environment", "=", environment.name])
        return snuba_filter
    def get_field(self, request, snuba_args):
        y_axis = request.GET.get("yAxis", None)
        # These aliases are used by v1 of events.
        if not y_axis or y_axis == "event_count":
            y_axis = "count()"
        elif y_axis == "user_count":
            y_axis = "count_unique(user)"

        snuba_filter = eventstore.Filter(
            {
                "start": snuba_args.get("start"),
                "end": snuba_args.get("end"),
                "rollup": snuba_args.get("rollup"),
            }
        )
        try:
            resolved = resolve_field_list([y_axis], snuba_filter)
        except InvalidSearchQuery as err:
            raise ParseError(detail=str(err))
        try:
            aggregate = resolved["aggregations"][0]
        except IndexError:
            raise ParseError(detail="Invalid yAxis value requested.")
        aggregate[2] = "count"
        snuba_args["aggregations"] = [aggregate]

        return snuba_args
Example #4
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError(
                    {
                        "fields": "fields are required during creation.",
                        "conditions": "conditions are required during creation.",
                    }
                )

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", []).copy()
        orderby = self._get_attr(data, "orderby", "")
        equations, fields = categorize_columns(fields)

        if equations is not None:
            resolved_equations, _ = resolve_equation_list(equations, fields)
        else:
            resolved_equations = []

        try:
            # When using the eps/epm functions, they require an interval argument
            # or to provide the start/end so that the interval can be computed.
            # This uses a hard coded start/end to ensure the validation succeeds
            # since the values themselves don't matter.
            params = {
                "start": datetime.now() - timedelta(days=1),
                "end": datetime.now(),
                "project_id": [p.id for p in self.context.get("projects")],
                "organization_id": self.context.get("organization").id,
            }

            snuba_filter = get_filter(conditions, params=params)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError({"conditions": f"Invalid conditions: {err}"})

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields, snuba_filter, resolved_equations=resolved_equations)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError({"fields": f"Invalid fields: {err}"})
        return data
Example #5
0
def build_snuba_filter(dataset, query, aggregate, environment, event_types, params=None):
    resolve_func = (
        resolve_column(Dataset.Events)
        if dataset == QueryDatasets.EVENTS
        else resolve_column(Dataset.Transactions)
    )
    query = apply_dataset_query_conditions(dataset, query, event_types)
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(resolve_field_list([aggregate], snuba_filter, auto_fields=False))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
    if snuba_filter.group_ids:
        snuba_filter.conditions.append(["group_id", "IN", list(map(int, snuba_filter.group_ids))])
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    return snuba_filter
Example #6
0
def build_snuba_filter(dataset,
                       query,
                       aggregate,
                       environment,
                       event_types,
                       params=None):
    resolve_func = {
        QueryDatasets.EVENTS: resolve_column(Dataset.Events),
        QueryDatasets.SESSIONS: resolve_column(Dataset.Sessions),
        QueryDatasets.TRANSACTIONS: resolve_column(Dataset.Transactions),
    }[dataset]

    functions_acl = None

    aggregations = [aggregate]
    if dataset == QueryDatasets.SESSIONS:
        # This aggregation is added to return the total number of sessions in crash
        # rate alerts that is used to identify if we are below a general minimum alert threshold
        count_col = re.search(r"(sessions|users)", aggregate)
        count_col_matched = count_col.group()

        aggregations += [
            f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}"
        ]
        functions_acl = ["identity"]

    query = apply_dataset_query_conditions(dataset, query, event_types)
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(
        resolve_field_list(aggregations,
                           snuba_filter,
                           auto_fields=False,
                           functions_acl=functions_acl))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
    if snuba_filter.group_ids:
        snuba_filter.conditions.append(
            ["group_id", "IN",
             list(map(int, snuba_filter.group_ids))])
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    return snuba_filter
    def build_snuba_filter(
        self,
        query: str,
        environment: Optional[Environment],
        params: Optional[Mapping[str, Any]] = None,
    ) -> Filter:
        resolve_func = resolve_column(Dataset(self.dataset.value))

        query = apply_dataset_query_conditions(QueryDatasets(self.dataset), query, self.event_types)
        snuba_filter = get_filter(query, params=params)
        snuba_filter.update_with(
            resolve_field_list([self.aggregate], snuba_filter, auto_fields=False)
        )
        snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
        if snuba_filter.group_ids:
            snuba_filter.conditions.append(
                ["group_id", "IN", list(map(int, snuba_filter.group_ids))]
            )
        if environment:
            snuba_filter.conditions.append(["environment", "=", environment.name])
        return snuba_filter
Example #8
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            fields = query_info.get("field", [])
            if not isinstance(fields, list):
                fields = [fields]

            if len(fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)
            elif len(fields) == 0:
                raise serializers.ValidationError("at least one field is required to export")

            if "query" not in query_info:
                detail = "query is a required to export, please pass an empty string if you don't want to set one"
                raise serializers.ValidationError(detail)

            query_info["field"] = fields

            if not query_info.get("project"):
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                snuba_filter = get_filter(query_info["query"], processor.params)
                resolve_field_list(
                    fields.copy(),
                    snuba_filter,
                    auto_fields=True,
                    auto_aggregations=True,
                )
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data
Example #9
0
def prepare_discover_query(
    selected_columns,
    query,
    params,
    orderby=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = ", and could not be automatically added" if auto_aggregations else ""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        "Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        "Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    return PreparedQuery(snuba_filter, translated_columns, resolved_fields)
Example #10
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError({
                    "fields":
                    "fields are required during creation.",
                    "conditions":
                    "conditions are required during creation.",
                })

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", []).copy()
        orderby = self._get_attr(data, "orderby", "")
        equations, fields = categorize_columns(fields)
        is_table = is_table_display_type(self.context.get("displayType"))

        if equations is not None:
            try:
                resolved_equations, _, _ = resolve_equation_list(
                    equations,
                    fields,
                    auto_add=not is_table,
                    aggregates_only=not is_table,
                )
            except (InvalidSearchQuery, ArithmeticError) as err:
                raise serializers.ValidationError(
                    {"fields": f"Invalid fields: {err}"})
        else:
            resolved_equations = []

        try:
            parse_search_query(conditions)
        except InvalidSearchQuery as err:
            # We don't know if the widget that this query belongs to is an
            # Issue widget or Discover widget. Pass the error back to the
            # Widget serializer to decide if whether or not to raise this
            # error based on the Widget's type
            data["issue_query_error"] = {
                "conditions": [f"Invalid conditions: {err}"]
            }

        try:
            # When using the eps/epm functions, they require an interval argument
            # or to provide the start/end so that the interval can be computed.
            # This uses a hard coded start/end to ensure the validation succeeds
            # since the values themselves don't matter.
            params = {
                "start": datetime.now() - timedelta(days=1),
                "end": datetime.now(),
                "project_id": [p.id for p in self.context.get("projects")],
                "organization_id": self.context.get("organization").id,
            }

            snuba_filter = get_filter(conditions, params=params)
        except InvalidSearchQuery as err:
            data["discover_query_error"] = {
                "conditions": [f"Invalid conditions: {err}"]
            }
            return data

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields,
                               snuba_filter,
                               resolved_equations=resolved_equations)
        except InvalidSearchQuery as err:
            # We don't know if the widget that this query belongs to is an
            # Issue widget or Discover widget. Pass the error back to the
            # Widget serializer to decide if whether or not to raise this
            # error based on the Widget's type
            data["discover_query_error"] = {"fields": f"Invalid fields: {err}"}

        return data
Example #11
0
def get_timeseries_snuba_filter(selected_columns, query, params):
    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    columns = []
    equations = []

    for column in selected_columns:
        if is_equation(column):
            equations.append(strip_equation(column))
        else:
            columns.append(column)

    if len(equations) > 0:
        resolved_equations, updated_columns = resolve_equation_list(
            equations, columns, aggregates_only=True, auto_add=True)
    else:
        resolved_equations = []
        updated_columns = columns

    # For the new apdex, we need to add project threshold config as a selected
    # column which means the group by for the time series won't work.
    # As a temporary solution, we will calculate the mean of all the project
    # level thresholds in the request and use the legacy apdex, user_misery
    # or count_miserable calculation.
    # TODO(snql): Alias the project_threshold_config column so it doesn't
    # have to be in the SELECT statement and group by to be able to use new apdex,
    # user_misery and count_miserable.
    threshold = None
    for agg in CONFIGURABLE_AGGREGATES:
        if agg not in updated_columns:
            continue

        if threshold is None:
            project_ids = params.get("project_id")
            threshold_configs = list(
                ProjectTransactionThreshold.objects.filter(
                    organization_id=params["organization_id"],
                    project_id__in=project_ids,
                ).values_list("threshold", flat=True))

            projects_without_threshold = len(project_ids) - len(
                threshold_configs)
            threshold_configs.extend([DEFAULT_PROJECT_THRESHOLD] *
                                     projects_without_threshold)
            threshold = int(mean(threshold_configs))

        updated_columns.remove(agg)
        updated_columns.append(
            CONFIGURABLE_AGGREGATES[agg].format(threshold=threshold))

    snuba_filter.update_with(
        resolve_field_list(updated_columns,
                           snuba_filter,
                           auto_fields=False,
                           resolved_equations=resolved_equations))

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    return snuba_filter, translated_columns