Ejemplo n.º 1
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError(
                    {
                        "fields": "fields are required during creation.",
                        "conditions": "conditions are required during creation.",
                    }
                )

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", []).copy()
        orderby = self._get_attr(data, "orderby", "")
        equations, fields = categorize_columns(fields)

        if equations is not None:
            resolved_equations, _ = resolve_equation_list(equations, fields)
        else:
            resolved_equations = []

        try:
            # When using the eps/epm functions, they require an interval argument
            # or to provide the start/end so that the interval can be computed.
            # This uses a hard coded start/end to ensure the validation succeeds
            # since the values themselves don't matter.
            params = {
                "start": datetime.now() - timedelta(days=1),
                "end": datetime.now(),
                "project_id": [p.id for p in self.context.get("projects")],
                "organization_id": self.context.get("organization").id,
            }

            snuba_filter = get_filter(conditions, params=params)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError({"conditions": f"Invalid conditions: {err}"})

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields, snuba_filter, resolved_equations=resolved_equations)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError({"fields": f"Invalid fields: {err}"})
        return data
Ejemplo n.º 2
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError({
                    "fields":
                    "fields are required during creation.",
                    "conditions":
                    "conditions are required during creation.",
                })

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", []).copy()
        orderby = self._get_attr(data, "orderby", "")
        equations, fields = categorize_columns(fields)
        is_table = is_table_display_type(self.context.get("displayType"))

        if equations is not None:
            try:
                resolved_equations, _, _ = resolve_equation_list(
                    equations,
                    fields,
                    auto_add=not is_table,
                    aggregates_only=not is_table,
                )
            except (InvalidSearchQuery, ArithmeticError) as err:
                raise serializers.ValidationError(
                    {"fields": f"Invalid fields: {err}"})
        else:
            resolved_equations = []

        try:
            parse_search_query(conditions)
        except InvalidSearchQuery as err:
            # We don't know if the widget that this query belongs to is an
            # Issue widget or Discover widget. Pass the error back to the
            # Widget serializer to decide if whether or not to raise this
            # error based on the Widget's type
            data["issue_query_error"] = {
                "conditions": [f"Invalid conditions: {err}"]
            }

        try:
            # When using the eps/epm functions, they require an interval argument
            # or to provide the start/end so that the interval can be computed.
            # This uses a hard coded start/end to ensure the validation succeeds
            # since the values themselves don't matter.
            params = {
                "start": datetime.now() - timedelta(days=1),
                "end": datetime.now(),
                "project_id": [p.id for p in self.context.get("projects")],
                "organization_id": self.context.get("organization").id,
            }

            snuba_filter = get_filter(conditions, params=params)
        except InvalidSearchQuery as err:
            data["discover_query_error"] = {
                "conditions": [f"Invalid conditions: {err}"]
            }
            return data

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields,
                               snuba_filter,
                               resolved_equations=resolved_equations)
        except InvalidSearchQuery as err:
            # We don't know if the widget that this query belongs to is an
            # Issue widget or Discover widget. Pass the error back to the
            # Widget serializer to decide if whether or not to raise this
            # error based on the Widget's type
            data["discover_query_error"] = {"fields": f"Invalid fields: {err}"}

        return data
Ejemplo n.º 3
0
def prepare_discover_query(
    selected_columns,
    query,
    params,
    equations=None,
    orderby=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        if equations is not None:
            resolved_equations = resolve_equation_list(equations, snuba_filter)

            snuba_filter.update_with(resolved_equations)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = ", and could not be automatically added" if auto_aggregations else ""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        "Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        "Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    return PreparedQuery(snuba_filter, translated_columns, resolved_fields)
Ejemplo n.º 4
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            base_fields = query_info.get("field", [])
            if not isinstance(base_fields, list):
                base_fields = [base_fields]

            equations, fields = categorize_columns(base_fields)

            if len(base_fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)
            elif len(base_fields) == 0:
                raise serializers.ValidationError(
                    "at least one field is required to export")

            if "query" not in query_info:
                detail = "query is a required to export, please pass an empty string if you don't want to set one"
                raise serializers.ValidationError(detail)

            query_info["field"] = fields
            query_info["equations"] = equations

            if not query_info.get("project"):
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()
            query_info["use_snql"] = features.has(
                "organizations:discover-use-snql", organization)

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                snuba_filter = get_filter(query_info["query"],
                                          processor.params)
                if len(equations) > 0:
                    resolved_equations, _, _ = resolve_equation_list(
                        equations, fields)
                else:
                    resolved_equations = []
                resolve_field_list(
                    fields.copy(),
                    snuba_filter,
                    auto_fields=True,
                    auto_aggregations=True,
                    resolved_equations=resolved_equations,
                )
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data
Ejemplo n.º 5
0
def get_timeseries_snuba_filter(selected_columns, query, params):
    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    columns = []
    equations = []

    for column in selected_columns:
        if is_equation(column):
            equations.append(strip_equation(column))
        else:
            columns.append(column)

    if len(equations) > 0:
        resolved_equations, updated_columns = resolve_equation_list(
            equations, columns, aggregates_only=True, auto_add=True)
    else:
        resolved_equations = []
        updated_columns = columns

    # For the new apdex, we need to add project threshold config as a selected
    # column which means the group by for the time series won't work.
    # As a temporary solution, we will calculate the mean of all the project
    # level thresholds in the request and use the legacy apdex, user_misery
    # or count_miserable calculation.
    # TODO(snql): Alias the project_threshold_config column so it doesn't
    # have to be in the SELECT statement and group by to be able to use new apdex,
    # user_misery and count_miserable.
    threshold = None
    for agg in CONFIGURABLE_AGGREGATES:
        if agg not in updated_columns:
            continue

        if threshold is None:
            project_ids = params.get("project_id")
            threshold_configs = list(
                ProjectTransactionThreshold.objects.filter(
                    organization_id=params["organization_id"],
                    project_id__in=project_ids,
                ).values_list("threshold", flat=True))

            projects_without_threshold = len(project_ids) - len(
                threshold_configs)
            threshold_configs.extend([DEFAULT_PROJECT_THRESHOLD] *
                                     projects_without_threshold)
            threshold = int(mean(threshold_configs))

        updated_columns.remove(agg)
        updated_columns.append(
            CONFIGURABLE_AGGREGATES[agg].format(threshold=threshold))

    snuba_filter.update_with(
        resolve_field_list(updated_columns,
                           snuba_filter,
                           auto_fields=False,
                           resolved_equations=resolved_equations))

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    return snuba_filter, translated_columns