Esempio n. 1
0
 def validate_fields(self, fields):
     snuba_filter = get_filter("")
     try:
         resolve_field_list(fields, snuba_filter)
         return fields
     except InvalidSearchQuery as err:
         raise serializers.ValidationError("Invalid fields: {}".format(err))
Esempio n. 2
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError({
                    "fields":
                    "fields are required during creation.",
                    "conditions":
                    "conditions are required during creation.",
                })

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", [])
        orderby = self._get_attr(data, "orderby", "")
        try:
            snuba_filter = get_filter(conditions)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError(
                {"conditions": f"Invalid conditions: {err}"})

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields, snuba_filter)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError(
                {"fields": f"Invalid fields: {err}"})
        return data
Esempio n. 3
0
def _create_in_snuba(subscription):
    snuba_query = subscription.snuba_query
    snuba_filter = get_filter(snuba_query.query)
    snuba_filter.update_with(
        resolve_field_list([snuba_query.aggregate],
                           snuba_filter,
                           auto_fields=False))
    snuba_filter = resolve_discover_aliases(snuba_filter)[0]
    if snuba_query.environment:
        snuba_filter.conditions.append(
            ["environment", "=", snuba_query.environment.name])
    conditions = apply_dataset_conditions(QueryDatasets(snuba_query.dataset),
                                          snuba_filter.conditions)
    response = _snuba_pool.urlopen(
        "POST",
        "/%s/subscriptions" % (snuba_query.dataset, ),
        body=json.dumps({
            "project_id": subscription.project_id,
            "dataset": snuba_query.dataset,
            "conditions": conditions,
            "aggregations": snuba_filter.aggregations,
            "time_window": snuba_query.time_window,
            "resolution": snuba_query.resolution,
        }),
    )
    if response.status != 202:
        raise SnubaError("HTTP %s response from Snuba!" % response.status)
    return json.loads(response.data)["subscription_id"]
Esempio n. 4
0
    def get_field(self, request, snuba_args):
        y_axis = request.GET.get("yAxis", None)
        # These aliases are used by v1 of events.
        if not y_axis or y_axis == "event_count":
            y_axis = "count()"
        elif y_axis == "user_count":
            y_axis = "count_unique(user)"

        snuba_filter = eventstore.Filter({
            "start": snuba_args.get("start"),
            "end": snuba_args.get("end"),
            "rollup": snuba_args.get("rollup"),
        })
        try:
            resolved = resolve_field_list([y_axis], snuba_filter)
        except InvalidSearchQuery as err:
            raise ParseError(detail=str(err))
        try:
            aggregate = resolved["aggregations"][0]
        except IndexError:
            raise ParseError(detail="Invalid yAxis value requested.")
        aggregate[2] = "count"
        snuba_args["aggregations"] = [aggregate]

        return snuba_args
Esempio n. 5
0
def get_timeseries_snuba_filter(selected_columns,
                                query,
                                params,
                                rollup,
                                default_count=True):
    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_filter.update_with(
        resolve_field_list(selected_columns, snuba_filter, auto_fields=False))

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    # Change the alias of the first aggregation to count. This ensures compatibility
    # with other parts of the timeseries endpoint expectations
    if len(snuba_filter.aggregations) == 1 and default_count:
        snuba_filter.aggregations[0][2] = "count"

    return snuba_filter, translated_columns
Esempio n. 6
0
    def get_snuba_query_args(self, request, organization, params):
        query = request.GET.get('query')

        group_ids = request.GET.getlist('group')
        if group_ids:
            # TODO(mark) This parameter should be removed in the long term.
            # Instead of using this parameter clients should use `issue.id`
            # in their query string.
            try:
                group_ids = set(map(int, filter(None, group_ids)))
            except ValueError:
                raise OrganizationEventsError('Invalid group parameter. Values must be numbers')

            projects = Project.objects.filter(
                organization=organization,
                group__id__in=group_ids,
            ).distinct()
            if any(p for p in projects if not request.access.has_project_access(p)):
                raise PermissionDenied
            params['issue.id'] = list(group_ids)
            params['project_id'] = list(set([p.id for p in projects] + params['project_id']))

        try:
            snuba_args = get_snuba_query_args(query=query, params=params)
        except InvalidSearchQuery as exc:
            raise OrganizationEventsError(exc.message)

        sort = request.GET.getlist('sort')
        if sort:
            snuba_args['orderby'] = sort

        # Deprecated. `sort` should be used as it is supported by
        # more endpoints.
        orderby = request.GET.getlist('orderby')
        if orderby and 'orderby' not in snuba_args:
            snuba_args['orderby'] = orderby

        if request.GET.get('rollup'):
            try:
                snuba_args['rollup'] = int(request.GET.get('rollup'))
            except ValueError:
                raise OrganizationEventsError('rollup must be an integer.')

        fields = request.GET.getlist('field')[:]
        if fields:
            try:
                snuba_args.update(resolve_field_list(fields, snuba_args))
            except InvalidSearchQuery as exc:
                raise OrganizationEventsError(exc.message)

        # TODO(lb): remove once boolean search is fully functional
        has_boolean_op_flag = features.has(
            'organizations:boolean-search',
            organization,
            actor=request.user
        )
        if snuba_args.pop('has_boolean_terms', False) and not has_boolean_op_flag:
            raise OrganizationEventsError(
                'Boolean search operator OR and AND not allowed in this search.')
        return snuba_args
Esempio n. 7
0
def get_timeseries_snuba_filter(selected_columns,
                                query,
                                params,
                                rollup,
                                reference_event=None):
    # TODO(evanh): These can be removed once we migrate the frontend / saved queries
    # to use the new function values
    selected_columns, _ = transform_deprecated_functions_in_columns(
        selected_columns)
    query = transform_deprecated_functions_in_query(query)

    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_filter.update_with(
        resolve_field_list(selected_columns, snuba_filter, auto_fields=False))
    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_filter.conditions.extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    # Change the alias of the first aggregation to count. This ensures compatibility
    # with other parts of the timeseries endpoint expectations
    if len(snuba_filter.aggregations) == 1:
        snuba_filter.aggregations[0][2] = "count"

    return snuba_filter, translated_columns
Esempio n. 8
0
def build_snuba_filter(dataset, query, aggregate, environment, params=None):
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(resolve_field_list([aggregate], snuba_filter, auto_fields=False))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_column(Dataset.Discover))[0]
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    snuba_filter.conditions = apply_dataset_conditions(dataset, snuba_filter.conditions)
    return snuba_filter
Esempio n. 9
0
def query(selected_columns,
          query,
          params,
          orderby=None,
          referrer=None,
          auto_fields=False):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    """
    snuba_filter = get_filter(query, params)

    # TODO(mark) Refactor the need for this translation shim once all of
    # discover is using this module. Remember to update all the functions
    # in this module.
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
        "orderby": orderby,
    }
    snuba_args.update(
        resolve_field_list(selected_columns,
                           snuba_args,
                           auto_fields=auto_fields))

    # Resolve the public aliases into the discover dataset names.
    snuba_args, translated_columns = resolve_discover_aliases(snuba_args)

    result = raw_query(
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        groupby=snuba_args.get("groupby"),
        conditions=snuba_args.get("conditions"),
        aggregations=snuba_args.get("aggregations"),
        selected_columns=snuba_args.get("selected_columns"),
        filter_keys=snuba_args.get("filter_keys"),
        orderby=snuba_args.get("orderby"),
        dataset=Dataset.Discover,
        referrer=referrer,
    )

    return transform_results(result, translated_columns, snuba_args)
Esempio n. 10
0
def build_snuba_filter(dataset, query, aggregate, environment, params=None):
    resolve_func = (resolve_column(Dataset.Events)
                    if dataset == QueryDatasets.EVENTS else resolve_column(
                        Dataset.Transactions))
    query = apply_dataset_query_conditions(dataset, query)
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(
        resolve_field_list([aggregate], snuba_filter, auto_fields=False))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    return snuba_filter
Esempio n. 11
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError({
                    "fields":
                    "fields are required during creation.",
                    "conditions":
                    "conditions are required during creation.",
                })

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", []).copy()
        orderby = self._get_attr(data, "orderby", "")
        try:
            # When using the eps/epm functions, they require an interval argument
            # or to provide the start/end so that the interval can be computed.
            # This uses a hard coded start/end to ensure the validation succeeds
            # since the values themselves don't matter.
            params = {
                "start": datetime.now() - timedelta(days=1),
                "end": datetime.now(),
                "project_id": [p.id for p in self.context.get("projects")],
            }

            snuba_filter = get_filter(conditions, params=params)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError(
                {"conditions": f"Invalid conditions: {err}"})

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields, snuba_filter)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError(
                {"fields": f"Invalid fields: {err}"})
        return data
Esempio n. 12
0
    def get_snuba_query_args(self, request, organization, params):
        query = request.GET.get("query")
        try:
            snuba_args = get_snuba_query_args(query=query, params=params)
        except InvalidSearchQuery as exc:
            raise OrganizationEventsError(exc.message)

        sort = request.GET.getlist("sort")
        if sort:
            snuba_args["orderby"] = sort

        # Deprecated. `sort` should be used as it is supported by
        # more endpoints.
        orderby = request.GET.getlist("orderby")
        if orderby and "orderby" not in snuba_args:
            snuba_args["orderby"] = orderby

        if request.GET.get("rollup"):
            try:
                snuba_args["rollup"] = int(request.GET.get("rollup"))
            except ValueError:
                raise OrganizationEventsError("rollup must be an integer.")

        fields = request.GET.getlist("field")[:]
        if fields:
            try:
                snuba_args.update(resolve_field_list(fields, snuba_args))
            except InvalidSearchQuery as exc:
                raise OrganizationEventsError(exc.message)

        reference_event_id = request.GET.get("referenceEvent")
        if reference_event_id:
            reference_event = find_reference_event(snuba_args,
                                                   reference_event_id)
            snuba_args["conditions"] = get_reference_event_conditions(
                snuba_args, reference_event.snuba_data)

        # TODO(lb): remove once boolean search is fully functional
        has_boolean_op_flag = features.has("organizations:boolean-search",
                                           organization,
                                           actor=request.user)
        if snuba_args.pop("has_boolean_terms",
                          False) and not has_boolean_op_flag:
            raise OrganizationEventsError(
                "Boolean search operator OR and AND not allowed in this search."
            )
        return snuba_args
Esempio n. 13
0
    def get_field(self, request, snuba_args):
        y_axis = request.GET.get("yAxis", None)
        # These aliases are used by v1 of events.
        if not y_axis or y_axis == "event_count":
            y_axis = "count()"
        elif y_axis == "user_count":
            y_axis = "count_unique(user)"

        try:
            resolved = resolve_field_list([y_axis], {})
        except InvalidSearchQuery as err:
            raise ParseError(detail=six.text_type(err))
        aggregate = resolved["aggregations"][0]
        aggregate[2] = "count"
        snuba_args["aggregations"] = [aggregate]

        return snuba_args
Esempio n. 14
0
    def get_snuba_query_args(self, request, organization, params):
        query = request.GET.get("query")
        try:
            filter = get_filter(query, params)
        except InvalidSearchQuery as exc:
            raise OrganizationEventsError(exc.message)

        snuba_args = {
            "start": filter.start,
            "end": filter.end,
            "conditions": filter.conditions,
            "filter_keys": filter.filter_keys,
        }

        sort = request.GET.getlist("sort")
        if sort:
            snuba_args["orderby"] = sort

        # Deprecated. `sort` should be used as it is supported by
        # more endpoints.
        orderby = request.GET.getlist("orderby")
        if orderby and "orderby" not in snuba_args:
            snuba_args["orderby"] = orderby

        if request.GET.get("rollup"):
            try:
                snuba_args["rollup"] = int(request.GET.get("rollup"))
            except ValueError:
                raise OrganizationEventsError("rollup must be an integer.")

        fields = request.GET.getlist("field")[:]
        if fields:
            try:
                snuba_args.update(resolve_field_list(fields, snuba_args))
            except InvalidSearchQuery as exc:
                raise OrganizationEventsError(exc.message)

        reference_event_id = request.GET.get("referenceEvent")
        if reference_event_id:
            snuba_args["conditions"] = get_reference_event_conditions(
                snuba_args, reference_event_id
            )

        return snuba_args
Esempio n. 15
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    reference_event=None,
    referrer=None,
    auto_fields=False,
    use_aggregate_conditions=False,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")

    snuba_filter = get_filter(query, params)

    # TODO(mark) Refactor the need for this translation shim once all of
    # discover is using this module. Remember to update all the functions
    # in this module.
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
        "orderby": orderby,
        "having": [],
    }

    if use_aggregate_conditions:
        snuba_args["having"] = snuba_filter.having

    snuba_args.update(
        resolve_field_list(selected_columns,
                           snuba_args,
                           params=params,
                           auto_fields=auto_fields))

    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_args["conditions"].extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_args, translated_columns = resolve_discover_aliases(snuba_args)

    # Make sure that any aggregate conditions are also in the selected columns
    for having_clause in snuba_args.get("having"):
        found = any(having_clause[0] == agg_clause[-1]
                    for agg_clause in snuba_args.get("aggregations"))
        if not found:
            raise InvalidSearchQuery(
                u"Aggregate {} used in a condition but is not a selected column."
                .format(having_clause[0]))

    result = raw_query(
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        groupby=snuba_args.get("groupby"),
        conditions=snuba_args.get("conditions"),
        aggregations=snuba_args.get("aggregations"),
        selected_columns=snuba_args.get("selected_columns"),
        filter_keys=snuba_args.get("filter_keys"),
        having=snuba_args.get("having"),
        orderby=snuba_args.get("orderby"),
        dataset=Dataset.Discover,
        limit=limit,
        offset=offset,
        referrer=referrer,
    )

    return transform_results(result, translated_columns, snuba_args)
Esempio n. 16
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    reference_event=None,
    referrer=None,
    auto_fields=False,
    use_aggregate_conditions=False,
    conditions=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")

    # TODO(evanh): These can be removed once we migrate the frontend / saved queries
    # to use the new function values
    selected_columns, function_translations = transform_deprecated_functions_in_columns(
        selected_columns)
    query = transform_deprecated_functions_in_query(query)

    snuba_filter = get_filter(query, params)
    if not use_aggregate_conditions:
        snuba_filter.having = []

    # We need to run a separate query to be able to properly bucket the values for the histogram
    # Do that here, and format the bucket number in to the columns before passing it through
    # to event search.
    idx = 0
    for col in selected_columns:
        if col.startswith("histogram("):
            histogram_column = find_histogram_buckets(col, params,
                                                      snuba_filter.conditions)
            selected_columns[idx] = histogram_column
            function_translations[get_function_alias(
                histogram_column)] = get_function_alias(col)
            break

        idx += 1

    # Check to see if we are ordering by any functions and convert the orderby to be the correct alias.
    if orderby:
        orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby]
        new_orderby = []
        for ordering in orderby:
            is_reversed = ordering.startswith("-")
            ordering = ordering.lstrip("-")
            for snuba_name, sentry_name in six.iteritems(
                    function_translations):
                if sentry_name == ordering:
                    ordering = snuba_name
                    break

            ordering = "{}{}".format("-" if is_reversed else "", ordering)
            new_orderby.append(ordering)

        snuba_filter.orderby = new_orderby

    snuba_filter.update_with(
        resolve_field_list(selected_columns,
                           snuba_filter,
                           auto_fields=auto_fields))

    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_filter.conditions.extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(
        snuba_filter, function_translations)

    # Make sure that any aggregate conditions are also in the selected columns
    for having_clause in snuba_filter.having:
        found = any(having_clause[0] == agg_clause[-1]
                    for agg_clause in snuba_filter.aggregations)
        if not found:
            raise InvalidSearchQuery(
                u"Aggregate {} used in a condition but is not a selected column."
                .format(having_clause[0]))

    if conditions is not None:
        snuba_filter.conditions.extend(conditions)

    result = raw_query(
        start=snuba_filter.start,
        end=snuba_filter.end,
        groupby=snuba_filter.groupby,
        conditions=snuba_filter.conditions,
        aggregations=snuba_filter.aggregations,
        selected_columns=snuba_filter.selected_columns,
        filter_keys=snuba_filter.filter_keys,
        having=snuba_filter.having,
        orderby=snuba_filter.orderby,
        dataset=Dataset.Discover,
        limit=limit,
        offset=offset,
        referrer=referrer,
    )

    return transform_results(result, translated_columns, snuba_filter,
                             selected_columns)
Esempio n. 17
0
def prepare_discover_query(
    selected_columns,
    query,
    params,
    orderby=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = ", and could not be automatically added" if auto_aggregations else ""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        "Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        "Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    return PreparedQuery(snuba_filter, translated_columns, resolved_fields)
Esempio n. 18
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    auto_aggregations (bool) Whether aggregates should be added automatically if they're used
                    in conditions, and there's at least one aggregate already.
    use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")

    # We clobber this value throughout this code, so copy the value
    selected_columns = selected_columns[:]

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    function_translations = {}

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter, function_translations)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = ", and could not be automatically added" if auto_aggregations else ""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        "Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        "Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.snuba_query"):
        result = raw_query(
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            filter_keys=snuba_filter.filter_keys,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            dataset=Dataset.Discover,
            limit=limit,
            offset=offset,
            referrer=referrer,
        )

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        return transform_results(result, resolved_fields["functions"],
                                 translated_columns, snuba_filter,
                                 selected_columns)
Esempio n. 19
0
def timeseries_query(selected_columns,
                     query,
                     params,
                     rollup,
                     reference_event=None,
                     referrer=None):
    """
    High-level API for doing arbitrary user timeseries queries against events.

    This function operates on the public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    This function is intended to only get timeseries based
    results and thus requires the `rollup` parameter.

    Returns a SnubaTSResult object that has been zerofilled in
    case of gaps.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    rollup (int) The bucket width in seconds
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    snuba_filter = get_filter(query, params)
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
    }
    if not snuba_args["start"] and not snuba_args["end"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_args.update(
        resolve_field_list(selected_columns, snuba_args, auto_fields=False))
    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_args["conditions"].extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_args, _ = resolve_discover_aliases(snuba_args)
    if not snuba_args["aggregations"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    result = raw_query(
        aggregations=snuba_args.get("aggregations"),
        conditions=snuba_args.get("conditions"),
        filter_keys=snuba_args.get("filter_keys"),
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        rollup=rollup,
        orderby="time",
        groupby=["time"],
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )
    result = zerofill(result["data"], snuba_args["start"], snuba_args["end"],
                      rollup, "time")

    return SnubaTSResult(result, snuba_filter.start, snuba_filter.end, rollup)
Esempio n. 20
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            fields = query_info.get("field", [])
            if not isinstance(fields, list):
                fields = [fields]

            if len(fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)

            query_info["field"] = fields

            if "project" not in query_info:
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                snuba_filter = get_filter(query_info["query"],
                                          processor.params)
                resolve_field_list(
                    fields.copy(),
                    snuba_filter,
                    auto_fields=True,
                    auto_aggregations=True,
                )
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data
Esempio n. 21
0
def timeseries_query(selected_columns, query, params, rollup, reference_event=None, referrer=None):
    """
    High-level API for doing arbitrary user timeseries queries against events.

    This function operates on the public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    This function is intended to only get timeseries based
    results and thus requires the `rollup` parameter.

    Returns a SnubaTSResult object that has been zerofilled in
    case of gaps.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    rollup (int) The bucket width in seconds
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    # TODO(evanh): These can be removed once we migrate the frontend / saved queries
    # to use the new function values
    selected_columns, _ = transform_deprecated_functions_in_columns(selected_columns)
    query = transform_deprecated_functions_in_query(query)

    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery("Cannot get timeseries result without a start and end.")

    snuba_filter.update_with(resolve_field_list(selected_columns, snuba_filter, auto_fields=False))
    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_filter.conditions.extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, _ = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery("Cannot get timeseries result with no aggregation.")

    # Change the alias of the first aggregation to count. This ensures compatibility
    # with other parts of the timeseries endpoint expectations
    if len(snuba_filter.aggregations) == 1:
        snuba_filter.aggregations[0][2] = "count"

    result = raw_query(
        aggregations=snuba_filter.aggregations,
        conditions=snuba_filter.conditions,
        filter_keys=snuba_filter.filter_keys,
        start=snuba_filter.start,
        end=snuba_filter.end,
        rollup=rollup,
        orderby="time",
        groupby=["time"],
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )
    result = zerofill(result["data"], snuba_filter.start, snuba_filter.end, rollup, "time")

    return SnubaTSResult({"data": result}, snuba_filter.start, snuba_filter.end, rollup)
Esempio n. 22
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    reference_event=None,
    referrer=None,
    auto_fields=False,
    use_aggregate_conditions=False,
    conditions=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")
    else:
        # We clobber this value throughout this code, so copy the value
        selected_columns = selected_columns[:]

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            snuba_filter.having = []

    # We need to run a separate query to be able to properly bucket the values for the histogram
    # Do that here, and format the bucket number in to the columns before passing it through
    # to event search.
    idx = 0
    function_translations = {}
    for col in selected_columns:
        if col.startswith("histogram("):
            with sentry_sdk.start_span(
                    op="discover.discover",
                    description="query.histogram_calculation") as span:
                span.set_data("histogram", col)
                histogram_column = find_histogram_buckets(
                    col, params, snuba_filter.conditions)
                selected_columns[idx] = histogram_column
                snuba_name = get_function_alias(histogram_column)
                sentry_name = get_function_alias(col)
                function_translations[snuba_name] = sentry_name
                # Since we're completely renaming the histogram function, we need to also check if we are
                # ordering by the histogram values, and change that.
                if orderby is not None:
                    orderby = list(orderby) if isinstance(
                        orderby, (list, tuple)) else [orderby]
                    for i, ordering in enumerate(orderby):
                        if sentry_name == ordering.lstrip("-"):
                            ordering = "{}{}".format(
                                "-" if ordering.startswith("-") else "",
                                snuba_name)
                            orderby[i] = ordering

            break

        idx += 1

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        snuba_filter.update_with(
            resolve_field_list(selected_columns,
                               snuba_filter,
                               auto_fields=auto_fields))

        if reference_event:
            ref_conditions = create_reference_event_conditions(reference_event)
            if ref_conditions:
                snuba_filter.conditions.extend(ref_conditions)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter, function_translations)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        else:
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        u"Aggregate(s) {} used in a condition but are not in the selected columns."
                        .format(", ".join(conditions_not_in_aggregations)))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        u"Aggregate {} used in a condition but is not a selected column."
                        .format(having_clause[0]))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.snuba_query"):
        result = raw_query(
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            filter_keys=snuba_filter.filter_keys,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            dataset=Dataset.Discover,
            limit=limit,
            offset=offset,
            referrer=referrer,
        )

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        return transform_results(result, translated_columns, snuba_filter,
                                 selected_columns)