def validate_fields(self, fields): snuba_filter = get_filter("") try: resolve_field_list(fields, snuba_filter) return fields except InvalidSearchQuery as err: raise serializers.ValidationError("Invalid fields: {}".format(err))
def validate(self, data): if not data.get("id"): keys = set(data.keys()) if self.required_for_create - keys: raise serializers.ValidationError({ "fields": "fields are required during creation.", "conditions": "conditions are required during creation.", }) # Validate the query that would be created when run. conditions = self._get_attr(data, "conditions", "") fields = self._get_attr(data, "fields", []) orderby = self._get_attr(data, "orderby", "") try: snuba_filter = get_filter(conditions) except InvalidSearchQuery as err: raise serializers.ValidationError( {"conditions": f"Invalid conditions: {err}"}) if orderby: snuba_filter.orderby = get_function_alias(orderby) try: resolve_field_list(fields, snuba_filter) except InvalidSearchQuery as err: raise serializers.ValidationError( {"fields": f"Invalid fields: {err}"}) return data
def _create_in_snuba(subscription): snuba_query = subscription.snuba_query snuba_filter = get_filter(snuba_query.query) snuba_filter.update_with( resolve_field_list([snuba_query.aggregate], snuba_filter, auto_fields=False)) snuba_filter = resolve_discover_aliases(snuba_filter)[0] if snuba_query.environment: snuba_filter.conditions.append( ["environment", "=", snuba_query.environment.name]) conditions = apply_dataset_conditions(QueryDatasets(snuba_query.dataset), snuba_filter.conditions) response = _snuba_pool.urlopen( "POST", "/%s/subscriptions" % (snuba_query.dataset, ), body=json.dumps({ "project_id": subscription.project_id, "dataset": snuba_query.dataset, "conditions": conditions, "aggregations": snuba_filter.aggregations, "time_window": snuba_query.time_window, "resolution": snuba_query.resolution, }), ) if response.status != 202: raise SnubaError("HTTP %s response from Snuba!" % response.status) return json.loads(response.data)["subscription_id"]
def get_field(self, request, snuba_args): y_axis = request.GET.get("yAxis", None) # These aliases are used by v1 of events. if not y_axis or y_axis == "event_count": y_axis = "count()" elif y_axis == "user_count": y_axis = "count_unique(user)" snuba_filter = eventstore.Filter({ "start": snuba_args.get("start"), "end": snuba_args.get("end"), "rollup": snuba_args.get("rollup"), }) try: resolved = resolve_field_list([y_axis], snuba_filter) except InvalidSearchQuery as err: raise ParseError(detail=str(err)) try: aggregate = resolved["aggregations"][0] except IndexError: raise ParseError(detail="Invalid yAxis value requested.") aggregate[2] = "count" snuba_args["aggregations"] = [aggregate] return snuba_args
def get_timeseries_snuba_filter(selected_columns, query, params, rollup, default_count=True): snuba_filter = get_filter(query, params) if not snuba_filter.start and not snuba_filter.end: raise InvalidSearchQuery( "Cannot get timeseries result without a start and end.") snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=False)) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter) if not snuba_filter.aggregations: raise InvalidSearchQuery( "Cannot get timeseries result with no aggregation.") # Change the alias of the first aggregation to count. This ensures compatibility # with other parts of the timeseries endpoint expectations if len(snuba_filter.aggregations) == 1 and default_count: snuba_filter.aggregations[0][2] = "count" return snuba_filter, translated_columns
def get_snuba_query_args(self, request, organization, params): query = request.GET.get('query') group_ids = request.GET.getlist('group') if group_ids: # TODO(mark) This parameter should be removed in the long term. # Instead of using this parameter clients should use `issue.id` # in their query string. try: group_ids = set(map(int, filter(None, group_ids))) except ValueError: raise OrganizationEventsError('Invalid group parameter. Values must be numbers') projects = Project.objects.filter( organization=organization, group__id__in=group_ids, ).distinct() if any(p for p in projects if not request.access.has_project_access(p)): raise PermissionDenied params['issue.id'] = list(group_ids) params['project_id'] = list(set([p.id for p in projects] + params['project_id'])) try: snuba_args = get_snuba_query_args(query=query, params=params) except InvalidSearchQuery as exc: raise OrganizationEventsError(exc.message) sort = request.GET.getlist('sort') if sort: snuba_args['orderby'] = sort # Deprecated. `sort` should be used as it is supported by # more endpoints. orderby = request.GET.getlist('orderby') if orderby and 'orderby' not in snuba_args: snuba_args['orderby'] = orderby if request.GET.get('rollup'): try: snuba_args['rollup'] = int(request.GET.get('rollup')) except ValueError: raise OrganizationEventsError('rollup must be an integer.') fields = request.GET.getlist('field')[:] if fields: try: snuba_args.update(resolve_field_list(fields, snuba_args)) except InvalidSearchQuery as exc: raise OrganizationEventsError(exc.message) # TODO(lb): remove once boolean search is fully functional has_boolean_op_flag = features.has( 'organizations:boolean-search', organization, actor=request.user ) if snuba_args.pop('has_boolean_terms', False) and not has_boolean_op_flag: raise OrganizationEventsError( 'Boolean search operator OR and AND not allowed in this search.') return snuba_args
def get_timeseries_snuba_filter(selected_columns, query, params, rollup, reference_event=None): # TODO(evanh): These can be removed once we migrate the frontend / saved queries # to use the new function values selected_columns, _ = transform_deprecated_functions_in_columns( selected_columns) query = transform_deprecated_functions_in_query(query) snuba_filter = get_filter(query, params) if not snuba_filter.start and not snuba_filter.end: raise InvalidSearchQuery( "Cannot get timeseries result without a start and end.") snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=False)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_filter.conditions.extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter) if not snuba_filter.aggregations: raise InvalidSearchQuery( "Cannot get timeseries result with no aggregation.") # Change the alias of the first aggregation to count. This ensures compatibility # with other parts of the timeseries endpoint expectations if len(snuba_filter.aggregations) == 1: snuba_filter.aggregations[0][2] = "count" return snuba_filter, translated_columns
def build_snuba_filter(dataset, query, aggregate, environment, params=None): snuba_filter = get_filter(query, params=params) snuba_filter.update_with(resolve_field_list([aggregate], snuba_filter, auto_fields=False)) snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_column(Dataset.Discover))[0] if environment: snuba_filter.conditions.append(["environment", "=", environment.name]) snuba_filter.conditions = apply_dataset_conditions(dataset, snuba_filter.conditions) return snuba_filter
def query(selected_columns, query, params, orderby=None, referrer=None, auto_fields=False): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. """ snuba_filter = get_filter(query, params) # TODO(mark) Refactor the need for this translation shim once all of # discover is using this module. Remember to update all the functions # in this module. snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, "orderby": orderby, } snuba_args.update( resolve_field_list(selected_columns, snuba_args, auto_fields=auto_fields)) # Resolve the public aliases into the discover dataset names. snuba_args, translated_columns = resolve_discover_aliases(snuba_args) result = raw_query( start=snuba_args.get("start"), end=snuba_args.get("end"), groupby=snuba_args.get("groupby"), conditions=snuba_args.get("conditions"), aggregations=snuba_args.get("aggregations"), selected_columns=snuba_args.get("selected_columns"), filter_keys=snuba_args.get("filter_keys"), orderby=snuba_args.get("orderby"), dataset=Dataset.Discover, referrer=referrer, ) return transform_results(result, translated_columns, snuba_args)
def build_snuba_filter(dataset, query, aggregate, environment, params=None): resolve_func = (resolve_column(Dataset.Events) if dataset == QueryDatasets.EVENTS else resolve_column( Dataset.Transactions)) query = apply_dataset_query_conditions(dataset, query) snuba_filter = get_filter(query, params=params) snuba_filter.update_with( resolve_field_list([aggregate], snuba_filter, auto_fields=False)) snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0] if environment: snuba_filter.conditions.append(["environment", "=", environment.name]) return snuba_filter
def validate(self, data): if not data.get("id"): keys = set(data.keys()) if self.required_for_create - keys: raise serializers.ValidationError({ "fields": "fields are required during creation.", "conditions": "conditions are required during creation.", }) # Validate the query that would be created when run. conditions = self._get_attr(data, "conditions", "") fields = self._get_attr(data, "fields", []).copy() orderby = self._get_attr(data, "orderby", "") try: # When using the eps/epm functions, they require an interval argument # or to provide the start/end so that the interval can be computed. # This uses a hard coded start/end to ensure the validation succeeds # since the values themselves don't matter. params = { "start": datetime.now() - timedelta(days=1), "end": datetime.now(), "project_id": [p.id for p in self.context.get("projects")], } snuba_filter = get_filter(conditions, params=params) except InvalidSearchQuery as err: raise serializers.ValidationError( {"conditions": f"Invalid conditions: {err}"}) if orderby: snuba_filter.orderby = get_function_alias(orderby) try: resolve_field_list(fields, snuba_filter) except InvalidSearchQuery as err: raise serializers.ValidationError( {"fields": f"Invalid fields: {err}"}) return data
def get_snuba_query_args(self, request, organization, params): query = request.GET.get("query") try: snuba_args = get_snuba_query_args(query=query, params=params) except InvalidSearchQuery as exc: raise OrganizationEventsError(exc.message) sort = request.GET.getlist("sort") if sort: snuba_args["orderby"] = sort # Deprecated. `sort` should be used as it is supported by # more endpoints. orderby = request.GET.getlist("orderby") if orderby and "orderby" not in snuba_args: snuba_args["orderby"] = orderby if request.GET.get("rollup"): try: snuba_args["rollup"] = int(request.GET.get("rollup")) except ValueError: raise OrganizationEventsError("rollup must be an integer.") fields = request.GET.getlist("field")[:] if fields: try: snuba_args.update(resolve_field_list(fields, snuba_args)) except InvalidSearchQuery as exc: raise OrganizationEventsError(exc.message) reference_event_id = request.GET.get("referenceEvent") if reference_event_id: reference_event = find_reference_event(snuba_args, reference_event_id) snuba_args["conditions"] = get_reference_event_conditions( snuba_args, reference_event.snuba_data) # TODO(lb): remove once boolean search is fully functional has_boolean_op_flag = features.has("organizations:boolean-search", organization, actor=request.user) if snuba_args.pop("has_boolean_terms", False) and not has_boolean_op_flag: raise OrganizationEventsError( "Boolean search operator OR and AND not allowed in this search." ) return snuba_args
def get_field(self, request, snuba_args): y_axis = request.GET.get("yAxis", None) # These aliases are used by v1 of events. if not y_axis or y_axis == "event_count": y_axis = "count()" elif y_axis == "user_count": y_axis = "count_unique(user)" try: resolved = resolve_field_list([y_axis], {}) except InvalidSearchQuery as err: raise ParseError(detail=six.text_type(err)) aggregate = resolved["aggregations"][0] aggregate[2] = "count" snuba_args["aggregations"] = [aggregate] return snuba_args
def get_snuba_query_args(self, request, organization, params): query = request.GET.get("query") try: filter = get_filter(query, params) except InvalidSearchQuery as exc: raise OrganizationEventsError(exc.message) snuba_args = { "start": filter.start, "end": filter.end, "conditions": filter.conditions, "filter_keys": filter.filter_keys, } sort = request.GET.getlist("sort") if sort: snuba_args["orderby"] = sort # Deprecated. `sort` should be used as it is supported by # more endpoints. orderby = request.GET.getlist("orderby") if orderby and "orderby" not in snuba_args: snuba_args["orderby"] = orderby if request.GET.get("rollup"): try: snuba_args["rollup"] = int(request.GET.get("rollup")) except ValueError: raise OrganizationEventsError("rollup must be an integer.") fields = request.GET.getlist("field")[:] if fields: try: snuba_args.update(resolve_field_list(fields, snuba_args)) except InvalidSearchQuery as exc: raise OrganizationEventsError(exc.message) reference_event_id = request.GET.get("referenceEvent") if reference_event_id: snuba_args["conditions"] = get_reference_event_conditions( snuba_args, reference_event_id ) return snuba_args
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, reference_event=None, referrer=None, auto_fields=False, use_aggregate_conditions=False, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") snuba_filter = get_filter(query, params) # TODO(mark) Refactor the need for this translation shim once all of # discover is using this module. Remember to update all the functions # in this module. snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, "orderby": orderby, "having": [], } if use_aggregate_conditions: snuba_args["having"] = snuba_filter.having snuba_args.update( resolve_field_list(selected_columns, snuba_args, params=params, auto_fields=auto_fields)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_args["conditions"].extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_args, translated_columns = resolve_discover_aliases(snuba_args) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_args.get("having"): found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_args.get("aggregations")) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) result = raw_query( start=snuba_args.get("start"), end=snuba_args.get("end"), groupby=snuba_args.get("groupby"), conditions=snuba_args.get("conditions"), aggregations=snuba_args.get("aggregations"), selected_columns=snuba_args.get("selected_columns"), filter_keys=snuba_args.get("filter_keys"), having=snuba_args.get("having"), orderby=snuba_args.get("orderby"), dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) return transform_results(result, translated_columns, snuba_args)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, reference_event=None, referrer=None, auto_fields=False, use_aggregate_conditions=False, conditions=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # TODO(evanh): These can be removed once we migrate the frontend / saved queries # to use the new function values selected_columns, function_translations = transform_deprecated_functions_in_columns( selected_columns) query = transform_deprecated_functions_in_query(query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: snuba_filter.having = [] # We need to run a separate query to be able to properly bucket the values for the histogram # Do that here, and format the bucket number in to the columns before passing it through # to event search. idx = 0 for col in selected_columns: if col.startswith("histogram("): histogram_column = find_histogram_buckets(col, params, snuba_filter.conditions) selected_columns[idx] = histogram_column function_translations[get_function_alias( histogram_column)] = get_function_alias(col) break idx += 1 # Check to see if we are ordering by any functions and convert the orderby to be the correct alias. if orderby: orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] new_orderby = [] for ordering in orderby: is_reversed = ordering.startswith("-") ordering = ordering.lstrip("-") for snuba_name, sentry_name in six.iteritems( function_translations): if sentry_name == ordering: ordering = snuba_name break ordering = "{}{}".format("-" if is_reversed else "", ordering) new_orderby.append(ordering) snuba_filter.orderby = new_orderby snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=auto_fields)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_filter.conditions.extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) if conditions is not None: snuba_filter.conditions.extend(conditions) result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) return transform_results(result, translated_columns, snuba_filter, selected_columns)
def prepare_discover_query( selected_columns, query, params, orderby=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: assert ( not auto_aggregations ), "Auto aggregations cannot be used without enabling aggregate conditions" snuba_filter.having = [] with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] resolved_fields = resolve_field_list( selected_columns, snuba_filter, auto_fields=auto_fields, auto_aggregations=auto_aggregations, functions_acl=functions_acl, ) snuba_filter.update_with(resolved_fields) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. error_extra = ", and could not be automatically added" if auto_aggregations else "" if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) # Only need to iterate on arg[1] if its a list elif isinstance(arg[1], (list, tuple)): alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( "Aggregate(s) {} used in a condition but are not in the selected columns{}." .format( ", ".join(conditions_not_in_aggregations), error_extra, )) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( "Aggregate {} used in a condition but is not a selected column{}." .format( having_clause[0], error_extra, )) if conditions is not None: snuba_filter.conditions.extend(conditions) return PreparedQuery(snuba_filter, translated_columns, resolved_fields)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, referrer=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. auto_aggregations (bool) Whether aggregates should be added automatically if they're used in conditions, and there's at least one aggregate already. use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: assert ( not auto_aggregations ), "Auto aggregations cannot be used without enabling aggregate conditions" snuba_filter.having = [] function_translations = {} with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] resolved_fields = resolve_field_list( selected_columns, snuba_filter, auto_fields=auto_fields, auto_aggregations=auto_aggregations, functions_acl=functions_acl, ) snuba_filter.update_with(resolved_fields) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. error_extra = ", and could not be automatically added" if auto_aggregations else "" if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) # Only need to iterate on arg[1] if its a list elif isinstance(arg[1], (list, tuple)): alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( "Aggregate(s) {} used in a condition but are not in the selected columns{}." .format( ", ".join(conditions_not_in_aggregations), error_extra, )) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( "Aggregate {} used in a condition but is not a selected column{}." .format( having_clause[0], error_extra, )) if conditions is not None: snuba_filter.conditions.extend(conditions) with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span(op="discover.discover", description="query.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) return transform_results(result, resolved_fields["functions"], translated_columns, snuba_filter, selected_columns)
def timeseries_query(selected_columns, query, params, rollup, reference_event=None, referrer=None): """ High-level API for doing arbitrary user timeseries queries against events. This function operates on the public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. This function is intended to only get timeseries based results and thus requires the `rollup` parameter. Returns a SnubaTSResult object that has been zerofilled in case of gaps. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, rollup (int) The bucket width in seconds reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. """ snuba_filter = get_filter(query, params) snuba_args = { "start": snuba_filter.start, "end": snuba_filter.end, "conditions": snuba_filter.conditions, "filter_keys": snuba_filter.filter_keys, } if not snuba_args["start"] and not snuba_args["end"]: raise InvalidSearchQuery( "Cannot get timeseries result without a start and end.") snuba_args.update( resolve_field_list(selected_columns, snuba_args, auto_fields=False)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_args["conditions"].extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_args, _ = resolve_discover_aliases(snuba_args) if not snuba_args["aggregations"]: raise InvalidSearchQuery( "Cannot get timeseries result with no aggregation.") result = raw_query( aggregations=snuba_args.get("aggregations"), conditions=snuba_args.get("conditions"), filter_keys=snuba_args.get("filter_keys"), start=snuba_args.get("start"), end=snuba_args.get("end"), rollup=rollup, orderby="time", groupby=["time"], dataset=Dataset.Discover, limit=10000, referrer=referrer, ) result = zerofill(result["data"], snuba_args["start"], snuba_args["end"], rollup, "time") return SnubaTSResult(result, snuba_filter.start, snuba_filter.end, rollup)
def validate(self, data): organization = self.context["organization"] query_info = data["query_info"] # Validate the project field, if provided # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid project_query = query_info.get("project") if project_query: get_projects_by_id = self.context["get_projects_by_id"] # Coerce the query into a set if isinstance(project_query, list): projects = get_projects_by_id(set(map(int, project_query))) else: projects = get_projects_by_id({int(project_query)}) query_info["project"] = [project.id for project in projects] # Discover Pre-processing if data["query_type"] == ExportQueryType.DISCOVER_STR: # coerce the fields into a list as needed fields = query_info.get("field", []) if not isinstance(fields, list): fields = [fields] if len(fields) > MAX_FIELDS: detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again." raise serializers.ValidationError(detail) query_info["field"] = fields if "project" not in query_info: projects = self.context["get_projects"]() query_info["project"] = [project.id for project in projects] # make sure to fix the export start/end times to ensure consistent results try: start, end = get_date_range_from_params(query_info) except InvalidParams as e: sentry_sdk.set_tag("query.error_reason", "Invalid date params") raise serializers.ValidationError(str(e)) if "statsPeriod" in query_info: del query_info["statsPeriod"] if "statsPeriodStart" in query_info: del query_info["statsPeriodStart"] if "statsPeriodEnd" in query_info: del query_info["statsPeriodEnd"] query_info["start"] = start.isoformat() query_info["end"] = end.isoformat() # validate the query string by trying to parse it processor = DiscoverProcessor( discover_query=query_info, organization_id=organization.id, ) try: snuba_filter = get_filter(query_info["query"], processor.params) resolve_field_list( fields.copy(), snuba_filter, auto_fields=True, auto_aggregations=True, ) except InvalidSearchQuery as err: raise serializers.ValidationError(str(err)) return data
def timeseries_query(selected_columns, query, params, rollup, reference_event=None, referrer=None): """ High-level API for doing arbitrary user timeseries queries against events. This function operates on the public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. This function is intended to only get timeseries based results and thus requires the `rollup` parameter. Returns a SnubaTSResult object that has been zerofilled in case of gaps. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, rollup (int) The bucket width in seconds reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. """ # TODO(evanh): These can be removed once we migrate the frontend / saved queries # to use the new function values selected_columns, _ = transform_deprecated_functions_in_columns(selected_columns) query = transform_deprecated_functions_in_query(query) snuba_filter = get_filter(query, params) if not snuba_filter.start and not snuba_filter.end: raise InvalidSearchQuery("Cannot get timeseries result without a start and end.") snuba_filter.update_with(resolve_field_list(selected_columns, snuba_filter, auto_fields=False)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_filter.conditions.extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_filter, _ = resolve_discover_aliases(snuba_filter) if not snuba_filter.aggregations: raise InvalidSearchQuery("Cannot get timeseries result with no aggregation.") # Change the alias of the first aggregation to count. This ensures compatibility # with other parts of the timeseries endpoint expectations if len(snuba_filter.aggregations) == 1: snuba_filter.aggregations[0][2] = "count" result = raw_query( aggregations=snuba_filter.aggregations, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, start=snuba_filter.start, end=snuba_filter.end, rollup=rollup, orderby="time", groupby=["time"], dataset=Dataset.Discover, limit=10000, referrer=referrer, ) result = zerofill(result["data"], snuba_filter.start, snuba_filter.end, rollup, "time") return SnubaTSResult({"data": result}, snuba_filter.start, snuba_filter.end, rollup)
def query( selected_columns, query, params, orderby=None, offset=None, limit=50, reference_event=None, referrer=None, auto_fields=False, use_aggregate_conditions=False, conditions=None, ): """ High-level API for doing arbitrary user queries against events. This function operates on the Discover public event schema and virtual fields/aggregate functions for selected columns and conditions are supported through this function. The resulting list will have all internal field names mapped back into their public schema names. selected_columns (Sequence[str]) List of public aliases to fetch. query (str) Filter query string to create conditions from. params (Dict[str, str]) Filtering parameters with start, end, project_id, environment orderby (None|str|Sequence[str]) The field to order results by. offset (None|int) The record offset to read. limit (int) The number of records to fetch. reference_event (ReferenceEvent) A reference event object. Used to generate additional conditions based on the provided reference. referrer (str|None) A referrer string to help locate the origin of this query. auto_fields (bool) Set to true to have project + eventid fields automatically added. conditions (Sequence[any]) List of conditions that are passed directly to snuba without any additional processing. """ if not selected_columns: raise InvalidSearchQuery("No columns selected") else: # We clobber this value throughout this code, so copy the value selected_columns = selected_columns[:] with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: snuba_filter.having = [] # We need to run a separate query to be able to properly bucket the values for the histogram # Do that here, and format the bucket number in to the columns before passing it through # to event search. idx = 0 function_translations = {} for col in selected_columns: if col.startswith("histogram("): with sentry_sdk.start_span( op="discover.discover", description="query.histogram_calculation") as span: span.set_data("histogram", col) histogram_column = find_histogram_buckets( col, params, snuba_filter.conditions) selected_columns[idx] = histogram_column snuba_name = get_function_alias(histogram_column) sentry_name = get_function_alias(col) function_translations[snuba_name] = sentry_name # Since we're completely renaming the histogram function, we need to also check if we are # ordering by the histogram values, and change that. if orderby is not None: orderby = list(orderby) if isinstance( orderby, (list, tuple)) else [orderby] for i, ordering in enumerate(orderby): if sentry_name == ordering.lstrip("-"): ordering = "{}{}".format( "-" if ordering.startswith("-") else "", snuba_name) orderby[i] = ordering break idx += 1 with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=auto_fields)) if reference_event: ref_conditions = create_reference_event_conditions(reference_event) if ref_conditions: snuba_filter.conditions.extend(ref_conditions) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter, function_translations) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) else: alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( u"Aggregate(s) {} used in a condition but are not in the selected columns." .format(", ".join(conditions_not_in_aggregations))) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( u"Aggregate {} used in a condition but is not a selected column." .format(having_clause[0])) if conditions is not None: snuba_filter.conditions.extend(conditions) with sentry_sdk.start_span(op="discover.discover", description="query.snuba_query"): result = raw_query( start=snuba_filter.start, end=snuba_filter.end, groupby=snuba_filter.groupby, conditions=snuba_filter.conditions, aggregations=snuba_filter.aggregations, selected_columns=snuba_filter.selected_columns, filter_keys=snuba_filter.filter_keys, having=snuba_filter.having, orderby=snuba_filter.orderby, dataset=Dataset.Discover, limit=limit, offset=offset, referrer=referrer, ) with sentry_sdk.start_span(op="discover.discover", description="query.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) return transform_results(result, translated_columns, snuba_filter, selected_columns)