def get_reference_event_conditions(snuba_args, event_slug): """ Returns a list of additional conditions/filter_keys to scope a query by the groupby fields using values from the reference event This is a key part of pagination in the event details modal and summary graph navigation. """ groupby = snuba_args.get("groupby", []) columns = eventstore.get_columns_from_aliases(groupby) field_names = [get_snuba_column_name(field) for field in groupby] # Fetch the reference event ensuring the fields in the groupby # clause are present. event_data = find_reference_event(snuba_args, event_slug, columns) conditions = [] tags = {} if "tags.key" in event_data and "tags.value" in event_data: tags = dict(zip(event_data["tags.key"], event_data["tags.value"])) for field in field_names: match = TAG_KEY_RE.match(field) if match: value = tags.get(match.group(1), None) else: value = event_data.get(field, None) # If the value is a sequence use the first element as snuba # doesn't support `=` or `IN` operations on fields like exception_frames.filename if isinstance(value, (list, set)) and value: value = value.pop() if value: conditions.append([field, "=", value]) return conditions
def get_reference_event_conditions(snuba_args, reference_event): """ Returns a list of additional conditions/filter_keys to scope a query by the groupby fields using values from the reference event This is a key part of pagination in the event details modal and summary graph navigation. """ conditions = [] tags = {} if "tags.key" in reference_event and "tags.value" in reference_event: tags = dict( zip(reference_event["tags.key"], reference_event["tags.value"])) # If we were given an project/event to use build additional # conditions using that event and the non-aggregated columns # we received in the querystring. This lets us find the oldest/newest. # This only handles simple fields on the snuba_data dict. for field in snuba_args.get("groupby", []): prop = get_snuba_column_name(field) if prop.startswith("tags["): value = tags.get(field, None) else: value = reference_event.get(prop, None) if value: conditions.append([prop, "=", value]) return conditions
def get_tag_value_paginator_for_projects(self, projects, environments, key, start, end, query=None, order_by='-last_seen'): from sentry.api.paginator import SequencePaginator if not order_by == '-last_seen': raise ValueError("Unsupported order_by: %s" % order_by) snuba_key = snuba.get_snuba_column_name(key) conditions = [] if snuba_key in BLACKLISTED_COLUMNS: snuba_key = 'tags[%s]' % (key, ) if query: conditions.append([snuba_key, 'LIKE', u'%{}%'.format(query)]) else: conditions.append([snuba_key, '!=', '']) filters = { 'project_id': projects, } if environments: filters['environment'] = environments results = snuba.query( start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ['count()', '', 'times_seen'], ['min', 'timestamp', 'first_seen'], ['max', 'timestamp', 'last_seen'], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer='tagstore.get_tag_value_paginator_for_projects', ) tag_values = [ TagValue(key=key, value=value, **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith('-') score_field = order_by.lstrip('-') return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc)
def test_get_snuba_column_name(self): assert get_snuba_column_name("project_id") == "project_id" assert get_snuba_column_name("start") == "start" assert get_snuba_column_name("'thing'") == "'thing'" assert get_snuba_column_name("id") == "event_id" assert get_snuba_column_name("geo.region") == "geo_region" assert get_snuba_column_name("tags[sentry:user]") == "tags[sentry:user]" assert get_snuba_column_name("organization") == "tags[organization]" assert get_snuba_column_name("unknown-key") == "tags[unknown-key]"
def get_tag_value_paginator_for_projects(self, projects, environments, key, start=None, end=None, query=None, order_by="-last_seen"): from sentry.api.paginator import SequencePaginator if not order_by == "-last_seen": raise ValueError("Unsupported order_by: %s" % order_by) snuba_key = snuba.get_snuba_column_name(key) conditions = [] if snuba_key in BLACKLISTED_COLUMNS: snuba_key = "tags[%s]" % (key, ) if query: conditions.append([snuba_key, "LIKE", u"%{}%".format(query)]) else: conditions.append([snuba_key, "!=", ""]) filters = {"project_id": projects} if environments: filters["environment"] = environments results = snuba.query( start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ["count()", "", "times_seen"], ["min", "timestamp", "first_seen"], ["max", "timestamp", "last_seen"], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer="tagstore.get_tag_value_paginator_for_projects", ) tag_values = [ TagValue(key=key, value=value, **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith("-") score_field = order_by.lstrip("-") return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc, )
def get(self, request, organization): try: snuba_args = self.get_snuba_query_args(request, organization) except OrganizationEventsError as exc: return Response({'detail': exc.message}, status=400) except NoProjects: return Response({'detail': 'A valid project must be included.'}, status=400) try: key = self._validate_key(request) self._validate_project_ids(request, organization, snuba_args) except OrganizationEventsError as error: return Response({'detail': six.text_type(error)}, status=400) colname = get_snuba_column_name(key) if key == PROJECT_KEY: colname = 'project_id' top_values = raw_query( start=snuba_args['start'], end=snuba_args['end'], conditions=snuba_args['conditions'] + [[colname, 'IS NOT NULL', None]], filter_keys=snuba_args['filter_keys'], groupby=[colname], aggregations=[('count()', None, 'count')], orderby='-count', limit=TOP_VALUES_DEFAULT_LIMIT, referrer='api.organization-events-distribution', )['data'] projects = {p.id: p.slug for p in self.get_projects(request, organization)} if key == PROJECT_KEY: resp = { 'key': PROJECT_KEY, 'topValues': [ { 'value': projects[v['project_id']], 'name': projects[v['project_id']], 'count': v['count'], } for v in top_values ] } else: resp = { 'key': key, 'topValues': [ { 'value': v[colname], 'name': tagstore.get_tag_value_label(colname, v[colname]), 'count': v['count'], } for v in top_values ], } return Response(resp)
def get_tag_value_paginator_for_projects(self, projects, environments, key, start, end, query=None, order_by='-last_seen'): from sentry.api.paginator import SequencePaginator if not order_by == '-last_seen': raise ValueError("Unsupported order_by: %s" % order_by) snuba_key = snuba.get_snuba_column_name(key) conditions = [] if snuba_key in BLACKLISTED_COLUMNS: snuba_key = 'tags[%s]' % (key,) if query: conditions.append([snuba_key, 'LIKE', u'%{}%'.format(query)]) else: conditions.append([snuba_key, '!=', '']) filters = { 'project_id': projects, } if environments: filters['environment'] = environments results = snuba.query( start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ['count()', '', 'times_seen'], ['min', 'timestamp', 'first_seen'], ['max', 'timestamp', 'last_seen'], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer='tagstore.get_tag_value_paginator_for_projects', ) tag_values = [ TagValue( key=key, value=value, **fix_tag_value_data(data) ) for value, data in six.iteritems(results) ] desc = order_by.startswith('-') score_field = order_by.lstrip('-') return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc )
def test_get_snuba_column_name(self): assert get_snuba_column_name("project_id") == "project_id" assert get_snuba_column_name("start") == "start" assert get_snuba_column_name("'thing'") == "'thing'" assert get_snuba_column_name("id") == "event_id" assert get_snuba_column_name("geo.region") == "geo_region" # This is odd behavior but captures what we do currently. assert get_snuba_column_name("tags[sentry:user]") == "tags[tags[sentry:user]]" assert get_snuba_column_name("organization") == "tags[organization]"
def parse_columns_in_functions(col, context=None, index=None): """ Checks expressions for arguments that should be considered a column while ignoring strings that represent clickhouse function names if col is a list, means the expression has functions and we need to parse for arguments that should be considered column names. Assumptions here: * strings that represent clickhouse function names are always followed by a list or tuple * strings that are quoted with single quotes are used as string literals for CH * otherwise we should attempt to get the snuba column name (or custom tag) """ function_name_index = get_function_index(col) if function_name_index is not None: # if this is non zero, that means there are strings before this index # that should be converted to snuba column names # e.g. ['func1', ['column', 'func2', ['arg1']]] if function_name_index > 0: for i in range(0, function_name_index): if context is not None: context[i] = get_snuba_column_name(col[i]) args = col[function_name_index + 1] # check for nested functions in args if get_function_index(args): # look for columns return parse_columns_in_functions(args, args) # check each argument for column names else: for (i, arg) in enumerate(args): parse_columns_in_functions(arg, args, i) else: # probably a column name if context is not None and index is not None: context[index] = get_snuba_column_name(col)
def transform_aliases_and_query(**kwargs): """ Convert aliases in selected_columns, groupby, aggregation, conditions, orderby and arrayjoin fields to their internal Snuba format and post the query to Snuba. Convert back translated aliases before returning snuba results. :deprecated: This method is deprecated. You should use sentry.snuba.discover instead. """ arrayjoin_map = {"error": "exception_stacks", "stack": "exception_frames"} translated_columns = {} derived_columns = set() selected_columns = kwargs.get("selected_columns") groupby = kwargs.get("groupby") aggregations = kwargs.get("aggregations") conditions = kwargs.get("conditions") filter_keys = kwargs["filter_keys"] arrayjoin = kwargs.get("arrayjoin") orderby = kwargs.get("orderby") having = kwargs.get("having", []) dataset = Dataset.Events if selected_columns: for (idx, col) in enumerate(selected_columns): if isinstance(col, list): # if list, means there are potentially nested functions and need to # iterate and translate potential columns parse_columns_in_functions(col) selected_columns[idx] = col translated_columns[col[2]] = col[2] derived_columns.add(col[2]) else: name = get_snuba_column_name(col) selected_columns[idx] = name translated_columns[name] = col if groupby: for (idx, col) in enumerate(groupby): if col not in derived_columns: name = get_snuba_column_name(col) else: name = col groupby[idx] = name translated_columns[name] = col for aggregation in aggregations or []: derived_columns.add(aggregation[2]) if isinstance(aggregation[1], str): aggregation[1] = get_snuba_column_name(aggregation[1]) elif isinstance(aggregation[1], (set, tuple, list)): aggregation[1] = [ get_snuba_column_name(col) for col in aggregation[1] ] for col in list(filter_keys.keys()): name = get_snuba_column_name(col) filter_keys[name] = filter_keys.pop(col) if conditions: aliased_conditions = [] for condition in conditions: field = condition[0] if not isinstance(field, (list, tuple)) and field in derived_columns: having.append(condition) else: aliased_conditions.append(condition) kwargs["conditions"] = aliased_conditions if having: kwargs["having"] = having if orderby: orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] translated_orderby = [] for field_with_order in orderby: field = field_with_order.lstrip("-") translated_orderby.append("{}{}".format( "-" if field_with_order.startswith("-") else "", field if field in derived_columns else get_snuba_column_name(field), )) kwargs["orderby"] = translated_orderby kwargs["arrayjoin"] = arrayjoin_map.get(arrayjoin, arrayjoin) kwargs["dataset"] = dataset result = aliased_query(**kwargs) snuba_filter = eventstore.Filter( rollup=kwargs.get("rollup"), start=kwargs.get("start"), end=kwargs.get("end"), orderby=kwargs.get("orderby"), ) return transform_data(result, translated_columns, snuba_filter)
def test_get_snuba_column_name(self): assert get_snuba_column_name("project_id") == "project_id" assert get_snuba_column_name("start") == "start" assert get_snuba_column_name("'thing'") == "'thing'" assert get_snuba_column_name("id") == "event_id" assert get_snuba_column_name("geo.region") == "geo_region" assert get_snuba_column_name( "tags[sentry:user]") == "tags[sentry:user]" assert get_snuba_column_name("organization") == "tags[organization]" assert get_snuba_column_name("unknown-key") == "tags[unknown-key]" # measurements are not available on the Events dataset, so it's seen as a tag assert get_snuba_column_name( "measurements_key", Dataset.Events) == "tags[measurements_key]" assert get_snuba_column_name( "measurements.key", Dataset.Events) == "tags[measurements.key]" # measurements are available on the Discover and Transactions dataset, so its parsed as such assert get_snuba_column_name("measurements_key", Dataset.Discover) == "measurements.key" assert get_snuba_column_name( "measurements_key", Dataset.Transactions) == "measurements.key" assert get_snuba_column_name("measurements.key", Dataset.Discover) == "measurements[key]" assert (get_snuba_column_name( "measurements.key", Dataset.Transactions) == "measurements[key]") assert get_snuba_column_name("measurements.KEY", Dataset.Discover) == "measurements[key]" assert (get_snuba_column_name( "measurements.KEY", Dataset.Transactions) == "measurements[key]") # span op breakdowns are not available on the Events dataset, so it's seen as a tag assert (get_snuba_column_name( "span_op_breakdowns_key", Dataset.Events) == "tags[span_op_breakdowns_key]") assert (get_snuba_column_name( "span_op_breakdowns.key", Dataset.Events) == "tags[span_op_breakdowns.key]") # span op breakdowns are available on the Discover and Transactions dataset, so its parsed as such assert (get_snuba_column_name( "span_op_breakdowns_key", Dataset.Discover) == "span_op_breakdowns.key") assert (get_snuba_column_name( "span_op_breakdowns_key", Dataset.Transactions) == "span_op_breakdowns.key") assert get_snuba_column_name( "spans.key", Dataset.Discover) == "span_op_breakdowns[ops.key]" assert (get_snuba_column_name( "spans.key", Dataset.Transactions) == "span_op_breakdowns[ops.key]") assert get_snuba_column_name( "spans.KEY", Dataset.Discover) == "span_op_breakdowns[ops.key]" assert (get_snuba_column_name( "spans.KEY", Dataset.Transactions) == "span_op_breakdowns[ops.key]")
def get_tag_value_paginator_for_projects( self, projects, environments, key, start=None, end=None, query=None, order_by="-last_seen", include_transactions=False, ): from sentry.api.paginator import SequencePaginator if not order_by == "-last_seen": raise ValueError("Unsupported order_by: %s" % order_by) dataset = Dataset.Events snuba_key = snuba.get_snuba_column_name(key) if include_transactions and snuba_key.startswith("tags["): snuba_key = snuba.get_snuba_column_name(key, dataset=Dataset.Discover) if not snuba_key.startswith("tags["): dataset = Dataset.Discover # We cannot search the values of these columns like we do other columns because they are # a different type, and as such, LIKE and != do not work on them. Furthermore, because the # use case for these values in autosuggestion is minimal, so we choose to disable them here. # # event_id: This is a FixedString which disallows us to use LIKE on it when searching, # but does work with !=. However, for consistency sake we disallow it # entirely, furthermore, suggesting an event_id is not a very useful feature # as they are not human readable. # timestamp: This is a DateTime which disallows us to use both LIKE and != on it when # searching. Suggesting a timestamp can potentially be useful but as it does # work at all, we opt to disable it here. A potential solution can be to # generate a time range to bound where they are searching. e.g. if a user # enters 2020-07 we can generate the following conditions: # >= 2020-07-01T00:00:00 AND <= 2020-07-31T23:59:59 # time: This is a column computed from timestamp so it suffers the same issues if snuba_key in {"event_id", "timestamp", "time"}: return SequencePaginator([]) # These columns have fixed values and we don't need to emit queries to find out the # potential options. if key in {"error.handled", "error.unhandled"}: return SequencePaginator( [ ( 1, TagValue( key=key, value="true", times_seen=None, first_seen=None, last_seen=None ), ), ( 2, TagValue( key=key, value="false", times_seen=None, first_seen=None, last_seen=None ), ), ] ) conditions = [] # transaction status needs a special case so that the user interacts with the names and not codes transaction_status = snuba_key == "transaction_status" if include_transactions and transaction_status: # Here we want to use the status codes during filtering, # but want to do this with names that include our query status_codes = [ span_key for span_key, value in six.iteritems(SPAN_STATUS_CODE_TO_NAME) if (query and query in value) or (not query) ] if status_codes: conditions.append([snuba_key, "IN", status_codes]) else: return SequencePaginator([]) elif key in FUZZY_NUMERIC_KEYS: converted_query = int(query) if query is not None and query.isdigit() else None if converted_query is not None: conditions.append([snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE]) conditions.append([snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE]) elif include_transactions and key == PROJECT_ALIAS: project_filters = { "id__in": projects, } if query: project_filters["slug__icontains"] = query project_queryset = Project.objects.filter(**project_filters).values("id", "slug") if not project_queryset.exists(): return SequencePaginator([]) project_slugs = {project["id"]: project["slug"] for project in project_queryset} projects = [project["id"] for project in project_queryset] snuba_key = "project_id" dataset = Dataset.Discover else: snuba_name = snuba_key is_user_alias = include_transactions and key == USER_DISPLAY_ALIAS if is_user_alias: # user.alias is a pseudo column in discover. It is computed by coalescing # together multiple user attributes. Here we get the coalese function used, # and resolve it to the corresponding snuba query dataset = Dataset.Discover resolver = snuba.resolve_column(dataset) snuba_name = FIELD_ALIASES[USER_DISPLAY_ALIAS].get_field() snuba.resolve_complex_column(snuba_name, resolver) elif snuba_name in BLACKLISTED_COLUMNS: snuba_name = "tags[%s]" % (key,) if query: conditions.append([snuba_name, "LIKE", u"%{}%".format(query)]) else: conditions.append([snuba_name, "!=", ""]) filters = {"project_id": projects} if environments: filters["environment"] = environments results = snuba.query( dataset=dataset, start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ["count()", "", "times_seen"], ["min", "timestamp", "first_seen"], ["max", "timestamp", "last_seen"], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer="tagstore.get_tag_value_paginator_for_projects", ) if include_transactions: # With transaction_status we need to map the ids back to their names if transaction_status: results = OrderedDict( [ (SPAN_STATUS_CODE_TO_NAME[result_key], data) for result_key, data in six.iteritems(results) ] ) # With project names we map the ids back to the project slugs elif key == PROJECT_ALIAS: results = OrderedDict( [ (project_slugs[value], data) for value, data in six.iteritems(results) if value in project_slugs ] ) tag_values = [ TagValue(key=key, value=six.text_type(value), **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith("-") score_field = order_by.lstrip("-") return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc, )
def get(self, request, organization): if not features.has( "organizations:events-v2", organization, actor=request.user): return Response(status=404) try: params = self.get_filter_params(request, organization) snuba_args = self.get_snuba_query_args(request, organization, params) except OrganizationEventsError as exc: return Response({"detail": exc.message}, status=400) except NoProjects: return Response({"detail": "A valid project must be included."}, status=400) try: key = self._validate_key(request) self._validate_project_ids(request, organization, snuba_args) except OrganizationEventsError as error: return Response({"detail": six.text_type(error)}, status=400) if key == PROJECT_KEY: colname = "project_id" conditions = snuba_args["conditions"] else: colname = get_snuba_column_name(key) conditions = snuba_args["conditions"] + [[ colname, "IS NOT NULL", None ]] top_values = raw_query( start=snuba_args["start"], end=snuba_args["end"], conditions=conditions, filter_keys=snuba_args["filter_keys"], groupby=[colname], aggregations=[("count()", None, "count")], orderby="-count", limit=TOP_VALUES_DEFAULT_LIMIT, referrer="api.organization-events-distribution", )["data"] projects = { p.id: p.slug for p in self.get_projects(request, organization) } if key == PROJECT_KEY: resp = { "key": PROJECT_KEY, "topValues": [{ "value": projects[v["project_id"]], "name": projects[v["project_id"]], "count": v["count"], } for v in top_values], } else: resp = { "key": key, "topValues": [{ "value": v[colname], "name": tagstore.get_tag_value_label(colname, v[colname]), "count": v["count"], } for v in top_values], } return Response(resp)
def get_tag_value_paginator_for_projects(self, projects, environments, key, start=None, end=None, query=None, order_by="-last_seen"): from sentry.api.paginator import SequencePaginator if not order_by == "-last_seen": raise ValueError("Unsupported order_by: %s" % order_by) dataset = Dataset.Events snuba_key = snuba.get_snuba_column_name(key) if snuba_key.startswith("tags["): snuba_key = snuba.get_snuba_column_name(key, dataset=Dataset.Discover) if not snuba_key.startswith("tags["): dataset = Dataset.Discover conditions = [] # transaction status needs a special case so that the user interacts with the names and not codes transaction_status = snuba_key == "transaction_status" if transaction_status: conditions.append([ snuba_key, "IN", # Here we want to use the status codes during filtering, # but want to do this with names that include our query [ span_key for span_key, value in six.iteritems( SPAN_STATUS_CODE_TO_NAME) if (query and query in value) or (not query) ], ]) elif key in FUZZY_NUMERIC_KEYS: converted_query = int( query) if query is not None and query.isdigit() else None if converted_query is not None: conditions.append([ snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE ]) conditions.append([ snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE ]) elif key == PROJECT_ALIAS: project_filters = { "id__in": projects, } if query: project_filters["slug__icontains"] = query project_queryset = Project.objects.filter( **project_filters).values("id", "slug") project_slugs = { project["id"]: project["slug"] for project in project_queryset } if project_queryset.exists(): projects = [project["id"] for project in project_queryset] snuba_key = "project_id" dataset = Dataset.Discover else: if snuba_key in BLACKLISTED_COLUMNS: snuba_key = "tags[%s]" % (key, ) if query: conditions.append([snuba_key, "LIKE", u"%{}%".format(query)]) else: conditions.append([snuba_key, "!=", ""]) filters = {"project_id": projects} if environments: filters["environment"] = environments results = snuba.query( dataset=dataset, start=start, end=end, groupby=[snuba_key], filter_keys=filters, aggregations=[ ["count()", "", "times_seen"], ["min", "timestamp", "first_seen"], ["max", "timestamp", "last_seen"], ], conditions=conditions, orderby=order_by, # TODO: This means they can't actually paginate all TagValues. limit=1000, arrayjoin=snuba.get_arrayjoin(snuba_key), referrer="tagstore.get_tag_value_paginator_for_projects", ) # With transaction_status we need to map the ids back to their names if transaction_status: results = OrderedDict([ (SPAN_STATUS_CODE_TO_NAME[result_key], data) for result_key, data in six.iteritems(results) ]) # With project names we map the ids back to the project slugs elif key == PROJECT_ALIAS: results = OrderedDict([(project_slugs[value], data) for value, data in six.iteritems(results)]) tag_values = [ TagValue(key=key, value=six.text_type(value), **fix_tag_value_data(data)) for value, data in six.iteritems(results) ] desc = order_by.startswith("-") score_field = order_by.lstrip("-") return SequencePaginator( [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values], reverse=desc, )