def __get_event_id_from_filter(self, filter=None, orderby=None): columns = ["event_id", "project_id", "timestamp"] try: result = snuba.dataset_query( selected_columns=columns, conditions=filter.conditions, filter_keys=filter.filter_keys, start=filter.start, end=filter.end, limit=1, referrer="eventstore.discover_dataset.get_next_or_prev_event_id", orderby=orderby, dataset=Dataset.Discover, ) except (snuba.QueryOutsideRetentionError, snuba.QueryOutsideGroupActivityError): # This can happen when the date conditions for paging # and the current event generate impossible conditions. return None if "error" in result or len(result["data"]) == 0: return None row = result["data"][0] return (six.text_type(row["project_id"]), six.text_type(row["event_id"]))
def get_events( self, filter, additional_columns=None, orderby=None, limit=DEFAULT_LIMIT, offset=DEFAULT_OFFSET, referrer="eventstore.get_events", ): """ Get events from Snuba. """ assert filter, "You must provide a filter" cols = self.__get_columns(additional_columns) orderby = orderby or DESC_ORDERING result = snuba.dataset_query( selected_columns=cols, start=filter.start, end=filter.end, conditions=filter.conditions, filter_keys=filter.filter_keys, orderby=orderby, limit=limit, offset=offset, referrer=referrer, ) if "error" not in result: return [SnubaEvent(evt) for evt in result["data"]] return []
def _get_terminal_event_id(self, direction, snuba_args, event): if direction == Direction.NEXT: time_condition = [["timestamp", ">", event.timestamp]] orderby = ["-timestamp", "-event_id"] else: time_condition = [["timestamp", "<", event.timestamp]] orderby = ["timestamp", "event_id"] conditions = snuba_args["conditions"][:] conditions.extend(time_condition) result = snuba.dataset_query( selected_columns=["event_id"], start=snuba_args.get("start", None), end=snuba_args.get("end", None), conditions=conditions, dataset=snuba.detect_dataset(snuba_args, aliased_conditions=True), filter_keys=snuba_args["filter_keys"], orderby=orderby, limit=1, ) if not result or "data" not in result or len(result["data"]) == 0: return None return result["data"][0]["event_id"]
def __get_event_id_from_filter(self, filter=None, orderby=None): columns = [ Columns.EVENT_ID.value.alias, Columns.PROJECT_ID.value.alias ] try: # This query uses the discover dataset to enable # getting events across both errors and transactions, which is # required when doing pagination in discover result = snuba.dataset_query( selected_columns=columns, conditions=filter.conditions, filter_keys=filter.filter_keys, start=filter.start, end=filter.end, limit=1, referrer="eventstore.get_next_or_prev_event_id", orderby=orderby, dataset=snuba.Dataset.Discover, ) except (snuba.QueryOutsideRetentionError, snuba.QueryOutsideGroupActivityError): # This can happen when the date conditions for paging # and the current event generate impossible conditions. return None if "error" in result or len(result["data"]) == 0: return None row = result["data"][0] return (six.text_type(row["project_id"]), six.text_type(row["event_id"]))
def __get_next_or_prev_event_id(self, filter=None, orderby=None): columns = ["event_id", "project_id"] result = snuba.dataset_query( selected_columns=columns, conditions=filter.conditions, filter_keys=filter.filter_keys, start=filter.start, end=filter.end, limit=1, referrer="eventstore.get_next_or_prev_event_id", orderby=orderby, dataset=snuba.detect_dataset( { "selected_columns": columns, "conditions": filter.conditions }, aliased_conditions=True, ), ) if "error" in result or len(result["data"]) == 0: return None row = result["data"][0] return (six.text_type(row["project_id"]), six.text_type(row["event_id"]))
def snuba_search( start, end, project_ids, environment_ids, sort_field, cursor=None, candidate_ids=None, limit=None, offset=0, get_sample=False, search_filters=None, ): """ This function doesn't strictly benefit from or require being pulled out of the main query method above, but the query method is already large and this function at least extracts most of the Snuba-specific logic. Returns a tuple of: * a sorted list of (group_id, group_score) tuples sorted descending by score, * the count of total results (rows) available for this query. """ filters = {"project_id": project_ids} if environment_ids is not None: filters["environment"] = environment_ids if candidate_ids: filters["group_id"] = sorted(candidate_ids) conditions = [] having = [] for search_filter in search_filters: if ( # Don't filter on issue fields here, they're not available search_filter.key.name in issue_only_fields or # We special case date search_filter.key.name == "date"): continue converted_filter = convert_search_filter_to_snuba_query(search_filter) # Ensure that no user-generated tags that clashes with aggregation_defs is added to having if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag: having.append(converted_filter) else: conditions.append(converted_filter) extra_aggregations = dependency_aggregations.get(sort_field, []) required_aggregations = set([sort_field, "total"] + extra_aggregations) for h in having: alias = h[0] required_aggregations.add(alias) aggregations = [] for alias in required_aggregations: aggregations.append(aggregation_defs[alias] + [alias]) if cursor is not None: having.append( (sort_field, ">=" if cursor.is_prev else "<=", cursor.value)) selected_columns = [] if get_sample: query_hash = md5(repr(conditions)).hexdigest()[:8] selected_columns.append( ("cityHash64", ("'{}'".format(query_hash), "group_id"), "sample")) sort_field = "sample" orderby = [sort_field] referrer = "search_sample" else: # Get the top matching groups by score, i.e. the actual search results # in the order that we want them. orderby = ["-{}".format(sort_field), "group_id"] # ensure stable sort within the same score referrer = "search" snuba_results = snuba.dataset_query( dataset=Dataset.Events, start=start, end=end, selected_columns=selected_columns, groupby=["group_id"], conditions=conditions, having=having, filter_keys=filters, aggregations=aggregations, orderby=orderby, referrer=referrer, limit=limit, offset=offset, totals= True, # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only turbo=get_sample, # Turn off FINAL when in sampling mode sample=1, # Don't use clickhouse sampling, even when in turbo mode. ) rows = snuba_results["data"] total = snuba_results["totals"]["total"] if not get_sample: metrics.timing("snuba.search.num_result_groups", len(rows)) return [(row["group_id"], row[sort_field]) for row in rows], total
def transform_aliases_and_query(**kwargs): """ Convert aliases in selected_columns, groupby, aggregation, conditions, orderby and arrayjoin fields to their internal Snuba format and post the query to Snuba. Convert back translated aliases before returning snuba results. :deprecated: This method is deprecated. You should use sentry.snuba.discover instead. """ arrayjoin_map = {"error": "exception_stacks", "stack": "exception_frames"} translated_columns = {} derived_columns = set() selected_columns = kwargs.get("selected_columns") groupby = kwargs.get("groupby") aggregations = kwargs.get("aggregations") conditions = kwargs.get("conditions") filter_keys = kwargs["filter_keys"] arrayjoin = kwargs.get("arrayjoin") orderby = kwargs.get("orderby") having = kwargs.get("having", []) dataset = Dataset.Events if selected_columns: for (idx, col) in enumerate(selected_columns): if isinstance(col, list): # if list, means there are potentially nested functions and need to # iterate and translate potential columns parse_columns_in_functions(col) selected_columns[idx] = col translated_columns[col[2]] = col[2] derived_columns.add(col[2]) else: name = get_snuba_column_name(col) selected_columns[idx] = name translated_columns[name] = col if groupby: for (idx, col) in enumerate(groupby): if col not in derived_columns: name = get_snuba_column_name(col) else: name = col groupby[idx] = name translated_columns[name] = col for aggregation in aggregations or []: derived_columns.add(aggregation[2]) if isinstance(aggregation[1], six.string_types): aggregation[1] = get_snuba_column_name(aggregation[1]) elif isinstance(aggregation[1], (set, tuple, list)): aggregation[1] = [get_snuba_column_name(col) for col in aggregation[1]] for col in filter_keys.keys(): name = get_snuba_column_name(col) filter_keys[name] = filter_keys.pop(col) if conditions: aliased_conditions = [] for condition in conditions: field = condition[0] if not isinstance(field, (list, tuple)) and field in derived_columns: having.append(condition) else: aliased_conditions.append(condition) kwargs["conditions"] = aliased_conditions if having: kwargs["having"] = having if orderby: orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] translated_orderby = [] for field_with_order in orderby: field = field_with_order.lstrip("-") translated_orderby.append( u"{}{}".format( "-" if field_with_order.startswith("-") else "", field if field in derived_columns else get_snuba_column_name(field), ) ) kwargs["orderby"] = translated_orderby kwargs["arrayjoin"] = arrayjoin_map.get(arrayjoin, arrayjoin) kwargs["dataset"] = dataset result = dataset_query(**kwargs) return transform_results(result, translated_columns, kwargs)