Python dataset_query Examples, sentry.utils.snuba.dataset_query Python Examples

Example #1

0

Show file

File: backend.py Project: yangyongguan/sentry

    def __get_event_id_from_filter(self, filter=None, orderby=None):
        columns = ["event_id", "project_id", "timestamp"]
        try:
            result = snuba.dataset_query(
                selected_columns=columns,
                conditions=filter.conditions,
                filter_keys=filter.filter_keys,
                start=filter.start,
                end=filter.end,
                limit=1,
                referrer="eventstore.discover_dataset.get_next_or_prev_event_id",
                orderby=orderby,
                dataset=Dataset.Discover,
            )
        except (snuba.QueryOutsideRetentionError, snuba.QueryOutsideGroupActivityError):
            # This can happen when the date conditions for paging
            # and the current event generate impossible conditions.
            return None

        if "error" in result or len(result["data"]) == 0:
            return None

        row = result["data"][0]

        return (six.text_type(row["project_id"]), six.text_type(row["event_id"]))

Example #2

0

Show file

    def get_events(
        self,
        filter,
        additional_columns=None,
        orderby=None,
        limit=DEFAULT_LIMIT,
        offset=DEFAULT_OFFSET,
        referrer="eventstore.get_events",
    ):
        """
        Get events from Snuba.
        """
        assert filter, "You must provide a filter"
        cols = self.__get_columns(additional_columns)
        orderby = orderby or DESC_ORDERING

        result = snuba.dataset_query(
            selected_columns=cols,
            start=filter.start,
            end=filter.end,
            conditions=filter.conditions,
            filter_keys=filter.filter_keys,
            orderby=orderby,
            limit=limit,
            offset=offset,
            referrer=referrer,
        )

        if "error" not in result:
            return [SnubaEvent(evt) for evt in result["data"]]

        return []

Example #3

0

Show file

File: organization_events.py Project: zvrr/sentry

    def _get_terminal_event_id(self, direction, snuba_args, event):
        if direction == Direction.NEXT:
            time_condition = [["timestamp", ">", event.timestamp]]
            orderby = ["-timestamp", "-event_id"]
        else:
            time_condition = [["timestamp", "<", event.timestamp]]
            orderby = ["timestamp", "event_id"]

        conditions = snuba_args["conditions"][:]
        conditions.extend(time_condition)

        result = snuba.dataset_query(
            selected_columns=["event_id"],
            start=snuba_args.get("start", None),
            end=snuba_args.get("end", None),
            conditions=conditions,
            dataset=snuba.detect_dataset(snuba_args, aliased_conditions=True),
            filter_keys=snuba_args["filter_keys"],
            orderby=orderby,
            limit=1,
        )
        if not result or "data" not in result or len(result["data"]) == 0:
            return None

        return result["data"][0]["event_id"]

Example #4

0

Show file

    def __get_event_id_from_filter(self, filter=None, orderby=None):
        columns = [
            Columns.EVENT_ID.value.alias, Columns.PROJECT_ID.value.alias
        ]

        try:
            # This query uses the discover dataset to enable
            # getting events across both errors and transactions, which is
            # required when doing pagination in discover
            result = snuba.dataset_query(
                selected_columns=columns,
                conditions=filter.conditions,
                filter_keys=filter.filter_keys,
                start=filter.start,
                end=filter.end,
                limit=1,
                referrer="eventstore.get_next_or_prev_event_id",
                orderby=orderby,
                dataset=snuba.Dataset.Discover,
            )
        except (snuba.QueryOutsideRetentionError,
                snuba.QueryOutsideGroupActivityError):
            # This can happen when the date conditions for paging
            # and the current event generate impossible conditions.
            return None

        if "error" in result or len(result["data"]) == 0:
            return None

        row = result["data"][0]

        return (six.text_type(row["project_id"]),
                six.text_type(row["event_id"]))

Example #5

0

Show file

File: backend.py Project: zvrr/sentry

    def __get_next_or_prev_event_id(self, filter=None, orderby=None):
        columns = ["event_id", "project_id"]
        result = snuba.dataset_query(
            selected_columns=columns,
            conditions=filter.conditions,
            filter_keys=filter.filter_keys,
            start=filter.start,
            end=filter.end,
            limit=1,
            referrer="eventstore.get_next_or_prev_event_id",
            orderby=orderby,
            dataset=snuba.detect_dataset(
                {
                    "selected_columns": columns,
                    "conditions": filter.conditions
                },
                aliased_conditions=True,
            ),
        )

        if "error" in result or len(result["data"]) == 0:
            return None

        row = result["data"][0]

        return (six.text_type(row["project_id"]),
                six.text_type(row["event_id"]))

Example #6

0

Show file

File: executors.py Project: yangyongguan/sentry

def snuba_search(
    start,
    end,
    project_ids,
    environment_ids,
    sort_field,
    cursor=None,
    candidate_ids=None,
    limit=None,
    offset=0,
    get_sample=False,
    search_filters=None,
):
    """
    This function doesn't strictly benefit from or require being pulled out of the main
    query method above, but the query method is already large and this function at least
    extracts most of the Snuba-specific logic.

    Returns a tuple of:
     * a sorted list of (group_id, group_score) tuples sorted descending by score,
     * the count of total results (rows) available for this query.
    """
    filters = {"project_id": project_ids}

    if environment_ids is not None:
        filters["environment"] = environment_ids

    if candidate_ids:
        filters["group_id"] = sorted(candidate_ids)

    conditions = []
    having = []
    for search_filter in search_filters:
        if (
                # Don't filter on issue fields here, they're not available
                search_filter.key.name in issue_only_fields or
                # We special case date
                search_filter.key.name == "date"):
            continue
        converted_filter = convert_search_filter_to_snuba_query(search_filter)

        # Ensure that no user-generated tags that clashes with aggregation_defs is added to having
        if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag:
            having.append(converted_filter)
        else:
            conditions.append(converted_filter)

    extra_aggregations = dependency_aggregations.get(sort_field, [])
    required_aggregations = set([sort_field, "total"] + extra_aggregations)
    for h in having:
        alias = h[0]
        required_aggregations.add(alias)

    aggregations = []
    for alias in required_aggregations:
        aggregations.append(aggregation_defs[alias] + [alias])

    if cursor is not None:
        having.append(
            (sort_field, ">=" if cursor.is_prev else "<=", cursor.value))

    selected_columns = []
    if get_sample:
        query_hash = md5(repr(conditions)).hexdigest()[:8]
        selected_columns.append(
            ("cityHash64", ("'{}'".format(query_hash), "group_id"), "sample"))
        sort_field = "sample"
        orderby = [sort_field]
        referrer = "search_sample"
    else:
        # Get the top matching groups by score, i.e. the actual search results
        # in the order that we want them.
        orderby = ["-{}".format(sort_field),
                   "group_id"]  # ensure stable sort within the same score
        referrer = "search"

    snuba_results = snuba.dataset_query(
        dataset=Dataset.Events,
        start=start,
        end=end,
        selected_columns=selected_columns,
        groupby=["group_id"],
        conditions=conditions,
        having=having,
        filter_keys=filters,
        aggregations=aggregations,
        orderby=orderby,
        referrer=referrer,
        limit=limit,
        offset=offset,
        totals=
        True,  # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only
        turbo=get_sample,  # Turn off FINAL when in sampling mode
        sample=1,  # Don't use clickhouse sampling, even when in turbo mode.
    )
    rows = snuba_results["data"]
    total = snuba_results["totals"]["total"]

    if not get_sample:
        metrics.timing("snuba.search.num_result_groups", len(rows))

    return [(row["group_id"], row[sort_field]) for row in rows], total

Example #7

0

Show file

File: utils.py Project: yxlbeyond/sentry

def transform_aliases_and_query(**kwargs):
    """
    Convert aliases in selected_columns, groupby, aggregation, conditions,
    orderby and arrayjoin fields to their internal Snuba format and post the
    query to Snuba. Convert back translated aliases before returning snuba
    results.

    :deprecated: This method is deprecated. You should use sentry.snuba.discover instead.
    """

    arrayjoin_map = {"error": "exception_stacks", "stack": "exception_frames"}

    translated_columns = {}
    derived_columns = set()

    selected_columns = kwargs.get("selected_columns")
    groupby = kwargs.get("groupby")
    aggregations = kwargs.get("aggregations")
    conditions = kwargs.get("conditions")
    filter_keys = kwargs["filter_keys"]
    arrayjoin = kwargs.get("arrayjoin")
    orderby = kwargs.get("orderby")
    having = kwargs.get("having", [])
    dataset = Dataset.Events

    if selected_columns:
        for (idx, col) in enumerate(selected_columns):
            if isinstance(col, list):
                # if list, means there are potentially nested functions and need to
                # iterate and translate potential columns
                parse_columns_in_functions(col)
                selected_columns[idx] = col
                translated_columns[col[2]] = col[2]
                derived_columns.add(col[2])
            else:
                name = get_snuba_column_name(col)
                selected_columns[idx] = name
                translated_columns[name] = col

    if groupby:
        for (idx, col) in enumerate(groupby):
            if col not in derived_columns:
                name = get_snuba_column_name(col)
            else:
                name = col

            groupby[idx] = name
            translated_columns[name] = col

    for aggregation in aggregations or []:
        derived_columns.add(aggregation[2])
        if isinstance(aggregation[1], six.string_types):
            aggregation[1] = get_snuba_column_name(aggregation[1])
        elif isinstance(aggregation[1], (set, tuple, list)):
            aggregation[1] = [get_snuba_column_name(col) for col in aggregation[1]]

    for col in filter_keys.keys():
        name = get_snuba_column_name(col)
        filter_keys[name] = filter_keys.pop(col)

    if conditions:
        aliased_conditions = []
        for condition in conditions:
            field = condition[0]
            if not isinstance(field, (list, tuple)) and field in derived_columns:
                having.append(condition)
            else:
                aliased_conditions.append(condition)
        kwargs["conditions"] = aliased_conditions

    if having:
        kwargs["having"] = having

    if orderby:
        orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby]
        translated_orderby = []

        for field_with_order in orderby:
            field = field_with_order.lstrip("-")
            translated_orderby.append(
                u"{}{}".format(
                    "-" if field_with_order.startswith("-") else "",
                    field if field in derived_columns else get_snuba_column_name(field),
                )
            )

        kwargs["orderby"] = translated_orderby

    kwargs["arrayjoin"] = arrayjoin_map.get(arrayjoin, arrayjoin)
    kwargs["dataset"] = dataset

    result = dataset_query(**kwargs)

    return transform_results(result, translated_columns, kwargs)