Example #1
0
def _create_in_snuba(subscription):
    conditions = resolve_discover_aliases(get_filter(subscription.query))[0].conditions
    try:
        environment = subscription.environments.all()[:1].get()
    except Environment.DoesNotExist:
        environment = None

    if environment:
        conditions.append(["environment", "=", environment.name])
    conditions = apply_dataset_conditions(QueryDatasets(subscription.dataset), conditions)
    response = _snuba_pool.urlopen(
        "POST",
        "/%s/subscriptions" % (subscription.dataset,),
        body=json.dumps(
            {
                "project_id": subscription.project_id,
                "dataset": subscription.dataset,
                # We only care about conditions here. Filter keys only matter for
                # filtering to project and groups. Projects are handled with an
                # explicit param, and groups can't be queried here.
                "conditions": conditions,
                "aggregations": [
                    query_aggregation_to_snuba[QueryAggregations(subscription.aggregation)]
                ],
                "time_window": subscription.time_window,
                "resolution": subscription.resolution,
            }
        ),
    )
    if response.status != 202:
        raise SnubaError("HTTP %s response from Snuba!" % response.status)
    return json.loads(response.data)["subscription_id"]
Example #2
0
def _create_in_snuba(project, dataset, query, aggregation, time_window,
                     resolution, environments):
    conditions = resolve_discover_aliases(
        {"conditions": get_filter(query).conditions})[0]["conditions"]
    if environments:
        conditions.append(
            ["environment", "IN", [env.name for env in environments]])
    response = _snuba_pool.urlopen(
        "POST",
        "/%s/subscriptions" % (dataset.value, ),
        body=json.dumps({
            "project_id":
            project.id,
            "dataset":
            dataset.value,
            # We only care about conditions here. Filter keys only matter for
            # filtering to project and groups. Projects are handled with an
            # explicit param, and groups can't be queried here.
            "conditions":
            conditions,
            "aggregations": [query_aggregation_to_snuba[aggregation]],
            "time_window":
            int(time_window.total_seconds()),
            "resolution":
            int(resolution.total_seconds()),
        }),
    )
    if response.status != 202:
        raise SnubaError("HTTP %s response from Snuba!" % response.status)
    return json.loads(response.data)["subscription_id"]
Example #3
0
def build_incident_query_params(incident,
                                start=None,
                                end=None,
                                windowed_stats=False):
    params = {}
    params["start"], params["end"] = calculate_incident_time_range(
        incident, start, end, windowed_stats=windowed_stats)

    group_ids = list(
        IncidentGroup.objects.filter(incident=incident).values_list("group_id",
                                                                    flat=True))
    if group_ids:
        params["group_ids"] = group_ids
    project_ids = list(
        IncidentProject.objects.filter(incident=incident).values_list(
            "project_id", flat=True))
    if project_ids:
        params["project_id"] = project_ids

    snuba_filter = get_filter(incident.alert_rule.snuba_query.query, params)
    conditions = resolve_discover_aliases(snuba_filter)[0].conditions
    if incident.alert_rule:
        conditions = apply_dataset_conditions(
            QueryDatasets(incident.alert_rule.snuba_query.dataset), conditions)
    return {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": conditions,
        "filter_keys": snuba_filter.filter_keys,
        "having": [],
    }
Example #4
0
def _create_in_snuba(subscription):
    snuba_query = subscription.snuba_query
    snuba_filter = get_filter(snuba_query.query)
    snuba_filter.update_with(
        resolve_field_list([snuba_query.aggregate],
                           snuba_filter,
                           auto_fields=False))
    snuba_filter = resolve_discover_aliases(snuba_filter)[0]
    if snuba_query.environment:
        snuba_filter.conditions.append(
            ["environment", "=", snuba_query.environment.name])
    conditions = apply_dataset_conditions(QueryDatasets(snuba_query.dataset),
                                          snuba_filter.conditions)
    response = _snuba_pool.urlopen(
        "POST",
        "/%s/subscriptions" % (snuba_query.dataset, ),
        body=json.dumps({
            "project_id": subscription.project_id,
            "dataset": snuba_query.dataset,
            "conditions": conditions,
            "aggregations": snuba_filter.aggregations,
            "time_window": snuba_query.time_window,
            "resolution": snuba_query.resolution,
        }),
    )
    if response.status != 202:
        raise SnubaError("HTTP %s response from Snuba!" % response.status)
    return json.loads(response.data)["subscription_id"]
Example #5
0
def _create_in_snuba(subscription):
    conditions = resolve_discover_aliases(
        {"conditions":
         get_filter(subscription.query).conditions})[0]["conditions"]
    environments = list(subscription.environments.all())
    if environments:
        conditions.append(
            ["environment", "IN", [env.name for env in environments]])
    response = _snuba_pool.urlopen(
        "POST",
        "/%s/subscriptions" % (subscription.dataset, ),
        body=json.dumps({
            "project_id":
            subscription.project_id,
            "dataset":
            subscription.dataset,
            # We only care about conditions here. Filter keys only matter for
            # filtering to project and groups. Projects are handled with an
            # explicit param, and groups can't be queried here.
            "conditions":
            conditions,
            "aggregations": [
                query_aggregation_to_snuba[QueryAggregations(
                    subscription.aggregation)]
            ],
            "time_window":
            subscription.time_window,
            "resolution":
            subscription.resolution,
        }),
    )
    if response.status != 202:
        raise SnubaError("HTTP %s response from Snuba!" % response.status)
    return json.loads(response.data)["subscription_id"]
Example #6
0
def build_snuba_filter(dataset, query, aggregate, environment, params=None):
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(
        resolve_field_list([aggregate], snuba_filter, auto_fields=False))
    snuba_filter = resolve_discover_aliases(snuba_filter)[0]
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    snuba_filter.conditions = apply_dataset_conditions(dataset,
                                                       snuba_filter.conditions)
    return snuba_filter
Example #7
0
def query_tag_data(
    params: Mapping[str, str],
    referrer: str,
    filter_query: Optional[str] = None,
    aggregate_column: Optional[str] = None,
) -> Optional[Dict]:
    """
    Fetch general data about all the transactions with this transaction name to feed into the facet query
    :return: Returns the row with aggregate and count if the query was successful
             Returns None if query was not successful which causes the endpoint to return early
    """
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", filter_query)
        snuba_filter = get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(
            snuba_filter)

    translated_aggregate_column = discover.resolve_discover_column(
        aggregate_column)

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.frequent_tags"):
        # Get the average and count to use to filter the next request to facets
        tag_data = discover.query(
            selected_columns=[
                "count()",
                f"avg({aggregate_column}) as aggregate",
                f"max({aggregate_column}) as max",
                f"min({aggregate_column}) as min",
            ],
            conditions=[
                [translated_aggregate_column, "IS NOT NULL", None],
            ],
            query=filter_query,
            params=params,
            orderby=["-count"],
            referrer=f"{referrer}.all_transactions",
            limit=1,
        )

        if len(tag_data["data"]) != 1:
            return None

        counts = [r["count"] for r in tag_data["data"]]
        aggregates = [r["aggregate"] for r in tag_data["data"]]

        # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that don't exist
        if counts[0] == 0 or aggregates[0] is None:
            return None
    if not tag_data["data"][0]:
        return None
    return tag_data["data"][0]
Example #8
0
def bulk_build_incident_query_params(incidents,
                                     start=None,
                                     end=None,
                                     windowed_stats=False):
    incident_groups = defaultdict(list)
    for incident_id, group_id in IncidentGroup.objects.filter(
            incident__in=incidents).values_list("incident_id", "group_id"):
        incident_groups[incident_id].append(group_id)
    incident_projects = defaultdict(list)
    for incident_id, project_id in IncidentProject.objects.filter(
            incident__in=incidents).values_list("incident_id", "project_id"):
        incident_projects[incident_id].append(project_id)

    attach_foreignkey(incidents, Incident.alert_rule)

    query_args_list = []
    for incident in incidents:
        params = {}

        params["start"], params["end"] = calculate_incident_time_range(
            incident, start, end, windowed_stats=windowed_stats)

        group_ids = incident_groups[incident.id]
        if group_ids:
            params["group_ids"] = group_ids
        project_ids = incident_projects[incident.id]
        if project_ids:
            params["project_id"] = project_ids

        snuba_filter = get_filter(incident.alert_rule.snuba_query.query,
                                  params)
        conditions = resolve_discover_aliases(snuba_filter)[0].conditions
        if incident.alert_rule:
            conditions = apply_dataset_conditions(
                QueryDatasets(incident.alert_rule.snuba_query.dataset),
                conditions)
        snuba_args = {
            "start": snuba_filter.start,
            "end": snuba_filter.end,
            "conditions": conditions,
            "filter_keys": snuba_filter.filter_keys,
            "having": [],
        }
        query_args_list.append(snuba_args)

    return query_args_list
Example #9
0
def bulk_build_incident_query_params(incidents,
                                     start=None,
                                     end=None,
                                     prewindow=False):
    incident_groups = defaultdict(list)
    for incident_id, group_id in IncidentGroup.objects.filter(
            incident__in=incidents).values_list("incident_id", "group_id"):
        incident_groups[incident_id].append(group_id)
    incident_projects = defaultdict(list)
    for incident_id, project_id in IncidentProject.objects.filter(
            incident__in=incidents).values_list("incident_id", "project_id"):
        incident_projects[incident_id].append(project_id)

    query_args_list = []
    for incident in incidents:
        params = {
            "start": incident.date_started if start is None else start,
            "end": incident.current_end_date if end is None else end,
        }
        if prewindow:
            prewindow_time_range = calculate_incident_prewindow(
                params["start"], params["end"])
            params["start"] = params["start"] - prewindow_time_range
        group_ids = incident_groups[incident.id]
        if group_ids:
            params["group_ids"] = group_ids
        project_ids = incident_projects[incident.id]
        if project_ids:
            params["project_id"] = project_ids

        snuba_filter = get_filter(incident.query, params)
        snuba_args = {
            "start": snuba_filter.start,
            "end": snuba_filter.end,
            "conditions": snuba_filter.conditions,
            "filter_keys": snuba_filter.filter_keys,
            "having": [],
        }
        snuba_args["conditions"] = resolve_discover_aliases(
            snuba_args)[0]["conditions"]
        query_args_list.append(snuba_args)

    return query_args_list
def query_facet_performance(
    params: Mapping[str, str],
    tag_data: Mapping[str, Any],
    aggregate_column: Optional[str] = None,
    filter_query: Optional[str] = None,
    orderby: Optional[str] = None,
    referrer: Optional[str] = None,
    limit: Optional[int] = None,
    offset: Optional[int] = None,
    all_tag_keys: Optional[bool] = None,
    tag_key: Optional[bool] = None,
) -> Dict:
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", filter_query)
        snuba_filter = discover.get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(
            snuba_filter)
    translated_aggregate_column = discover.resolve_discover_column(
        aggregate_column)

    # Aggregate (avg) and count of all transactions for this query
    transaction_aggregate = tag_data["aggregate"]

    # Dynamically sample so at least 50000 transactions are selected
    sample_start_count = 50000
    transaction_count = tag_data["count"]
    sampling_enabled = transaction_count > sample_start_count

    # log-e growth starting at 50,000
    target_sample = max(
        sample_start_count * (math.log(transaction_count) -
                              (math.log(sample_start_count) - 1)),
        transaction_count,
    )

    dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample /
                                                            transaction_count)
    sample_rate = min(max(dynamic_sample_rate, 0),
                      1) if sampling_enabled else None
    frequency_sample_rate = sample_rate if sample_rate else 1

    # Exclude tags that have high cardinality are are generally unrelated to performance
    excluded_tags = [
        "tags_key",
        "NOT IN",
        [
            "trace", "trace.ctx", "trace.span", "project", "browser",
            "celery_task_id", "url"
        ],
    ]

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.aggregate_tags"):
        span.set_data("sample_rate", sample_rate)
        span.set_data("target_sample", target_sample)
        conditions = snuba_filter.conditions
        aggregate_comparison = transaction_aggregate * 1.005 if transaction_aggregate else 0
        having = [excluded_tags]
        if not all_tag_keys and not tag_key:
            having.append(["aggregate", ">", aggregate_comparison])

        resolved_orderby = [] if orderby is None else orderby

        conditions.append([translated_aggregate_column, "IS NOT NULL", None])

        if tag_key:
            conditions.append(["tags_key", "IN", [tag_key]])
        tag_key_limit = limit if tag_key else 1

        tag_selected_columns = [
            [
                "divide",
                [
                    [
                        "sum",
                        [
                            "minus",
                            [
                                translated_aggregate_column,
                                str(transaction_aggregate),
                            ],
                        ],
                    ],
                    frequency_sample_rate,
                ],
                "sumdelta",
            ],
            ["count", [], "count"],
            [
                "divide",
                [
                    [
                        "divide",
                        [["count", []], frequency_sample_rate],
                    ],
                    transaction_count,
                ],
                "frequency",
            ],
            ["divide", ["aggregate", transaction_aggregate], "comparison"],
            ["avg", [translated_aggregate_column], "aggregate"],
        ]

        results = discover.raw_query(
            selected_columns=tag_selected_columns,
            conditions=conditions,
            start=snuba_filter.start,
            end=snuba_filter.end,
            filter_keys=snuba_filter.filter_keys,
            orderby=resolved_orderby + ["tags_key"],
            groupby=["tags_key", "tags_value"],
            having=having,
            dataset=Dataset.Discover,
            referrer=f"{referrer}.tag_values".format(referrer, "tag_values"),
            sample=sample_rate,
            turbo=sample_rate is not None,
            limitby=[tag_key_limit, "tags_key"],
            limit=limit,
            offset=offset,
        )

        results["meta"] = discover.transform_meta(results, {})

        return results
def query_top_tags(
    params: Mapping[str, str],
    tag_key: str,
    limit: int,
    referrer: str,
    orderby: Optional[List[str]],
    offset: Optional[int] = None,
    aggregate_column: Optional[str] = None,
    filter_query: Optional[str] = None,
) -> Optional[List[Any]]:
    """
    Fetch counts by tag value, finding the top tag values for a tag key by a limit.
    :return: Returns the row with the value, the aggregate and the count if the query was successful
             Returns None if query was not successful which causes the endpoint to return early
    """
    with sentry_sdk.start_span(
        op="discover.discover", description="facets.filter_transform"
    ) as span:
        span.set_data("query", filter_query)
        snuba_filter = get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter)

    translated_aggregate_column = discover.resolve_discover_column(aggregate_column)

    with sentry_sdk.start_span(op="discover.discover", description="facets.top_tags"):

        if not orderby:
            orderby = ["-count"]

        for i, sort in enumerate(orderby):
            if "frequency" in sort:
                # Replacing frequency as it's the same underlying data dimension, this way we don't have to modify the existing histogram query.
                orderby[i] = sort.replace("frequency", "count")

        if "tags_value" not in orderby:
            orderby = orderby + ["tags_value"]

        # Get the average and count to use to filter the next request to facets
        tag_data = discover.query(
            selected_columns=[
                "count()",
                f"avg({aggregate_column}) as aggregate",
                "array_join(tags.value) as tags_value",
            ],
            query=filter_query,
            params=params,
            orderby=orderby,
            conditions=[
                [translated_aggregate_column, "IS NOT NULL", None],
                ["tags_key", "IN", [tag_key]],
            ],
            functions_acl=["array_join"],
            referrer=f"{referrer}.top_tags",
            limit=limit,
            offset=offset,
        )

        if len(tag_data["data"]) <= 0:
            return None

        counts = [r["count"] for r in tag_data["data"]]

        # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that don't exist
        if counts[0] == 0:
            return None
    if not tag_data["data"]:
        return None
    return tag_data["data"]