Example #1
0
def query_suspect_span_groups(
    params: ParamsType,
    fields: List[str],
    query: Optional[str],
    span_ops: Optional[List[str]],
    span_groups: Optional[List[str]],
    direction: str,
    orderby: str,
    limit: int,
    offset: int,
) -> List[SuspectSpan]:
    suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby]

    selected_columns: List[str] = [
        column
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if not is_equation(column)
    ] + [
        "array_join(spans_op)",
        "array_join(spans_group)",
        "count()",
        "count_unique(id)",
    ]

    equations: List[str] = [
        strip_equation(column)
        for column in suspect_span_columns.suspect_op_group_columns
        if is_equation(column)
    ]

    # TODO: This adds all the possible fields to the query by default. However,
    # due to the way shards aggregate the rows, this can be slow. As an
    # optimization, allow the fields to be user specified to only get the
    # necessary aggregations.
    #
    # As part of the transition, continue to add all possible fields when its
    # not specified, but this should be removed in the future.
    if not fields:
        for column in SPAN_PERFORMANCE_COLUMNS.values():
            for col in column.suspect_op_group_sort:
                if not col.startswith("equation["):
                    selected_columns.append(col)

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        equations=equations,
        query=query,
        orderby=[
            direction + column
            for column in suspect_span_columns.suspect_op_group_sort
        ],
        auto_aggregations=True,
        use_aggregate_conditions=True,
        limit=limit,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    extra_conditions = []

    if span_ops:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_op)"),
                Op.IN,
                Function("tuple", span_ops),
            ))

    if span_groups:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_group)"),
                Op.IN,
                Function("tuple", span_groups),
            ))

    if extra_conditions:
        builder.add_conditions(extra_conditions)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-suspects")

    return [
        SuspectSpan(
            op=suspect["array_join_spans_op"],
            group=suspect["array_join_spans_group"],
            frequency=suspect.get("count_unique_id"),
            count=suspect.get("count"),
            avg_occurrences=suspect.get("equation[0]"),
            sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"),
            p50_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_50"),
            p75_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_75"),
            p95_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_95"),
            p99_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_99"),
        ) for suspect in results["data"]
    ]
Example #2
0
def query_example_transactions(
    params: ParamsType,
    query: Optional[str],
    direction: str,
    orderby: str,
    spans: List[Span],
    per_suspect: int = 5,
    offset: Optional[int] = None,
) -> Dict[Span, List[EventID]]:
    # there aren't any suspects, early return to save an empty query
    if not spans or per_suspect == 0:
        return {}

    orderby_columns = SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_sort

    selected_columns: List[str] = [
        "id",
        "project.id",
        "project",
        "array_join(spans_op)",
        "array_join(spans_group)",
        *orderby_columns,
    ]

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        query=query,
        orderby=[direction + column for column in orderby_columns],
        # we want only `per_suspect` examples for each suspect
        limit=len(spans) * per_suspect,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    # we are only interested in the specific op, group pairs from the suspects
    builder.add_conditions([
        Condition(
            Function(
                "tuple",
                [
                    builder.resolve_function("array_join(spans_op)"),
                    builder.resolve_function("array_join(spans_group)"),
                ],
            ),
            Op.IN,
            Function(
                "tuple",
                [
                    Function("tuple", [suspect.op, suspect.group])
                    for suspect in spans
                ],
            ),
        ),
    ])

    if len(spans) > 1:
        # Hack: the limit by clause only allows columns but here we want to
        # do a limitby on the two array joins. For the time being, directly
        # do the limitby on the internal snuba name for the span group column
        # but this should not be relied upon in production, and if two spans
        # differ only by the span op, this will result in a incorrect query
        builder.limitby = LimitBy(Column("_snuba_array_join_spans_group"),
                                  per_suspect)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-examples")

    examples: Dict[Span, List[EventID]] = {
        Span(suspect.op, suspect.group): []
        for suspect in spans
    }

    for example in results["data"]:
        key = Span(example["array_join_spans_op"],
                   example["array_join_spans_group"])
        value = EventID(example["project.id"], example["project"],
                        example["id"])
        examples[key].append(value)

    return examples
Example #3
0
def query_suspect_span_groups(
    params: ParamsType,
    fields: List[str],
    query: Optional[str],
    span_ops: Optional[List[str]],
    span_groups: Optional[List[str]],
    direction: str,
    orderby: str,
    limit: int,
    offset: int,
) -> List[SuspectSpan]:
    suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby]

    selected_columns: List[str] = [
        column
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if not is_equation(column)
    ] + [
        "array_join(spans_op)",
        "array_join(spans_group)",
        # want a single event id to fetch from nodestore for the span description
        "any(id)",
    ]

    equations: List[str] = [
        strip_equation(column)
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if is_equation(column)
    ]

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        equations=equations,
        query=query,
        orderby=[direction + column for column in suspect_span_columns.suspect_op_group_sort],
        auto_aggregations=True,
        use_aggregate_conditions=True,
        limit=limit,
        offset=offset,
        functions_acl=["array_join", "sumArray", "percentileArray", "maxArray"],
    )

    extra_conditions = []

    if span_ops:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_op)"),
                Op.IN,
                Function("tuple", span_ops),
            )
        )

    if span_groups:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_group)"),
                Op.IN,
                Function("tuple", span_groups),
            )
        )

    if extra_conditions:
        builder.add_conditions(extra_conditions)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(snql_query, "api.organization-events-spans-performance-suspects")

    return [
        SuspectSpan(
            op=suspect["array_join_spans_op"],
            group=suspect["array_join_spans_group"],
            description=get_span_description(
                EventID(params["project_id"][0], suspect["any_id"]),
                span_op=suspect["array_join_spans_op"],
                span_group=suspect["array_join_spans_group"],
            ),
            frequency=suspect.get("count_unique_id"),
            count=suspect.get("count"),
            avg_occurrences=suspect.get("equation[0]"),
            sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"),
            p50_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_50"),
            p75_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_75"),
            p95_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_95"),
            p99_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_99"),
        )
        for suspect in results["data"]
    ]