コード例 #1
0
 def get_equation_list(self, organization: Organization,
                       request: Request) -> Sequence[str]:
     """equations have a prefix so that they can be easily included alongside our existing fields"""
     return [
         strip_equation(field) for field in request.GET.getlist("field")[:]
         if is_equation(field)
     ]
コード例 #2
0
    def serialize_multiple_axis(
        self,
        serializer: BaseSnubaSerializer,
        event_result: SnubaTSResult,
        columns: Sequence[str],
        query_columns: Sequence[str],
        allow_partial_buckets: bool,
        zerofill_results: bool = True,
    ) -> Dict[str, Any]:
        # Return with requested yAxis as the key
        result = {}
        equations = 0
        for index, query_column in enumerate(query_columns):
            result[columns[index]] = serializer.serialize(
                event_result,
                resolve_axis_column(query_column, equations),
                order=index,
                allow_partial_buckets=allow_partial_buckets,
                zerofill_results=zerofill_results,
            )
            if is_equation(query_column):
                equations += 1
        # Set order if multi-axis + top events
        if "order" in event_result.data:
            result["order"] = event_result.data["order"]

        return result
コード例 #3
0
 def get_field_list(self, organization: Organization,
                    request: HttpRequest) -> Sequence[str]:
     if self.has_arithmetic(organization, request):
         return [
             field for field in request.GET.getlist("field")[:]
             if not is_equation(field)
         ]
     else:
         return request.GET.getlist("field")[:]
コード例 #4
0
def is_aggregate(field: str) -> bool:
    field_match = re.match(AGGREGATE_PATTERN, field)
    if field_match:
        return True

    equation_match = re.match(AGGREGATE_BASE, field) and is_equation(field)
    if equation_match:
        return True

    return False
コード例 #5
0
 def serialize_multiple_axis(self, serializer, event_result, columns,
                             query_columns, allow_partial_buckets):
     # Return with requested yAxis as the key
     result = {}
     equations = 0
     for index, query_column in enumerate(query_columns):
         result[columns[index]] = serializer.serialize(
             event_result,
             resolve_axis_column(query_column, equations),
             order=index,
             allow_partial_buckets=allow_partial_buckets,
         )
         if is_equation(query_column):
             equations += 1
     # Set order if multi-axis + top events
     if "order" in event_result.data:
         result["order"] = event_result.data["order"]
     return result
コード例 #6
0
 def get_field_list(self, organization: Organization, request: Request) -> Sequence[str]:
     return [field for field in request.GET.getlist("field")[:] if not is_equation(field)]
コード例 #7
0
def resolve_axis_column(column: str, index: int = 0) -> str:
    return cast(
        str, get_function_alias(column) if not is_equation(column) else f"equation[{index}]"
    )
コード例 #8
0
def query_suspect_span_groups(
    params: ParamsType,
    fields: List[str],
    query: Optional[str],
    span_ops: Optional[List[str]],
    span_groups: Optional[List[str]],
    direction: str,
    orderby: str,
    limit: int,
    offset: int,
) -> List[SuspectSpan]:
    suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby]

    selected_columns: List[str] = [
        column
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if not is_equation(column)
    ] + [
        "array_join(spans_op)",
        "array_join(spans_group)",
        "count()",
        "count_unique(id)",
    ]

    equations: List[str] = [
        strip_equation(column)
        for column in suspect_span_columns.suspect_op_group_columns
        if is_equation(column)
    ]

    # TODO: This adds all the possible fields to the query by default. However,
    # due to the way shards aggregate the rows, this can be slow. As an
    # optimization, allow the fields to be user specified to only get the
    # necessary aggregations.
    #
    # As part of the transition, continue to add all possible fields when its
    # not specified, but this should be removed in the future.
    if not fields:
        for column in SPAN_PERFORMANCE_COLUMNS.values():
            for col in column.suspect_op_group_sort:
                if not col.startswith("equation["):
                    selected_columns.append(col)

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        equations=equations,
        query=query,
        orderby=[
            direction + column
            for column in suspect_span_columns.suspect_op_group_sort
        ],
        auto_aggregations=True,
        use_aggregate_conditions=True,
        limit=limit,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    extra_conditions = []

    if span_ops:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_op)"),
                Op.IN,
                Function("tuple", span_ops),
            ))

    if span_groups:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_group)"),
                Op.IN,
                Function("tuple", span_groups),
            ))

    if extra_conditions:
        builder.add_conditions(extra_conditions)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-suspects")

    return [
        SuspectSpan(
            op=suspect["array_join_spans_op"],
            group=suspect["array_join_spans_group"],
            frequency=suspect.get("count_unique_id"),
            count=suspect.get("count"),
            avg_occurrences=suspect.get("equation[0]"),
            sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"),
            p50_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_50"),
            p75_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_75"),
            p95_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_95"),
            p99_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_99"),
        ) for suspect in results["data"]
    ]
コード例 #9
0
def get_top5_display_mode(field: str) -> str:
    if is_equation(field):
        return "top5line"

    return "top5line" if field.split("(")[0] in line_plot_fields else "top5"
コード例 #10
0
def query_suspect_span_groups(
    params: ParamsType,
    fields: List[str],
    query: Optional[str],
    span_ops: Optional[List[str]],
    span_groups: Optional[List[str]],
    direction: str,
    orderby: str,
    limit: int,
    offset: int,
) -> List[SuspectSpan]:
    suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby]

    selected_columns: List[str] = [
        column
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if not is_equation(column)
    ] + [
        "array_join(spans_op)",
        "array_join(spans_group)",
        # want a single event id to fetch from nodestore for the span description
        "any(id)",
    ]

    equations: List[str] = [
        strip_equation(column)
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if is_equation(column)
    ]

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        equations=equations,
        query=query,
        orderby=[direction + column for column in suspect_span_columns.suspect_op_group_sort],
        auto_aggregations=True,
        use_aggregate_conditions=True,
        limit=limit,
        offset=offset,
        functions_acl=["array_join", "sumArray", "percentileArray", "maxArray"],
    )

    extra_conditions = []

    if span_ops:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_op)"),
                Op.IN,
                Function("tuple", span_ops),
            )
        )

    if span_groups:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_group)"),
                Op.IN,
                Function("tuple", span_groups),
            )
        )

    if extra_conditions:
        builder.add_conditions(extra_conditions)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(snql_query, "api.organization-events-spans-performance-suspects")

    return [
        SuspectSpan(
            op=suspect["array_join_spans_op"],
            group=suspect["array_join_spans_group"],
            description=get_span_description(
                EventID(params["project_id"][0], suspect["any_id"]),
                span_op=suspect["array_join_spans_op"],
                span_group=suspect["array_join_spans_group"],
            ),
            frequency=suspect.get("count_unique_id"),
            count=suspect.get("count"),
            avg_occurrences=suspect.get("equation[0]"),
            sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"),
            p50_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_50"),
            p75_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_75"),
            p95_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_95"),
            p99_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_99"),
        )
        for suspect in results["data"]
    ]
コード例 #11
0
def get_timeseries_snuba_filter(selected_columns, query, params):
    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    columns = []
    equations = []

    for column in selected_columns:
        if is_equation(column):
            equations.append(strip_equation(column))
        else:
            columns.append(column)

    if len(equations) > 0:
        resolved_equations, updated_columns = resolve_equation_list(
            equations, columns, aggregates_only=True, auto_add=True)
    else:
        resolved_equations = []
        updated_columns = columns

    # For the new apdex, we need to add project threshold config as a selected
    # column which means the group by for the time series won't work.
    # As a temporary solution, we will calculate the mean of all the project
    # level thresholds in the request and use the legacy apdex, user_misery
    # or count_miserable calculation.
    # TODO(snql): Alias the project_threshold_config column so it doesn't
    # have to be in the SELECT statement and group by to be able to use new apdex,
    # user_misery and count_miserable.
    threshold = None
    for agg in CONFIGURABLE_AGGREGATES:
        if agg not in updated_columns:
            continue

        if threshold is None:
            project_ids = params.get("project_id")
            threshold_configs = list(
                ProjectTransactionThreshold.objects.filter(
                    organization_id=params["organization_id"],
                    project_id__in=project_ids,
                ).values_list("threshold", flat=True))

            projects_without_threshold = len(project_ids) - len(
                threshold_configs)
            threshold_configs.extend([DEFAULT_PROJECT_THRESHOLD] *
                                     projects_without_threshold)
            threshold = int(mean(threshold_configs))

        updated_columns.remove(agg)
        updated_columns.append(
            CONFIGURABLE_AGGREGATES[agg].format(threshold=threshold))

    snuba_filter.update_with(
        resolve_field_list(updated_columns,
                           snuba_filter,
                           auto_fields=False,
                           resolved_equations=resolved_equations))

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    return snuba_filter, translated_columns
コード例 #12
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            base_fields = query_info.get("field", [])
            if not isinstance(base_fields, list):
                base_fields = [base_fields]

            equations = []
            fields = []
            if self.context.get("has_arithmetic"):
                for field in base_fields:
                    if is_equation(field):
                        equations.append(strip_equation(field))
                    else:
                        fields.append(field)
            else:
                fields = base_fields

            if len(base_fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)
            elif len(base_fields) == 0:
                raise serializers.ValidationError(
                    "at least one field is required to export")

            if "query" not in query_info:
                detail = "query is a required to export, please pass an empty string if you don't want to set one"
                raise serializers.ValidationError(detail)

            query_info["field"] = fields
            query_info["equations"] = equations

            if not query_info.get("project"):
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                snuba_filter = get_filter(query_info["query"],
                                          processor.params)
                if len(equations) > 0:
                    resolved_equations, _ = resolve_equation_list(
                        equations, fields)
                else:
                    resolved_equations = []
                resolve_field_list(
                    fields.copy(),
                    snuba_filter,
                    auto_fields=True,
                    auto_aggregations=True,
                    resolved_equations=resolved_equations,
                )
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data