Beispiel #1
0
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        query: Optional[str] = None,
        selected_columns: Optional[List[str]] = None,
        orderby: Optional[List[str]] = None,
        auto_aggregations: bool = False,
        use_aggregate_conditions: bool = False,
        limit: Optional[int] = 50,
        offset: Optional[int] = 0,
        limitby: Optional[Tuple[str, int]] = None,
    ):
        super().__init__(dataset, params)

        # TODO: implement this in `resolve_select`
        self.auto_aggregations = auto_aggregations

        self.limit = None if limit is None else Limit(limit)
        self.offset = None if offset is None else Offset(offset)

        self.limitby = self.resolve_limitby(limitby)

        self.where, self.having = self.resolve_conditions(
            query, use_aggregate_conditions=use_aggregate_conditions)

        # params depends on parse_query, and conditions being resolved first since there may be projects in conditions
        self.where += self.resolve_params()

        self.columns = self.resolve_select(selected_columns)
        self.orderby = self.resolve_orderby(orderby)
Beispiel #2
0
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        granularity: int,
        query: Optional[str] = None,
        selected_columns: Optional[List[str]] = None,
        equations: Optional[List[str]] = None,
        functions_acl: Optional[List[str]] = None,
        limit: Optional[int] = 10000,
    ):
        super().__init__(
            dataset,
            params,
            auto_fields=False,
            functions_acl=functions_acl,
            equation_config={
                "auto_add": True,
                "aggregates_only": True
            },
        )
        self.where, self.having = self.resolve_conditions(
            query, use_aggregate_conditions=False)

        self.limit = None if limit is None else Limit(limit)

        # params depends on parse_query, and conditions being resolved first since there may be projects in conditions
        self.where += self.resolve_params()
        self.columns = self.resolve_select(selected_columns, equations)
        self.granularity = Granularity(granularity)

        # This is a timeseries, the groupby will always be time
        self.groupby = [self.time_column]
 def data_fn(offset, limit):
     if use_snql:
         trend_query.offset = Offset(offset)
         trend_query.limit = Limit(limit)
         result = raw_snql_query(
             trend_query.get_snql_query(),
             referrer="api.trends.get-percentage-change.wip-snql",
         )
         result = discover.transform_results(
             result, trend_query.function_alias_map, {}, None
         )
         return result
     else:
         return discover.query(
             selected_columns=selected_columns + trend_columns,
             query=query,
             params=params,
             orderby=orderby,
             offset=offset,
             limit=limit,
             referrer="api.trends.get-percentage-change",
             auto_fields=True,
             auto_aggregations=True,
             use_aggregate_conditions=True,
         )
Beispiel #4
0
def run_outcomes_query_timeseries(query: QueryDefinition) -> ResultSet:
    snql_query = Query(
        dataset=query.dataset.value,
        match=Entity(query.match),
        select=query.select_params,
        groupby=query.group_by + [Column(TS_COL)],
        where=query.conditions,
        limit=Limit(10000),
        offset=Offset(0),
        granularity=Granularity(query.rollup),
    )
    result_timeseries = raw_snql_query(snql_query, referrer="outcomes.timeseries")
    return _format_rows(result_timeseries["data"], query)
Beispiel #5
0
    def __init__(
        self,
        num_buckets: int,
        histogram_column: str,
        histogram_rows: Optional[int],
        histogram_params: HistogramParams,
        key_column: Optional[str],
        field_names: Optional[List[Union[str, Any, None]]],
        groupby: Optional[List[str]],
        *args: Any,
        **kwargs: Any,
    ):
        kwargs["functions_acl"] = kwargs.get("functions_acl",
                                             []) + self.base_function_acl
        super().__init__(*args, **kwargs)
        self.additional_groupby = groupby
        selected_columns = kwargs["selected_columns"]

        resolved_histogram = self.resolve_column(histogram_column)

        # Reset&Ignore the columns from the QueryBuilder
        self.aggregates: List[CurriedFunction] = []
        self.columns = [self.resolve_column("count()"), resolved_histogram]

        if key_column is not None and field_names is not None:
            key_values: List[str] = [
                field for field in field_names if isinstance(field, str)
            ]
            self.where.append(
                Condition(self.resolve_column(key_column), Op.IN, key_values))

        # make sure to bound the bins to get the desired range of results
        min_bin = histogram_params.start_offset
        self.where.append(Condition(resolved_histogram, Op.GTE, min_bin))
        max_bin = histogram_params.start_offset + histogram_params.bucket_size * num_buckets
        self.where.append(Condition(resolved_histogram, Op.LTE, max_bin))

        if key_column is not None:
            self.columns.append(self.resolve_column(key_column))

        groups = len(
            selected_columns) if histogram_rows is None else histogram_rows
        self.limit = Limit(groups * num_buckets)
        self.orderby = (self.orderby if self.orderby else
                        []) + [OrderBy(resolved_histogram, Direction.ASC)]
Beispiel #6
0
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        query: Optional[str] = None,
        selected_columns: Optional[List[str]] = None,
        orderby: Optional[List[str]] = None,
        limit: int = 50,
    ):
        super().__init__(dataset, params, orderby)

        self.limit = Limit(limit)

        if query is not None:
            self.resolve_where(query)
        # params depends on get_filter since there may be projects in the query
        self.resolve_params()
        if selected_columns is not None:
            self.resolve_select(selected_columns)
    def test_cache(self):
        """Minimal test to verify if use_cache works"""
        results = snuba.raw_snql_query(
            Query(
                "events",
                Entity("events"),
                select=[Column("event_id")],
                where=[
                    Condition(Column("project_id"), Op.EQ, self.project.id),
                    Condition(Column("timestamp"), Op.GTE,
                              timezone.now() - timedelta(days=1)),
                    Condition(Column("timestamp"), Op.LT, timezone.now()),
                ],
                limit=Limit(1),
            ),
            use_cache=True,
        )

        assert results["data"] == []
Beispiel #8
0
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        query: Optional[str] = None,
        selected_columns: Optional[List[str]] = None,
        equations: Optional[List[str]] = None,
        orderby: Optional[List[str]] = None,
        auto_fields: bool = False,
        auto_aggregations: bool = False,
        use_aggregate_conditions: bool = False,
        functions_acl: Optional[List[str]] = None,
        array_join: Optional[str] = None,
        limit: Optional[int] = 50,
        offset: Optional[int] = 0,
        limitby: Optional[Tuple[str, int]] = None,
        turbo: bool = False,
        sample_rate: Optional[float] = None,
    ):
        super().__init__(dataset, params, auto_fields, functions_acl)

        self.auto_aggregations = auto_aggregations

        self.limit = None if limit is None else Limit(limit)
        self.offset = None if offset is None else Offset(offset)

        self.limitby = self.resolve_limitby(limitby)
        self.turbo = Turbo(turbo)
        self.sample_rate = sample_rate

        self.where, self.having = self.resolve_conditions(
            query, use_aggregate_conditions=use_aggregate_conditions)

        # params depends on parse_query, and conditions being resolved first since there may be projects in conditions
        self.where += self.resolve_params()

        self.columns = self.resolve_select(selected_columns, equations)
        self.orderby = self.resolve_orderby(orderby)
        self.array_join = None if array_join is None else self.resolve_column(
            array_join)
Beispiel #9
0
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        query: Optional[str] = None,
        selected_columns: Optional[List[str]] = None,
        orderby: Optional[List[str]] = None,
        use_aggregate_conditions: bool = False,
        limit: int = 50,
    ):
        super().__init__(dataset, params)

        self.limit = Limit(limit)

        parsed_terms = self.parse_query(query)
        self.where = self.resolve_where(parsed_terms)
        self.having = self.resolve_having(
            parsed_terms, use_aggregate_conditions=use_aggregate_conditions)

        # params depends on get_filter since there may be projects in the query
        self.where += self.resolve_params()

        self.columns = self.resolve_select(selected_columns)
        self.orderby = self.resolve_orderby(orderby)
                         "p50"),
     ],
     groupby=[Column("title")],
     where=[
         Condition(Column("timestamp"), Op.GT, NOW),
         Condition(Function("toHour", [Column("timestamp")]), Op.LTE,
                   NOW),
         Condition(Column("project_id"), Op.IN,
                   Function("tuple", [1, 2, 3])),
     ],
     having=[
         Condition(Function("uniq", [Column("event_id")]), Op.GT, 1)
     ],
     orderby=[OrderBy(Column("title"), Direction.ASC)],
     limitby=LimitBy(Column("title"), 5),
     limit=Limit(10),
     offset=Offset(1),
     granularity=Granularity(3600),
     totals=Totals(True),
 ),
 (
     "MATCH (events SAMPLE 1000)",
     "SELECT title, uniq(event_id) AS uniq_events, quantile(0.5)(duration) AS p50",
     "BY title",
     ("WHERE timestamp > toDateTime('2021-01-02T03:04:05.000006') "
      "AND toHour(timestamp) <= toDateTime('2021-01-02T03:04:05.000006') "
      "AND project_id IN tuple(1, 2, 3)"),
     "HAVING uniq(event_id) > 1",
     "ORDER BY title ASC",
     "LIMIT 5 BY title",
     "LIMIT 10",
Beispiel #11
0
def test_limit(value: Any, exception: Optional[Exception]) -> None:
    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            Limit(value)
    else:
        assert Limit(value).limit == value
Beispiel #12
0
 def set_limit(self, limit: int) -> "Query":
     return self._replace("limit", Limit(limit))