コード例 #1
0
 def build_request(self, dataset: Dataset, timestamp: datetime,
                   offset: Optional[int], timer: Timer) -> Request:
     """
     Returns a Request that can be used to run a query via `parse_and_run_query`.
     :param dataset: The Dataset to build the request for
     :param timestamp: Date that the query should run up until
     :param offset: Maximum offset we should query for
     """
     schema = RequestSchema.build_with_extensions(
         dataset.get_default_entity().get_extensions(),
         SubscriptionRequestSettings,
         Language.LEGACY,
     )
     extra_conditions: Sequence[Condition] = []
     if offset is not None:
         extra_conditions = [[["ifnull", ["offset", 0]], "<=", offset]]
     return build_request(
         {
             "project": self.project_id,
             "conditions": [*self.conditions, *extra_conditions],
             "aggregations": self.aggregations,
             "from_date": (timestamp - self.time_window).isoformat(),
             "to_date": timestamp.isoformat(),
         },
         schema,
         timer,
         dataset,
         SUBSCRIPTION_REFERRER,
     )
コード例 #2
0
ファイル: factory.py プロジェクト: pombredanne/snuba
def enforce_table_writer(dataset: Dataset) -> TableWriter:
    writable_storage = dataset.get_default_entity().get_writable_storage()

    assert (
        writable_storage is not None
    ), f"Dataset{dataset} does not have a writable storage."
    return writable_storage.get_table_writer()
コード例 #3
0
ファイル: views.py プロジェクト: getsentry/snuba
    def eventstream(*, dataset: Dataset) -> RespTuple:
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data, []),
            datetime.now(),
        )

        type_ = record[1]

        storage = dataset.get_default_entity().get_writable_storage()
        assert storage is not None

        if type_ == "insert":
            from arroyo.processing.strategies.streaming import (
                KafkaConsumerStrategyFactory,
            )

            from snuba.consumers.consumer import build_batch_writer, process_message

            table_writer = storage.get_table_writer()
            stream_loader = table_writer.get_stream_loader()
            strategy = KafkaConsumerStrategyFactory(
                stream_loader.get_pre_filter(),
                functools.partial(
                    process_message, stream_loader.get_processor(), "consumer_grouup"
                ),
                build_batch_writer(table_writer, metrics=metrics),
                max_batch_size=1,
                max_batch_time=1.0,
                processes=None,
                input_block_size=None,
                output_block_size=None,
            ).create(lambda offsets: None)
            strategy.submit(message)
            strategy.close()
            strategy.join()
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(storage, "consumer_group", metrics=metrics)
            processed = worker.process_message(message)
            if processed is not None:
                batch = [processed]
                worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
コード例 #4
0
ファイル: __init__.py プロジェクト: isabella232/snuba
def parse_query(body: MutableMapping[str, Any], dataset: Dataset) -> Query:
    """
    Parses the query body generating the AST. This only takes into
    account the initial query body. Extensions are parsed by extension
    processors and are supposed to update the AST.

    Parsing includes two phases. The first transforms the json body into
    a minimal query Object resolving expressions, conditions, etc.
    The second phase performs some query processing to provide a sane
    query to the dataset specific section.
    - It prevents alias shadowing.
    - It transforms columns from the tags[asd] form into
      SubscriptableReference.
    - Applies aliases to all columns that do not have one and that do not
      represent a reference to an existing alias.
      During query processing a column can be transformed into a different
      expression. It is essential to preserve the original column name so
      that the result set still has a column with the name provided by the
      user no matter on which transformation we applied.
      By applying aliases at this stage every processor just needs to
      preserve them to guarantee the correctness of the query.
    - Expands all the references to aliases by inlining the expression
      to make aliasing transparent to all query processing phases.
      References to aliases are reintroduced at the end of the query
      processing.
      Alias references are packaged back at the end of processing.
    """
    # TODO: Parse the entity out of the query body and select the correct one from the dataset
    entity = dataset.get_default_entity()

    query = _parse_query_impl(body, entity)
    # TODO: These should support composite queries.
    _validate_empty_table_names(query)
    _validate_aliases(query)
    _parse_subscriptables(query)
    _apply_column_aliases(query)
    _expand_aliases(query)
    # WARNING: These steps above assume table resolution did not happen
    # yet. If it is put earlier than here (unlikely), we need to adapt them.
    _deescape_aliases(query)
    _mangle_aliases(query)
    _validate_arrayjoin(query)

    # XXX: Select the entity to be used for the query. This step is temporary. Eventually
    # entity selection will be moved to Sentry and specified for all SnQL queries.
    selected_entity = dataset.select_entity(query)
    query_entity = QueryEntity(
        selected_entity, get_entity(selected_entity).get_data_model()
    )
    query.set_from_clause(query_entity)

    validate_query(query)
    return query
コード例 #5
0
ファイル: views.py プロジェクト: fpacifici/snuba
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings
        )

    request = build_request(body, schema, timer, dataset, http_request.referrer)

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
コード例 #6
0
ファイル: views.py プロジェクト: fpacifici/snuba
def dataset_query_view(*, dataset: Dataset, timer: Timer):
    if http_request.method == "GET":
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings
        )
        return render_template(
            "query.html",
            query_template=json.dumps(schema.generate_template(), indent=4,),
        )
    elif http_request.method == "POST":
        body = parse_request_body(http_request)
        _trace_transaction(dataset)
        return dataset_query(dataset, body, timer)
    else:
        assert False, "unexpected fallthrough"
コード例 #7
0
ファイル: views.py プロジェクト: chhetripradeep/snuba
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    if language == Language.SNQL:
        metrics.increment("snql.query.incoming", tags={"referrer": referrer})
        parser: Callable[
            [RequestParts, RequestSettings, Dataset],
            Union[Query, CompositeQuery[Entity]],
        ] = partial(parse_snql_query, [])
    else:
        parser = parse_legacy_query

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language
        )

    request = build_request(
        body, parser, HTTPRequestSettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)

        # Some metrics to track the adoption of SnQL
        query_type = "simple"
        if language == Language.SNQL:
            if isinstance(request.query, CompositeQuery):
                if isinstance(request.query.get_from_clause(), JoinClause):
                    query_type = "join"
                else:
                    query_type = "subquery"

            metrics.increment(
                "snql.query.success", tags={"referrer": referrer, "type": query_type}
            )

    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        if language == Language.SNQL:
            metrics.increment(
                "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"},
            )

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
コード例 #8
0
ファイル: views.py プロジェクト: getsentry/snuba
 def write(*, dataset: Dataset) -> RespTuple:
     return _write_to_entity(entity=dataset.get_default_entity())