Ejemplo n.º 1
0
def parse_and_run_query(dataset: Dataset, request: Request,
                        timer: Timer) -> RawQueryResult:
    """
    Runs a Snuba Query, then records the metadata about each split query that was run.
    """
    request_copy = copy.deepcopy(request)
    query_metadata = SnubaQueryMetadata(
        request=request_copy,
        dataset=get_dataset_name(dataset),
        timer=timer,
        query_list=[],
    )

    with sentry_sdk.configure_scope() as scope:
        if scope.span:
            scope.span.set_tag("dataset", get_dataset_name(dataset))
            scope.span.set_tag("referrer", http_request.referrer)

    try:
        result = _run_query_pipeline(dataset=dataset,
                                     request=request,
                                     timer=timer,
                                     query_metadata=query_metadata)
        record_query(request_copy, timer, query_metadata)
    except RawQueryException as error:
        record_query(request_copy, timer, query_metadata)
        raise error

    return result
Ejemplo n.º 2
0
def parse_and_run_query(
    dataset: Dataset,
    request: Request,
    timer: Timer,
    robust: bool = False,
    concurrent_queries_gauge: Optional[Gauge] = None,
) -> QueryResult:
    """
    Runs a Snuba Query, then records the metadata about each split query that was run.
    """
    query_metadata = SnubaQueryMetadata(
        request=request,
        dataset=get_dataset_name(dataset),
        timer=timer,
        query_list=[],
    )

    try:
        result = _run_query_pipeline(
            dataset=dataset,
            request=request,
            timer=timer,
            query_metadata=query_metadata,
            robust=robust,
            concurrent_queries_gauge=concurrent_queries_gauge,
        )
        if not request.settings.get_dry_run():
            record_query(request, timer, query_metadata, result.extra)
    except QueryException as error:
        record_query(request, timer, query_metadata, error.extra)
        raise error

    return result
Ejemplo n.º 3
0
def _trace_transaction(dataset: Dataset) -> None:
    with sentry_sdk.configure_scope() as scope:
        if scope.span:
            scope.span.set_tag("dataset", get_dataset_name(dataset))
            scope.span.set_tag("referrer", http_request.referrer)

        if scope.transaction:
            scope.transaction = f"{scope.transaction.name}__{get_dataset_name(dataset)}__{http_request.referrer}"
Ejemplo n.º 4
0
def parse_and_run_query(
    dataset: Dataset,
    request: Request,
    timer: Timer,
    robust: bool = False,
    concurrent_queries_gauge: Optional[Gauge] = None,
) -> QueryResult:
    """
    Runs a Snuba Query, then records the metadata about each split query that was run.
    """
    # from_clause = request.query.get_from_clause()
    start, end = None, None
    entity_name = "unknown"
    if isinstance(request.query, LogicalQuery):
        entity_key = request.query.get_from_clause().key
        entity = get_entity(entity_key)
        entity_name = entity_key.value
        if entity.required_time_column is not None:
            start, end = get_time_range(request.query,
                                        entity.required_time_column)

    query_metadata = SnubaQueryMetadata(
        request=request,
        start_timestamp=start,
        end_timestamp=end,
        dataset=get_dataset_name(dataset),
        entity=entity_name,
        timer=timer,
        query_list=[],
        projects=ProjectsFinder().visit(request.query),
        snql_anonymized=request.snql_anonymized,
    )

    try:
        result = _run_query_pipeline(
            dataset=dataset,
            request=request,
            timer=timer,
            query_metadata=query_metadata,
            robust=robust,
            concurrent_queries_gauge=concurrent_queries_gauge,
        )
        _set_query_final(request, result.extra)
        if not request.query_settings.get_dry_run():
            record_query(request, timer, query_metadata, result.extra)
    except QueryException as error:
        _set_query_final(request, error.extra)
        record_query(request, timer, query_metadata, error.extra)
        raise error

    return result
Ejemplo n.º 5
0
    def selector(query: Union[CompositeQuery[QueryEntity], LogicalQuery]) -> None:
        # If you are doing a JOIN, then you have to specify the entity
        if isinstance(query, CompositeQuery):
            return

        if get_dataset_name(dataset) == "discover":
            query_entity = query.get_from_clause()
            # The legacy -> snql parser will mark queries with no entity specified as the "discover" entity
            # so only do this selection in that case. If someone wants the "discover" entity specifically
            # then their query will have to only use fields from that entity.
            if query_entity.key == EntityKey.DISCOVER:
                selected_entity_key = dataset.select_entity(query)
                selected_entity = get_entity(selected_entity_key)
                query_entity = QueryEntity(
                    selected_entity_key, selected_entity.get_data_model()
                )
                query.set_from_clause(query_entity)

                # XXX: This exists only to ensure that the generated SQL matches legacy queries.
                def replace_time_condition_aliases(exp: Expression) -> Expression:
                    if (
                        isinstance(exp, FunctionCall)
                        and len(exp.parameters) == 2
                        and isinstance(exp.parameters[0], Column)
                        and exp.parameters[0].alias == "_snuba_timestamp"
                    ):
                        return FunctionCall(
                            exp.alias,
                            exp.function_name,
                            (
                                Column(
                                    f"_snuba_{selected_entity.required_time_column}",
                                    exp.parameters[0].table_name,
                                    exp.parameters[0].column_name,
                                ),
                                exp.parameters[1],
                            ),
                        )

                    return exp

                condition = query.get_condition()
                if condition is not None:
                    query.set_ast_condition(
                        condition.transform(replace_time_condition_aliases)
                    )
Ejemplo n.º 6
0
    def __init__(
        self,
        dataset: Dataset,
        executor: ThreadPoolExecutor,
        schedulers: Mapping[int, Scheduler[Subscription]],
        producer: Producer[SubscriptionTaskResult],
        topic: Topic,
        metrics: MetricsBackend,
    ) -> None:
        self.__dataset = dataset
        self.__dataset_name = get_dataset_name(self.__dataset)
        self.__executor = executor
        self.__schedulers = schedulers
        self.__producer = producer
        self.__topic = topic
        self.__metrics = metrics

        self.__concurrent_gauge: Gauge = ThreadSafeGauge(
            self.__metrics, "executor.concurrent")

        self.__concurrent_clickhouse_gauge: Gauge = ThreadSafeGauge(
            self.__metrics, "executor.concurrent.clickhouse")
Ejemplo n.º 7
0
def parse_and_run_query(dataset: Dataset, request: Request,
                        timer: Timer) -> QueryResult:
    """
    Runs a Snuba Query, then records the metadata about each split query that was run.
    """
    request_copy = copy.deepcopy(request)
    query_metadata = SnubaQueryMetadata(
        request=request_copy,
        dataset=get_dataset_name(dataset),
        timer=timer,
        query_list=[],
    )

    try:
        result = _run_query_pipeline(request=request,
                                     timer=timer,
                                     query_metadata=query_metadata)
        record_query(request_copy, timer, query_metadata)
    except QueryException as error:
        record_query(request_copy, timer, query_metadata)
        raise error

    return result
Ejemplo n.º 8
0
def test_get_dataset_name() -> None:
    dataset_name = "events"
    assert get_dataset_name(get_dataset(dataset_name)) == dataset_name
Ejemplo n.º 9
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    # Try to detect if new requests are being sent to the api
    # after the shutdown command has been issued, and if so
    # how long after. I don't want to do a disk check for
    # every query, so randomly sample until the shutdown file
    # is detected, and then log everything
    if IS_SHUTTING_DOWN or random.random() < 0.05:
        if IS_SHUTTING_DOWN or check_down_file_exists():
            tags = {"dataset": get_dataset_name(dataset)}
            metrics.increment("post.shutdown.query", tags=tags)
            diff = time.time() - (shutdown_time() or 0.0)  # this should never be None
            metrics.timing("post.shutdown.query.delay", diff, tags=tags)

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": str(cause),
            }
            logger.warning(
                str(cause),
                exc_info=True,
            )
        elif isinstance(cause, ClickhouseError):
            status = get_http_status_for_clickhouse_error(cause)
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, QueryTooLongException):
            status = 400
            details = {"type": "query-too-long", "message": str(cause)}
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.query_settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Ejemplo n.º 10
0
 def to_url(self, value: Dataset) -> str:
     return get_dataset_name(value)
Ejemplo n.º 11
0
 def test(self):
     dataset_name = "events"
     assert get_dataset_name(get_dataset(dataset_name)) == dataset_name