Python Dataset.get_extensionsの例

プログラミング言語: Python

名前空間/パッケージ名: snuba.datasets.dataset

クラス/型: Dataset

メソッド/関数: get_extensions

hotexamples.comのコード掲載数: 7

Python Dataset.get_extensions - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsnuba.datasets.dataset.Dataset.get_extensionsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

get_default_entity(8)

get_extensions(7)

get_dataset_schemas(6)

get_all_storages(4)

get_writable_storage(4)

get_all_entities(3)

get_abstract_columnset(3)

get_query_processors(3)

get_query_plan_builder(2)

default_conditions(1)

get_table_writer(1)

process_condition(1)

get_function_call_validators(1)

get_required_columns(1)

get_query_pipeline_builder(1)

get_entity(1)

select_entity(1)

コード例 #1

ファイルを表示

ファイル: data.py プロジェクト: ruezetle/snuba

 def build_request(
     self, dataset: Dataset, timestamp: datetime, offset: Optional[int], timer: Timer
 ) -> Request:
     """
     Returns a Request that can be used to run a query via `parse_and_run_query`.
     :param dataset: The Dataset to build the request for
     :param timestamp: Date that the query should run up until
     :param offset: Maximum offset we should query for
     """
     schema = RequestSchema.build_with_extensions(
         dataset.get_extensions(), SubscriptionRequestSettings,
     )
     extra_conditions: Sequence[Condition] = []
     if offset is not None:
         extra_conditions = [[["ifnull", ["offset", 0]], "<=", offset]]
     return validate_request_content(
         {
             "project": self.project_id,
             "conditions": [*self.conditions, *extra_conditions],
             "aggregations": self.aggregations,
             "from_date": (timestamp - self.time_window).isoformat(),
             "to_date": timestamp.isoformat(),
         },
         schema,
         timer,
         dataset,
         SUBSCRIPTION_REFERRER,
     )

コード例 #2

ファイルを表示

def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )

    request = build_request(body, schema, timer, dataset, http_request.referrer)

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})

コード例 #3

ファイルを表示

ファイル: views.py プロジェクト: ruezetle/snuba

def dataset_query_view(*, dataset: Dataset, timer: Timer):
    if http_request.method == "GET":
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )
        return render_template(
            "query.html",
            query_template=json.dumps(schema.generate_template(), indent=4,),
        )
    elif http_request.method == "POST":
        body = parse_request_body(http_request)
        return dataset_query(dataset, body, timer)
    else:
        assert False, "unexpected fallthrough"

コード例 #4

ファイルを表示

ファイル: views.py プロジェクト: jiankunking/snuba

def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"
    ensure_table_exists(dataset)
    return format_result(
        run_query(
            dataset,
            validate_request_content(
                body,
                RequestSchema.build_with_extensions(dataset.get_extensions(),
                                                    HTTPRequestSettings),
                timer,
                dataset,
                http_request.referrer,
            ),
            timer,
        ))

コード例 #5

ファイルを表示

ファイル: query.py プロジェクト: jiankunking/snuba

def parse_and_run_query(
    dataset: Dataset, request: Request, timer: Timer
) -> ClickhouseQueryResult:
    from_date, to_date = TimeSeriesExtensionProcessor.get_time_limit(
        request.extensions["timeseries"]
    )

    if (
        request.query.get_sample() is not None and request.query.get_sample() != 1.0
    ) and not request.settings.get_turbo():
        metrics.increment("sample_without_turbo", tags={"referrer": request.referrer})

    extensions = dataset.get_extensions()
    for name, extension in extensions.items():
        extension.get_processor().process_query(
            request.query, request.extensions[name], request.settings
        )

    request.query.add_conditions(dataset.default_conditions())

    if request.settings.get_turbo():
        request.query.set_final(False)

    for processor in dataset.get_query_processors():
        processor.process_query(request.query, request.settings)

    relational_source = request.query.get_data_source()
    request.query.add_conditions(relational_source.get_mandatory_conditions())

    source = relational_source.format_from()
    with sentry_sdk.start_span(description="create_query", op="db"):
        # TODO: consider moving the performance logic and the pre_where generation into
        # ClickhouseQuery since they are Clickhouse specific
        query = DictClickhouseQuery(dataset, request.query, request.settings)
    timer.mark("prepare_query")

    num_days = (to_date - from_date).days
    stats = {
        "clickhouse_table": source,
        "final": request.query.get_final(),
        "referrer": request.referrer,
        "num_days": num_days,
        "sample": request.query.get_sample(),
    }

    with sentry_sdk.configure_scope() as scope:
        if scope.span:
            scope.span.set_tag("dataset", type(dataset).__name__)
            scope.span.set_tag("referrer", http_request.referrer)
            scope.span.set_tag("timeframe_days", num_days)

    with sentry_sdk.start_span(description=query.format_sql(), op="db") as span:
        span.set_tag("dataset", type(dataset).__name__)
        span.set_tag("table", source)
        try:
            span.set_tag(
                "ast_query",
                AstClickhouseQuery(request.query, request.settings).format_sql(),
            )
        except Exception:
            logger.exception("Failed to format ast query")
        result = raw_query(
            request, query, NativeDriverReader(clickhouse_ro), timer, stats
        )

    with sentry_sdk.configure_scope() as scope:
        if scope.span:
            if "max_threads" in stats:
                scope.span.set_tag("max_threads", stats["max_threads"])

    return result

コード例 #6

ファイルを表示

ファイル: query.py プロジェクト: ruezetle/snuba

def _run_query_pipeline(
    dataset: Dataset,
    request: Request,
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
) -> RawQueryResult:
    """
    Runs the query processing and execution pipeline for a Snuba Query. This means it takes a Dataset
    and a Request and returns the results of the query.

    This process includes:
    - Applying dataset specific syntax extensions (QueryExtension)
    - Applying dataset query processors on the abstract Snuba query.
    - Using the dataset provided StorageQueryPlanBuilder to build a StorageQueryPlan. This step
      transforms the Snuba Query into the Storage Query (that is contextual to the storage/s).
      From this point on none should depend on the dataset.
    - Executing the storage specific query processors.
    - Providing the newly built Query and a QueryRunner to the QueryExecutionStrategy to actually
      run the DB Query.
    """

    # TODO: this will work perfectly with datasets that are not time series. Remove it.
    from_date, to_date = TimeSeriesExtensionProcessor.get_time_limit(
        request.extensions["timeseries"])

    if (request.query.get_sample() is not None and request.query.get_sample()
            != 1.0) and not request.settings.get_turbo():
        metrics.increment("sample_without_turbo",
                          tags={"referrer": request.referrer})

    extensions = dataset.get_extensions()
    for name, extension in extensions.items():
        extension.get_processor().process_query(request.query,
                                                request.extensions[name],
                                                request.settings)

    # TODO: Fit this in a query processor. All query transformations should be driven by
    # datasets/storages and never hardcoded.
    if request.settings.get_turbo():
        request.query.set_final(False)

    for processor in dataset.get_query_processors():
        processor.process_query(request.query, request.settings)

    storage_query_plan = dataset.get_query_plan_builder().build_plan(request)

    # TODO: This below should be a storage specific query processor.
    relational_source = request.query.get_data_source()
    request.query.add_conditions(relational_source.get_mandatory_conditions())

    for processor in storage_query_plan.query_processors:
        processor.process_query(request.query, request.settings)

    query_runner = partial(
        _format_storage_query_and_run,
        dataset,
        timer,
        query_metadata,
        from_date,
        to_date,
    )

    return storage_query_plan.execution_strategy.execute(request, query_runner)

コード例 #7

ファイルを表示

def _run_query_pipeline(
    dataset: Dataset,
    request: Request,
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
) -> QueryResult:
    """
    Runs the query processing and execution pipeline for a Snuba Query. This means it takes a Dataset
    and a Request and returns the results of the query.

    This process includes:
    - Applying dataset specific syntax extensions (QueryExtension)
    - Applying dataset query processors on the abstract Snuba query.
    - Using the dataset provided ClickhouseQueryPlanBuilder to build a ClickhouseQueryPlan. This step
      transforms the Snuba Query into the Storage Query (that is contextual to the storage/s).
      From this point on none should depend on the dataset.
    - Executing the plan specific query processors.
    - Providing the newly built Query, processors to be run for each DB query and a QueryRunner
      to the QueryExecutionStrategy to actually run the DB Query.
    """

    # TODO: this will work perfectly with datasets that are not time series. Remove it.
    from_date, to_date = TimeSeriesExtensionProcessor.get_time_limit(
        request.extensions["timeseries"]
    )

    if (
        request.query.get_sample() is not None and request.query.get_sample() != 1.0
    ) and not request.settings.get_turbo():
        metrics.increment("sample_without_turbo", tags={"referrer": request.referrer})

    extensions = dataset.get_extensions()
    for name, extension in extensions.items():
        with sentry_sdk.start_span(
            description=type(extension.get_processor()).__name__, op="extension"
        ):
            extension.get_processor().process_query(
                request.query, request.extensions[name], request.settings
            )

    # TODO: Fit this in a query processor. All query transformations should be driven by
    # datasets/storages and never hardcoded.
    if request.settings.get_turbo():
        request.query.set_final(False)

    for processor in dataset.get_query_processors():
        with sentry_sdk.start_span(
            description=type(processor).__name__, op="processor"
        ):
            processor.process_query(request.query, request.settings)

    query_plan = dataset.get_query_plan_builder().build_plan(request)
    # From this point on. The logical query should not be used anymore by anyone.
    # The Clickhouse Query is the one to be used to run the rest of the query pipeline.

    # TODO: Break the Query Plan execution out of this method. With the division
    # between plan specific processors and DB query specific processors and with
    # the soon to come ClickhouseCluster, there is more coupling between the
    # components of the query plan.

    for clickhouse_processor in query_plan.plan_processors:
        with sentry_sdk.start_span(
            description=type(clickhouse_processor).__name__, op="processor"
        ):
            clickhouse_processor.process_query(query_plan.query, request.settings)

    query_runner = partial(
        _format_storage_query_and_run,
        timer,
        query_metadata,
        from_date,
        to_date,
        request.referrer,
    )

    return query_plan.execution_strategy.execute(
        query_plan.query, request.settings, query_runner
    )