Esempio n. 1
0
    def get_query_context_factory(self) -> QueryContextFactory:
        if self.query_context_factory is None:
            # pylint: disable=import-outside-toplevel
            from superset.common.query_context_factory import QueryContextFactory

            self.query_context_factory = QueryContextFactory()
        return self.query_context_factory
Esempio n. 2
0
def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):
    qc = QueryContextFactory().create(
        datasource={
            "type": virtual_dataset_comma_in_column_value.type,
            "id": virtual_dataset_comma_in_column_value.id,
        },
        queries=[{
            "columns": ["col1", "col2"],
            "metrics": ["count"],
            "post_processing": [
                {
                    "operation": "pivot",
                    "options": {
                        "aggregates": {
                            "count": {
                                "operator": "mean"
                            }
                        },
                        "columns": ["col2"],
                        "index": ["col1"],
                    },
                },
                {
                    "operation": "flatten"
                },
            ],
        }],
        result_type=ChartDataResultType.FULL,
        force=True,
    )
    query_object = qc.queries[0]
    df = qc.get_df_payload(query_object)["df"]
    label_map = qc.get_df_payload(query_object)["label_map"]
    assert list(df.columns.values) == [
        "col1",
        "count" + FLAT_COLUMN_SEPARATOR + "col2, row1",
        "count" + FLAT_COLUMN_SEPARATOR + "col2, row2",
        "count" + FLAT_COLUMN_SEPARATOR + "col2, row3",
    ]
    assert label_map == {
        "col1": ["col1"],
        "count, col2, row1": ["count", "col2, row1"],
        "count, col2, row2": ["count", "col2, row2"],
        "count, col2, row3": ["count", "col2, row3"],
    }
Esempio n. 3
0
    def run(self) -> Dict[str, Any]:
        self.validate()
        if not self._model:
            raise DatasetNotFoundError()

        qc_instance = QueryContextFactory().create(
            datasource={
                "type": self._model.type,
                "id": self._model.id,
            },
            queries=[{}],
            result_type=ChartDataResultType.SAMPLES,
            force=self._force,
        )
        results = qc_instance.get_payload()
        try:
            return results["queries"][0]
        except (IndexError, KeyError) as exc:
            raise DatasetSamplesFailedError from exc
Esempio n. 4
0
def get_samples(  # pylint: disable=too-many-arguments,too-many-locals
    datasource_type: str,
    datasource_id: int,
    force: bool = False,
    page: int = 1,
    per_page: int = 1000,
    payload: Optional[SamplesPayloadSchema] = None,
) -> Dict[str, Any]:
    datasource = DatasourceDAO.get_datasource(
        session=db.session,
        datasource_type=datasource_type,
        datasource_id=datasource_id,
    )

    limit_clause = get_limit_clause(page, per_page)

    # todo(yongjie): Constructing count(*) and samples in the same query_context,
    #  then remove query_type==SAMPLES
    # constructing samples query
    samples_instance = QueryContextFactory().create(
        datasource={
            "type": datasource.type,
            "id": datasource.id,
        },
        queries=[{
            **payload,
            **limit_clause
        } if payload else limit_clause],
        result_type=ChartDataResultType.SAMPLES,
        force=force,
    )

    # constructing count(*) query
    count_star_metric = {
        "metrics": [{
            "expressionType": "SQL",
            "sqlExpression": "COUNT(*)",
            "label": "COUNT(*)",
        }]
    }
    count_star_instance = QueryContextFactory().create(
        datasource={
            "type": datasource.type,
            "id": datasource.id,
        },
        queries=[{
            **payload,
            **count_star_metric
        } if payload else count_star_metric],
        result_type=ChartDataResultType.FULL,
        force=force,
    )
    samples_results = samples_instance.get_payload()
    count_star_results = count_star_instance.get_payload()

    try:
        sample_data = samples_results["queries"][0]
        count_star_data = count_star_results["queries"][0]
        failed_status = (sample_data.get("status") == QueryStatus.FAILED or
                         count_star_data.get("status") == QueryStatus.FAILED)
        error_msg = sample_data.get("error") or count_star_data.get("error")
        if failed_status and error_msg:
            cache_key = sample_data.get("cache_key")
            QueryCacheManager.delete(cache_key, region=CacheRegion.DATA)
            raise DatasetSamplesFailedError(error_msg)

        sample_data["page"] = page
        sample_data["per_page"] = per_page
        sample_data["total_count"] = count_star_data["data"][0]["COUNT(*)"]
        return sample_data
    except (IndexError, KeyError) as exc:
        raise DatasetSamplesFailedError from exc