コード例 #1
0
    def cache_key(self, query_obj: QueryObject,
                  **kwargs: Any) -> Optional[str]:
        extra_cache_keys = self.datasource.get_extra_cache_keys(
            query_obj.to_dict())

        cache_key = (query_obj.cache_key(
            datasource=self.datasource.uid,
            extra_cache_keys=extra_cache_keys,
            rls=security_manager.get_rls_ids(self.datasource)
            if is_feature_enabled("ROW_LEVEL_SECURITY")
            and self.datasource.is_rls_supported else [],
            changed_on=self.datasource.changed_on,
            **kwargs) if query_obj else None)
        return cache_key
コード例 #2
0
    def cache_key(self, query_obj: QueryObject,
                  **kwargs: Any) -> Optional[str]:
        extra_cache_keys = self.datasource.get_extra_cache_keys(
            query_obj.to_dict())
        logger.info("EEEEEEEEEEEE {}".format(extra_cache_keys))

        cache_key = (query_obj.cache_key(
            datasource=self.datasource.uid,
            extra_cache_keys=extra_cache_keys,
            rls=security_manager.get_rls_ids(self.datasource)
            if config["ENABLE_ROW_LEVEL_SECURITY"]
            and self.datasource.is_rls_supported else [],
            changed_on=self.datasource.changed_on,
            **kwargs) if query_obj else None)
        return cache_key
コード例 #3
0
    def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
        """Returns a pandas dataframe based on the query object"""

        # Here, we assume that all the queries will use the same datasource, which is
        # a valid assumption for current setting. In the long term, we may
        # support multiple queries from different data sources.

        timestamp_format = None
        if self.datasource.type == "table":
            dttm_col = self.datasource.get_column(query_object.granularity)
            if dttm_col:
                timestamp_format = dttm_col.python_date_format

        # The datasource here can be different backend but the interface is common
        result = self.datasource.query(query_object.to_dict())

        df = result.df
        # Transform the timestamp we received from database to pandas supported
        # datetime format. If no python_date_format is specified, the pattern will
        # be considered as the default ISO date format
        # If the datetime format is unix, the parse will use the corresponding
        # parsing logic
        if not df.empty:
            if DTTM_ALIAS in df.columns:
                if timestamp_format in ("epoch_s", "epoch_ms"):
                    # Column has already been formatted as a timestamp.
                    df[DTTM_ALIAS] = df[DTTM_ALIAS].apply(pd.Timestamp)
                else:
                    df[DTTM_ALIAS] = pd.to_datetime(
                        df[DTTM_ALIAS], utc=False, format=timestamp_format
                    )
                if self.datasource.offset:
                    df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
                df[DTTM_ALIAS] += query_object.time_shift

            if self.enforce_numerical_metrics:
                self.df_metrics_to_num(df, query_object)

            df.replace([np.inf, -np.inf], np.nan)

        df = query_object.exec_post_processing(df)

        return {
            "query": result.query,
            "status": result.status,
            "error_message": result.error_message,
            "df": df,
        }
コード例 #4
0
ファイル: query_context.py プロジェクト: zphj1987/superset
    def get_single_payload(
        self,
        query_obj: QueryObject,
        force_cached: Optional[bool] = False,
    ) -> Dict[str, Any]:
        """Return results payload for a single quey"""
        if self.result_type == utils.ChartDataResultType.QUERY:
            return {
                "query": self.datasource.get_query_str(query_obj.to_dict()),
                "language": self.datasource.query_language,
            }

        if self.result_type == utils.ChartDataResultType.SAMPLES:
            row_limit = query_obj.row_limit or math.inf
            query_obj = copy.copy(query_obj)
            query_obj.is_timeseries = False
            query_obj.orderby = []
            query_obj.groupby = []
            query_obj.metrics = []
            query_obj.post_processing = []
            query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"])
            query_obj.row_offset = 0
            query_obj.columns = [
                o.column_name for o in self.datasource.columns
            ]

        payload = self.get_df_payload(query_obj, force_cached=force_cached)
        df = payload["df"]
        status = payload["status"]
        if status != utils.QueryStatus.FAILED:
            payload["colnames"] = list(df.columns)
            payload["coltypes"] = utils.extract_dataframe_dtypes(df)
            payload["data"] = self.get_data(df)
        del payload["df"]

        filters = query_obj.filter
        filter_columns = cast(List[str], [flt.get("col") for flt in filters])
        columns = set(self.datasource.column_names)
        applied_time_columns, rejected_time_columns = utils.get_time_filter_status(
            self.datasource, query_obj.applied_time_extras)
        payload["applied_filters"] = [{
            "column": col
        } for col in filter_columns if col in columns] + applied_time_columns
        payload["rejected_filters"] = [{
            "reason": "not_in_datasource",
            "column": col
        } for col in filter_columns if col not in columns
                                       ] + rejected_time_columns

        if (self.result_type == utils.ChartDataResultType.RESULTS
                and status != utils.QueryStatus.FAILED):
            return {"data": payload["data"]}
        return payload
コード例 #5
0
    def query_cache_key(self, query_obj: QueryObject,
                        **kwargs: Any) -> Optional[str]:
        """
        Returns a QueryObject cache key for objects in self.queries
        """
        datasource = self._qc_datasource
        extra_cache_keys = datasource.get_extra_cache_keys(query_obj.to_dict())

        cache_key = (query_obj.cache_key(
            datasource=datasource.uid,
            extra_cache_keys=extra_cache_keys,
            rls=security_manager.get_rls_cache_key(datasource),
            changed_on=datasource.changed_on,
            **kwargs,
        ) if query_obj else None)
        return cache_key
コード例 #6
0
 def create(  # pylint: disable=too-many-arguments
     self,
     parent_result_type: ChartDataResultType,
     datasource: Optional[DatasourceDict] = None,
     extras: Optional[Dict[str, Any]] = None,
     row_limit: Optional[int] = None,
     time_range: Optional[str] = None,
     time_shift: Optional[str] = None,
     **kwargs: Any,
 ) -> QueryObject:
     datasource_model_instance = None
     if datasource:
         datasource_model_instance = self._convert_to_model(datasource)
     processed_extras = self._process_extras(extras)
     result_type = kwargs.setdefault("result_type", parent_result_type)
     row_limit = self._process_row_limit(row_limit, result_type)
     from_dttm, to_dttm = self._get_dttms(time_range, time_shift,
                                          processed_extras)
     kwargs["from_dttm"] = from_dttm
     kwargs["to_dttm"] = to_dttm
     return QueryObject(
         datasource=datasource_model_instance,
         extras=extras,
         row_limit=row_limit,
         time_range=time_range,
         time_shift=time_shift,
         **kwargs,
     )
コード例 #7
0
    def get_query_result(self, query_object: QueryObject) -> QueryResult:
        """Returns a pandas dataframe based on the query object"""
        query_context = self._query_context
        # Here, we assume that all the queries will use the same datasource, which is
        # a valid assumption for current setting. In the long term, we may
        # support multiple queries from different data sources.

        # The datasource here can be different backend but the interface is common
        # pylint: disable=import-outside-toplevel
        from superset.models.sql_lab import Query

        query = ""
        if isinstance(query_context.datasource, Query):
            # todo(hugh): add logic to manage all sip68 models here
            result = query_context.datasource.exc_query(query_object.to_dict())
        else:
            result = query_context.datasource.query(query_object.to_dict())
            query = result.query + ";\n\n"

        df = result.df
        # Transform the timestamp we received from database to pandas supported
        # datetime format. If no python_date_format is specified, the pattern will
        # be considered as the default ISO date format
        # If the datetime format is unix, the parse will use the corresponding
        # parsing logic
        if not df.empty:
            df = self.normalize_df(df, query_object)

            if query_object.time_offsets:
                time_offsets = self.processing_time_offsets(df, query_object)
                df = time_offsets["df"]
                queries = time_offsets["queries"]

                query += ";\n\n".join(queries)
                query += ";\n\n"

            # Re-raising QueryObjectValidationError
            try:
                df = query_object.exec_post_processing(df)
            except InvalidPostProcessingError as ex:
                raise QueryObjectValidationError from ex

        result.df = df
        result.query = query
        result.from_dttm = query_object.from_dttm
        result.to_dttm = query_object.to_dttm
        return result
コード例 #8
0
 def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]:
     """Returns a payload of metadata and data"""
     if self.result_type == utils.ChartDataResultType.QUERY:
         return {
             "query": self.datasource.get_query_str(query_obj.to_dict()),
             "language": self.datasource.query_language,
         }
     if self.result_type == utils.ChartDataResultType.SAMPLES:
         row_limit = query_obj.row_limit or math.inf
         query_obj = copy.copy(query_obj)
         query_obj.groupby = []
         query_obj.metrics = []
         query_obj.post_processing = []
         query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"])
         query_obj.row_offset = 0
         query_obj.columns = [o.column_name for o in self.datasource.columns]
     payload = self.get_df_payload(query_obj)
     df = payload["df"]
     status = payload["status"]
     if status != utils.QueryStatus.FAILED:
         if df.empty:
             payload["error"] = "No data"
         else:
             payload["data"] = self.get_data(df)
     del payload["df"]
     if self.result_type == utils.ChartDataResultType.RESULTS:
         return {"data": payload["data"]}
     return payload
コード例 #9
0
    def query_cache_key(self, query_obj: QueryObject,
                        **kwargs: Any) -> Optional[str]:
        """
        Returns a QueryObject cache key for objects in self.queries
        """
        datasource = self._qc_datasource
        extra_cache_keys = datasource.get_extra_cache_keys(query_obj.to_dict())

        cache_key = (query_obj.cache_key(
            datasource=datasource.uid,
            extra_cache_keys=extra_cache_keys,
            rls=security_manager.get_rls_ids(datasource)
            if is_feature_enabled("ROW_LEVEL_SECURITY")
            and datasource.is_rls_supported else [],
            changed_on=datasource.changed_on,
            **kwargs,
        ) if query_obj else None)
        return cache_key
コード例 #10
0
def _get_drill_detail(
    query_context: QueryContext, query_obj: QueryObject, force_cached: bool = False
) -> Dict[str, Any]:
    # todo(yongjie): Remove this function,
    #  when determining whether samples should be applied to the time filter.
    datasource = _get_datasource(query_context, query_obj)
    query_obj = copy.copy(query_obj)
    query_obj.is_timeseries = False
    query_obj.orderby = []
    query_obj.metrics = None
    query_obj.post_processing = []
    qry_obj_cols = []
    for o in datasource.columns:
        if isinstance(o, dict):
            qry_obj_cols.append(o.get("column_name"))
        else:
            qry_obj_cols.append(o.column_name)
    query_obj.columns = qry_obj_cols
    return _get_full(query_context, query_obj, force_cached)
コード例 #11
0
def _get_query(
    query_context: QueryContext,
    query_obj: QueryObject,
    _: bool,
) -> Dict[str, Any]:
    datasource = _get_datasource(query_context, query_obj)
    result = {"language": datasource.query_language}
    try:
        result["query"] = datasource.get_query_str(query_obj.to_dict())
    except QueryObjectValidationError as err:
        result["error"] = err.message
    return result
コード例 #12
0
def _get_samples(query_context: QueryContext,
                 query_obj: QueryObject,
                 force_cached: bool = False) -> Dict[str, Any]:
    datasource = _get_datasource(query_context, query_obj)
    query_obj = copy.copy(query_obj)
    query_obj.is_timeseries = False
    query_obj.orderby = []
    query_obj.metrics = None
    query_obj.post_processing = []
    query_obj.columns = [o.column_name for o in datasource.columns]
    query_obj.from_dttm = None
    query_obj.to_dttm = None
    return _get_full(query_context, query_obj, force_cached)
コード例 #13
0
 def __init__(  # pylint: disable=too-many-arguments
     self,
     datasource: Dict[str, Any],
     queries: List[Dict[str, Any]],
     force: bool = False,
     custom_cache_timeout: Optional[int] = None,
     result_type: Optional[utils.ChartDataResultType] = None,
     result_format: Optional[utils.ChartDataResultFormat] = None,
 ) -> None:
     self.datasource = ConnectorRegistry.get_datasource(
         str(datasource["type"]), int(datasource["id"]), db.session)
     self.queries = [QueryObject(**query_obj) for query_obj in queries]
     self.force = force
     self.custom_cache_timeout = custom_cache_timeout
     self.result_type = result_type or utils.ChartDataResultType.FULL
     self.result_format = result_format or utils.ChartDataResultFormat.JSON
コード例 #14
0
    def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]:
        """Returns a payload of metadata and data"""
        if self.result_type == utils.ChartDataResultType.QUERY:
            return {
                "query": self.datasource.get_query_str(query_obj.to_dict()),
                "language": self.datasource.query_language,
            }
        if self.result_type == utils.ChartDataResultType.SAMPLES:
            row_limit = query_obj.row_limit or math.inf
            query_obj = copy.copy(query_obj)
            query_obj.orderby = []
            query_obj.groupby = []
            query_obj.metrics = []
            query_obj.post_processing = []
            query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"])
            query_obj.row_offset = 0
            query_obj.columns = [
                o.column_name for o in self.datasource.columns
            ]
        payload = self.get_df_payload(query_obj)
        # TODO: implement
        payload["annotation_data"] = []
        df = payload["df"]
        status = payload["status"]
        if status != utils.QueryStatus.FAILED:
            payload["data"] = self.get_data(df)
        del payload["df"]

        filters = query_obj.filter
        filter_columns = cast(List[str], [flt.get("col") for flt in filters])
        columns = set(self.datasource.column_names)
        applied_time_columns, rejected_time_columns = utils.get_time_filter_status(
            self.datasource, query_obj.applied_time_extras)
        payload["applied_filters"] = [{
            "column": col
        } for col in filter_columns if col in columns] + applied_time_columns
        payload["rejected_filters"] = [{
            "reason": "not_in_datasource",
            "column": col
        } for col in filter_columns if col not in columns
                                       ] + rejected_time_columns

        if self.result_type == utils.ChartDataResultType.RESULTS:
            return {"data": payload["data"]}
        return payload
コード例 #15
0
def _get_samples(
    query_context: QueryContext, query_obj: QueryObject, force_cached: bool = False
) -> Dict[str, Any]:
    datasource = _get_datasource(query_context, query_obj)
    query_obj = copy.copy(query_obj)
    query_obj.is_timeseries = False
    query_obj.orderby = []
    query_obj.metrics = None
    query_obj.post_processing = []
    qry_obj_cols = []
    for o in datasource.columns:
        if isinstance(o, dict):
            qry_obj_cols.append(o.get("column_name"))
        else:
            qry_obj_cols.append(o.column_name)
    query_obj.columns = qry_obj_cols
    query_obj.from_dttm = None
    query_obj.to_dttm = None
    return _get_full(query_context, query_obj, force_cached)
コード例 #16
0
 def __init__(
     self,
     datasource: DatasourceDict,
     queries: List[Dict[str, Any]],
     force: bool = False,
     custom_cache_timeout: Optional[int] = None,
     result_type: Optional[ChartDataResultType] = None,
     result_format: Optional[ChartDataResultFormat] = None,
 ) -> None:
     self.datasource = ConnectorRegistry.get_datasource(
         str(datasource["type"]), int(datasource["id"]), db.session)
     self.queries = [QueryObject(**query_obj) for query_obj in queries]
     self.force = force
     self.custom_cache_timeout = custom_cache_timeout
     self.result_type = result_type or ChartDataResultType.FULL
     self.result_format = result_format or ChartDataResultFormat.JSON
     self.cache_values = {
         "datasource": datasource,
         "queries": queries,
         "result_type": self.result_type,
         "result_format": self.result_format,
     }