def get_query_result(self, query_object: QueryObject) -> QueryResult: """Returns a pandas dataframe based on the query object""" # Here, we assume that all the queries will use the same datasource, which is # a valid assumption for current setting. In the long term, we may # support multiple queries from different data sources. # The datasource here can be different backend but the interface is common result = self.datasource.query(query_object.to_dict()) query = result.query + ";\n\n" df = result.df # Transform the timestamp we received from database to pandas supported # datetime format. If no python_date_format is specified, the pattern will # be considered as the default ISO date format # If the datetime format is unix, the parse will use the corresponding # parsing logic if not df.empty: df = self.normalize_df(df, query_object) if query_object.time_offsets: time_offsets = self.processing_time_offsets(df, query_object) df = time_offsets["df"] queries = time_offsets["queries"] query += ";\n\n".join(queries) query += ";\n\n" df = query_object.exec_post_processing(df) result.df = df result.query = query return result
def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]: """Returns a pandas dataframe based on the query object""" # Here, we assume that all the queries will use the same datasource, which is # a valid assumption for current setting. In the long term, we may # support multiple queries from different data sources. timestamp_format = None if self.datasource.type == "table": dttm_col = self.datasource.get_column(query_object.granularity) if dttm_col: timestamp_format = dttm_col.python_date_format # The datasource here can be different backend but the interface is common result = self.datasource.query(query_object.to_dict()) df = result.df # Transform the timestamp we received from database to pandas supported # datetime format. If no python_date_format is specified, the pattern will # be considered as the default ISO date format # If the datetime format is unix, the parse will use the corresponding # parsing logic if not df.empty: if DTTM_ALIAS in df.columns: if timestamp_format in ("epoch_s", "epoch_ms"): # Column has already been formatted as a timestamp. df[DTTM_ALIAS] = df[DTTM_ALIAS].apply(pd.Timestamp) else: df[DTTM_ALIAS] = pd.to_datetime( df[DTTM_ALIAS], utc=False, format=timestamp_format ) if self.datasource.offset: df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) df[DTTM_ALIAS] += query_object.time_shift if self.enforce_numerical_metrics: self.df_metrics_to_num(df, query_object) df.replace([np.inf, -np.inf], np.nan) df = query_object.exec_post_processing(df) return { "query": result.query, "status": result.status, "error_message": result.error_message, "df": df, }
def get_query_result(self, query_object: QueryObject) -> QueryResult: """Returns a pandas dataframe based on the query object""" query_context = self._query_context # Here, we assume that all the queries will use the same datasource, which is # a valid assumption for current setting. In the long term, we may # support multiple queries from different data sources. # The datasource here can be different backend but the interface is common # pylint: disable=import-outside-toplevel from superset.models.sql_lab import Query query = "" if isinstance(query_context.datasource, Query): # todo(hugh): add logic to manage all sip68 models here result = query_context.datasource.exc_query(query_object.to_dict()) else: result = query_context.datasource.query(query_object.to_dict()) query = result.query + ";\n\n" df = result.df # Transform the timestamp we received from database to pandas supported # datetime format. If no python_date_format is specified, the pattern will # be considered as the default ISO date format # If the datetime format is unix, the parse will use the corresponding # parsing logic if not df.empty: df = self.normalize_df(df, query_object) if query_object.time_offsets: time_offsets = self.processing_time_offsets(df, query_object) df = time_offsets["df"] queries = time_offsets["queries"] query += ";\n\n".join(queries) query += ";\n\n" # Re-raising QueryObjectValidationError try: df = query_object.exec_post_processing(df) except InvalidPostProcessingError as ex: raise QueryObjectValidationError from ex result.df = df result.query = query result.from_dttm = query_object.from_dttm result.to_dttm = query_object.to_dttm return result