def normalize_col( df: pd.DataFrame, timestamp_format: Optional[str], offset: int, time_shift: Optional[timedelta], ) -> pd.DataFrame: df = df.copy() normalize_dttm_col(df, timestamp_format, offset, time_shift) return df
def test_normalize_dttm_col(self): ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0) df = pd.DataFrame([{"__timestamp": ts, "a": 1}]) # test regular (non-numeric) format assert normalize_dttm_col(df, None, 0, None)[DTTM_ALIAS][0] == ts assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts # test offset assert normalize_dttm_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp( 2021, 2, 15, 20, 0, 0, 0) # test offset and timedelta assert normalize_dttm_col( df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][0] == pd.Timestamp( 2021, 2, 15, 20, 30, 0, 0) # test numeric epoch_s format df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}]) assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts # test numeric epoch_ms format df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}]) assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]: """Returns a pandas dataframe based on the query object""" # Here, we assume that all the queries will use the same datasource, which is # a valid assumption for current setting. In the long term, we may # support multiple queries from different data sources. timestamp_format = None if self.datasource.type == "table": dttm_col = self.datasource.get_column(query_object.granularity) if dttm_col: timestamp_format = dttm_col.python_date_format # The datasource here can be different backend but the interface is common result = self.datasource.query(query_object.to_dict()) df = result.df # Transform the timestamp we received from database to pandas supported # datetime format. If no python_date_format is specified, the pattern will # be considered as the default ISO date format # If the datetime format is unix, the parse will use the corresponding # parsing logic if not df.empty: normalize_dttm_col( df=df, timestamp_format=timestamp_format, offset=self.datasource.offset, time_shift=query_object.time_shift, ) if self.enforce_numerical_metrics: self.df_metrics_to_num(df, query_object) df.replace([np.inf, -np.inf], np.nan, inplace=True) df = query_object.exec_post_processing(df) return { "query": result.query, "status": result.status, "error_message": result.error_message, "df": df, }
def normalize_df(self, df: pd.DataFrame, query_object: QueryObject) -> pd.DataFrame: timestamp_format = None if self.datasource.type == "table": dttm_col = self.datasource.get_column(query_object.granularity) if dttm_col: timestamp_format = dttm_col.python_date_format normalize_dttm_col( df=df, timestamp_format=timestamp_format, offset=self.datasource.offset, time_shift=query_object.time_shift, ) if self.enforce_numerical_metrics: self.df_metrics_to_num(df, query_object) df.replace([np.inf, -np.inf], np.nan, inplace=True) return df