def pivot_table(df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame: """ Pivot table (v1). """ if form_data.get("granularity") == "all" and DTTM_ALIAS in df: del df[DTTM_ALIAS] # v1 func names => v2 func names func_map = { "sum": "Sum", "mean": "Average", "min": "Minimum", "max": "Maximum", "std": "Sample Standard Deviation", "var": "Sample Variance", } return pivot_df( df, rows=form_data.get("groupby") or [], columns=form_data.get("columns") or [], metrics=[get_metric_name(m) for m in form_data["metrics"]], aggfunc=func_map.get(form_data.get("pandas_aggfunc", "sum"), "Sum"), transpose_pivot=bool(form_data.get("transpose_pivot")), combine_metrics=bool(form_data.get("combine_metric")), show_rows_total=bool(form_data.get("pivot_margins")), show_columns_total=bool(form_data.get("pivot_margins")), apply_metrics_on_rows=False, )
def adhoc_metric_to_sqla(self, metric, cols): """ Turn an adhoc metric into a sqlalchemy column. :param dict metric: Adhoc metric definition :param dict cols: Columns for the current table :returns: The metric defined as a sqlalchemy column :rtype: sqlalchemy.sql.column """ expression_type = metric.get("expressionType") label = utils.get_metric_name(metric) if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES["SIMPLE"]: column_name = metric.get("column").get("column_name") table_column = cols.get(column_name) if table_column: sqla_column = table_column.get_sqla_col() else: sqla_column = column(column_name) sqla_metric = self.sqla_aggregations[metric.get("aggregate")]( sqla_column) elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES["SQL"]: sqla_metric = literal_column(metric.get("sqlExpression")) else: return None return self.make_sqla_column_compatible(sqla_metric, label)
def adhoc_metric_to_sqla(self, metric, cols): """ Turn an adhoc metric into a sqlalchemy column. :param dict metric: Adhoc metric definition :param dict cols: Columns for the current table :returns: The metric defined as a sqlalchemy column :rtype: sqlalchemy.sql.column """ expression_type = metric.get('expressionType') label = utils.get_metric_name(metric) label = self.get_label(label) if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SIMPLE']: column_name = metric.get('column').get('column_name') sqla_column = column(column_name) table_column = cols.get(column_name) if table_column: sqla_column = table_column.get_sqla_col() sqla_metric = self.sqla_aggregations[metric.get('aggregate')]( sqla_column) sqla_metric = sqla_metric.label(label) return sqla_metric elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SQL']: sqla_metric = literal_column(metric.get('sqlExpression')) sqla_metric = sqla_metric.label(label) return sqla_metric else: return None
def adhoc_metric_to_sqla(self, metric, cols): """ Turn an adhoc metric into a sqlalchemy column. :param dict metric: Adhoc metric definition :param dict cols: Columns for the current table :returns: The metric defined as a sqlalchemy column :rtype: sqlalchemy.sql.column """ expression_type = metric.get('expressionType') label = utils.get_metric_name(metric) if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SIMPLE']: column_name = metric.get('column').get('column_name') table_column = cols.get(column_name) if table_column: sqla_column = table_column.get_sqla_col() else: sqla_column = column(column_name) sqla_metric = self.sqla_aggregations[metric.get('aggregate')](sqla_column) elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SQL']: sqla_metric = literal_column(metric.get('sqlExpression')) else: return None return self.make_sqla_column_compatible(sqla_metric, label)
def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]: """ The representation of the datasource containing only the required data to render the provided slices. Used to reduce the payload when loading a dashboard. """ data = self.data metric_names = set() column_names = set() for slc in slices: form_data = slc.form_data # pull out all required metrics from the form_data for param in METRIC_FORM_DATA_PARAMS: for metric in utils.get_iterable(form_data.get(param) or []): metric_names.add(utils.get_metric_name(metric)) if utils.is_adhoc_metric(metric): column_names.add((metric.get("column") or {}).get("column_name")) # pull out all required columns from the form_data for filter_ in form_data.get("adhoc_filters") or []: if filter_["clause"] == "WHERE" and filter_.get("subject"): column_names.add(filter_.get("subject")) for param in COLUMN_FORM_DATA_PARAMS: for column in utils.get_iterable(form_data.get(param) or []): column_names.add(column) filtered_metrics = [ metric for metric in data["metrics"] if metric["metric_name"] in metric_names ] filtered_columns = [ column for column in data["columns"] if column["column_name"] in column_names ] del data["description"] data.update({"metrics": filtered_metrics}) data.update({"columns": filtered_columns}) verbose_map = {"__timestamp": "Time"} verbose_map.update({ metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] for metric in filtered_metrics }) verbose_map.update({ column["column_name"]: column["verbose_name"] or column["column_name"] for column in filtered_columns }) data["verbose_map"] = verbose_map return data
def test_get_metric_name_adhoc(): metric = deepcopy(SIMPLE_SUM_ADHOC_METRIC) assert get_metric_name(metric) == "my SUM" del metric["label"] assert get_metric_name(metric) == "SUM(my_col)" metric["label"] = "" assert get_metric_name(metric) == "SUM(my_col)" del metric["aggregate"] assert get_metric_name(metric) == "my_col" metric["aggregate"] = "" assert get_metric_name(metric) == "my_col" metric = deepcopy(SQL_ADHOC_METRIC) assert get_metric_name(metric) == "my_sql" del metric["label"] assert get_metric_name(metric) == "SUM(my_col)" metric["label"] = "" assert get_metric_name(metric) == "SUM(my_col)"
def pivot_table_v2(df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame: """ Pivot table v2. """ if form_data.get("granularity_sqla") == "all" and DTTM_ALIAS in df: del df[DTTM_ALIAS] return pivot_df( df, rows=form_data.get("groupbyRows") or [], columns=form_data.get("groupbyColumns") or [], metrics=[get_metric_name(m) for m in form_data["metrics"]], aggfunc=form_data.get("aggregateFunction", "Sum"), transpose_pivot=bool(form_data.get("transposePivot")), combine_metrics=bool(form_data.get("combineMetric")), show_rows_total=bool(form_data.get("rowTotals")), show_columns_total=bool(form_data.get("colTotals")), apply_metrics_on_rows=form_data.get("metricsLayout") == "ROWS", )
def pivot_table(df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame: """ Pivot table. """ if form_data.get("granularity") == "all" and DTTM_ALIAS in df: del df[DTTM_ALIAS] metrics = [get_metric_name(m) for m in form_data["metrics"]] aggfuncs: Dict[str, Union[str, Callable[[Any], Any]]] = {} for metric in metrics: aggfunc = form_data.get("pandas_aggfunc") or "sum" if pd.api.types.is_numeric_dtype(df[metric]): if aggfunc == "sum": aggfunc = sql_like_sum elif aggfunc not in {"min", "max"}: aggfunc = "max" aggfuncs[metric] = aggfunc groupby = form_data.get("groupby") or [] columns = form_data.get("columns") or [] if form_data.get("transpose_pivot"): groupby, columns = columns, groupby df = df.pivot_table( index=groupby, columns=columns, values=metrics, aggfunc=aggfuncs, margins=form_data.get("pivot_margins"), ) # Re-order the columns adhering to the metric ordering. df = df[metrics] # Display metrics side by side with each column if form_data.get("combine_metric"): df = df.stack(0).unstack().reindex(level=-1, columns=metrics) # flatten column names df.columns = [" ".join(column) for column in df.columns] return df
def __init__( self, granularity: str, metrics: List[Union[Dict, str]], groupby: Optional[List[str]] = None, filters: Optional[List[str]] = None, time_range: Optional[str] = None, time_shift: Optional[str] = None, is_timeseries: bool = False, timeseries_limit: int = 0, row_limit: int = app.config["ROW_LIMIT"], timeseries_limit_metric: Optional[Dict] = None, order_desc: bool = True, extras: Optional[Dict] = None, columns: Optional[List[str]] = None, orderby: Optional[List[List]] = None, relative_start: str = app.config["DEFAULT_RELATIVE_START_TIME"], relative_end: str = app.config["DEFAULT_RELATIVE_END_TIME"], ): self.granularity = granularity self.from_dttm, self.to_dttm = utils.get_since_until( relative_start=relative_start, relative_end=relative_end, time_range=time_range, time_shift=time_shift, ) self.is_timeseries = is_timeseries self.time_range = time_range self.time_shift = utils.parse_human_timedelta(time_shift) self.groupby = groupby or [] self.metrics = [utils.get_metric_name(metric) for metric in metrics] self.row_limit = row_limit self.filter = filters or [] self.timeseries_limit = timeseries_limit self.timeseries_limit_metric = timeseries_limit_metric self.order_desc = order_desc self.extras = extras or {} self.columns = columns or [] self.orderby = orderby or []
def test_get_metric_name_invalid_metric(): metric = deepcopy(SIMPLE_SUM_ADHOC_METRIC) del metric["label"] del metric["column"] with pytest.raises(ValueError): get_metric_name(metric) metric = deepcopy(SIMPLE_SUM_ADHOC_METRIC) del metric["label"] metric["expressionType"] = "FOO" with pytest.raises(ValueError): get_metric_name(metric) metric = deepcopy(SQL_ADHOC_METRIC) del metric["label"] metric["expressionType"] = "FOO" with pytest.raises(ValueError): get_metric_name(metric)
def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]: """ The representation of the datasource containing only the required data to render the provided slices. Used to reduce the payload when loading a dashboard. """ data = self.data metric_names = set() column_names = set() for slc in slices: form_data = slc.form_data # pull out all required metrics from the form_data for param in METRIC_FORM_DATA_PARAMS: for metric in utils.get_iterable(form_data.get(param) or []): metric_names.add(utils.get_metric_name(metric)) if utils.is_adhoc_metric(metric): column_names.add( (metric.get("column") or {}).get("column_name") ) # Columns used in query filters column_names.update( filter_["subject"] for filter_ in form_data.get("adhoc_filters") or [] if filter_.get("clause") == "WHERE" and filter_.get("subject") ) # columns used by Filter Box column_names.update( filter_config["column"] for filter_config in form_data.get("filter_configs") or [] if "column" in filter_config ) column_names.update( column for column in utils.get_iterable(form_data.get(param) or []) for param in COLUMN_FORM_DATA_PARAMS ) filtered_metrics = [ metric for metric in data["metrics"] if metric["metric_name"] in metric_names ] filtered_columns: List[Column] = [] column_types: Set[GenericDataType] = set() for column in data["columns"]: generic_type = column.get("type_generic") if generic_type is not None: column_types.add(generic_type) if column["column_name"] in column_names: filtered_columns.append(column) data["column_types"] = list(column_types) del data["description"] data.update({"metrics": filtered_metrics}) data.update({"columns": filtered_columns}) verbose_map = {"__timestamp": "Time"} verbose_map.update( { metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] for metric in filtered_metrics } ) verbose_map.update( { column["column_name"]: column["verbose_name"] or column["column_name"] for column in filtered_columns } ) data["verbose_map"] = verbose_map return data
def pivot_table_v2( # pylint: disable=too-many-branches df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame: """ Pivot table v2. """ if form_data.get("granularity_sqla") == "all" and DTTM_ALIAS in df: del df[DTTM_ALIAS] # TODO (betodealmeida): implement metricsLayout metrics = [get_metric_name(m) for m in form_data["metrics"]] aggregate_function = form_data.get("aggregateFunction", "Sum") groupby = form_data.get("groupbyRows") or [] columns = form_data.get("groupbyColumns") or [] if form_data.get("transposePivot"): groupby, columns = columns, groupby df = df.pivot_table( index=groupby, columns=columns, values=metrics, aggfunc=pivot_v2_aggfunc_map[aggregate_function], margins=True, ) # The pandas `pivot_table` method either brings both row/column # totals, or none at all. We pass `margin=True` to get both, and # remove any dimension that was not requests. if not form_data.get("rowTotals"): df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True) if not form_data.get("colTotals"): df = df[:-1] # Compute fractions, if needed. If `colTotals` or `rowTotals` are # present we need to adjust for including them in the sum if aggregate_function.endswith(" as Fraction of Total"): total = df.sum().sum() df = df.astype(total.dtypes) / total if form_data.get("colTotals"): df *= 2 if form_data.get("rowTotals"): df *= 2 elif aggregate_function.endswith(" as Fraction of Columns"): total = df.sum(axis=0) df = df.astype(total.dtypes).div(total, axis=1) if form_data.get("colTotals"): df *= 2 elif aggregate_function.endswith(" as Fraction of Rows"): total = df.sum(axis=1) df = df.astype(total.dtypes).div(total, axis=0) if form_data.get("rowTotals"): df *= 2 # Re-order the columns adhering to the metric ordering. df = df[metrics] # Display metrics side by side with each column if form_data.get("combineMetric"): df = df.stack(0).unstack().reindex(level=-1, columns=metrics) # flatten column names df.columns = [ " ".join(str(name) for name in column) for column in df.columns ] return df
def data_for_slices( # pylint: disable=too-many-locals self, slices: List[Slice]) -> Dict[str, Any]: """ The representation of the datasource containing only the required data to render the provided slices. Used to reduce the payload when loading a dashboard. """ data = self.data metric_names = set() column_names = set() for slc in slices: form_data = slc.form_data # pull out all required metrics from the form_data for metric_param in METRIC_FORM_DATA_PARAMS: for metric in utils.get_iterable( form_data.get(metric_param) or []): metric_names.add(utils.get_metric_name(metric)) if utils.is_adhoc_metric(metric): column_names.add((metric.get("column") or {}).get("column_name")) # Columns used in query filters column_names.update( filter_["subject"] for filter_ in form_data.get("adhoc_filters") or [] if filter_.get("clause") == "WHERE" and filter_.get("subject")) # columns used by Filter Box column_names.update( filter_config["column"] for filter_config in form_data.get("filter_configs") or [] if "column" in filter_config) # for legacy dashboard imports which have the wrong query_context in them try: query_context = slc.get_query_context() except DatasetNotFoundError: query_context = None # legacy charts don't have query_context charts if query_context: column_names.update([ utils.get_column_name(column) for query in query_context.queries for column in query.columns ] or []) else: _columns = [ utils.get_column_name(column) if utils.is_adhoc_column(column) else column for column_param in COLUMN_FORM_DATA_PARAMS for column in utils.get_iterable( form_data.get(column_param) or []) ] column_names.update(_columns) filtered_metrics = [ metric for metric in data["metrics"] if metric["metric_name"] in metric_names ] filtered_columns: List[Column] = [] column_types: Set[GenericDataType] = set() for column in data["columns"]: generic_type = column.get("type_generic") if generic_type is not None: column_types.add(generic_type) if column["column_name"] in column_names: filtered_columns.append(column) data["column_types"] = list(column_types) del data["description"] data.update({"metrics": filtered_metrics}) data.update({"columns": filtered_columns}) verbose_map = {"__timestamp": "Time"} verbose_map.update({ metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] for metric in filtered_metrics }) verbose_map.update({ column["column_name"]: column["verbose_name"] or column["column_name"] for column in filtered_columns }) data["verbose_map"] = verbose_map return data
def test_get_metric_name_saved_metric(): assert get_metric_name(STR_METRIC) == "my_metric"
def test_get_metric_name_saved_metric(): assert get_metric_name(STR_METRIC) == "my_metric" assert get_metric_name(STR_METRIC, {STR_METRIC: "My Metric"}) == "My Metric"
def test_get_column_name_physical_column(): assert get_column_name(STR_COLUMN) == "my_column" assert get_metric_name(STR_COLUMN, {STR_COLUMN: "My Column"}) == "My Column"