コード例 #1
0
ファイル: post_processing.py プロジェクト: saltict/superset
def pivot_table(df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame:
    """
    Pivot table (v1).
    """
    if form_data.get("granularity") == "all" and DTTM_ALIAS in df:
        del df[DTTM_ALIAS]

    # v1 func names => v2 func names
    func_map = {
        "sum": "Sum",
        "mean": "Average",
        "min": "Minimum",
        "max": "Maximum",
        "std": "Sample Standard Deviation",
        "var": "Sample Variance",
    }

    return pivot_df(
        df,
        rows=form_data.get("groupby") or [],
        columns=form_data.get("columns") or [],
        metrics=[get_metric_name(m) for m in form_data["metrics"]],
        aggfunc=func_map.get(form_data.get("pandas_aggfunc", "sum"), "Sum"),
        transpose_pivot=bool(form_data.get("transpose_pivot")),
        combine_metrics=bool(form_data.get("combine_metric")),
        show_rows_total=bool(form_data.get("pivot_margins")),
        show_columns_total=bool(form_data.get("pivot_margins")),
        apply_metrics_on_rows=False,
    )
コード例 #2
0
    def adhoc_metric_to_sqla(self, metric, cols):
        """
        Turn an adhoc metric into a sqlalchemy column.

        :param dict metric: Adhoc metric definition
        :param dict cols: Columns for the current table
        :returns: The metric defined as a sqlalchemy column
        :rtype: sqlalchemy.sql.column
        """
        expression_type = metric.get("expressionType")
        label = utils.get_metric_name(metric)

        if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES["SIMPLE"]:
            column_name = metric.get("column").get("column_name")
            table_column = cols.get(column_name)
            if table_column:
                sqla_column = table_column.get_sqla_col()
            else:
                sqla_column = column(column_name)
            sqla_metric = self.sqla_aggregations[metric.get("aggregate")](
                sqla_column)
        elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES["SQL"]:
            sqla_metric = literal_column(metric.get("sqlExpression"))
        else:
            return None

        return self.make_sqla_column_compatible(sqla_metric, label)
コード例 #3
0
ファイル: models.py プロジェクト: stendu/incubator-superset
    def adhoc_metric_to_sqla(self, metric, cols):
        """
        Turn an adhoc metric into a sqlalchemy column.

        :param dict metric: Adhoc metric definition
        :param dict cols: Columns for the current table
        :returns: The metric defined as a sqlalchemy column
        :rtype: sqlalchemy.sql.column
        """
        expression_type = metric.get('expressionType')
        label = utils.get_metric_name(metric)
        label = self.get_label(label)

        if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SIMPLE']:
            column_name = metric.get('column').get('column_name')
            sqla_column = column(column_name)
            table_column = cols.get(column_name)

            if table_column:
                sqla_column = table_column.get_sqla_col()

            sqla_metric = self.sqla_aggregations[metric.get('aggregate')](
                sqla_column)
            sqla_metric = sqla_metric.label(label)
            return sqla_metric
        elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SQL']:
            sqla_metric = literal_column(metric.get('sqlExpression'))
            sqla_metric = sqla_metric.label(label)
            return sqla_metric
        else:
            return None
コード例 #4
0
ファイル: models.py プロジェクト: neuroradiology/caravel
    def adhoc_metric_to_sqla(self, metric, cols):
        """
        Turn an adhoc metric into a sqlalchemy column.

        :param dict metric: Adhoc metric definition
        :param dict cols: Columns for the current table
        :returns: The metric defined as a sqlalchemy column
        :rtype: sqlalchemy.sql.column
        """
        expression_type = metric.get('expressionType')
        label = utils.get_metric_name(metric)

        if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SIMPLE']:
            column_name = metric.get('column').get('column_name')
            table_column = cols.get(column_name)
            if table_column:
                sqla_column = table_column.get_sqla_col()
            else:
                sqla_column = column(column_name)
            sqla_metric = self.sqla_aggregations[metric.get('aggregate')](sqla_column)
        elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SQL']:
            sqla_metric = literal_column(metric.get('sqlExpression'))
        else:
            return None

        return self.make_sqla_column_compatible(sqla_metric, label)
コード例 #5
0
    def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]:
        """
        The representation of the datasource containing only the required data
        to render the provided slices.

        Used to reduce the payload when loading a dashboard.
        """
        data = self.data
        metric_names = set()
        column_names = set()
        for slc in slices:
            form_data = slc.form_data

            # pull out all required metrics from the form_data
            for param in METRIC_FORM_DATA_PARAMS:
                for metric in utils.get_iterable(form_data.get(param) or []):
                    metric_names.add(utils.get_metric_name(metric))

                    if utils.is_adhoc_metric(metric):
                        column_names.add((metric.get("column")
                                          or {}).get("column_name"))

            # pull out all required columns from the form_data
            for filter_ in form_data.get("adhoc_filters") or []:
                if filter_["clause"] == "WHERE" and filter_.get("subject"):
                    column_names.add(filter_.get("subject"))

            for param in COLUMN_FORM_DATA_PARAMS:
                for column in utils.get_iterable(form_data.get(param) or []):
                    column_names.add(column)

        filtered_metrics = [
            metric for metric in data["metrics"]
            if metric["metric_name"] in metric_names
        ]

        filtered_columns = [
            column for column in data["columns"]
            if column["column_name"] in column_names
        ]

        del data["description"]
        data.update({"metrics": filtered_metrics})
        data.update({"columns": filtered_columns})
        verbose_map = {"__timestamp": "Time"}
        verbose_map.update({
            metric["metric_name"]: metric["verbose_name"]
            or metric["metric_name"]
            for metric in filtered_metrics
        })
        verbose_map.update({
            column["column_name"]: column["verbose_name"]
            or column["column_name"]
            for column in filtered_columns
        })
        data["verbose_map"] = verbose_map

        return data
コード例 #6
0
def test_get_metric_name_adhoc():
    metric = deepcopy(SIMPLE_SUM_ADHOC_METRIC)
    assert get_metric_name(metric) == "my SUM"
    del metric["label"]
    assert get_metric_name(metric) == "SUM(my_col)"
    metric["label"] = ""
    assert get_metric_name(metric) == "SUM(my_col)"
    del metric["aggregate"]
    assert get_metric_name(metric) == "my_col"
    metric["aggregate"] = ""
    assert get_metric_name(metric) == "my_col"

    metric = deepcopy(SQL_ADHOC_METRIC)
    assert get_metric_name(metric) == "my_sql"
    del metric["label"]
    assert get_metric_name(metric) == "SUM(my_col)"
    metric["label"] = ""
    assert get_metric_name(metric) == "SUM(my_col)"
コード例 #7
0
ファイル: post_processing.py プロジェクト: saltict/superset
def pivot_table_v2(df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame:
    """
    Pivot table v2.
    """
    if form_data.get("granularity_sqla") == "all" and DTTM_ALIAS in df:
        del df[DTTM_ALIAS]

    return pivot_df(
        df,
        rows=form_data.get("groupbyRows") or [],
        columns=form_data.get("groupbyColumns") or [],
        metrics=[get_metric_name(m) for m in form_data["metrics"]],
        aggfunc=form_data.get("aggregateFunction", "Sum"),
        transpose_pivot=bool(form_data.get("transposePivot")),
        combine_metrics=bool(form_data.get("combineMetric")),
        show_rows_total=bool(form_data.get("rowTotals")),
        show_columns_total=bool(form_data.get("colTotals")),
        apply_metrics_on_rows=form_data.get("metricsLayout") == "ROWS",
    )
コード例 #8
0
def pivot_table(df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame:
    """
    Pivot table.
    """
    if form_data.get("granularity") == "all" and DTTM_ALIAS in df:
        del df[DTTM_ALIAS]

    metrics = [get_metric_name(m) for m in form_data["metrics"]]
    aggfuncs: Dict[str, Union[str, Callable[[Any], Any]]] = {}
    for metric in metrics:
        aggfunc = form_data.get("pandas_aggfunc") or "sum"
        if pd.api.types.is_numeric_dtype(df[metric]):
            if aggfunc == "sum":
                aggfunc = sql_like_sum
        elif aggfunc not in {"min", "max"}:
            aggfunc = "max"
        aggfuncs[metric] = aggfunc

    groupby = form_data.get("groupby") or []
    columns = form_data.get("columns") or []
    if form_data.get("transpose_pivot"):
        groupby, columns = columns, groupby

    df = df.pivot_table(
        index=groupby,
        columns=columns,
        values=metrics,
        aggfunc=aggfuncs,
        margins=form_data.get("pivot_margins"),
    )

    # Re-order the columns adhering to the metric ordering.
    df = df[metrics]

    # Display metrics side by side with each column
    if form_data.get("combine_metric"):
        df = df.stack(0).unstack().reindex(level=-1, columns=metrics)

    # flatten column names
    df.columns = [" ".join(column) for column in df.columns]

    return df
コード例 #9
0
    def __init__(
        self,
        granularity: str,
        metrics: List[Union[Dict, str]],
        groupby: Optional[List[str]] = None,
        filters: Optional[List[str]] = None,
        time_range: Optional[str] = None,
        time_shift: Optional[str] = None,
        is_timeseries: bool = False,
        timeseries_limit: int = 0,
        row_limit: int = app.config["ROW_LIMIT"],
        timeseries_limit_metric: Optional[Dict] = None,
        order_desc: bool = True,
        extras: Optional[Dict] = None,
        columns: Optional[List[str]] = None,
        orderby: Optional[List[List]] = None,
        relative_start: str = app.config["DEFAULT_RELATIVE_START_TIME"],
        relative_end: str = app.config["DEFAULT_RELATIVE_END_TIME"],
    ):
        self.granularity = granularity
        self.from_dttm, self.to_dttm = utils.get_since_until(
            relative_start=relative_start,
            relative_end=relative_end,
            time_range=time_range,
            time_shift=time_shift,
        )
        self.is_timeseries = is_timeseries
        self.time_range = time_range
        self.time_shift = utils.parse_human_timedelta(time_shift)
        self.groupby = groupby or []

        self.metrics = [utils.get_metric_name(metric) for metric in metrics]
        self.row_limit = row_limit
        self.filter = filters or []
        self.timeseries_limit = timeseries_limit
        self.timeseries_limit_metric = timeseries_limit_metric
        self.order_desc = order_desc
        self.extras = extras or {}
        self.columns = columns or []
        self.orderby = orderby or []
コード例 #10
0
def test_get_metric_name_invalid_metric():
    metric = deepcopy(SIMPLE_SUM_ADHOC_METRIC)
    del metric["label"]
    del metric["column"]
    with pytest.raises(ValueError):
        get_metric_name(metric)

    metric = deepcopy(SIMPLE_SUM_ADHOC_METRIC)
    del metric["label"]
    metric["expressionType"] = "FOO"
    with pytest.raises(ValueError):
        get_metric_name(metric)

    metric = deepcopy(SQL_ADHOC_METRIC)
    del metric["label"]
    metric["expressionType"] = "FOO"
    with pytest.raises(ValueError):
        get_metric_name(metric)
コード例 #11
0
ファイル: models.py プロジェクト: zuzhi/superset
    def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]:
        """
        The representation of the datasource containing only the required data
        to render the provided slices.

        Used to reduce the payload when loading a dashboard.
        """
        data = self.data
        metric_names = set()
        column_names = set()
        for slc in slices:
            form_data = slc.form_data

            # pull out all required metrics from the form_data
            for param in METRIC_FORM_DATA_PARAMS:
                for metric in utils.get_iterable(form_data.get(param) or []):
                    metric_names.add(utils.get_metric_name(metric))
                    if utils.is_adhoc_metric(metric):
                        column_names.add(
                            (metric.get("column") or {}).get("column_name")
                        )

            # Columns used in query filters
            column_names.update(
                filter_["subject"]
                for filter_ in form_data.get("adhoc_filters") or []
                if filter_.get("clause") == "WHERE" and filter_.get("subject")
            )

            # columns used by Filter Box
            column_names.update(
                filter_config["column"]
                for filter_config in form_data.get("filter_configs") or []
                if "column" in filter_config
            )

            column_names.update(
                column
                for column in utils.get_iterable(form_data.get(param) or [])
                for param in COLUMN_FORM_DATA_PARAMS
            )

        filtered_metrics = [
            metric
            for metric in data["metrics"]
            if metric["metric_name"] in metric_names
        ]

        filtered_columns: List[Column] = []
        column_types: Set[GenericDataType] = set()
        for column in data["columns"]:
            generic_type = column.get("type_generic")
            if generic_type is not None:
                column_types.add(generic_type)
            if column["column_name"] in column_names:
                filtered_columns.append(column)

        data["column_types"] = list(column_types)
        del data["description"]
        data.update({"metrics": filtered_metrics})
        data.update({"columns": filtered_columns})
        verbose_map = {"__timestamp": "Time"}
        verbose_map.update(
            {
                metric["metric_name"]: metric["verbose_name"] or metric["metric_name"]
                for metric in filtered_metrics
            }
        )
        verbose_map.update(
            {
                column["column_name"]: column["verbose_name"] or column["column_name"]
                for column in filtered_columns
            }
        )
        data["verbose_map"] = verbose_map

        return data
コード例 #12
0
def pivot_table_v2(  # pylint: disable=too-many-branches
        df: pd.DataFrame, form_data: Dict[str, Any]) -> pd.DataFrame:
    """
    Pivot table v2.
    """
    if form_data.get("granularity_sqla") == "all" and DTTM_ALIAS in df:
        del df[DTTM_ALIAS]

    # TODO (betodealmeida): implement metricsLayout
    metrics = [get_metric_name(m) for m in form_data["metrics"]]
    aggregate_function = form_data.get("aggregateFunction", "Sum")
    groupby = form_data.get("groupbyRows") or []
    columns = form_data.get("groupbyColumns") or []
    if form_data.get("transposePivot"):
        groupby, columns = columns, groupby

    df = df.pivot_table(
        index=groupby,
        columns=columns,
        values=metrics,
        aggfunc=pivot_v2_aggfunc_map[aggregate_function],
        margins=True,
    )

    # The pandas `pivot_table` method either brings both row/column
    # totals, or none at all. We pass `margin=True` to get both, and
    # remove any dimension that was not requests.
    if not form_data.get("rowTotals"):
        df.drop(df.columns[len(df.columns) - 1], axis=1, inplace=True)
    if not form_data.get("colTotals"):
        df = df[:-1]

    # Compute fractions, if needed. If `colTotals` or `rowTotals` are
    # present we need to adjust for including them in the sum
    if aggregate_function.endswith(" as Fraction of Total"):
        total = df.sum().sum()
        df = df.astype(total.dtypes) / total
        if form_data.get("colTotals"):
            df *= 2
        if form_data.get("rowTotals"):
            df *= 2
    elif aggregate_function.endswith(" as Fraction of Columns"):
        total = df.sum(axis=0)
        df = df.astype(total.dtypes).div(total, axis=1)
        if form_data.get("colTotals"):
            df *= 2
    elif aggregate_function.endswith(" as Fraction of Rows"):
        total = df.sum(axis=1)
        df = df.astype(total.dtypes).div(total, axis=0)
        if form_data.get("rowTotals"):
            df *= 2

    # Re-order the columns adhering to the metric ordering.
    df = df[metrics]

    # Display metrics side by side with each column
    if form_data.get("combineMetric"):
        df = df.stack(0).unstack().reindex(level=-1, columns=metrics)

    # flatten column names
    df.columns = [
        " ".join(str(name) for name in column) for column in df.columns
    ]

    return df
コード例 #13
0
    def data_for_slices(  # pylint: disable=too-many-locals
            self, slices: List[Slice]) -> Dict[str, Any]:
        """
        The representation of the datasource containing only the required data
        to render the provided slices.

        Used to reduce the payload when loading a dashboard.
        """
        data = self.data
        metric_names = set()
        column_names = set()
        for slc in slices:
            form_data = slc.form_data
            # pull out all required metrics from the form_data
            for metric_param in METRIC_FORM_DATA_PARAMS:
                for metric in utils.get_iterable(
                        form_data.get(metric_param) or []):
                    metric_names.add(utils.get_metric_name(metric))
                    if utils.is_adhoc_metric(metric):
                        column_names.add((metric.get("column")
                                          or {}).get("column_name"))

            # Columns used in query filters
            column_names.update(
                filter_["subject"]
                for filter_ in form_data.get("adhoc_filters") or []
                if filter_.get("clause") == "WHERE" and filter_.get("subject"))

            # columns used by Filter Box
            column_names.update(
                filter_config["column"]
                for filter_config in form_data.get("filter_configs") or []
                if "column" in filter_config)

            # for legacy dashboard imports which have the wrong query_context in them
            try:
                query_context = slc.get_query_context()
            except DatasetNotFoundError:
                query_context = None

            # legacy charts don't have query_context charts
            if query_context:
                column_names.update([
                    utils.get_column_name(column)
                    for query in query_context.queries
                    for column in query.columns
                ] or [])
            else:
                _columns = [
                    utils.get_column_name(column)
                    if utils.is_adhoc_column(column) else column
                    for column_param in COLUMN_FORM_DATA_PARAMS
                    for column in utils.get_iterable(
                        form_data.get(column_param) or [])
                ]
                column_names.update(_columns)

        filtered_metrics = [
            metric for metric in data["metrics"]
            if metric["metric_name"] in metric_names
        ]

        filtered_columns: List[Column] = []
        column_types: Set[GenericDataType] = set()
        for column in data["columns"]:
            generic_type = column.get("type_generic")
            if generic_type is not None:
                column_types.add(generic_type)
            if column["column_name"] in column_names:
                filtered_columns.append(column)

        data["column_types"] = list(column_types)
        del data["description"]
        data.update({"metrics": filtered_metrics})
        data.update({"columns": filtered_columns})
        verbose_map = {"__timestamp": "Time"}
        verbose_map.update({
            metric["metric_name"]: metric["verbose_name"]
            or metric["metric_name"]
            for metric in filtered_metrics
        })
        verbose_map.update({
            column["column_name"]: column["verbose_name"]
            or column["column_name"]
            for column in filtered_columns
        })
        data["verbose_map"] = verbose_map

        return data
コード例 #14
0
def test_get_metric_name_saved_metric():
    assert get_metric_name(STR_METRIC) == "my_metric"
コード例 #15
0
def test_get_metric_name_saved_metric():
    assert get_metric_name(STR_METRIC) == "my_metric"
    assert get_metric_name(STR_METRIC,
                           {STR_METRIC: "My Metric"}) == "My Metric"
コード例 #16
0
def test_get_column_name_physical_column():
    assert get_column_name(STR_COLUMN) == "my_column"
    assert get_metric_name(STR_COLUMN,
                           {STR_COLUMN: "My Column"}) == "My Column"