Beispiel #1
0
def test_plot_multivariate():
    """Tests plot_multivariate function"""
    x_col = "time"
    df = pd.DataFrame({
        x_col: [dt(2018, 1, 1),
                dt(2018, 1, 2),
                dt(2018, 1, 3)],
        "oranges": [8.5, 2.0, 3.0],
        "apples": [1.4, 2.1, 3.4],
        "bananas": [4.2, 3.1, 3.0],
    })

    # plot with default values
    fig = plot_multivariate(
        df=df,
        x_col=x_col)
    assert fig.layout.showlegend
    assert fig.layout.xaxis.title.text == x_col
    assert fig.layout.yaxis.title.text == cst.VALUE_COL
    assert len(fig.data) == 3
    assert fig.data[0].mode == "lines"
    assert fig.data[0].name == "oranges"
    assert fig.data[1].name == "apples"
    assert fig.data[2].name == "bananas"

    # plot with style override
    fig = plot_multivariate(
        df=df,
        x_col=x_col,
        y_col_style_dict={
            "oranges": {
                "legendgroup": "one",
                "line": {
                    "color": "red",
                    "dash": "dot"
                }
            },
            "apples": None,
            "bananas": {
                "name": "plantain",
                "legendgroup": "one",
                "mode": "markers",
                "line": None  # Remove line params since we use mode="markers"
            }
        },
        xlabel="xlab",
        ylabel="ylab",
        title="New Title",
        showlegend=False
    )
    assert not fig.layout.showlegend
    assert fig.layout.xaxis.title.text == "xlab"
    assert fig.layout.yaxis.title.text == "ylab"
    assert len(fig.data) == 3
    assert fig.data[0].mode == "lines"
    assert fig.data[0].legendgroup == "one"
    assert fig.data[0].line.color == "red"
    assert fig.data[0].name == "oranges"
    assert fig.data[1].name == "apples"
    assert fig.data[2].name == "plantain"
    assert fig.data[2].mode == "markers"

    # ylabel is used for default title
    fig = plot_multivariate(
        df=df,
        x_col=x_col,
        xlabel="xlab",
        ylabel="ylab")
    assert fig.layout.title.text == f"ylab vs xlab"

    # plotly style
    fig = plot_multivariate(
        df=df,
        x_col=x_col,
        y_col_style_dict="plotly")
    assert [fig.data[i].name for i in range(len(fig.data))] ==\
           ["oranges", "apples", "bananas"]
    assert fig.data[0].line.color is None
    assert fig.data[1].fill is None

    # auto style
    fig = plot_multivariate(
        df=df,
        x_col=x_col,
        y_col_style_dict="auto")
    assert [fig.data[i].name for i in range(len(fig.data))] ==\
           ["apples", "bananas", "oranges"]  # sorted ascending
    assert fig.data[0].line.color == "rgba(0, 145, 202, 1.0)"
    assert fig.data[1].fill is None

    # auto-fill style
    fig = plot_multivariate(
        df=df,
        x_col=x_col,
        y_col_style_dict="auto-fill",
        default_color="blue")
    assert [fig.data[i].name for i in range(len(fig.data))] ==\
           ["apples", "bananas", "oranges"]  # sorted ascending
    assert fig.data[0].line.color == "blue"
    assert fig.data[1].fill == "tonexty"
    assert fig.data[2].fill == "tonexty"
Beispiel #2
0
    13,  # accepts the same parameters as `plot_quantiles_and_overlays`
    show_mean=True,
    show_quantiles=False,
    show_overlays=True,
    center_values=
    False,  # note! does not center, to compute raw differences from the mean below
    overlay_label_time_feature="str_dow",
)
overlay_minus_mean = grouped_df[OVERLAY_COL_GROUP] - grouped_df[
    MEAN_COL_GROUP].values  # subtracts the mean
x_col = overlay_minus_mean.index.name
overlay_minus_mean.reset_index(
    inplace=True)  # `plot_multivariate` expects the x-value to be a column
fig = plot_multivariate(  # plots the deviation from the mean
    df=overlay_minus_mean,
    x_col=x_col,
    ylabel=ts.original_value_col,
    title="day of week effect over time")
plotly.io.show(fig)

# %%
# The pattern looks fairly stable until Nov 2013, when Monday
# far surpasses Sunday as the weekly peak. The relative values on Monday
# and Tuesday increase, and the relative values on Saturday and Sunday decline.
# Thus, it may be useful to include a seasonality changepoint around
# that time.
#
# .. tip::
#   You can interact with the plot to focus on a particular day
#   by double clicking its name in the legend. Double click again
#   to unselect, or single click to show/hide a single series.
Beispiel #3
0
    def plot_grouping_evaluation_metrics(
            self,
            metric_dict: Dict,
            config_names: List = None,
            which: str = "train",
            groupby_time_feature: str = None,
            groupby_sliding_window_size: int = None,
            groupby_custom_column: pd.Series = None,
            xlabel=None,
            ylabel="Metric value",
            title=None,
            showlegend=True):
        """Returns a line plot of the grouped evaluation values of ``metric_dict`` of ``config_names``.
        These values are grouped by the grouping method chosen by ``groupby_time_feature``,
         ``groupby_sliding_window_size`` and ``groupby_custom_column``.
        See `~greykite.framework.output.univariate_forecast.UnivariateForecast.get_grouping_evaluation`
        for details on grouping method.

         Parameters
        ----------
        metric_dict : `dict` [`str`, `callable`]
            Evaluation metrics to compute. Same as
            `~greykite.framework.framework.benchmark.benchmark_class.BenchmarkForecastConfig.get_evaluation_metrics`.
            To get the best visualization, keep number of metrics <= 2.
        config_names : `list` [`str`], default None
            Which config results to plot. A list of config names.
            If None, uses all the available config keys.
        which: `str`
            "train" or "test". Which dataset to evaluate.
        groupby_time_feature : `str` or None, optional
            If provided, groups by a column generated by
            `~greykite.common.features.timeseries_features.build_time_features_df`.
            See that function for valid values.
        groupby_sliding_window_size : `int` or None, optional
            If provided, sequentially partitions data into groups of size
            ``groupby_sliding_window_size``.
        groupby_custom_column : `pandas.Series` or None, optional
            If provided, groups by this column value. Should be same length as the DataFrame.
        xlabel : `str` or None, default None
            x-axis label. If None, label is determined by the groupby column name.
        ylabel : `str` or None, default "Metric value"
            y-axis label.
        title : `str` or None, default None
            Plot title. If None, default is based on ``config_name``.
        showlegend : `bool`, default True
            Whether to show the legend.

        Returns
        -------
         fig : `plotly.graph_objs.Figure`
            Interactive plotly graph.
        """
        grouped_evaluation_df = self.get_grouping_evaluation_metrics(
            metric_dict=metric_dict,
            config_names=config_names,
            which=which,
            groupby_time_feature=groupby_time_feature,
            groupby_sliding_window_size=groupby_sliding_window_size,
            groupby_custom_column=groupby_custom_column)

        # Figures out groupby_col name by process of elimination
        cols = [
            col for col in grouped_evaluation_df.columns
            if col not in ["config_name", "split_num"]
        ]
        groupby_col = get_pattern_cols(cols,
                                       pos_pattern=".*",
                                       neg_pattern=which)[0]

        plot_df = (
            grouped_evaluation_df.drop(columns=["split_num"
                                                ])  # Drops redundant column
            .groupby(["config_name",
                      groupby_col])  # Averages values across splits
            .mean().dropna(how="all")  # Drops rows with all NA values
            .unstack(
                level=0
            )  # Moves config_name from multiindex rows to multiindex columns
            .sort_index(
                axis=1)  # Sorts on groupby_col to plot groups in logical order
        )

        # Flattens and renames multiindex columns
        cols = [groupby_col] + ["_".join(v) for v in plot_df.columns]
        plot_df = pd.DataFrame(plot_df.to_records())
        plot_df.columns = cols

        if xlabel is None:
            xlabel = groupby_col
        if title is None:
            title = f"{which} performance by {xlabel} across rolling windows"
        fig = plot_multivariate(df=plot_df,
                                x_col=groupby_col,
                                y_col_style_dict="plotly",
                                xlabel=xlabel,
                                ylabel=ylabel,
                                title=title,
                                showlegend=showlegend)

        return fig
    def plot_flexible_grouping_evaluation(
            self,
            which="train",
            groupby_time_feature=None,
            groupby_sliding_window_size=None,
            groupby_custom_column=None,
            map_func_dict=None,
            agg_kwargs=None,
            extend_col_names=False,
            y_col_style_dict="auto-fill",
            default_color="rgba(0, 145, 202, 1.0)",
            xlabel=None,
            ylabel=None,
            title=None,
            showlegend=True):
        """Plots group-wise evaluation metrics. Whereas
        `~greykite.framework.output.univariate_forecast.UnivariateForecast.plot_grouping_evaluation`
        shows one metric, this can show any number of custom metrics.

        For example:

            * Mean and quantiles of squared error by group.
            * Mean and quantiles of residuals by group.
            * Mean and quantiles of actual and forecast by group.
            * % of actuals outside prediction intervals by group
            * any combination of the above metrics by the same group

        See `~greykite.framework.output.univariate_forecast.UnivariateForecast.get_flexible_grouping_evaluation`
        for details.

        which: `str`
            "train" or "test". Which dataset to evaluate.
        groupby_time_feature : `str` or None, optional
            If provided, groups by a column generated by
            `~greykite.common.features.timeseries_features.build_time_features_df`.
            See that function for valid values.
        groupby_sliding_window_size : `int` or None, optional
            If provided, sequentially partitions data into groups of size
            ``groupby_sliding_window_size``.
        groupby_custom_column : `pandas.Series` or None, optional
            If provided, groups by this column value. Should be same length as the DataFrame.
        map_func_dict : `dict` [`str`, `callable`] or None, default None
            Grouping evaluation metric specification, along with ``agg_kwargs``.
            See `~greykite.framework.output.univariate_forecast.UnivariateForecast.get_flexible_grouping_evaluation`.
        agg_kwargs : `dict` or None, default None
            Grouping evaluation metric specification, along with ``map_func_dict``.
            See `~greykite.framework.output.univariate_forecast.UnivariateForecast.get_flexible_grouping_evaluation`.
        extend_col_names : `bool` or None, default False
            How to name the grouping metrics.
            See `~greykite.framework.output.univariate_forecast.UnivariateForecast.get_flexible_grouping_evaluation`.
        y_col_style_dict: `dict` [`str`, `dict` or None] or "plotly" or "auto" or "auto-fill", default "auto-fill"
            The column(s) to plot on the y-axis, and how to style them. The names should match
            those generated by ``agg_kwargs`` and ``extend_col_names``.
            The function
            `~greykite.framework.output.univariate_forecast.UnivariateForecast.get_flexible_grouping_evaluation`
            can be used to check the column names.

            For convenience, start with "auto-fill" or "plotly", then adjust styling as needed.

            See `~greykite.common.viz.timeseries_plotting.plot_multivariate` for details.

        default_color: `str`, default "rgba(0, 145, 202, 1.0)" (blue)
            Default line color when ``y_col_style_dict`` is one of "auto", "auto-fill".
        xlabel : `str` or None, default None
            x-axis label. If None, default is ``x_col``.
        ylabel : `str` or None, default None
            y-axis label. If None, y-axis is not labeled.
        title : `str` or None, default None
            Plot title. If None and ``ylabel`` is provided, a default title is used.
        showlegend : `bool`, default True
            Whether to show the legend.

        Returns
        -------
        fig : `plotly.graph_objs.Figure`
            Interactive plotly graph showing the evaluation metrics.

            See `~greykite.common.viz.timeseries_plotting.plot_forecast_vs_actual`
            return value for how to plot the figure and add customization.

        See Also
        --------
        `~greykite.framework.output.univariate_forecast.UnivariateForecast.get_flexible_grouping_evaluation` : called by this function
        `~greykite.common.viz.timeseries_plotting.plot_multivariate` : called by this function
        """
        grouped_df = self.get_flexible_grouping_evaluation(
            which=which,
            groupby_time_feature=groupby_time_feature,
            groupby_sliding_window_size=groupby_sliding_window_size,
            groupby_custom_column=groupby_custom_column,
            map_func_dict=map_func_dict,
            agg_kwargs=agg_kwargs,
            extend_col_names=extend_col_names)

        x_col = grouped_df.index.name
        grouped_df.reset_index(inplace=True)
        fig = plot_multivariate(
            grouped_df,
            x_col=x_col,
            y_col_style_dict=y_col_style_dict,
            default_color=default_color,
            xlabel=xlabel,
            ylabel=ylabel,
            title=title,
            showlegend=showlegend)
        return fig
Beispiel #5
0
    def plot_forecasts_by_step(self,
                               forecast_step: int,
                               config_names: List = None,
                               xlabel: str = TIME_COL,
                               ylabel: str = VALUE_COL,
                               title: str = None,
                               showlegend: bool = True):
        """Returns a ``forecast_step`` ahead rolling forecast plot.
        The plot consists one line for each valid. ``config_names``.
        If available, the corresponding actual values are also plotted.

        For a more customizable plot, see
        :func:`~greykite.common.viz.timeseries_plotting.plot_multivariate`

        Parameters
        ----------
        forecast_step : `int`
            Which forecast step to plot. A forecast step is an integer between 1 and the
            forecast horizon, inclusive, indicating the number of periods from train end date
            to the prediction date (# steps ahead).
        config_names : `list` [`str`], default None
            Which config results to plot. A list of config names.
            If None, uses all the available config keys.
        xlabel : `str` or None, default TIME_COL
            x-axis label.
        ylabel : `str` or None, default VALUE_COL
            y-axis label.
        title : `str` or None, default None
            Plot title. If None, default is based on ``forecast_step``.
        showlegend : `bool`, default True
            Whether to show the legend.

        Returns
        -------
        fig : `plotly.graph_objs.Figure`
            Interactive plotly graph.
            Plots multiple column(s) in ``self.forecasts`` against ``TIME_COL``.

            See `~greykite.common.viz.timeseries_plotting.plot_forecast_vs_actual`
            return value for how to plot the figure and add customization.
        """
        if self.forecasts is None:
            self.extract_forecasts()

        if forecast_step > self.tscv.forecast_horizon:
            raise ValueError(
                f"`forecast_step` ({forecast_step}) must be less than or equal to "
                f"forecast horizon ({self.tscv.forecast_horizon}).")

        config_names = self.get_valid_config_names(config_names)
        y_cols = [TIME_COL, ACTUAL_COL] + \
                 [f"{config_name}_{PREDICTED_COL}" for config_name in config_names]

        df = self.forecasts[self.forecasts[FORECAST_STEP_COL] == forecast_step]
        df = df[y_cols]

        if title is None:
            title = f"{forecast_step}-step ahead rolling forecasts"
        fig = plot_multivariate(df=df,
                                x_col=TIME_COL,
                                y_col_style_dict="plotly",
                                xlabel=xlabel,
                                ylabel=ylabel,
                                title=title,
                                showlegend=showlegend)

        return fig