def plot_regress_exog(res, exog_idx, exog_name='', fig=None):
    """Plot regression results against one regressor.

    This plots four graphs in a 2 by 2 figure: 'endog versus exog',
    'residuals versus exog', 'fitted versus exog' and
    'fitted plus residual versus exog'

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : matplotlib figure instance

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    """

    fig = utils.create_mpl_fig(fig)

    if exog_name == '':
        exog_name = 'variable %d' % exog_idx

    #maybe add option for wendog, wexog
    #y = res.endog
    x1 = res.model.exog[:,exog_idx]

    ax = fig.add_subplot(2,2,1)
    #namestr = ' for %s' % self.name if self.name else ''
    ax.plot(x1, res.model.endog, 'o')
    ax.set_title('endog versus exog', fontsize='small')# + namestr)

    ax = fig.add_subplot(2,2,2)
    #namestr = ' for %s' % self.name if self.name else ''
    ax.plot(x1, res.resid, 'o')
    ax.axhline(y=0)
    ax.set_title('residuals versus exog', fontsize='small')# + namestr)

    ax = fig.add_subplot(2,2,3)
    #namestr = ' for %s' % self.name if self.name else ''
    ax.plot(x1, res.fittedvalues, 'o')
    ax.set_title('Fitted versus exog', fontsize='small')# + namestr)

    ax = fig.add_subplot(2,2,4)
    #namestr = ' for %s' % self.name if self.name else ''
    ax.plot(x1, res.fittedvalues + res.resid, 'o')
    ax.set_title('Fitted plus residuals versus exog', fontsize='small')# + namestr)

    fig.suptitle('Regression Plots for %s' % exog_name)

    return fig
    def plot_cusum(self, alpha=0.05, legend_loc='upper left',
                   fig=None, figsize=None):
        r"""
        Plot the CUSUM statistic and significance bounds.

        Parameters
        ----------
        alpha : float, optional
            The plotted significance bounds are alpha %.
        legend_loc : string, optional
            The location of the legend in the plot. Default is upper left.
        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the grid will be created in the provided
            figure using `fig.add_subplot()`.
        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        Evidence of parameter instability may be found if the CUSUM statistic
        moves out of the significance bounds.

        References
        ----------
        .. [*] Brown, R. L., J. Durbin, and J. M. Evans. 1975.
           "Techniques for Testing the Constancy of
           Regression Relationships over Time."
           Journal of the Royal Statistical Society.
           Series B (Methodological) 37 (2): 149-92.

        """
        # Create the plot
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
        _import_mpl()
        fig = create_mpl_fig(fig, figsize)
        ax = fig.add_subplot(1, 1, 1)

        # Get dates, if applicable
        if hasattr(self.data, 'dates') and self.data.dates is not None:
            dates = self.data.dates._mpl_repr()
        else:
            dates = np.arange(self.nobs)
        d = max(self.nobs_diffuse, self.loglikelihood_burn)

        # Plot cusum series and reference line
        ax.plot(dates[d:], self.cusum, label='CUSUM')
        ax.hlines(0, dates[d], dates[-1], color='k', alpha=0.3)

        # Plot significance bounds
        lower_line, upper_line = self._cusum_significance_bounds(alpha)
        ax.plot([dates[d], dates[-1]], upper_line, 'k--',
                label='%d%% significance' % (alpha * 100))
        ax.plot([dates[d], dates[-1]], lower_line, 'k--')

        ax.legend(loc=legend_loc)

        return fig
Exemple #3
0
    def plot_cusum(self, alpha=0.05, legend_loc='upper left',
                   fig=None, figsize=None):
        r"""
        Plot the CUSUM statistic and significance bounds.

        Parameters
        ----------
        alpha : float, optional
            The plotted significance bounds are alpha %.
        legend_loc : string, optional
            The location of the legend in the plot. Default is upper left.
        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the grid will be created in the provided
            figure using `fig.add_subplot()`.
        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        Evidence of parameter instability may be found if the CUSUM statistic
        moves out of the significance bounds.

        References
        ----------
        .. [*] Brown, R. L., J. Durbin, and J. M. Evans. 1975.
           "Techniques for Testing the Constancy of
           Regression Relationships over Time."
           Journal of the Royal Statistical Society.
           Series B (Methodological) 37 (2): 149-92.

        """
        # Create the plot
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
        _import_mpl()
        fig = create_mpl_fig(fig, figsize)
        ax = fig.add_subplot(1, 1, 1)

        # Get dates, if applicable
        if hasattr(self.data, 'dates') and self.data.dates is not None:
            dates = self.data.dates._mpl_repr()
        else:
            dates = np.arange(self.nobs)
        d = max(self.nobs_diffuse, self.loglikelihood_burn)

        # Plot cusum series and reference line
        ax.plot(dates[d:], self.cusum, label='CUSUM')
        ax.hlines(0, dates[d], dates[-1], color='k', alpha=0.3)

        # Plot significance bounds
        lower_line, upper_line = self._cusum_significance_bounds(alpha)
        ax.plot([dates[d], dates[-1]], upper_line, 'k--',
                label='%d%% significance' % (alpha * 100))
        ax.plot([dates[d], dates[-1]], lower_line, 'k--')

        ax.legend(loc=legend_loc)

        return fig
Exemple #4
0
    def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None):
        """Plot an ARIMA's diagnostics.

        Diagnostic plots for standardized residuals of one endogenous variable

        Parameters
        ----------
        variable : integer, optional
            Index of the endogenous variable for which the diagnostic plots
            should be created. Default is 0.

        lags : integer, optional
            Number of lags to include in the correlogram. Default is 10.

        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the 2x2 grid will be created in the provided
            figure using `fig.add_subplot()`.

        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        Produces a 2x2 plot grid with the following plots (ordered clockwise
        from top left):

        1. Standardized residuals over time
        2. Histogram plus estimated density of standardized residuals, along
           with a Normal(0,1) density plotted for reference.
        3. Normal Q-Q plot, with Normal reference line.
        4. Correlogram

        See Also
        --------
        statsmodels.graphics.gofplots.qqplot
        pmdarima.utils.visualization.plot_acf

        References
        ----------
        .. [1] https://www.statsmodels.org/dev/_modules/statsmodels/tsa/statespace/mlemodel.html#MLEResults.plot_diagnostics
        """  # noqa: E501
        # implicitly checks whether installed, and does our backend magic:
        _get_plt()

        # We originally delegated down to SARIMAX model wrapper, but
        # statsmodels makes it difficult to trust their API, so we just re-
        # implemented a common method for all results wrappers.
        from statsmodels.graphics import utils as sm_graphics
        fig = sm_graphics.create_mpl_fig(fig, figsize)

        res_wpr = self.arima_res_
        data = res_wpr.data

        # Eliminate residuals associated with burned or diffuse likelihoods.
        # The statsmodels code for the Kalman Filter takes the loglik_burn
        # as a parameter:

        # loglikelihood_burn : int, optional
        #     The number of initial periods during which the loglikelihood is
        #     not recorded. Default is 0.

        # If the class has it, it's a SARIMAX and we'll use it. Otherwise we
        # will just access the residuals as we normally would...
        if hasattr(res_wpr, 'loglikelihood_burn'):
            # This is introduced in the bleeding edge version, but is not
            # backwards compatible with 0.9.0 and less:
            d = res_wpr.loglikelihood_burn
            if hasattr(res_wpr, 'nobs_diffuse'):
                d = np.maximum(d, res_wpr.nobs_diffuse)

            resid = res_wpr.filter_results\
                           .standardized_forecasts_error[variable, d:]
        else:
            # This gets the residuals, but they need to be standardized
            d = 0
            r = res_wpr.resid
            resid = (r - np.nanmean(r)) / np.nanstd(r)

        # Top-left: residuals vs time
        ax = fig.add_subplot(221)
        if hasattr(data, 'dates') and data.dates is not None:
            x = data.dates[d:]._mpl_repr()
        else:
            x = np.arange(len(resid))
        ax.plot(x, resid)
        ax.hlines(0, x[0], x[-1], alpha=0.5)
        ax.set_xlim(x[0], x[-1])
        ax.set_title('Standardized residual')

        # Top-right: histogram, Gaussian kernel density, Normal density
        # Can only do histogram and Gaussian kernel density on the non-null
        # elements
        resid_nonmissing = resid[~(np.isnan(resid))]
        ax = fig.add_subplot(222)
        # temporarily disable Deprecation warning, normed -> density
        # hist needs to use `density` in future when minimum matplotlib has it
        # 'normed' argument is no longer supported in matplotlib since
        # version 3.2.0. New function added for backwards compatibility
        with warnings.catch_warnings(record=True):
            ax.hist(resid_nonmissing,
                    label='Hist',
                    **mpl_compat.mpl_hist_arg())

        kde = gaussian_kde(resid_nonmissing)
        xlim = (-1.96 * 2, 1.96 * 2)
        x = np.linspace(xlim[0], xlim[1])
        ax.plot(x, kde(x), label='KDE')
        ax.plot(x, norm.pdf(x), label='N(0,1)')
        ax.set_xlim(xlim)
        ax.legend()
        ax.set_title('Histogram plus estimated density')

        # Bottom-left: QQ plot
        ax = fig.add_subplot(223)
        from statsmodels.graphics import gofplots
        gofplots.qqplot(resid_nonmissing, line='s', ax=ax)
        ax.set_title('Normal Q-Q')

        # Bottom-right: Correlogram
        ax = fig.add_subplot(224)
        from statsmodels.graphics import tsaplots
        tsaplots.plot_acf(resid, ax=ax, lags=lags)
        ax.set_title('Correlogram')

        ax.set_ylim(-1, 1)

        return fig
Exemple #5
0
    def plot_recursive_coefficient(
        self,
        variables=None,
        alpha=0.05,
        legend_loc="upper left",
        fig=None,
        figsize=None,
    ):
        r"""
        Plot the recursively estimated coefficients on a given variable

        Parameters
        ----------
        variables : {int, str, Iterable[int], Iterable[str], None}, optional
            Integer index or string name of the variables whose coefficients
            to plot. Can also be an iterable of integers or strings. Default
            plots all coefficients.
        alpha : float, optional
            The confidence intervals for the coefficient are (1 - alpha)%. Set
            to None to exclude confidence intervals.
        legend_loc : str, optional
            The location of the legend in the plot. Default is upper left.
        fig : Figure, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the grid will be created in the provided
            figure using `fig.add_subplot()`.
        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Returns
        -------
        Figure
            The matplotlib Figure object.
        """
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig

        if alpha is not None:
            ci = self._conf_int(alpha, None)

        row_labels = self.model.data.row_labels
        if row_labels is None:
            row_labels = np.arange(self._params.shape[0])
        k_variables = self._params.shape[1]
        param_names = self.model.data.param_names
        if variables is None:
            variable_idx = list(range(k_variables))
        else:
            if isinstance(variables, (int, str)):
                variables = [variables]
            variable_idx = []
            for i in range(len(variables)):
                variable = variables[i]
                if variable in param_names:
                    variable_idx.append(param_names.index(variable))
                elif isinstance(variable, int):
                    variable_idx.append(variable)
                else:
                    msg = ("variable {0} is not an integer and was not found "
                           "in the list of variable "
                           "names: {1}".format(variables[i],
                                               ", ".join(param_names)))
                    raise ValueError(msg)

        _import_mpl()
        fig = create_mpl_fig(fig, figsize)

        loc = 0
        import pandas as pd

        if isinstance(row_labels, pd.PeriodIndex):
            row_labels = row_labels.to_timestamp()
        row_labels = np.asarray(row_labels)
        for i in variable_idx:
            ax = fig.add_subplot(len(variable_idx), 1, loc + 1)
            params = self._params[:, i]
            valid = ~np.isnan(self._params[:, i])
            row_lbl = row_labels[valid]
            ax.plot(row_lbl, params[valid])
            if alpha is not None:
                this_ci = np.reshape(ci[:, :, i], (-1, 2))
                if not np.all(np.isnan(this_ci)):
                    ax.plot(row_lbl,
                            this_ci[:, 0][valid],
                            "k:",
                            label="Lower CI")
                    ax.plot(row_lbl,
                            this_ci[:, 1][valid],
                            "k:",
                            label="Upper CI")
                    if loc == 0:
                        ax.legend(loc=legend_loc)
            ax.set_xlim(row_lbl[0], row_lbl[-1])
            ax.set_title(param_names[i])
            loc += 1

        fig.tight_layout()
        return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None):
    """Plot partial regression for a set of regressors.

    Parameters
    ----------
    results : results instance
        A regression model results instance
    exog_idx : None, list of ints, list of strings
        (column) indices of the exog used in the plot, default is all.
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `fig` is None, the created figure.  Otherwise `fig` itself.

    Notes
    -----
    A subplot is created for each explanatory variable given by exog_idx.
    The partial regression plot shows the relationship between the response
    and the given explanatory variable after removing the effect of all other
    explanatory variables in exog.

    See Also
    --------
    plot_partregress : Plot partial regression for a single regressor.
    plot_ccpr

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm

    """
    import pandas
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    #maybe add option for using wendog, wexog instead
    y = pandas.Series(results.model.endog, name=results.model.endog_names)
    exog = results.model.exog

    k_vars = exog.shape[1]
    #this function doesn't make sense if k_vars=1

    if not grid is None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx)/2.))
            ncols = 2
            title_kwargs = {"fontdict" : {"fontsize" : 'small'}}
        else:
            nrows = len(exog_idx)
            ncols = 1
            title_kwargs = {}

    # for indexing purposes
    other_names = np.array(results.model.exog_names)
    for i, idx in enumerate(exog_idx):
        others = lrange(k_vars)
        others.pop(idx)
        exog_others = pandas.DataFrame(exog[:, others],
                                       columns=other_names[others])
        ax = fig.add_subplot(nrows, ncols, i+1)
        plot_partregress(y, pandas.Series(exog[:, idx],
                                          name=other_names[idx]),
                         exog_others, ax=ax, title_kwargs=title_kwargs,
                         obs_labels=False)
        ax.set_title("")

    fig.suptitle("Partial Regression Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None):
    """Generate CCPR plots against a set of regressors, plot in a grid.

    Generates a grid of CCPR (component and component-plus-residual) plots.

    Parameters
    ----------
    results : result instance
        uses exog and params of the result instance
    exog_idx : None or list of int
        (column) indices of the exog used in the plot
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    Partial residual plots are formed as::

        Res + Betahat(i)*Xi versus Xi

    and CCPR adds::

        Betahat(i)*Xi versus Xi

    See Also
    --------
    plot_ccpr : Creates CCPR plot for a single regressor.

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
    """
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    if grid is not None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx)/2.))
            ncols = 2
        else:
            nrows = len(exog_idx)
            ncols = 1

    seen_constant = 0
    for i, idx in enumerate(exog_idx):
        if results.model.exog[:, idx].var() == 0:
            seen_constant = 1
            continue

        ax = fig.add_subplot(nrows, ncols, i+1-seen_constant)
        fig = plot_ccpr(results, exog_idx=idx, ax=ax)
        ax.set_title("")

    fig.suptitle("Component-Component Plus Residual Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
Exemple #8
0
    def plot_recursive_coefficient(self, variables=0, alpha=0.05,
                                   legend_loc='upper left', fig=None,
                                   figsize=None):
        r"""
        Plot the recursively estimated coefficients on a given variable

        Parameters
        ----------
        variables : int or str or iterable of int or string, optional
            Integer index or string name of the variable whose coefficient will
            be plotted. Can also be an iterable of integers or strings. Default
            is the first variable.
        alpha : float, optional
            The confidence intervals for the coefficient are (1 - alpha) %
        legend_loc : string, optional
            The location of the legend in the plot. Default is upper left.
        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the grid will be created in the provided
            figure using `fig.add_subplot()`.
        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        All plots contain (1 - `alpha`) %  confidence intervals.
        """
        # Get variables
        if isinstance(variables, (int, str)):
            variables = [variables]
        k_variables = len(variables)

        # If a string was given for `variable`, try to get it from exog names
        exog_names = self.model.exog_names
        for i in range(k_variables):
            variable = variables[i]
            if isinstance(variable, str):
                variables[i] = exog_names.index(variable)

        # Create the plot
        from scipy.stats import norm
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
        plt = _import_mpl()
        fig = create_mpl_fig(fig, figsize)

        for i in range(k_variables):
            variable = variables[i]
            ax = fig.add_subplot(k_variables, 1, i + 1)

            # Get dates, if applicable
            if hasattr(self.data, 'dates') and self.data.dates is not None:
                dates = self.data.dates._mpl_repr()
            else:
                dates = np.arange(self.nobs)
            d = max(self.nobs_diffuse, self.loglikelihood_burn)

            # Plot the coefficient
            coef = self.recursive_coefficients
            ax.plot(dates[d:], coef.filtered[variable, d:],
                    label='Recursive estimates: %s' % exog_names[variable])

            # Legend
            handles, labels = ax.get_legend_handles_labels()

            # Get the critical value for confidence intervals
            if alpha is not None:
                critical_value = norm.ppf(1 - alpha / 2.)

                # Plot confidence intervals
                std_errors = np.sqrt(coef.filtered_cov[variable, variable, :])
                ci_lower = (
                    coef.filtered[variable] - critical_value * std_errors)
                ci_upper = (
                    coef.filtered[variable] + critical_value * std_errors)
                ci_poly = ax.fill_between(
                    dates[d:], ci_lower[d:], ci_upper[d:], alpha=0.2
                )
                ci_label = ('$%.3g \\%%$ confidence interval'
                            % ((1 - alpha)*100))

                # Only add CI to legend for the first plot
                if i == 0:
                    # Proxy artist for fill_between legend entry
                    # See https://matplotlib.org/1.3.1/users/legend_guide.html
                    p = plt.Rectangle((0, 0), 1, 1,
                                      fc=ci_poly.get_facecolor()[0])

                    handles.append(p)
                    labels.append(ci_label)

            ax.legend(handles, labels, loc=legend_loc)

            # Remove xticks for all but the last plot
            if i < k_variables - 1:
                ax.xaxis.set_ticklabels([])

        fig.tight_layout()

        return fig
Exemple #9
0
def feature_regression_summary(df,
                               feat_idx,
                               target,
                               model_fit_results,
                               display_regress_diagnostics=False):

    feat = df.columns[feat_idx]

    v = vif(np.matrix(df), feat_idx)
    colinear = v > 10

    if display_regress_diagnostics:
        # ‘endog versus exog’, ‘residuals versus exog’, ‘fitted versus exog’ and ‘fitted plus residual versus exog’
        fig = plt.figure(constrained_layout=True,
                         figsize=(2.25 * plot_edge, 4 * plot_edge))
        fig = smgrpu.create_mpl_fig(fig)
        gs = GridSpec(3, 2, figure=fig)
        ax_fit = fig.add_subplot(gs[0, 0])
        ax_partial_residuals = fig.add_subplot(gs[0, 1])
        ax_partregress = fig.add_subplot(gs[1, 0])
        ax_ccpr = fig.add_subplot(gs[1, 1])
        ax_dist = fig.add_subplot(gs[2:4, 0:2])

        exog_name, exog_idx = smgrpu.maybe_name_or_idx(feat,
                                                       model_fit_results.model)
        smresults = smtt.maybe_unwrap_results(model_fit_results)
        y_name = smresults.model.endog_names
        x1 = smresults.model.exog[:, exog_idx]
        prstd, iv_l, iv_u = wls_prediction_std(smresults)

        # endog versus exog
        # use wrapper since it's availab;e!
        sm.graphics.plot_fit(model_fit_results, feat, ax=ax_fit)

        # residuals versus exog
        ax_partial_residuals.plot(x1, smresults.resid, 'o')
        ax_partial_residuals.axhline(y=0, color='black')
        ax_partial_residuals.set_title('Residuals versus %s' % exog_name,
                                       fontsize='large')
        ax_partial_residuals.set_xlabel(exog_name)
        ax_partial_residuals.set_ylabel("resid")

        # Partial Regression plot: fitted versus exog
        exog_noti = np.ones(smresults.model.exog.shape[1], bool)
        exog_noti[exog_idx] = False
        exog_others = smresults.model.exog[:, exog_noti]
        from pandas import Series
        smgrp.plot_partregress(smresults.model.data.orig_endog,
                               Series(x1,
                                      name=exog_name,
                                      index=smresults.model.data.row_labels),
                               exog_others,
                               obs_labels=False,
                               ax=ax_partregress)

        # CCPR: fitted plus residual versus exog
        # use wrapper since it's availab;e!
        sm.graphics.plot_ccpr(model_fit_results, feat, ax=ax_ccpr)
        ax_ccpr.set_title('CCPR Plot', fontsize='large')

        sns.distplot(df[feat], ax=ax_dist)

        fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large")
        #fig.tight_layout()
        #fig.subplots_adjust(top=.90)
        plt.show()

        display(
            HTML(
                "Variance Inflation Factor (<i>VIF</i>) for <b>{}</b>: <b>{}</b> {}"
                .format(
                    feat, round(v, 2), "$\\le 10 \\iff$ low colinearity" if
                    not colinear else "$> 10 \\iff$ <b>HIGH COLINEARITY</b>")))
        display(
            HTML(
                "<b><i>p-value</i></b> (<i>VIF</i>) for <b>{}</b>: <b>{}</b><br><br>"
                .format(feat, model_fit_results.pvalues[feat_idx + 1])))

    return v
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None):
    """Generate CCPR plots against a set of regressors, plot in a grid.

    Generates a grid of CCPR (component and component-plus-residual) plots.

    Parameters
    ----------
    results : result instance
        uses exog and params of the result instance
    exog_idx : None or list of int
        (column) indices of the exog used in the plot
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    Partial residual plots are formed as::

        Res + Betahat(i)*Xi versus Xi

    and CCPR adds::

        Betahat(i)*Xi versus Xi

    See Also
    --------
    plot_ccpr : Creates CCPR plot for a single regressor.

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
    """
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    if grid is not None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx) / 2.))
            ncols = 2
        else:
            nrows = len(exog_idx)
            ncols = 1

    seen_constant = 0
    for i, idx in enumerate(exog_idx):
        if results.model.exog[:, idx].var() == 0:
            seen_constant = 1
            continue

        ax = fig.add_subplot(nrows, ncols, i + 1 - seen_constant)
        fig = plot_ccpr(results, exog_idx=idx, ax=ax)
        ax.set_title("")

    fig.suptitle("Component-Component Plus Residual Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
def plot_partregress(results, exog_idx=None, xnames=None, grid=None, fig=None):
    """Plot partial regression for a set of regressors.

    Parameters
    ----------
    results : results instance
        A regression model results instance
    exog_idx : None or list of int
        (column) indices of the exog used in the plot, default is all.
    xnames : None or list of strings
        Names for the numbers given in exog_idx. Default is
        results.model.exog_names.
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `fig` is None, the created figure.  Otherwise `fig` itself.

    Notes
    -----
    A subplot is created for each explanatory variable given by exog_idx.
    The partial regression plot shows the relationship between the response
    and the given explanatory variable after removing the effect of all other
    explanatory variables in exog.

    See Also
    --------
    plot_partregress_ax : Plot partial regression for a single regressor.
    plot_ccpr

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm

    """
    fig = utils.create_mpl_fig(fig)

    #maybe add option for using wendog, wexog instead
    y = results.model.endog
    exog = results.model.exog

    k_vars = exog.shape[1]
    #this function doesn't make sense if k_vars=1

    if xnames is None:
        exog_idx = range(k_vars)
        xnames = results.model.exog_names
    else:
        exog_idx = []
        for name in xnames:
            exog_idx.append(results.model.exog_names.index(name))


    if not grid is None:
        nrows, ncols = grid

    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx)/2.))
            ncols = 2
            title_fontsize = 'small'
        else:
            nrows = len(exog_idx)
            ncols = 1
            title_fontsize = None


    for i,idx in enumerate(exog_idx):
        others = range(k_vars)
        others.pop(idx)
        exog_others = exog[:, others]
        ax = fig.add_subplot(nrows, ncols, i+1)
        plot_partregress_ax(y, exog[:, idx], exog_others, ax=ax,
                               varname=xnames[i])

    import matplotlib as mpl
    if mpl.__version__ >= '1.1':
        # The tight_layout feature is not available before version 1.1
        # It automatically pads the figure so labels do not get clipped.
        fig.tight_layout()

    return fig
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None):
    """Generate CCPR plots against a set of regressors, plot in a grid.

    Generates a grid of CCPR (component and component-plus-residual) plots.

    Parameters
    ----------
    results : result instance
        uses exog and params of the result instance
    exog_idx : None or list of int
        (column) indices of the exog used in the plot
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    Partial residual plots are formed as::

        Res + Betahat(i)*Xi versus Xi

    and CCPR adds::

        Betahat(i)*Xi versus Xi

    See Also
    --------
    plot_ccpr : Creates CCPR plot for a single regressor.

    Examples
    --------
    Using the state crime dataset seperately plot the effect of the each
    variable on the on the outcome, murder rate while accounting for the effect
    of all other variables in the model.

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 8))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> sm.graphics.plot_ccpr_grid(results, fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_ccpr_grid.py

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
    """
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    if grid is not None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx)/2.))
            ncols = 2
        else:
            nrows = len(exog_idx)
            ncols = 1

    seen_constant = 0
    for i, idx in enumerate(exog_idx):
        if results.model.exog[:, idx].var() == 0:
            seen_constant = 1
            continue

        ax = fig.add_subplot(nrows, ncols, i+1-seen_constant)
        fig = plot_ccpr(results, exog_idx=idx, ax=ax)
        ax.set_title("")

    fig.suptitle("Component-Component Plus Residual Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
    def plot_recursive_coefficient(self, variables=0, alpha=0.05,
                                   legend_loc='upper left', fig=None,
                                   figsize=None):
        r"""
        Plot the recursively estimated coefficients on a given variable

        Parameters
        ----------
        variables : int or str or iterable of int or string, optional
            Integer index or string name of the variable whose coefficient will
            be plotted. Can also be an iterable of integers or strings. Default
            is the first variable.
        alpha : float, optional
            The confidence intervals for the coefficient are (1 - alpha) %
        legend_loc : string, optional
            The location of the legend in the plot. Default is upper left.
        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the grid will be created in the provided
            figure using `fig.add_subplot()`.
        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        All plots contain (1 - `alpha`) %  confidence intervals.
        """
        # Get variables
        if isinstance(variables, (int, str)):
            variables = [variables]
        k_variables = len(variables)

        # If a string was given for `variable`, try to get it from exog names
        exog_names = self.model.exog_names
        for i in range(k_variables):
            variable = variables[i]
            if isinstance(variable, str):
                variables[i] = exog_names.index(variable)

        # Create the plot
        from scipy.stats import norm
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
        plt = _import_mpl()
        fig = create_mpl_fig(fig, figsize)

        for i in range(k_variables):
            variable = variables[i]
            ax = fig.add_subplot(k_variables, 1, i + 1)

            # Get dates, if applicable
            if hasattr(self.data, 'dates') and self.data.dates is not None:
                dates = self.data.dates._mpl_repr()
            else:
                dates = np.arange(self.nobs)
            d = max(self.nobs_diffuse, self.loglikelihood_burn)

            # Plot the coefficient
            coef = self.recursive_coefficients
            ax.plot(dates[d:], coef.filtered[variable, d:],
                    label='Recursive estimates: %s' % exog_names[variable])

            # Legend
            handles, labels = ax.get_legend_handles_labels()

            # Get the critical value for confidence intervals
            if alpha is not None:
                critical_value = norm.ppf(1 - alpha / 2.)

                # Plot confidence intervals
                std_errors = np.sqrt(coef.filtered_cov[variable, variable, :])
                ci_lower = (
                    coef.filtered[variable] - critical_value * std_errors)
                ci_upper = (
                    coef.filtered[variable] + critical_value * std_errors)
                ci_poly = ax.fill_between(
                    dates[d:], ci_lower[d:], ci_upper[d:], alpha=0.2
                )
                ci_label = ('$%.3g \\%%$ confidence interval'
                            % ((1 - alpha)*100))

                # Only add CI to legend for the first plot
                if i == 0:
                    # Proxy artist for fill_between legend entry
                    # See http://matplotlib.org/1.3.1/users/legend_guide.html
                    p = plt.Rectangle((0, 0), 1, 1,
                                      fc=ci_poly.get_facecolor()[0])

                    handles.append(p)
                    labels.append(ci_label)

            ax.legend(handles, labels, loc=legend_loc)

            # Remove xticks for all but the last plot
            if i < k_variables - 1:
                ax.xaxis.set_ticklabels([])

        fig.tight_layout()

        return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None):
    """Plot partial regression for a set of regressors.

    Parameters
    ----------
    results : results instance
        A regression model results instance
    exog_idx : None, list of ints, list of strings
        (column) indices of the exog used in the plot, default is all.
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `fig` is None, the created figure.  Otherwise `fig` itself.

    Notes
    -----
    A subplot is created for each explanatory variable given by exog_idx.
    The partial regression plot shows the relationship between the response
    and the given explanatory variable after removing the effect of all other
    explanatory variables in exog.

    See Also
    --------
    plot_partregress : Plot partial regression for a single regressor.
    plot_ccpr : Plot CCPR against one regressor

    Examples
    --------
    Using the state crime dataset seperately plot the effect of the each
    variable on the on the outcome, murder rate while accounting for the effect
    of all other variables in the model visualized with a grid of partial
    regression plots.

    >>> from statsmodels.graphics.regressionplots import plot_partregress_grid
    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 6))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> plot_partregress_grid(results, fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_partregress_grid.py

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm

    """
    import pandas
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    #maybe add option for using wendog, wexog instead
    y = pandas.Series(results.model.endog, name=results.model.endog_names)
    exog = results.model.exog

    k_vars = exog.shape[1]
    #this function doesn't make sense if k_vars=1

    if grid is not None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx)/2.))
            ncols = 2
            title_kwargs = {"fontdict" : {"fontsize" : 'small'}}
        else:
            nrows = len(exog_idx)
            ncols = 1
            title_kwargs = {}

    # for indexing purposes
    other_names = np.array(results.model.exog_names)
    for i, idx in enumerate(exog_idx):
        others = lrange(k_vars)
        others.pop(idx)
        exog_others = pandas.DataFrame(exog[:, others],
                                       columns=other_names[others])
        ax = fig.add_subplot(nrows, ncols, i+1)
        plot_partregress(y, pandas.Series(exog[:, idx],
                                          name=other_names[idx]),
                         exog_others, ax=ax, title_kwargs=title_kwargs,
                         obs_labels=False)
        ax.set_title("")

    fig.suptitle("Partial Regression Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
def plot_regress_exog(results, exog_idx, fig=None):
    """Plot regression results against one regressor.

    This plots four graphs in a 2 by 2 figure: 'endog versus exog',
    'residuals versus exog', 'fitted versus exog' and
    'fitted plus residual versus exog'

    Parameters
    ----------
    results : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int or str
        Name or index of regressor in exog matrix
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : matplotlib figure instance

    Examples
    --------
    Load the Statewide Crime data set and build a model with regressors
    including the rate of high school graduation (hs_grad), population in urban
    areas (urban), households below poverty line (poverty), and single person
    households (single).  Outcome variable is the muder rate (murder).

    Build a 2 by 2 figure based on poverty showing fitted versus actual murder
    rate, residuals versus the poverty rate, partial regression plot of poverty,
    and CCPR plot for poverty rate.

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plot
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 6))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> sm.graphics.plot_regress_exog(results, 'poverty', fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_regress_exog.py

    """

    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
    results = maybe_unwrap_results(results)

    #maybe add option for wendog, wexog
    y_name = results.model.endog_names
    x1 = results.model.exog[:, exog_idx]
    prstd, iv_l, iv_u = wls_prediction_std(results)

    ax = fig.add_subplot(2, 2, 1)
    ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name)
    ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted',
            alpha=.5)
    ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7)
    ax.set_title('Y and Fitted vs. X', fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel(y_name)
    ax.legend(loc='best')

    ax = fig.add_subplot(2, 2, 2)
    ax.plot(x1, results.resid, 'o')
    ax.axhline(y=0, color='black')
    ax.set_title('Residuals versus %s' % exog_name, fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel("resid")

    ax = fig.add_subplot(2, 2, 3)
    exog_noti = np.ones(results.model.exog.shape[1], bool)
    exog_noti[exog_idx] = False
    exog_others = results.model.exog[:, exog_noti]
    from pandas import Series
    fig = plot_partregress(results.model.data.orig_endog,
                           Series(x1, name=exog_name,
                                  index=results.model.data.row_labels),
                           exog_others, obs_labels=False, ax=ax)
    ax.set_title('Partial regression plot', fontsize='large')
    #ax.set_ylabel("Fitted values")
    #ax.set_xlabel(exog_name)

    ax = fig.add_subplot(2, 2, 4)
    fig = plot_ccpr(results, exog_idx, ax=ax)
    ax.set_title('CCPR Plot', fontsize='large')
    #ax.set_xlabel(exog_name)
    #ax.set_ylabel("Fitted values + resids")

    fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.90)
    return fig
Exemple #16
0
    def plot_cusum_squares(self, alpha=0.05, legend_loc='upper left',
                           fig=None, figsize=None):
        r"""
        Plot the CUSUM of squares statistic and significance bounds.

        Parameters
        ----------
        alpha : float, optional
            The plotted significance bounds are alpha %.
        legend_loc : string, optional
            The location of the legend in the plot. Default is upper left.
        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the grid will be created in the provided
            figure using `fig.add_subplot()`.
        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        Evidence of parameter instability may be found if the CUSUM of squares
        statistic moves out of the significance bounds.

        Critical values used in creating the significance bounds are computed
        using the approximate formula of [2]_.

        References
        ----------
        .. [1] Brown, R. L., J. Durbin, and J. M. Evans. 1975.
           "Techniques for Testing the Constancy of
           Regression Relationships over Time."
           Journal of the Royal Statistical Society.
           Series B (Methodological) 37 (2): 149-92.
        .. [2] Edgerton, David, and Curt Wells. 1994.
           "Critical Values for the Cusumsq Statistic
           in Medium and Large Sized Samples."
           Oxford Bulletin of Economics and Statistics 56 (3): 355-65.

        """
        # Create the plot
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
        plt = _import_mpl()
        fig = create_mpl_fig(fig, figsize)
        ax = fig.add_subplot(1, 1, 1)

        # Get dates, if applicable
        if hasattr(self.data, 'dates') and self.data.dates is not None:
            dates = self.data.dates._mpl_repr()
        else:
            dates = np.arange(self.nobs)
        llb = self.loglikelihood_burn

        # Plot cusum series and reference line
        ax.plot(dates[llb:], self.cusum_squares, label='CUSUM of squares')
        ref_line = (np.arange(llb, self.nobs) - llb) / (self.nobs - llb)
        ax.plot(dates[llb:], ref_line, 'k', alpha=0.3)

        # Plot significance bounds
        lower_line, upper_line = self._cusum_squares_significance_bounds(alpha)
        ax.plot([dates[llb], dates[-1]], upper_line, 'k--',
                label='%d%% significance' % (alpha * 100))
        ax.plot([dates[llb], dates[-1]], lower_line, 'k--')

        ax.legend(loc=legend_loc)

        return fig
def plot_regress_exog(results, exog_idx, fig=None):
    """Plot regression #1lab_results against one regressor.

    This plots four graphs in a 2 by 2 figure: 'endog versus exog',
    'residuals versus exog', 'fitted versus exog' and
    'fitted plus residual versus exog'

    Parameters
    ----------
    results : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : matplotlib figure instance
    """

    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
    results = maybe_unwrap_results(results)

    #maybe add option for wendog, wexog
    y_name = results.model.endog_names
    x1 = results.model.exog[:, exog_idx]
    prstd, iv_l, iv_u = wls_prediction_std(results)

    ax = fig.add_subplot(2, 2, 1)
    ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name)
    ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted', alpha=.5)
    ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7)
    ax.set_title('Y and Fitted vs. X', fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel(y_name)
    ax.legend(loc='best')

    ax = fig.add_subplot(2, 2, 2)
    ax.plot(x1, results.resid, 'o')
    ax.axhline(y=0, color='black')
    ax.set_title('Residuals versus %s' % exog_name, fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel("resid")

    ax = fig.add_subplot(2, 2, 3)
    exog_noti = np.ones(results.model.exog.shape[1], bool)
    exog_noti[exog_idx] = False
    exog_others = results.model.exog[:, exog_noti]
    from pandas import Series
    fig = plot_partregress(results.model.data.orig_endog,
                           Series(x1,
                                  name=exog_name,
                                  index=results.model.data.row_labels),
                           exog_others,
                           obs_labels=False,
                           ax=ax)
    ax.set_title('Partial regression plot', fontsize='large')
    #ax.set_ylabel("Fitted values")
    #ax.set_xlabel(exog_name)

    ax = fig.add_subplot(2, 2, 4)
    fig = plot_ccpr(results, exog_idx, ax=ax)
    ax.set_title('CCPR Plot', fontsize='large')
    #ax.set_xlabel(exog_name)
    #ax.set_ylabel("Fitted values + resids")

    fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.90)
    return fig
Exemple #18
0
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None):
    """Plot partial regression for a set of regressors.

    Parameters
    ----------
    results : results instance
        A regression model results instance
    exog_idx : None, list of ints, list of strings
        (column) indices of the exog used in the plot, default is all.
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `fig` is None, the created figure.  Otherwise `fig` itself.

    Notes
    -----
    A subplot is created for each explanatory variable given by exog_idx.
    The partial regression plot shows the relationship between the response
    and the given explanatory variable after removing the effect of all other
    explanatory variables in exog.

    See Also
    --------
    plot_partregress : Plot partial regression for a single regressor.
    plot_ccpr : Plot CCPR against one regressor

    Examples
    --------
    Using the state crime dataset seperately plot the effect of the each
    variable on the on the outcome, murder rate while accounting for the effect
    of all other variables in the model visualized with a grid of partial
    regression plots.

    >>> from statsmodels.graphics.regressionplots import plot_partregress_grid
    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 6))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> plot_partregress_grid(results, fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_partregress_grid.py

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm

    """
    import pandas
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    #maybe add option for using wendog, wexog instead
    y = pandas.Series(results.model.endog, name=results.model.endog_names)
    exog = results.model.exog

    k_vars = exog.shape[1]
    #this function doesn't make sense if k_vars=1

    if grid is not None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx) / 2.))
            ncols = 2
            title_kwargs = {"fontdict": {"fontsize": 'small'}}
        else:
            nrows = len(exog_idx)
            ncols = 1
            title_kwargs = {}

    # for indexing purposes
    other_names = np.array(results.model.exog_names)
    for i, idx in enumerate(exog_idx):
        others = lrange(k_vars)
        others.pop(idx)
        exog_others = pandas.DataFrame(exog[:, others],
                                       columns=other_names[others])
        ax = fig.add_subplot(nrows, ncols, i + 1)
        plot_partregress(y,
                         pandas.Series(exog[:, idx], name=other_names[idx]),
                         exog_others,
                         ax=ax,
                         title_kwargs=title_kwargs,
                         obs_labels=False)
        ax.set_title("")

    fig.suptitle("Partial Regression Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None):
    """Plot partial regression for a set of regressors.

    Parameters
    ----------
    results : #1lab_results instance
        A regression model #1lab_results instance
    exog_idx : None, list of ints, list of strings
        (column) indices of the exog used in the plot, default is all.
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `fig` is None, the created figure.  Otherwise `fig` itself.

    Notes
    -----
    A subplot is created for each explanatory variable given by exog_idx.
    The partial regression plot shows the relationship between the response
    and the given explanatory variable after removing the effect of all other
    explanatory variables in exog.

    See Also
    --------
    plot_partregress : Plot partial regression for a single regressor.
    plot_ccpr

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm

    """
    import pandas
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    #maybe add option for using wendog, wexog instead
    y = pandas.Series(results.model.endog, name=results.model.endog_names)
    exog = results.model.exog

    k_vars = exog.shape[1]
    #this function doesn't make sense if k_vars=1

    if not grid is None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx) / 2.))
            ncols = 2
            title_kwargs = {"fontdict": {"fontsize": 'small'}}
        else:
            nrows = len(exog_idx)
            ncols = 1
            title_kwargs = {}

    # for indexing purposes
    other_names = np.array(results.model.exog_names)
    for i, idx in enumerate(exog_idx):
        others = lrange(k_vars)
        others.pop(idx)
        exog_others = pandas.DataFrame(exog[:, others],
                                       columns=other_names[others])
        ax = fig.add_subplot(nrows, ncols, i + 1)
        plot_partregress(y,
                         pandas.Series(exog[:, idx], name=other_names[idx]),
                         exog_others,
                         ax=ax,
                         title_kwargs=title_kwargs,
                         obs_labels=False)
        ax.set_title("")

    fig.suptitle("Partial Regression Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
def plot_ccpr(res, exog_idx=None, grid=None, fig=None):
    """Generate CCPR plots against a set of regressors, plot in a grid.

    Generates a grid of CCPR (component and component-plus-residual) plots.

    Parameters
    ----------
    res : result instance
        uses exog and params of the result instance
    exog_idx : None or list of int
        (column) indices of the exog used in the plot
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    Partial residual plots are formed as::

        Res + Betahat(i)*Xi versus Xi

    and CCPR adds::

        Betahat(i)*Xi versus Xi

    See Also
    --------
    plot_ccpr_ax : Creates CCPR plot for a single regressor.

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm

    """
    fig = utils.create_mpl_fig(fig)

    if grid is not None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx)/2.))
            ncols = 2
        else:
            nrows = len(exog_idx)
            ncols = 1

    for i, idx in enumerate(exog_idx):
        ax = fig.add_subplot(nrows, ncols, i+1)
        plot_ccpr_ax(res, exog_idx=idx, ax=ax)

    return fig
Exemple #21
0
def plot_regress_exog(results, exog_idx, fig=None):
    """Plot regression results against one regressor.

    This plots four graphs in a 2 by 2 figure: 'endog versus exog',
    'residuals versus exog', 'fitted versus exog' and
    'fitted plus residual versus exog'

    Parameters
    ----------
    results : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int or str
        Name or index of regressor in exog matrix
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : matplotlib figure instance

    Examples
    --------
    Load the Statewide Crime data set and build a model with regressors
    including the rate of high school graduation (hs_grad), population in urban
    areas (urban), households below poverty line (poverty), and single person
    households (single).  Outcome variable is the muder rate (murder).

    Build a 2 by 2 figure based on poverty showing fitted versus actual murder
    rate, residuals versus the poverty rate, partial regression plot of poverty,
    and CCPR plot for poverty rate.

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plot
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 6))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> sm.graphics.plot_regress_exog(results, 'poverty', fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_regress_exog.py

    """

    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
    results = maybe_unwrap_results(results)

    #maybe add option for wendog, wexog
    y_name = results.model.endog_names
    x1 = results.model.exog[:, exog_idx]
    prstd, iv_l, iv_u = wls_prediction_std(results)

    ax = fig.add_subplot(2, 2, 1)
    ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name)
    ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted', alpha=.5)
    ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7)
    ax.set_title('Y and Fitted vs. X', fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel(y_name)
    ax.legend(loc='best')

    ax = fig.add_subplot(2, 2, 2)
    ax.plot(x1, results.resid, 'o')
    ax.axhline(y=0, color='black')
    ax.set_title('Residuals versus %s' % exog_name, fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel("resid")

    ax = fig.add_subplot(2, 2, 3)
    exog_noti = np.ones(results.model.exog.shape[1], bool)
    exog_noti[exog_idx] = False
    exog_others = results.model.exog[:, exog_noti]
    from pandas import Series
    fig = plot_partregress(results.model.data.orig_endog,
                           Series(x1,
                                  name=exog_name,
                                  index=results.model.data.row_labels),
                           exog_others,
                           obs_labels=False,
                           ax=ax)
    ax.set_title('Partial regression plot', fontsize='large')
    #ax.set_ylabel("Fitted values")
    #ax.set_xlabel(exog_name)

    ax = fig.add_subplot(2, 2, 4)
    fig = plot_ccpr(results, exog_idx, ax=ax)
    ax.set_title('CCPR Plot', fontsize='large')
    #ax.set_xlabel(exog_name)
    #ax.set_ylabel("Fitted values + resids")

    fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.90)
    return fig
Exemple #22
0
    def plot_cusum_squares(self,
                           alpha=0.05,
                           legend_loc='upper left',
                           fig=None,
                           figsize=None):
        r"""
        Plot the CUSUM of squares statistic and significance bounds.

        Parameters
        ----------
        alpha : float, optional
            The plotted significance bounds are alpha %.
        legend_loc : string, optional
            The location of the legend in the plot. Default is upper left.
        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the grid will be created in the provided
            figure using `fig.add_subplot()`.
        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        Evidence of parameter instability may be found if the CUSUM of squares
        statistic moves out of the significance bounds.

        Critical values used in creating the significance bounds are computed
        using the approximate formula of [2]_.

        References
        ----------
        .. [1] Brown, R. L., J. Durbin, and J. M. Evans. 1975.
           "Techniques for Testing the Constancy of
           Regression Relationships over Time."
           Journal of the Royal Statistical Society.
           Series B (Methodological) 37 (2): 149-92.
        .. [2] Edgerton, David, and Curt Wells. 1994.
           "Critical Values for the Cusumsq Statistic
           in Medium and Large Sized Samples."
           Oxford Bulletin of Economics and Statistics 56 (3): 355-65.

        """
        # Create the plot
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
        plt = _import_mpl()
        fig = create_mpl_fig(fig, figsize)
        ax = fig.add_subplot(1, 1, 1)

        # Get dates, if applicable
        if hasattr(self.data, 'dates') and self.data.dates is not None:
            dates = self.data.dates._mpl_repr()
        else:
            dates = np.arange(self.nobs)
        llb = self.loglikelihood_burn

        # Plot cusum series and reference line
        ax.plot(dates[llb:], self.cusum_squares, label='CUSUM of squares')
        ref_line = (np.arange(llb, self.nobs) - llb) / (self.nobs - llb)
        ax.plot(dates[llb:], ref_line, 'k', alpha=0.3)

        # Plot significance bounds
        lower_line, upper_line = self._cusum_squares_significance_bounds(alpha)
        ax.plot([dates[llb], dates[-1]],
                upper_line,
                'k--',
                label='%d%% significance' % (alpha * 100))
        ax.plot([dates[llb], dates[-1]], lower_line, 'k--')

        ax.legend(loc=legend_loc)

        return fig
Exemple #23
0
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None):
    """Generate CCPR plots against a set of regressors, plot in a grid.

    Generates a grid of CCPR (component and component-plus-residual) plots.

    Parameters
    ----------
    results : result instance
        uses exog and params of the result instance
    exog_idx : None or list of int
        (column) indices of the exog used in the plot
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    Partial residual plots are formed as::

        Res + Betahat(i)*Xi versus Xi

    and CCPR adds::

        Betahat(i)*Xi versus Xi

    See Also
    --------
    plot_ccpr : Creates CCPR plot for a single regressor.

    Examples
    --------
    Using the state crime dataset seperately plot the effect of the each
    variable on the on the outcome, murder rate while accounting for the effect
    of all other variables in the model.

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 8))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> sm.graphics.plot_ccpr_grid(results, fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_ccpr_grid.py

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
    """
    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)

    if grid is not None:
        nrows, ncols = grid
    else:
        if len(exog_idx) > 2:
            nrows = int(np.ceil(len(exog_idx) / 2.))
            ncols = 2
        else:
            nrows = len(exog_idx)
            ncols = 1

    seen_constant = 0
    for i, idx in enumerate(exog_idx):
        if results.model.exog[:, idx].var() == 0:
            seen_constant = 1
            continue

        ax = fig.add_subplot(nrows, ncols, i + 1 - seen_constant)
        fig = plot_ccpr(results, exog_idx=idx, ax=ax)
        ax.set_title("")

    fig.suptitle("Component-Component Plus Residual Plot", fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.95)
    return fig
Exemple #24
0
    def plot_predict(
        self,
        steps: int = 1,
        theta: float = 2,
        alpha: Optional[float] = 0.05,
        in_sample: bool = False,
        fig: Optional["matplotlib.figure.Figure"] = None,
        figsize: Tuple[float, float] = None,
    ) -> "matplotlib.figure.Figure":
        r"""
        Plot forecasts, prediction intervals and in-sample values

        Parameters
        ----------
        steps : int, default 1
            The number of steps ahead to compute the forecast components.
        theta : float, default 2
            The theta value to use when computing the weight to combine
            the trend and the SES forecasts.
        alpha : {float, None}, default 0.05
            The tail probability not covered by the confidence interval. Must
            be in (0, 1). Confidence interval is constructed assuming normally
            distributed shocks. If None, figure will not show the confidence
            interval.
        in_sample : bool, default False
            Flag indicating whether to include the in-sample period in the
            plot.
        fig : Figure, default None
            An existing figure handle. If not provided, a new figure is
            created.
        figsize: tuple[float, float], default None
            Tuple containing the figure size.

        Returns
        -------
        Figure
            Figure handle containing the plot.

        Notes
        -----
        The variance of the h-step forecast is assumed to follow from the
        integrated Moving Average structure of the Theta model, and so is
        :math:`\sigma^2(\alpha^2 + (h-1))`. The prediction interval assumes
        that innovations are normally distributed.
        """
        from statsmodels.graphics.utils import _import_mpl, create_mpl_fig

        _import_mpl()
        fig = create_mpl_fig(fig, figsize)
        assert fig is not None
        predictions = self.forecast(steps, theta)
        pred_index = predictions.index

        ax = fig.add_subplot(111)
        nobs = self.model.endog_orig.shape[0]
        index = NumericIndex(np.arange(nobs))
        if in_sample:
            if isinstance(self.model.endog_orig, pd.Series):
                index = self.model.endog_orig.index
            ax.plot(index, self.model.endog_orig)
        ax.plot(pred_index, predictions)
        if alpha is not None:
            pi = self.prediction_intervals(steps, theta, alpha)
            label = "{0:.0%} confidence interval".format(1 - alpha)
            ax.fill_between(
                pred_index,
                pi["lower"],
                pi["upper"],
                color="gray",
                alpha=0.5,
                label=label,
            )

        ax.legend(loc="best", frameon=False)
        fig.tight_layout(pad=1.0)

        return fig
def plot_regress_exog(results, exog_idx, fig=None):
    """Plot regression results against one regressor.

    This plots four graphs in a 2 by 2 figure: 'endog versus exog',
    'residuals versus exog', 'fitted versus exog' and
    'fitted plus residual versus exog'

    Parameters
    ----------
    results : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : matplotlib figure instance
    """

    fig = utils.create_mpl_fig(fig)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
    results = maybe_unwrap_results(results)

    #maybe add option for wendog, wexog
    y_name = results.model.endog_names
    x1 = results.model.exog[:, exog_idx]
    prstd, iv_l, iv_u = wls_prediction_std(results)

    ax = fig.add_subplot(2, 2, 1)
    ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name)
    ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted',
            alpha=.5)
    ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7)
    ax.set_title('Y and Fitted vs. X', fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel(y_name)
    ax.legend(loc='best')

    ax = fig.add_subplot(2, 2, 2)
    ax.plot(x1, results.resid, 'o')
    ax.axhline(y=0, color='black')
    ax.set_title('Residuals versus %s' % exog_name, fontsize='large')
    ax.set_xlabel(exog_name)
    ax.set_ylabel("resid")

    ax = fig.add_subplot(2, 2, 3)
    exog_noti = np.ones(results.model.exog.shape[1], bool)
    exog_noti[exog_idx] = False
    exog_others = results.model.exog[:, exog_noti]
    from pandas import Series
    fig = plot_partregress(results.model.data.orig_endog,
                           Series(x1, name=exog_name,
                                  index=results.model.data.row_labels),
                           exog_others, obs_labels=False, ax=ax)
    ax.set_title('Partial regression plot', fontsize='large')
    #ax.set_ylabel("Fitted values")
    #ax.set_xlabel(exog_name)

    ax = fig.add_subplot(2, 2, 4)
    fig = plot_ccpr(results, exog_idx, ax=ax)
    ax.set_title('CCPR Plot', fontsize='large')
    #ax.set_xlabel(exog_name)
    #ax.set_ylabel("Fitted values + resids")

    fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large")

    fig.tight_layout()

    fig.subplots_adjust(top=.90)
    return fig
Exemple #26
0
def plot_diagnostics(residuals,
                     variable=0,
                     lags=40,
                     fig=None,
                     figsize=(15, 7),
                     savefig=False,
                     path=None):

    _import_mpl()
    fig = create_mpl_fig(fig, figsize)

    # # Eliminate residuals associated with burned or diffuse likelihoods
    # d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
    # resid = self.filter_results.standardized_forecasts_error[variable, d:]
    # loglikelihood_burn: the number of observations during which the likelihood is not evaluated.

    # Standardize residual
    # Source: https://alkaline-ml.com/pmdarima/1.1.1/_modules/pmdarima/arima/arima.html
    resid = residuals
    resid = (resid - np.nanmean(resid)) / np.nanstd(resid)

    # Top-left: residuals vs time
    ax = fig.add_subplot(221)
    #  if hasattr(self.data, 'dates') and self.data.dates is not None:
    #      x = self.data.dates[d:]._mpl_repr()
    #  else:
    #      x = np.arange(len(resid))
    x = np.arange(len(resid))
    ax.plot(x, resid)
    ax.hlines(0, x[0], x[-1], alpha=0.5)
    ax.set_xlim(x[0], x[-1])
    ax.set_title('Standardized residual')

    # Top-right: histogram, Gaussian kernel density, Normal density
    # Can only do histogram and Gaussian kernel density on the non-null
    # elements
    resid_nonmissing = resid[~(np.isnan(resid))]
    ax = fig.add_subplot(222)

    # gh5792: Remove  except after support for matplotlib>2.1 required
    try:
        ax.hist(resid_nonmissing, density=True, label='Hist')
    except AttributeError:
        ax.hist(resid_nonmissing, normed=True, label='Hist')

    from scipy.stats import gaussian_kde, norm
    kde = gaussian_kde(resid_nonmissing)
    xlim = (-1.96 * 2, 1.96 * 2)
    x = np.linspace(xlim[0], xlim[1])
    ax.plot(x, kde(x), label='KDE')
    ax.plot(x, norm.pdf(x), label='N(0,1)')
    ax.set_xlim(xlim)
    ax.legend()
    ax.set_title('Histogram plus estimated density')

    # Bottom-left: QQ plot
    ax = fig.add_subplot(223)
    from statsmodels.graphics.gofplots import qqplot
    qqplot(resid_nonmissing, line='s', ax=ax)
    ax.set_title('Normal Q-Q')

    # Bottom-right: Correlogram
    ax = fig.add_subplot(224)
    from statsmodels.graphics.tsaplots import plot_pacf
    plot_pacf(resid, ax=ax, lags=lags)
    ax.set_title('Partial Autocorrelation function')

    ax.set_ylim(-0.1, 0.1)

    if savefig == True:
        fig.suptitle('Residual diagnostic', fontsize=20)
        fig.savefig(path, dpi=500)
        fig.show()
    return fig