def plot_regress_exog(res, exog_idx, exog_name='', fig=None): """Plot regression results against one regressor. This plots four graphs in a 2 by 2 figure: 'endog versus exog', 'residuals versus exog', 'fitted versus exog' and 'fitted plus residual versus exog' Parameters ---------- res : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : matplotlib figure instance Notes ----- This is currently very simple, no options or varnames yet. """ fig = utils.create_mpl_fig(fig) if exog_name == '': exog_name = 'variable %d' % exog_idx #maybe add option for wendog, wexog #y = res.endog x1 = res.model.exog[:,exog_idx] ax = fig.add_subplot(2,2,1) #namestr = ' for %s' % self.name if self.name else '' ax.plot(x1, res.model.endog, 'o') ax.set_title('endog versus exog', fontsize='small')# + namestr) ax = fig.add_subplot(2,2,2) #namestr = ' for %s' % self.name if self.name else '' ax.plot(x1, res.resid, 'o') ax.axhline(y=0) ax.set_title('residuals versus exog', fontsize='small')# + namestr) ax = fig.add_subplot(2,2,3) #namestr = ' for %s' % self.name if self.name else '' ax.plot(x1, res.fittedvalues, 'o') ax.set_title('Fitted versus exog', fontsize='small')# + namestr) ax = fig.add_subplot(2,2,4) #namestr = ' for %s' % self.name if self.name else '' ax.plot(x1, res.fittedvalues + res.resid, 'o') ax.set_title('Fitted plus residuals versus exog', fontsize='small')# + namestr) fig.suptitle('Regression Plots for %s' % exog_name) return fig
def plot_cusum(self, alpha=0.05, legend_loc='upper left', fig=None, figsize=None): r""" Plot the CUSUM statistic and significance bounds. Parameters ---------- alpha : float, optional The plotted significance bounds are alpha %. legend_loc : string, optional The location of the legend in the plot. Default is upper left. fig : Matplotlib Figure instance, optional If given, subplots are created in this figure instead of in a new figure. Note that the grid will be created in the provided figure using `fig.add_subplot()`. figsize : tuple, optional If a figure is created, this argument allows specifying a size. The tuple is (width, height). Notes ----- Evidence of parameter instability may be found if the CUSUM statistic moves out of the significance bounds. References ---------- .. [*] Brown, R. L., J. Durbin, and J. M. Evans. 1975. "Techniques for Testing the Constancy of Regression Relationships over Time." Journal of the Royal Statistical Society. Series B (Methodological) 37 (2): 149-92. """ # Create the plot from statsmodels.graphics.utils import _import_mpl, create_mpl_fig _import_mpl() fig = create_mpl_fig(fig, figsize) ax = fig.add_subplot(1, 1, 1) # Get dates, if applicable if hasattr(self.data, 'dates') and self.data.dates is not None: dates = self.data.dates._mpl_repr() else: dates = np.arange(self.nobs) d = max(self.nobs_diffuse, self.loglikelihood_burn) # Plot cusum series and reference line ax.plot(dates[d:], self.cusum, label='CUSUM') ax.hlines(0, dates[d], dates[-1], color='k', alpha=0.3) # Plot significance bounds lower_line, upper_line = self._cusum_significance_bounds(alpha) ax.plot([dates[d], dates[-1]], upper_line, 'k--', label='%d%% significance' % (alpha * 100)) ax.plot([dates[d], dates[-1]], lower_line, 'k--') ax.legend(loc=legend_loc) return fig
def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None): """Plot an ARIMA's diagnostics. Diagnostic plots for standardized residuals of one endogenous variable Parameters ---------- variable : integer, optional Index of the endogenous variable for which the diagnostic plots should be created. Default is 0. lags : integer, optional Number of lags to include in the correlogram. Default is 10. fig : Matplotlib Figure instance, optional If given, subplots are created in this figure instead of in a new figure. Note that the 2x2 grid will be created in the provided figure using `fig.add_subplot()`. figsize : tuple, optional If a figure is created, this argument allows specifying a size. The tuple is (width, height). Notes ----- Produces a 2x2 plot grid with the following plots (ordered clockwise from top left): 1. Standardized residuals over time 2. Histogram plus estimated density of standardized residuals, along with a Normal(0,1) density plotted for reference. 3. Normal Q-Q plot, with Normal reference line. 4. Correlogram See Also -------- statsmodels.graphics.gofplots.qqplot pmdarima.utils.visualization.plot_acf References ---------- .. [1] https://www.statsmodels.org/dev/_modules/statsmodels/tsa/statespace/mlemodel.html#MLEResults.plot_diagnostics """ # noqa: E501 # implicitly checks whether installed, and does our backend magic: _get_plt() # We originally delegated down to SARIMAX model wrapper, but # statsmodels makes it difficult to trust their API, so we just re- # implemented a common method for all results wrappers. from statsmodels.graphics import utils as sm_graphics fig = sm_graphics.create_mpl_fig(fig, figsize) res_wpr = self.arima_res_ data = res_wpr.data # Eliminate residuals associated with burned or diffuse likelihoods. # The statsmodels code for the Kalman Filter takes the loglik_burn # as a parameter: # loglikelihood_burn : int, optional # The number of initial periods during which the loglikelihood is # not recorded. Default is 0. # If the class has it, it's a SARIMAX and we'll use it. Otherwise we # will just access the residuals as we normally would... if hasattr(res_wpr, 'loglikelihood_burn'): # This is introduced in the bleeding edge version, but is not # backwards compatible with 0.9.0 and less: d = res_wpr.loglikelihood_burn if hasattr(res_wpr, 'nobs_diffuse'): d = np.maximum(d, res_wpr.nobs_diffuse) resid = res_wpr.filter_results\ .standardized_forecasts_error[variable, d:] else: # This gets the residuals, but they need to be standardized d = 0 r = res_wpr.resid resid = (r - np.nanmean(r)) / np.nanstd(r) # Top-left: residuals vs time ax = fig.add_subplot(221) if hasattr(data, 'dates') and data.dates is not None: x = data.dates[d:]._mpl_repr() else: x = np.arange(len(resid)) ax.plot(x, resid) ax.hlines(0, x[0], x[-1], alpha=0.5) ax.set_xlim(x[0], x[-1]) ax.set_title('Standardized residual') # Top-right: histogram, Gaussian kernel density, Normal density # Can only do histogram and Gaussian kernel density on the non-null # elements resid_nonmissing = resid[~(np.isnan(resid))] ax = fig.add_subplot(222) # temporarily disable Deprecation warning, normed -> density # hist needs to use `density` in future when minimum matplotlib has it # 'normed' argument is no longer supported in matplotlib since # version 3.2.0. New function added for backwards compatibility with warnings.catch_warnings(record=True): ax.hist(resid_nonmissing, label='Hist', **mpl_compat.mpl_hist_arg()) kde = gaussian_kde(resid_nonmissing) xlim = (-1.96 * 2, 1.96 * 2) x = np.linspace(xlim[0], xlim[1]) ax.plot(x, kde(x), label='KDE') ax.plot(x, norm.pdf(x), label='N(0,1)') ax.set_xlim(xlim) ax.legend() ax.set_title('Histogram plus estimated density') # Bottom-left: QQ plot ax = fig.add_subplot(223) from statsmodels.graphics import gofplots gofplots.qqplot(resid_nonmissing, line='s', ax=ax) ax.set_title('Normal Q-Q') # Bottom-right: Correlogram ax = fig.add_subplot(224) from statsmodels.graphics import tsaplots tsaplots.plot_acf(resid, ax=ax, lags=lags) ax.set_title('Correlogram') ax.set_ylim(-1, 1) return fig
def plot_recursive_coefficient( self, variables=None, alpha=0.05, legend_loc="upper left", fig=None, figsize=None, ): r""" Plot the recursively estimated coefficients on a given variable Parameters ---------- variables : {int, str, Iterable[int], Iterable[str], None}, optional Integer index or string name of the variables whose coefficients to plot. Can also be an iterable of integers or strings. Default plots all coefficients. alpha : float, optional The confidence intervals for the coefficient are (1 - alpha)%. Set to None to exclude confidence intervals. legend_loc : str, optional The location of the legend in the plot. Default is upper left. fig : Figure, optional If given, subplots are created in this figure instead of in a new figure. Note that the grid will be created in the provided figure using `fig.add_subplot()`. figsize : tuple, optional If a figure is created, this argument allows specifying a size. The tuple is (width, height). Returns ------- Figure The matplotlib Figure object. """ from statsmodels.graphics.utils import _import_mpl, create_mpl_fig if alpha is not None: ci = self._conf_int(alpha, None) row_labels = self.model.data.row_labels if row_labels is None: row_labels = np.arange(self._params.shape[0]) k_variables = self._params.shape[1] param_names = self.model.data.param_names if variables is None: variable_idx = list(range(k_variables)) else: if isinstance(variables, (int, str)): variables = [variables] variable_idx = [] for i in range(len(variables)): variable = variables[i] if variable in param_names: variable_idx.append(param_names.index(variable)) elif isinstance(variable, int): variable_idx.append(variable) else: msg = ("variable {0} is not an integer and was not found " "in the list of variable " "names: {1}".format(variables[i], ", ".join(param_names))) raise ValueError(msg) _import_mpl() fig = create_mpl_fig(fig, figsize) loc = 0 import pandas as pd if isinstance(row_labels, pd.PeriodIndex): row_labels = row_labels.to_timestamp() row_labels = np.asarray(row_labels) for i in variable_idx: ax = fig.add_subplot(len(variable_idx), 1, loc + 1) params = self._params[:, i] valid = ~np.isnan(self._params[:, i]) row_lbl = row_labels[valid] ax.plot(row_lbl, params[valid]) if alpha is not None: this_ci = np.reshape(ci[:, :, i], (-1, 2)) if not np.all(np.isnan(this_ci)): ax.plot(row_lbl, this_ci[:, 0][valid], "k:", label="Lower CI") ax.plot(row_lbl, this_ci[:, 1][valid], "k:", label="Upper CI") if loc == 0: ax.legend(loc=legend_loc) ax.set_xlim(row_lbl[0], row_lbl[-1]) ax.set_title(param_names[i]) loc += 1 fig.tight_layout() return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None): """Plot partial regression for a set of regressors. Parameters ---------- results : results instance A regression model results instance exog_idx : None, list of ints, list of strings (column) indices of the exog used in the plot, default is all. grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `fig` is None, the created figure. Otherwise `fig` itself. Notes ----- A subplot is created for each explanatory variable given by exog_idx. The partial regression plot shows the relationship between the response and the given explanatory variable after removing the effect of all other explanatory variables in exog. See Also -------- plot_partregress : Plot partial regression for a single regressor. plot_ccpr References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm """ import pandas fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) #maybe add option for using wendog, wexog instead y = pandas.Series(results.model.endog, name=results.model.endog_names) exog = results.model.exog k_vars = exog.shape[1] #this function doesn't make sense if k_vars=1 if not grid is None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx)/2.)) ncols = 2 title_kwargs = {"fontdict" : {"fontsize" : 'small'}} else: nrows = len(exog_idx) ncols = 1 title_kwargs = {} # for indexing purposes other_names = np.array(results.model.exog_names) for i, idx in enumerate(exog_idx): others = lrange(k_vars) others.pop(idx) exog_others = pandas.DataFrame(exog[:, others], columns=other_names[others]) ax = fig.add_subplot(nrows, ncols, i+1) plot_partregress(y, pandas.Series(exog[:, idx], name=other_names[idx]), exog_others, ax=ax, title_kwargs=title_kwargs, obs_labels=False) ax.set_title("") fig.suptitle("Partial Regression Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None): """Generate CCPR plots against a set of regressors, plot in a grid. Generates a grid of CCPR (component and component-plus-residual) plots. Parameters ---------- results : result instance uses exog and params of the result instance exog_idx : None or list of int (column) indices of the exog used in the plot grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- Partial residual plots are formed as:: Res + Betahat(i)*Xi versus Xi and CCPR adds:: Betahat(i)*Xi versus Xi See Also -------- plot_ccpr : Creates CCPR plot for a single regressor. References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm """ fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) if grid is not None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx)/2.)) ncols = 2 else: nrows = len(exog_idx) ncols = 1 seen_constant = 0 for i, idx in enumerate(exog_idx): if results.model.exog[:, idx].var() == 0: seen_constant = 1 continue ax = fig.add_subplot(nrows, ncols, i+1-seen_constant) fig = plot_ccpr(results, exog_idx=idx, ax=ax) ax.set_title("") fig.suptitle("Component-Component Plus Residual Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_recursive_coefficient(self, variables=0, alpha=0.05, legend_loc='upper left', fig=None, figsize=None): r""" Plot the recursively estimated coefficients on a given variable Parameters ---------- variables : int or str or iterable of int or string, optional Integer index or string name of the variable whose coefficient will be plotted. Can also be an iterable of integers or strings. Default is the first variable. alpha : float, optional The confidence intervals for the coefficient are (1 - alpha) % legend_loc : string, optional The location of the legend in the plot. Default is upper left. fig : Matplotlib Figure instance, optional If given, subplots are created in this figure instead of in a new figure. Note that the grid will be created in the provided figure using `fig.add_subplot()`. figsize : tuple, optional If a figure is created, this argument allows specifying a size. The tuple is (width, height). Notes ----- All plots contain (1 - `alpha`) % confidence intervals. """ # Get variables if isinstance(variables, (int, str)): variables = [variables] k_variables = len(variables) # If a string was given for `variable`, try to get it from exog names exog_names = self.model.exog_names for i in range(k_variables): variable = variables[i] if isinstance(variable, str): variables[i] = exog_names.index(variable) # Create the plot from scipy.stats import norm from statsmodels.graphics.utils import _import_mpl, create_mpl_fig plt = _import_mpl() fig = create_mpl_fig(fig, figsize) for i in range(k_variables): variable = variables[i] ax = fig.add_subplot(k_variables, 1, i + 1) # Get dates, if applicable if hasattr(self.data, 'dates') and self.data.dates is not None: dates = self.data.dates._mpl_repr() else: dates = np.arange(self.nobs) d = max(self.nobs_diffuse, self.loglikelihood_burn) # Plot the coefficient coef = self.recursive_coefficients ax.plot(dates[d:], coef.filtered[variable, d:], label='Recursive estimates: %s' % exog_names[variable]) # Legend handles, labels = ax.get_legend_handles_labels() # Get the critical value for confidence intervals if alpha is not None: critical_value = norm.ppf(1 - alpha / 2.) # Plot confidence intervals std_errors = np.sqrt(coef.filtered_cov[variable, variable, :]) ci_lower = ( coef.filtered[variable] - critical_value * std_errors) ci_upper = ( coef.filtered[variable] + critical_value * std_errors) ci_poly = ax.fill_between( dates[d:], ci_lower[d:], ci_upper[d:], alpha=0.2 ) ci_label = ('$%.3g \\%%$ confidence interval' % ((1 - alpha)*100)) # Only add CI to legend for the first plot if i == 0: # Proxy artist for fill_between legend entry # See https://matplotlib.org/1.3.1/users/legend_guide.html p = plt.Rectangle((0, 0), 1, 1, fc=ci_poly.get_facecolor()[0]) handles.append(p) labels.append(ci_label) ax.legend(handles, labels, loc=legend_loc) # Remove xticks for all but the last plot if i < k_variables - 1: ax.xaxis.set_ticklabels([]) fig.tight_layout() return fig
def feature_regression_summary(df, feat_idx, target, model_fit_results, display_regress_diagnostics=False): feat = df.columns[feat_idx] v = vif(np.matrix(df), feat_idx) colinear = v > 10 if display_regress_diagnostics: # ‘endog versus exog’, ‘residuals versus exog’, ‘fitted versus exog’ and ‘fitted plus residual versus exog’ fig = plt.figure(constrained_layout=True, figsize=(2.25 * plot_edge, 4 * plot_edge)) fig = smgrpu.create_mpl_fig(fig) gs = GridSpec(3, 2, figure=fig) ax_fit = fig.add_subplot(gs[0, 0]) ax_partial_residuals = fig.add_subplot(gs[0, 1]) ax_partregress = fig.add_subplot(gs[1, 0]) ax_ccpr = fig.add_subplot(gs[1, 1]) ax_dist = fig.add_subplot(gs[2:4, 0:2]) exog_name, exog_idx = smgrpu.maybe_name_or_idx(feat, model_fit_results.model) smresults = smtt.maybe_unwrap_results(model_fit_results) y_name = smresults.model.endog_names x1 = smresults.model.exog[:, exog_idx] prstd, iv_l, iv_u = wls_prediction_std(smresults) # endog versus exog # use wrapper since it's availab;e! sm.graphics.plot_fit(model_fit_results, feat, ax=ax_fit) # residuals versus exog ax_partial_residuals.plot(x1, smresults.resid, 'o') ax_partial_residuals.axhline(y=0, color='black') ax_partial_residuals.set_title('Residuals versus %s' % exog_name, fontsize='large') ax_partial_residuals.set_xlabel(exog_name) ax_partial_residuals.set_ylabel("resid") # Partial Regression plot: fitted versus exog exog_noti = np.ones(smresults.model.exog.shape[1], bool) exog_noti[exog_idx] = False exog_others = smresults.model.exog[:, exog_noti] from pandas import Series smgrp.plot_partregress(smresults.model.data.orig_endog, Series(x1, name=exog_name, index=smresults.model.data.row_labels), exog_others, obs_labels=False, ax=ax_partregress) # CCPR: fitted plus residual versus exog # use wrapper since it's availab;e! sm.graphics.plot_ccpr(model_fit_results, feat, ax=ax_ccpr) ax_ccpr.set_title('CCPR Plot', fontsize='large') sns.distplot(df[feat], ax=ax_dist) fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large") #fig.tight_layout() #fig.subplots_adjust(top=.90) plt.show() display( HTML( "Variance Inflation Factor (<i>VIF</i>) for <b>{}</b>: <b>{}</b> {}" .format( feat, round(v, 2), "$\\le 10 \\iff$ low colinearity" if not colinear else "$> 10 \\iff$ <b>HIGH COLINEARITY</b>"))) display( HTML( "<b><i>p-value</i></b> (<i>VIF</i>) for <b>{}</b>: <b>{}</b><br><br>" .format(feat, model_fit_results.pvalues[feat_idx + 1]))) return v
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None): """Generate CCPR plots against a set of regressors, plot in a grid. Generates a grid of CCPR (component and component-plus-residual) plots. Parameters ---------- results : result instance uses exog and params of the result instance exog_idx : None or list of int (column) indices of the exog used in the plot grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- Partial residual plots are formed as:: Res + Betahat(i)*Xi versus Xi and CCPR adds:: Betahat(i)*Xi versus Xi See Also -------- plot_ccpr : Creates CCPR plot for a single regressor. References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm """ fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) if grid is not None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx) / 2.)) ncols = 2 else: nrows = len(exog_idx) ncols = 1 seen_constant = 0 for i, idx in enumerate(exog_idx): if results.model.exog[:, idx].var() == 0: seen_constant = 1 continue ax = fig.add_subplot(nrows, ncols, i + 1 - seen_constant) fig = plot_ccpr(results, exog_idx=idx, ax=ax) ax.set_title("") fig.suptitle("Component-Component Plus Residual Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_partregress(results, exog_idx=None, xnames=None, grid=None, fig=None): """Plot partial regression for a set of regressors. Parameters ---------- results : results instance A regression model results instance exog_idx : None or list of int (column) indices of the exog used in the plot, default is all. xnames : None or list of strings Names for the numbers given in exog_idx. Default is results.model.exog_names. grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `fig` is None, the created figure. Otherwise `fig` itself. Notes ----- A subplot is created for each explanatory variable given by exog_idx. The partial regression plot shows the relationship between the response and the given explanatory variable after removing the effect of all other explanatory variables in exog. See Also -------- plot_partregress_ax : Plot partial regression for a single regressor. plot_ccpr References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm """ fig = utils.create_mpl_fig(fig) #maybe add option for using wendog, wexog instead y = results.model.endog exog = results.model.exog k_vars = exog.shape[1] #this function doesn't make sense if k_vars=1 if xnames is None: exog_idx = range(k_vars) xnames = results.model.exog_names else: exog_idx = [] for name in xnames: exog_idx.append(results.model.exog_names.index(name)) if not grid is None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx)/2.)) ncols = 2 title_fontsize = 'small' else: nrows = len(exog_idx) ncols = 1 title_fontsize = None for i,idx in enumerate(exog_idx): others = range(k_vars) others.pop(idx) exog_others = exog[:, others] ax = fig.add_subplot(nrows, ncols, i+1) plot_partregress_ax(y, exog[:, idx], exog_others, ax=ax, varname=xnames[i]) import matplotlib as mpl if mpl.__version__ >= '1.1': # The tight_layout feature is not available before version 1.1 # It automatically pads the figure so labels do not get clipped. fig.tight_layout() return fig
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None): """Generate CCPR plots against a set of regressors, plot in a grid. Generates a grid of CCPR (component and component-plus-residual) plots. Parameters ---------- results : result instance uses exog and params of the result instance exog_idx : None or list of int (column) indices of the exog used in the plot grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- Partial residual plots are formed as:: Res + Betahat(i)*Xi versus Xi and CCPR adds:: Betahat(i)*Xi versus Xi See Also -------- plot_ccpr : Creates CCPR plot for a single regressor. Examples -------- Using the state crime dataset seperately plot the effect of the each variable on the on the outcome, murder rate while accounting for the effect of all other variables in the model. >>> import statsmodels.api as sm >>> import matplotlib.pyplot as plt >>> import statsmodels.formula.api as smf >>> fig = plt.figure(figsize=(8, 8)) >>> crime_data = sm.datasets.statecrime.load_pandas() >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', ... data=crime_data.data).fit() >>> sm.graphics.plot_ccpr_grid(results, fig=fig) >>> plt.show() .. plot:: plots/graphics_regression_ccpr_grid.py References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm """ fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) if grid is not None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx)/2.)) ncols = 2 else: nrows = len(exog_idx) ncols = 1 seen_constant = 0 for i, idx in enumerate(exog_idx): if results.model.exog[:, idx].var() == 0: seen_constant = 1 continue ax = fig.add_subplot(nrows, ncols, i+1-seen_constant) fig = plot_ccpr(results, exog_idx=idx, ax=ax) ax.set_title("") fig.suptitle("Component-Component Plus Residual Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_recursive_coefficient(self, variables=0, alpha=0.05, legend_loc='upper left', fig=None, figsize=None): r""" Plot the recursively estimated coefficients on a given variable Parameters ---------- variables : int or str or iterable of int or string, optional Integer index or string name of the variable whose coefficient will be plotted. Can also be an iterable of integers or strings. Default is the first variable. alpha : float, optional The confidence intervals for the coefficient are (1 - alpha) % legend_loc : string, optional The location of the legend in the plot. Default is upper left. fig : Matplotlib Figure instance, optional If given, subplots are created in this figure instead of in a new figure. Note that the grid will be created in the provided figure using `fig.add_subplot()`. figsize : tuple, optional If a figure is created, this argument allows specifying a size. The tuple is (width, height). Notes ----- All plots contain (1 - `alpha`) % confidence intervals. """ # Get variables if isinstance(variables, (int, str)): variables = [variables] k_variables = len(variables) # If a string was given for `variable`, try to get it from exog names exog_names = self.model.exog_names for i in range(k_variables): variable = variables[i] if isinstance(variable, str): variables[i] = exog_names.index(variable) # Create the plot from scipy.stats import norm from statsmodels.graphics.utils import _import_mpl, create_mpl_fig plt = _import_mpl() fig = create_mpl_fig(fig, figsize) for i in range(k_variables): variable = variables[i] ax = fig.add_subplot(k_variables, 1, i + 1) # Get dates, if applicable if hasattr(self.data, 'dates') and self.data.dates is not None: dates = self.data.dates._mpl_repr() else: dates = np.arange(self.nobs) d = max(self.nobs_diffuse, self.loglikelihood_burn) # Plot the coefficient coef = self.recursive_coefficients ax.plot(dates[d:], coef.filtered[variable, d:], label='Recursive estimates: %s' % exog_names[variable]) # Legend handles, labels = ax.get_legend_handles_labels() # Get the critical value for confidence intervals if alpha is not None: critical_value = norm.ppf(1 - alpha / 2.) # Plot confidence intervals std_errors = np.sqrt(coef.filtered_cov[variable, variable, :]) ci_lower = ( coef.filtered[variable] - critical_value * std_errors) ci_upper = ( coef.filtered[variable] + critical_value * std_errors) ci_poly = ax.fill_between( dates[d:], ci_lower[d:], ci_upper[d:], alpha=0.2 ) ci_label = ('$%.3g \\%%$ confidence interval' % ((1 - alpha)*100)) # Only add CI to legend for the first plot if i == 0: # Proxy artist for fill_between legend entry # See http://matplotlib.org/1.3.1/users/legend_guide.html p = plt.Rectangle((0, 0), 1, 1, fc=ci_poly.get_facecolor()[0]) handles.append(p) labels.append(ci_label) ax.legend(handles, labels, loc=legend_loc) # Remove xticks for all but the last plot if i < k_variables - 1: ax.xaxis.set_ticklabels([]) fig.tight_layout() return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None): """Plot partial regression for a set of regressors. Parameters ---------- results : results instance A regression model results instance exog_idx : None, list of ints, list of strings (column) indices of the exog used in the plot, default is all. grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `fig` is None, the created figure. Otherwise `fig` itself. Notes ----- A subplot is created for each explanatory variable given by exog_idx. The partial regression plot shows the relationship between the response and the given explanatory variable after removing the effect of all other explanatory variables in exog. See Also -------- plot_partregress : Plot partial regression for a single regressor. plot_ccpr : Plot CCPR against one regressor Examples -------- Using the state crime dataset seperately plot the effect of the each variable on the on the outcome, murder rate while accounting for the effect of all other variables in the model visualized with a grid of partial regression plots. >>> from statsmodels.graphics.regressionplots import plot_partregress_grid >>> import statsmodels.api as sm >>> import matplotlib.pyplot as plt >>> import statsmodels.formula.api as smf >>> fig = plt.figure(figsize=(8, 6)) >>> crime_data = sm.datasets.statecrime.load_pandas() >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', ... data=crime_data.data).fit() >>> plot_partregress_grid(results, fig=fig) >>> plt.show() .. plot:: plots/graphics_regression_partregress_grid.py References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm """ import pandas fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) #maybe add option for using wendog, wexog instead y = pandas.Series(results.model.endog, name=results.model.endog_names) exog = results.model.exog k_vars = exog.shape[1] #this function doesn't make sense if k_vars=1 if grid is not None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx)/2.)) ncols = 2 title_kwargs = {"fontdict" : {"fontsize" : 'small'}} else: nrows = len(exog_idx) ncols = 1 title_kwargs = {} # for indexing purposes other_names = np.array(results.model.exog_names) for i, idx in enumerate(exog_idx): others = lrange(k_vars) others.pop(idx) exog_others = pandas.DataFrame(exog[:, others], columns=other_names[others]) ax = fig.add_subplot(nrows, ncols, i+1) plot_partregress(y, pandas.Series(exog[:, idx], name=other_names[idx]), exog_others, ax=ax, title_kwargs=title_kwargs, obs_labels=False) ax.set_title("") fig.suptitle("Partial Regression Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_regress_exog(results, exog_idx, fig=None): """Plot regression results against one regressor. This plots four graphs in a 2 by 2 figure: 'endog versus exog', 'residuals versus exog', 'fitted versus exog' and 'fitted plus residual versus exog' Parameters ---------- results : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int or str Name or index of regressor in exog matrix fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : matplotlib figure instance Examples -------- Load the Statewide Crime data set and build a model with regressors including the rate of high school graduation (hs_grad), population in urban areas (urban), households below poverty line (poverty), and single person households (single). Outcome variable is the muder rate (murder). Build a 2 by 2 figure based on poverty showing fitted versus actual murder rate, residuals versus the poverty rate, partial regression plot of poverty, and CCPR plot for poverty rate. >>> import statsmodels.api as sm >>> import matplotlib.pyplot as plot >>> import statsmodels.formula.api as smf >>> fig = plt.figure(figsize=(8, 6)) >>> crime_data = sm.datasets.statecrime.load_pandas() >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', ... data=crime_data.data).fit() >>> sm.graphics.plot_regress_exog(results, 'poverty', fig=fig) >>> plt.show() .. plot:: plots/graphics_regression_regress_exog.py """ fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) results = maybe_unwrap_results(results) #maybe add option for wendog, wexog y_name = results.model.endog_names x1 = results.model.exog[:, exog_idx] prstd, iv_l, iv_u = wls_prediction_std(results) ax = fig.add_subplot(2, 2, 1) ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name) ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted', alpha=.5) ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7) ax.set_title('Y and Fitted vs. X', fontsize='large') ax.set_xlabel(exog_name) ax.set_ylabel(y_name) ax.legend(loc='best') ax = fig.add_subplot(2, 2, 2) ax.plot(x1, results.resid, 'o') ax.axhline(y=0, color='black') ax.set_title('Residuals versus %s' % exog_name, fontsize='large') ax.set_xlabel(exog_name) ax.set_ylabel("resid") ax = fig.add_subplot(2, 2, 3) exog_noti = np.ones(results.model.exog.shape[1], bool) exog_noti[exog_idx] = False exog_others = results.model.exog[:, exog_noti] from pandas import Series fig = plot_partregress(results.model.data.orig_endog, Series(x1, name=exog_name, index=results.model.data.row_labels), exog_others, obs_labels=False, ax=ax) ax.set_title('Partial regression plot', fontsize='large') #ax.set_ylabel("Fitted values") #ax.set_xlabel(exog_name) ax = fig.add_subplot(2, 2, 4) fig = plot_ccpr(results, exog_idx, ax=ax) ax.set_title('CCPR Plot', fontsize='large') #ax.set_xlabel(exog_name) #ax.set_ylabel("Fitted values + resids") fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.90) return fig
def plot_cusum_squares(self, alpha=0.05, legend_loc='upper left', fig=None, figsize=None): r""" Plot the CUSUM of squares statistic and significance bounds. Parameters ---------- alpha : float, optional The plotted significance bounds are alpha %. legend_loc : string, optional The location of the legend in the plot. Default is upper left. fig : Matplotlib Figure instance, optional If given, subplots are created in this figure instead of in a new figure. Note that the grid will be created in the provided figure using `fig.add_subplot()`. figsize : tuple, optional If a figure is created, this argument allows specifying a size. The tuple is (width, height). Notes ----- Evidence of parameter instability may be found if the CUSUM of squares statistic moves out of the significance bounds. Critical values used in creating the significance bounds are computed using the approximate formula of [2]_. References ---------- .. [1] Brown, R. L., J. Durbin, and J. M. Evans. 1975. "Techniques for Testing the Constancy of Regression Relationships over Time." Journal of the Royal Statistical Society. Series B (Methodological) 37 (2): 149-92. .. [2] Edgerton, David, and Curt Wells. 1994. "Critical Values for the Cusumsq Statistic in Medium and Large Sized Samples." Oxford Bulletin of Economics and Statistics 56 (3): 355-65. """ # Create the plot from statsmodels.graphics.utils import _import_mpl, create_mpl_fig plt = _import_mpl() fig = create_mpl_fig(fig, figsize) ax = fig.add_subplot(1, 1, 1) # Get dates, if applicable if hasattr(self.data, 'dates') and self.data.dates is not None: dates = self.data.dates._mpl_repr() else: dates = np.arange(self.nobs) llb = self.loglikelihood_burn # Plot cusum series and reference line ax.plot(dates[llb:], self.cusum_squares, label='CUSUM of squares') ref_line = (np.arange(llb, self.nobs) - llb) / (self.nobs - llb) ax.plot(dates[llb:], ref_line, 'k', alpha=0.3) # Plot significance bounds lower_line, upper_line = self._cusum_squares_significance_bounds(alpha) ax.plot([dates[llb], dates[-1]], upper_line, 'k--', label='%d%% significance' % (alpha * 100)) ax.plot([dates[llb], dates[-1]], lower_line, 'k--') ax.legend(loc=legend_loc) return fig
def plot_regress_exog(results, exog_idx, fig=None): """Plot regression #1lab_results against one regressor. This plots four graphs in a 2 by 2 figure: 'endog versus exog', 'residuals versus exog', 'fitted versus exog' and 'fitted plus residual versus exog' Parameters ---------- results : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : matplotlib figure instance """ fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) results = maybe_unwrap_results(results) #maybe add option for wendog, wexog y_name = results.model.endog_names x1 = results.model.exog[:, exog_idx] prstd, iv_l, iv_u = wls_prediction_std(results) ax = fig.add_subplot(2, 2, 1) ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name) ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted', alpha=.5) ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7) ax.set_title('Y and Fitted vs. X', fontsize='large') ax.set_xlabel(exog_name) ax.set_ylabel(y_name) ax.legend(loc='best') ax = fig.add_subplot(2, 2, 2) ax.plot(x1, results.resid, 'o') ax.axhline(y=0, color='black') ax.set_title('Residuals versus %s' % exog_name, fontsize='large') ax.set_xlabel(exog_name) ax.set_ylabel("resid") ax = fig.add_subplot(2, 2, 3) exog_noti = np.ones(results.model.exog.shape[1], bool) exog_noti[exog_idx] = False exog_others = results.model.exog[:, exog_noti] from pandas import Series fig = plot_partregress(results.model.data.orig_endog, Series(x1, name=exog_name, index=results.model.data.row_labels), exog_others, obs_labels=False, ax=ax) ax.set_title('Partial regression plot', fontsize='large') #ax.set_ylabel("Fitted values") #ax.set_xlabel(exog_name) ax = fig.add_subplot(2, 2, 4) fig = plot_ccpr(results, exog_idx, ax=ax) ax.set_title('CCPR Plot', fontsize='large') #ax.set_xlabel(exog_name) #ax.set_ylabel("Fitted values + resids") fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.90) return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None): """Plot partial regression for a set of regressors. Parameters ---------- results : results instance A regression model results instance exog_idx : None, list of ints, list of strings (column) indices of the exog used in the plot, default is all. grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `fig` is None, the created figure. Otherwise `fig` itself. Notes ----- A subplot is created for each explanatory variable given by exog_idx. The partial regression plot shows the relationship between the response and the given explanatory variable after removing the effect of all other explanatory variables in exog. See Also -------- plot_partregress : Plot partial regression for a single regressor. plot_ccpr : Plot CCPR against one regressor Examples -------- Using the state crime dataset seperately plot the effect of the each variable on the on the outcome, murder rate while accounting for the effect of all other variables in the model visualized with a grid of partial regression plots. >>> from statsmodels.graphics.regressionplots import plot_partregress_grid >>> import statsmodels.api as sm >>> import matplotlib.pyplot as plt >>> import statsmodels.formula.api as smf >>> fig = plt.figure(figsize=(8, 6)) >>> crime_data = sm.datasets.statecrime.load_pandas() >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', ... data=crime_data.data).fit() >>> plot_partregress_grid(results, fig=fig) >>> plt.show() .. plot:: plots/graphics_regression_partregress_grid.py References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm """ import pandas fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) #maybe add option for using wendog, wexog instead y = pandas.Series(results.model.endog, name=results.model.endog_names) exog = results.model.exog k_vars = exog.shape[1] #this function doesn't make sense if k_vars=1 if grid is not None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx) / 2.)) ncols = 2 title_kwargs = {"fontdict": {"fontsize": 'small'}} else: nrows = len(exog_idx) ncols = 1 title_kwargs = {} # for indexing purposes other_names = np.array(results.model.exog_names) for i, idx in enumerate(exog_idx): others = lrange(k_vars) others.pop(idx) exog_others = pandas.DataFrame(exog[:, others], columns=other_names[others]) ax = fig.add_subplot(nrows, ncols, i + 1) plot_partregress(y, pandas.Series(exog[:, idx], name=other_names[idx]), exog_others, ax=ax, title_kwargs=title_kwargs, obs_labels=False) ax.set_title("") fig.suptitle("Partial Regression Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None): """Plot partial regression for a set of regressors. Parameters ---------- results : #1lab_results instance A regression model #1lab_results instance exog_idx : None, list of ints, list of strings (column) indices of the exog used in the plot, default is all. grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `fig` is None, the created figure. Otherwise `fig` itself. Notes ----- A subplot is created for each explanatory variable given by exog_idx. The partial regression plot shows the relationship between the response and the given explanatory variable after removing the effect of all other explanatory variables in exog. See Also -------- plot_partregress : Plot partial regression for a single regressor. plot_ccpr References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm """ import pandas fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) #maybe add option for using wendog, wexog instead y = pandas.Series(results.model.endog, name=results.model.endog_names) exog = results.model.exog k_vars = exog.shape[1] #this function doesn't make sense if k_vars=1 if not grid is None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx) / 2.)) ncols = 2 title_kwargs = {"fontdict": {"fontsize": 'small'}} else: nrows = len(exog_idx) ncols = 1 title_kwargs = {} # for indexing purposes other_names = np.array(results.model.exog_names) for i, idx in enumerate(exog_idx): others = lrange(k_vars) others.pop(idx) exog_others = pandas.DataFrame(exog[:, others], columns=other_names[others]) ax = fig.add_subplot(nrows, ncols, i + 1) plot_partregress(y, pandas.Series(exog[:, idx], name=other_names[idx]), exog_others, ax=ax, title_kwargs=title_kwargs, obs_labels=False) ax.set_title("") fig.suptitle("Partial Regression Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_ccpr(res, exog_idx=None, grid=None, fig=None): """Generate CCPR plots against a set of regressors, plot in a grid. Generates a grid of CCPR (component and component-plus-residual) plots. Parameters ---------- res : result instance uses exog and params of the result instance exog_idx : None or list of int (column) indices of the exog used in the plot grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- Partial residual plots are formed as:: Res + Betahat(i)*Xi versus Xi and CCPR adds:: Betahat(i)*Xi versus Xi See Also -------- plot_ccpr_ax : Creates CCPR plot for a single regressor. References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm """ fig = utils.create_mpl_fig(fig) if grid is not None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx)/2.)) ncols = 2 else: nrows = len(exog_idx) ncols = 1 for i, idx in enumerate(exog_idx): ax = fig.add_subplot(nrows, ncols, i+1) plot_ccpr_ax(res, exog_idx=idx, ax=ax) return fig
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None): """Generate CCPR plots against a set of regressors, plot in a grid. Generates a grid of CCPR (component and component-plus-residual) plots. Parameters ---------- results : result instance uses exog and params of the result instance exog_idx : None or list of int (column) indices of the exog used in the plot grid : None or tuple of int (nrows, ncols) If grid is given, then it is used for the arrangement of the subplots. If grid is None, then ncol is one, if there are only 2 subplots, and the number of columns is two otherwise. fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : Matplotlib figure instance If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. Notes ----- Partial residual plots are formed as:: Res + Betahat(i)*Xi versus Xi and CCPR adds:: Betahat(i)*Xi versus Xi See Also -------- plot_ccpr : Creates CCPR plot for a single regressor. Examples -------- Using the state crime dataset seperately plot the effect of the each variable on the on the outcome, murder rate while accounting for the effect of all other variables in the model. >>> import statsmodels.api as sm >>> import matplotlib.pyplot as plt >>> import statsmodels.formula.api as smf >>> fig = plt.figure(figsize=(8, 8)) >>> crime_data = sm.datasets.statecrime.load_pandas() >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', ... data=crime_data.data).fit() >>> sm.graphics.plot_ccpr_grid(results, fig=fig) >>> plt.show() .. plot:: plots/graphics_regression_ccpr_grid.py References ---------- See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm """ fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) if grid is not None: nrows, ncols = grid else: if len(exog_idx) > 2: nrows = int(np.ceil(len(exog_idx) / 2.)) ncols = 2 else: nrows = len(exog_idx) ncols = 1 seen_constant = 0 for i, idx in enumerate(exog_idx): if results.model.exog[:, idx].var() == 0: seen_constant = 1 continue ax = fig.add_subplot(nrows, ncols, i + 1 - seen_constant) fig = plot_ccpr(results, exog_idx=idx, ax=ax) ax.set_title("") fig.suptitle("Component-Component Plus Residual Plot", fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.95) return fig
def plot_predict( self, steps: int = 1, theta: float = 2, alpha: Optional[float] = 0.05, in_sample: bool = False, fig: Optional["matplotlib.figure.Figure"] = None, figsize: Tuple[float, float] = None, ) -> "matplotlib.figure.Figure": r""" Plot forecasts, prediction intervals and in-sample values Parameters ---------- steps : int, default 1 The number of steps ahead to compute the forecast components. theta : float, default 2 The theta value to use when computing the weight to combine the trend and the SES forecasts. alpha : {float, None}, default 0.05 The tail probability not covered by the confidence interval. Must be in (0, 1). Confidence interval is constructed assuming normally distributed shocks. If None, figure will not show the confidence interval. in_sample : bool, default False Flag indicating whether to include the in-sample period in the plot. fig : Figure, default None An existing figure handle. If not provided, a new figure is created. figsize: tuple[float, float], default None Tuple containing the figure size. Returns ------- Figure Figure handle containing the plot. Notes ----- The variance of the h-step forecast is assumed to follow from the integrated Moving Average structure of the Theta model, and so is :math:`\sigma^2(\alpha^2 + (h-1))`. The prediction interval assumes that innovations are normally distributed. """ from statsmodels.graphics.utils import _import_mpl, create_mpl_fig _import_mpl() fig = create_mpl_fig(fig, figsize) assert fig is not None predictions = self.forecast(steps, theta) pred_index = predictions.index ax = fig.add_subplot(111) nobs = self.model.endog_orig.shape[0] index = NumericIndex(np.arange(nobs)) if in_sample: if isinstance(self.model.endog_orig, pd.Series): index = self.model.endog_orig.index ax.plot(index, self.model.endog_orig) ax.plot(pred_index, predictions) if alpha is not None: pi = self.prediction_intervals(steps, theta, alpha) label = "{0:.0%} confidence interval".format(1 - alpha) ax.fill_between( pred_index, pi["lower"], pi["upper"], color="gray", alpha=0.5, label=label, ) ax.legend(loc="best", frameon=False) fig.tight_layout(pad=1.0) return fig
def plot_regress_exog(results, exog_idx, fig=None): """Plot regression results against one regressor. This plots four graphs in a 2 by 2 figure: 'endog versus exog', 'residuals versus exog', 'fitted versus exog' and 'fitted plus residual versus exog' Parameters ---------- results : result instance result instance with resid, model.endog and model.exog as attributes exog_idx : int index of regressor in exog matrix fig : Matplotlib figure instance, optional If given, this figure is simply returned. Otherwise a new figure is created. Returns ------- fig : matplotlib figure instance """ fig = utils.create_mpl_fig(fig) exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) results = maybe_unwrap_results(results) #maybe add option for wendog, wexog y_name = results.model.endog_names x1 = results.model.exog[:, exog_idx] prstd, iv_l, iv_u = wls_prediction_std(results) ax = fig.add_subplot(2, 2, 1) ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name) ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted', alpha=.5) ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7) ax.set_title('Y and Fitted vs. X', fontsize='large') ax.set_xlabel(exog_name) ax.set_ylabel(y_name) ax.legend(loc='best') ax = fig.add_subplot(2, 2, 2) ax.plot(x1, results.resid, 'o') ax.axhline(y=0, color='black') ax.set_title('Residuals versus %s' % exog_name, fontsize='large') ax.set_xlabel(exog_name) ax.set_ylabel("resid") ax = fig.add_subplot(2, 2, 3) exog_noti = np.ones(results.model.exog.shape[1], bool) exog_noti[exog_idx] = False exog_others = results.model.exog[:, exog_noti] from pandas import Series fig = plot_partregress(results.model.data.orig_endog, Series(x1, name=exog_name, index=results.model.data.row_labels), exog_others, obs_labels=False, ax=ax) ax.set_title('Partial regression plot', fontsize='large') #ax.set_ylabel("Fitted values") #ax.set_xlabel(exog_name) ax = fig.add_subplot(2, 2, 4) fig = plot_ccpr(results, exog_idx, ax=ax) ax.set_title('CCPR Plot', fontsize='large') #ax.set_xlabel(exog_name) #ax.set_ylabel("Fitted values + resids") fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large") fig.tight_layout() fig.subplots_adjust(top=.90) return fig
def plot_diagnostics(residuals, variable=0, lags=40, fig=None, figsize=(15, 7), savefig=False, path=None): _import_mpl() fig = create_mpl_fig(fig, figsize) # # Eliminate residuals associated with burned or diffuse likelihoods # d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) # resid = self.filter_results.standardized_forecasts_error[variable, d:] # loglikelihood_burn: the number of observations during which the likelihood is not evaluated. # Standardize residual # Source: https://alkaline-ml.com/pmdarima/1.1.1/_modules/pmdarima/arima/arima.html resid = residuals resid = (resid - np.nanmean(resid)) / np.nanstd(resid) # Top-left: residuals vs time ax = fig.add_subplot(221) # if hasattr(self.data, 'dates') and self.data.dates is not None: # x = self.data.dates[d:]._mpl_repr() # else: # x = np.arange(len(resid)) x = np.arange(len(resid)) ax.plot(x, resid) ax.hlines(0, x[0], x[-1], alpha=0.5) ax.set_xlim(x[0], x[-1]) ax.set_title('Standardized residual') # Top-right: histogram, Gaussian kernel density, Normal density # Can only do histogram and Gaussian kernel density on the non-null # elements resid_nonmissing = resid[~(np.isnan(resid))] ax = fig.add_subplot(222) # gh5792: Remove except after support for matplotlib>2.1 required try: ax.hist(resid_nonmissing, density=True, label='Hist') except AttributeError: ax.hist(resid_nonmissing, normed=True, label='Hist') from scipy.stats import gaussian_kde, norm kde = gaussian_kde(resid_nonmissing) xlim = (-1.96 * 2, 1.96 * 2) x = np.linspace(xlim[0], xlim[1]) ax.plot(x, kde(x), label='KDE') ax.plot(x, norm.pdf(x), label='N(0,1)') ax.set_xlim(xlim) ax.legend() ax.set_title('Histogram plus estimated density') # Bottom-left: QQ plot ax = fig.add_subplot(223) from statsmodels.graphics.gofplots import qqplot qqplot(resid_nonmissing, line='s', ax=ax) ax.set_title('Normal Q-Q') # Bottom-right: Correlogram ax = fig.add_subplot(224) from statsmodels.graphics.tsaplots import plot_pacf plot_pacf(resid, ax=ax, lags=lags) ax.set_title('Partial Autocorrelation function') ax.set_ylim(-0.1, 0.1) if savefig == True: fig.suptitle('Residual diagnostic', fontsize=20) fig.savefig(path, dpi=500) fig.show() return fig