Ejemplo n.º 1
0
    def showPlot(self):
        """Do plots"""

        pf = self.pf
        fit = self.fit
        if fit == None:
            pf.showWarning('no fitted model')
            return
        df = self.table.model.df
        s = self.sub

        fig = pf.fig
        fig.clear()
        ax = fig.add_subplot(111)

        #plotframe options
        pf.mplopts.applyOptions()
        kwds = pf.mplopts.kwds
        kind = self.plotvar.get()
        indvar = self.indvar.get()
        if indvar == '':
            indvar = self.model.exog_names[1]

        if kind == 'default':
            if isinstance(self.model, sm.OLS) or isinstance(
                    self.model, sm.GLS):
                self.plotRegression(fit, indvar, ax=ax, **kwds)
            elif isinstance(self.model, sm.Logit):
                self.plotLogit(fit, indvar, ax)
        elif kind == 'predicted vs test':
            self.plotPrediction(fit, ax)
        elif kind == 'fit line':
            try:
                sm.graphics.plot_fit(fit, indvar, ax=ax)
            except ValueError:
                pf.showWarning('%s is not an independent variable' % indvar,
                               ax=ax)
        elif kind == 'regression plots':
            fig.clear()
            sm.graphics.plot_regress_exog(fit, indvar, fig=fig)
        elif kind == 'influence':
            sm.graphics.influence_plot(fit, ax=ax, criterion="cooks")
        elif kind == 'leverage':
            from statsmodels.graphics.regressionplots import plot_leverage_resid2
            plot_leverage_resid2(fit, ax=ax)
        elif kind == 'qqplot':
            sm.graphics.qqplot(fit.resid, line='r', ax=ax)
        elif kind == 'all regressors':
            fig.clear()
            sm.graphics.plot_partregress_grid(fit, fig=fig)

        fig.tight_layout()
        fig.canvas.draw()
        return
Ejemplo n.º 2
0
    def showPlot(self):
        """Do plots"""

        pf = self.pf
        fit = self.fit
        if fit == None:
            pf.showWarning('no fitted model')
            return
        df = self.table.model.df
        s = self.sub

        fig = pf.fig
        fig.clear()
        ax = fig.add_subplot(111)

        #plotframe options
        pf.mplopts.applyOptions()
        kwds = pf.mplopts.kwds
        kind = self.plotvar.get()
        indvar = self.indvar.get()
        if indvar == '':
            indvar = self.model.exog_names[1]

        if kind == 'default':
            if isinstance(self.model, sm.OLS) or isinstance(self.model, sm.GLS):
                self.plotRegression(fit, indvar, ax=ax, **kwds)
            elif isinstance(self.model, sm.Logit):
                self.plotLogit(fit, indvar, ax)
        elif kind == 'predicted vs test':
            self.plotPrediction(fit, ax)
        elif kind == 'fit line':
            try:
                sm.graphics.plot_fit(fit, indvar, ax=ax)
            except ValueError:
                pf.showWarning('%s is not an independent variable' %indvar,ax=ax)
        elif kind == 'regression plots':
            fig.clear()
            sm.graphics.plot_regress_exog(fit, indvar, fig=fig)
        elif kind == 'influence':
            sm.graphics.influence_plot(fit, ax=ax, criterion="cooks")
        elif kind == 'leverage':
            from statsmodels.graphics.regressionplots import plot_leverage_resid2
            plot_leverage_resid2(fit, ax=ax)
        elif kind =='qqplot':
            sm.graphics.qqplot(fit.resid, line='r', ax=ax)
        elif kind == 'all regressors':
            fig.clear()
            sm.graphics.plot_partregress_grid(fit, fig=fig)

        fig.tight_layout()
        fig.canvas.draw()
        return
def plot_gof_figures(model):
    """Plot a multipanel figure of goodness of fit plots
  
  arguments:
  model: a fitted ols() object from statsmodels.formula.api
  
  output:
  Prints a multipanel figure including:
  * Residual vs fitted value plot
  * Scale-location plot
  * Q-Q plot
  * Leverage vs normalized residual plot
  """

    fig = plt.figure(figsize=(16, 16))
    ax = plt.subplot(2, 2, 1)
    sns.residplot(model.fittedvalues, model.resid, lowess=True)
    plt.xlabel('Fitted values')
    plt.ylabel('Residuals')
    plt.title('Residuals vs Fitted')
    ax = plt.subplot(2, 2, 2)
    _ = gofplots.qqplot(model.resid, fit=True, line='45', ax=ax)
    plt.title('Normal Q-Q')
    ax = plt.subplot(2, 2, 3)
    plt.scatter(model.fittedvalues, np.abs(model.resid)**.5)
    plt.xlabel('Fitted values')
    plt.ylabel('Square root of the standardized residuals')
    plt.title('Scale-Location')
    ax = plt.subplot(2, 2, 4)
    _ = regressionplots.plot_leverage_resid2(model, ax=ax)
Ejemplo n.º 4
0
lzip(name, test)

# ## Influence tests
#
# Once created, an object of class ``OLSInfluence`` holds attributes and methods that allow users to assess the influence of each observation. For example, we can compute and extract the first few rows of DFbetas by:

from statsmodels.stats.outliers_influence import OLSInfluence
test_class = OLSInfluence(results)
test_class.dfbetas[:5, :]

# Explore other options by typing ``dir(influence_test)``
#
# Useful information on leverage can also be plotted:

from statsmodels.graphics.regressionplots import plot_leverage_resid2
print(plot_leverage_resid2(results))

# Other plotting options can be found on the [Graphics page.](http://www.statsmodels.org/stable/graphics.html)

# ## Multicollinearity
#
# Condition number:

np.linalg.cond(results.model.exog)

# ## Heteroskedasticity tests
#
# Breush-Pagan test:

name = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
test = sms.het_breushpagan(results.resid, results.model.exog)
# ## Influence tests
# 
# Once created, an object of class ``OLSInfluence`` holds attributes and methods that allow users to assess the influence of each observation. For example, we can compute and extract the first few rows of DFbetas by:

from statsmodels.stats.outliers_influence import OLSInfluence
test_class = OLSInfluence(results)
test_class.dfbetas[:5,:]


# Explore other options by typing ``dir(influence_test)``
# 
# Useful information on leverage can also be plotted:

from statsmodels.graphics.regressionplots import plot_leverage_resid2
print(plot_leverage_resid2(results))


# Other plotting options can be found on the [Graphics page.](http://statsmodels.sourceforge.net/stable/graphics.html)

# ## Multicollinearity
# 
# Condition number:

np.linalg.cond(results.model.exog)


# ## Heteroskedasticity tests
# 
# Breush-Pagan test:
Ejemplo n.º 6
0
 def test_plot_leverage_resid2(self):
     fig = plot_leverage_resid2(self.res)
     assert_equal(isinstance(fig, plt.Figure), True)
     plt.close(fig)
Ejemplo n.º 7
0
 def test_plot_leverage_resid2(self, close_figures):
     fig = plot_leverage_resid2(self.res)
     assert_equal(isinstance(fig, plt.Figure), True)
Ejemplo n.º 8
0
#
# Once created, an object of class ``OLSInfluence`` holds attributes and
# methods that allow users to assess the influence of each observation. For
# example, we can compute and extract the first few rows of DFbetas by:

from statsmodels.stats.outliers_influence import OLSInfluence
test_class = OLSInfluence(results)
test_class.dfbetas[:5, :]

# Explore other options by typing ``dir(influence_test)``
#
# Useful information on leverage can also be plotted:

from statsmodels.graphics.regressionplots import plot_leverage_resid2
fig, ax = plt.subplots(figsize=(8, 6))
fig = plot_leverage_resid2(results, ax=ax)

# Other plotting options can be found on the [Graphics
# page.](https://www.statsmodels.org/stable/graphics.html)

# ## Multicollinearity
#
# Condition number:

np.linalg.cond(results.model.exog)

# ## Heteroskedasticity tests
#
# Breush-Pagan test:

name = ['Lagrange multiplier statistic', 'p-value',
Ejemplo n.º 9
0
 def test_graphs(self, res):
     col_num = self.X.shape[1]
     rp.plot_fit(res, exog_idx=col_num - 3)
     rp.plot_regress_exog(res, exog_idx=col_num - 1, fig=None)
     rp.plot_leverage_resid2(res)  # squared
Ejemplo n.º 10
0
from statsmodels.stats.outliers_influence import summary_table
#from sklearn.linear_model import LinearRegression
#import scipy, scipy.stats
#from statsmodels.sandbox.regression.predstd import wls_prediction_std

filename = '../Carseats.csv'

data = pd.read_csv(filename, na_values='?', index_col=0).dropna()

#Multi-linear regression#
mlinreg = smf.ols(formula='Sales ~ Price + Urban + US', data=data).fit()

print(mlinreg.summary())

#Multi-linear regression without Urban#
mlinreg2 = smf.ols(formula='Sales ~ Price + US', data=data).fit()

print(mlinreg2.summary())

f, axarr = plt.subplots(2)
#Checking studentized (normalized) residuals for non-linearity and outliers#
sns.regplot(data['Sales'], mlinreg2.resid_pearson, lowess=True, ax=axarr[0], line_kws={'color':'r', 'lw':1})
axarr[0].set_title('Normalized residual plot')
axarr[0].set_xlabel('Fitted values')
axarr[0].set_ylabel('Normalized residuals')

#Statsmodels leverage plot#
f = plot_leverage_resid2(mlinreg2, ax=axarr[1])

plt.show()
Ejemplo n.º 11
0
# Confidence interval: This is the coefficient to calculate 95% confidence interval for the independent variable’s slope.

# p-value ≤ 0.05 signifies strong evidence against the null hypothesis, so you reject the null hypothesis. A p-value > 0.05 signifies
#    weak evidence against the null hypothesis, so you fail to reject the null hypothesis.

# Regression Diagnosis :-

# 1) outliers :- Data points that are far away from the fitted regression line are called outliers, and these can impact the accuracy of the model.
# Plotting normalized residual vs. leverage will give us a good understanding of the outliers points. Residual is the difference
# between actual vs. predicted, and leverage is a measure of how far away the independent variable values of an observation are from
# those of the other observations.

from statsmodels.graphics.regressionplots import plot_leverage_resid2
fig, ax = plt.subplots(figsize=(8, 6))
fig = plot_leverage_resid2(lm, ax=ax)
plt.show()

# Running a Bonferroni outlier test will give us p-values for each observation, and
#   those observations with p value < 0.05 are the outliers affecting the accuracy.

# Find outliers #
# Bonferroni outlier test
test = lm.outlier_test()
print('Bad data points (bonf(p) < 0.05):')
print(test[test.iloc[:, 2] < 0.05])

# 2) Homoscedasticity and Normality :- The error variance should be constant, which is known has homoscedasticity and the error should be normally distributed

# plot to check homoscedasticity
plt.plot(lm.resid, 'o')
Ejemplo n.º 12
0
■ トライ&エラーを補助してくれる可視化ツール

回帰分析にはトライ&エラーが付き物です。むしろ、ほとんどの経済現象は、線形式で完全に記述できるはずがありませんから、色々な回帰式を当てはめたり、サンプル期間変えたりして初めて、経済現象の全体像を掴むことができるのだと思います。

statsmodelsには、こうしたトライ&エラーをサポートする可視化ツールが付いています。以下のモジュールをimportしてみましょう。

"""

# /// Graphical Diagnostic Tools /// ---------------------------------
import statsmodels.graphics.regressionplots as regplot
"""
まず、以下のinfluence_plot()は、サンプルの中にはずれ値的な動きをした期間があるかを検出してくれます。
"""
# Checking Outlier effect
regplot.influence_plot(rlt)  # Studentized Residual
regplot.plot_leverage_resid2(rlt)  # Leverage vs. resid^2
"""
また、plot_regress_exog()は、個別の説明変数ごとに、誤差項と説明変数の関係や、他の要因をコントロールした上での当該変数の説明力を見る偏回帰プロットが表示されます。誤差や説明力が説明変数の値に連動して変化するなら、他変数からの影響や非線形性が現れていると考えられます。
"""

# Selected exog vs. other things controlled endog plot
regplot.plot_regress_exog(rlt, 1)

N = DD.shape[0]

x = DD['Pic'].values * 100
y = DD['GAP'].values * 100

radii = np.random.random(size=N) / 10
colors = [
    "#%02x%02x%02x" % (r, g, 150)
Ejemplo n.º 13
0
#Checking studentized (normalized) residuals for non-linearity and outliers#
sns.regplot(data['y'],
            yallreg.resid_pearson,
            lowess=True,
            ax=axarr[0],
            line_kws={
                'color': 'r',
                'lw': 1
            })
axarr[0].set_title('Normalized residual plot')
axarr[0].set_xlabel('Fitted values')
axarr[0].set_ylabel('Normalized residuals')

#Statsmodels leverage plot#
f = plot_leverage_resid2(yallreg, ax=axarr[1])

#Regresion y vx x1
yx1reg = smf.ols(formula='y ~ x1', data=data).fit()
print('\n\nRegression result: y vs x1')
print(yx1reg.summary())

#Regresion y vx x2
yx2reg = smf.ols(formula='y ~ x2', data=data).fit()
print('\n\nRegression result: y vs x2')
print(yx2reg.summary())

##Adding new data point##
data_new = data.copy()
last = len(data_new.index)
data_new.loc[last, 'x1'] = 0.1
Ejemplo n.º 14
0
statsmodelsには、こうしたトライ&エラーをサポートする可視化ツールが付いています。以下のモジュールをimportしてみましょう。

"""


# /// Graphical Diagnostic Tools /// ---------------------------------
import statsmodels.graphics.regressionplots as regplot


"""
まず、以下のinfluence_plot()は、サンプルの中にはずれ値的な動きをした期間があるかを検出してくれます。
"""
# Checking Outlier effect
regplot.influence_plot(rlt) # Studentized Residual
regplot.plot_leverage_resid2(rlt) # Leverage vs. resid^2


"""
また、plot_regress_exog()は、個別の説明変数ごとに、誤差項と説明変数の関係や、他の要因をコントロールした上での当該変数の説明力を見る偏回帰プロットが表示されます。誤差や説明力が説明変数の値に連動して変化するなら、他変数からの影響や非線形性が現れていると考えられます。
"""

# Selected exog vs. other things controlled endog plot
regplot.plot_regress_exog(rlt,1)




N = DD.shape[0]

x = DD['Pic'].values*100