예제 #1
0
    def simple_regession(self):
        ''' The answer of exercise03-08:
        (a)
            (i)  Yes, from F-stat
            (ii) Explain it from RSE and R^2 stat
            (iii)negative
            (iv) Code, no prediction interval
        (b) Code
        (c) Residual/fitted: non-linearity
        '''

        # model = smf.ols(formula="mpg ~ horsepower", data=self.df)
        y = self.df['mpg']
        X = self.df[['horsepower']]
        X = sm.add_constant(X)
        print X
        res = sm.OLS(y, X).fit()
        # res = model.fit()
        print res.summary()

        print "The prediction is: ", res.predict(exog=[[1, 98]])
        print "The prediction interval is: "

        '''
        self.df.plot(kind="scatter", x='horsepower', y='mpg', c='w')
        graph_x = np.linspace(min(self.df['horsepower']), 200)
        graph_y = res.predict(sm.add_constant(graph_x))
        plt.plot(graph_x, graph_y)
        '''
        fig = rp.abline_plot(model_results=res)
        ax = fig.axes[0]
        ax.scatter(X['horsepower'], y, c='w')
        plt.show()
        lrplot.plot_R_graphs(res)
예제 #2
0
        The answer of exercise-03-09:
        (f) From the correlation matrix in 9a., displacement, horsepower and weight show a similar nonlinear pattern against our response mpg.
            This nonlinear pattern is very close to a log form.
            So in the next attempt, we use log(mpg) as our response variable.
        """
        # Why choose these predictors? By brute force and choose the least p-value?
        mod = smf.ols(
            formula="np.log(mpg) ~ cylinders+displacement+horsepower+weight+acceleration+year+origin", data=self.df
        )
        res = mod.fit()
        print res.summary()
        return res


if __name__ == "__main__":
    ex09 = Exec09()
    # ex09.plot_scatter_matrix()
    # ex09.show_covariance()
    """ RegressionResults class """
    res = ex09.multi_variate_regression()
    # lrp.plot_scale_location(res)
    # lrp.plot_qq(res)
    # lrp.plot_fitted_student_residual(ex09.df, res)

    # ex09.get_leverages_resid(res)
    # ex09.get_vifs(ex09.X)
    # ex09.regress_with_interaction()

    # res = ex09.regress_with_poly_2()
    lrp.plot_R_graphs(res)