def liner_demo(self): pd_data = pd.DataFrame() model = sm.OLS( np.log(pd_data["depend_var"]), sm.add_constant(pd_data[["constant_column1", "constant_column2"]])).fit() print(model.summary())
def GLM_poisson(data_to_fit,target): import statsmodels.api as sm X = sm.add_constant(data_to_fit) poisson_model = sm.GLM(target, X, family=sm.families.Poisson(link=sm.families.links.log)) results = poisson_model.fit() print(results.summary()) return results
def regression(data_to_fit,target): import statsmodels.api as sm from statsmodels.sandbox.regression.predstd import wls_prediction_std X = sm.add_constant(data_to_fit) model = sm.OLS(target, X) results = model.fit() print(results.summary()) return results
def next(self): p0 = pd.Series(self.data0.get(size=self.p.period)) p1 = pd.Series(self.data1.get(size=self.p.period)) p1 = sm.add_constant(p1, prepend=True) slope, intercept = sm.OLS(p0, p1).fit().params self.lines.slope[0] = slope self.lines.intercept[0] = intercept
def regression_to_dataframe(data_to_fit,target): import statsmodels.api as sm from statsmodels.sandbox.regression.predstd import wls_prediction_std X = sm.add_constant(data_to_fit) model = sm.OLS(target, X) model_result = model.fit() print(model_result.summary()) statistics = pd.Series({'r2': model_result.rsquared, 'adj_r2': model_result.rsquared_adj}) # put them togher with the result for each term result_df = pd.DataFrame({'params': model_result.params, 'pvals': model_result.pvalues, 'std': model_result.bse, 'statistics': statistics}) # add the complexive results for f-value and the total p-value fisher_df = pd.DataFrame({'params': {'_f_test': model_result.fvalue}, 'pvals': {'_f_test': model_result.f_pvalue}}) # merge them and unstack to obtain a hierarchically indexed series res_series = pd.concat([result_df, fisher_df]).unstack() return res_series.dropna()
def cointegration(self, priceX, priceY): if priceX is None or priceY is None: print('缺少价格序列.') priceX = np.log(priceX) priceY = np.log(priceY) results = sm.OLS(priceY, sm.add_constant(priceX)).fit() resid = results.resid adfSpread = ADF(resid) if adfSpread.pvalue >= 0.05: print('''交易价格不具有协整关系. P-value of ADF test: %f Coefficients of regression: Intercept: %f Beta: %f ''' % (adfSpread.pvalue, results.params[0], results.params[1])) return None else: print('''交易价格具有协整关系. P-value of ADF test: %f Coefficients of regression: Intercept: %f Beta: %f ''' % (adfSpread.pvalue, results.params[0], results.params[1])) return results.params[0], results.params[1]
seaborn.set(rc={'figure.figsize':(6,5)}) seaborn.regplot(x = x_values, y = y_values) seaborn.regplot(x = x_values, y = mean_vol) # MatplotLib Parameters #plt.scatter(x, y, label='skitscat', color='k') plt.xlabel('Rolling HVs / Rolling Return') plt.ylabel('Lagged Rolling HVs') plt.title('Scatter Plot') plt.legend() plt.show() # Can Probably Delete This y = scatter_plot_df.iloc[:, [0]] X = scatter_plot_df.iloc[:, [1, 2]] X = sm.add_constant(X) model = sm.OLS(y, X, missing='drop') results = model.fit() # Print Statements print("Constant: {:.2f}, HVs: {:.2f}, Returns: {:.2f} Corr.: {:.2f}, n = {:.0f}".format(results.params['const'], results.params["Rolling_HVs({})".format(rolling_HVs_window)], results.params["Rolling_Returns({})".format(rolling_returns_window)], results.rsquared, results.df_resid)) print(results.params)
y_pred = regressor.predict(X_test) # Building the optimal model using Backward Elimination import statsmodels.formula.api as sm # Display Ordinary Least Square Regression Results # Due the nature of OLS, we need to create a constant 1 as X0 variable to satisfy the regression formula # Original form is as following which add 1s to the last column # X = np.append(arr = X, values = np.ones((50,1)).astype(int), axis = 1) # Since we wants 1s to be the first column, we reverse the function X = np.append(arr = np.ones((50,1)).astype(int), values = X, axis = 1) ############# More Efficient Method ############# # Run Statistical Results import statsmodels.api as sm # Display Ordinary Least Square Regression Results # Due the nature of OLS, we need to create a constant 1 as X0 variable to satisfy the regression formula X_with_constant = sm.add_constant(X) ################################################# # Run Backword Elimination by removing the highest P-value variable X_opt = X[:, [0, 1, 2, 3, 4, 5]] regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary() X_opt = X[:, [0, 1, 3, 4, 5]] regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary() X_opt = X[:, [0, 3, 4, 5]] regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary() X_opt = X[:, [0, 3, 5]] regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit() regressor_OLS.summary()
def predict(self, X): if self.fit_intercept: X = sm.add_constant(X) return self.results_.predict(X)
def fit(self, X, y): if self.fit_intercept: X = sm.add_constant(X) self.model_ = self.model_class(y, X) self.results_ = self.model_.fit()