def reg_m(y, x): ones = np.ones(len(x[0])) X = sm.add_constant(np.column_stack((x[0], ones))) for ele in x[1:]: X = sm.add_constant(np.column_stack((ele, X))) results = sm.GLSAR(y, X).fit() return results
def simple_regression_example(): # Load data. spector_data = sm.datasets.spector.load() spector_data.exog = sm.add_constant(spector_data.exog, prepend=False) # Fit and summarize model. # OLS: ordinary least squares for i.i.d. errors Sigma = I. mod = sm.OLS(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary()) # GLS: generalized least squares for arbitrary covariance Sigma. mod = sm.GLS(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary()) # WLS: weighted least squares for heteroskedastic errors diag(Sigma). mod = sm.WLS(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary()) # GLSAR: feasible generalized least squares with autocorrelated AR(p) errors Sigma = Sigma(rho). mod = sm.GLSAR(spector_data.endog, spector_data.exog) res = mod.fit() print(res.summary())
def linear_model(df, with_wind=True, with_ar=True): """Define the linear model with optional wind and autoregression. See the latest report for a detailed description. """ y = df['height'] X = np.c_[ df['year']-1970, np.cos(2*np.pi*(df['year']-1970)/18.613), np.sin(2*np.pi*(df['year']-1970)/18.613) ] month = np.mod(df['year'], 1) * 12.0 names = ['Constant', 'Trend', 'Nodal U', 'Nodal V'] if with_wind: X = np.c_[ X, df['u2'], df['v2'] ] names.extend(['Wind $u^2$', 'Wind $v^2$']) X = sm.add_constant(X) if with_ar: model = sm.GLSAR(y, X, missing='drop', rho=1) else: model = sm.OLS(y, X, missing='drop') fit = model.fit(cov_type='HC0') return fit, names
def __init__(self, df, X, y, num_offices, mod_type='dem'): self.df = df self.num_offices = num_offices self.mod_type = mod_type # Fit a linear model to get the coefficients model = sm.GLSAR(y, X, rho=1).iterative_fit(1) [self.one_coef, self.two_coef, self.int_coef] = model.params[-3:] [self.one_std, self.two_std, self.int_std] = model.bse[-3:] # Use the model to predict without offices counter_params = model.params[:-3] X_counter = X[X.columns[:-3]] y_pred = X_counter.dot(counter_params).values # Return the models vote predictions (disallow negative votes) self.df['votes_predicted'] = y_pred * self.df['CVAP_EST'] self.df['votes_predicted'] = self.df['votes_predicted']\ .apply(lambda x: max(x, 0)) # Greatly increase the speed of simulation using a dictionary self.data_dict = df.set_index('NAME').to_dict('index') if self.mod_type == 'rep': self.int_std *= -1 # cook score is negative for republicans
def fit(y, X, reg_names): nr = len(reg_names) try: mod = sm.GLSAR(y.values, X, 2, missing='drop') # MLR analysis with AR2 modeling res = mod.iterative_fit() output = xr.Dataset({'coef': (['reg_name'], res.params[1:]), \ 'conf_int': (['reg_name', 'limit'], res.conf_int()[1:,:]), \ 'p_value': (['reg_name'], res.pvalues[1:]), \ 'DWT': (sms.durbin_watson(res.wresid)), \ 'CoD': (res.rsquared)}, \ coords = {'reg_name': (['reg_name'], reg_names),\ 'limit': (['limit'], ['lower', 'upper'])}) except: nans = np.full([nr], np.nan) output = xr.Dataset({'coef': (['reg_name'], nans), \ 'conf_int': (['reg_name', 'limit'], np.array([nans, nans]).T), \ 'p_value': (['reg_name'], nans), \ 'DWT': (np.nan), \ 'CoD': (np.nan)}, \ coords = {'reg_name': (['reg_name'], reg_names),\ 'limit': (['limit'], ['lower', 'upper'])}) return output
def xr_regression(y): X = sm.add_constant(reg, prepend=True) # regressor matrix mod = sm.GLSAR(y.values.squeeze(), X, 0, missing='drop') # MLR analysis without AR2 modeling res = mod.iterative_fit() return xr.DataArray(res.wresid)
def glsar_model(): # Generalized Least Squares with AR covariance structure X = range(1, 8) X = sm.add_constant(X) Y = [1, 3, 4, 5, 8, 10, 9] glsar = sm.GLSAR(Y, X, rho=2) model = glsar.fit() return ModelWithResults(model=model, alg=glsar, inference_dataframe=X)
def regr_gls_sm(y: Union[np.ndarray, pd.DataFrame], x: Union[np.ndarray, pd.DataFrame], **param): ''' Use: ''' # X = np.column_stack( (np.ones(N), x**2) ) # ones at beg, BUT need length # if str(type(x)) == "<class 'numpy.ndarray'>" or type(x) is pd.core.frame.DataFrame: if type(x) == np.ndarray or type(x) is pd.DataFrame: X = sm.add_constant(x) else: X = np.array(x).T X = sm.add_constant(X) if not str(type( y)) == "<class 'numpy.ndarray'>" or not type(y) is pd.DataFrame: y = np.array(y) model = sm.OLS(y, X) fit_ols = model.fit() # fit_ols = strat['model'][eqn_f]['fit_model'] # y, X = strat['model'][eqn_f]['df_train_filter'].iloc[:,0], strat['model'][eqn_f]['df_train_filter'].iloc[:,1:] ols_resid = fit_ols.resid ols_resid = ols_resid.to_numpy() resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit() # print(resid_fit.tvalues[1]) # print(resid_fit.pvalues[1]) rho = resid_fit.params[1] order = toeplitz(range(len(ols_resid))) # so that our error covariance structure is actually rho**order which defines an autocorrelation structure sigma = rho**order model_gls = sm.GLS(y, X, sigma=sigma) fit_gls = model_gls.fit() model_glsar = sm.GLSAR(y, X, 1) fit_glsar = model_glsar.iterative_fit(1) # print(strat['model'][eqn_f]['fit_model'].summary()) # print(gls_results.summary()) # print(glsar_results.summary()) # strat['model'][eqn_f]['fit_gls'] = gls_results # strat['model'][eqn_f]['fit_glsar'] = glsar_results # # print(gls_results.params) # print(glsar_results.params) # print(gls_results.bse) # print(glsar_results.bse) return fit_gls, fit_glsar """
def fit_data(self, y, x, rho=1): """ iterative_fit(maxiter=3): Perform an iterative two-stage procedure to estimate a GLS model. The model is assumed to have AR(p) errors, AR(p) parameters and regression coefficients are estimated iteratively. rho: Order of the autoregressive covariance """ glsar_model = sm.GLSAR(y, x, rho=rho) glsar_results = glsar_model.iterative_fit(1) print(glsar_results.summary()) self.params = glsar_results.params self.bse = glsar_results.bse
def SPGLSAR(context): # 从 Context 中获取相关数据 args = context.args # 查看上一节点发送的 args.inputData 数据 df = args.inputData featureColumns = args.featureColumns labelColumn = args.labelColumn features = df[featureColumns].values label = df[labelColumn].values arma_mod = sm.GLSAR(label, features, rho=args.rho, missing=args.missing) arma_res = arma_mod.fit(method=args.method) return arma_res
def _fit_ufunc(y, X, rho=2): X = X.T #print(X.shape) #sys.exit() nr = X.shape[1] try: mod = sm.GLSAR(y, X, rho, missing='drop') # MLR analysis with AR2 modeling res = mod.iterative_fit() out = np.array( (res.params[:], res.pvalues[:], [sms.durbin_watson(res.wresid)] * nr, [res.rsquared] * nr)) except: out = np.full((4, nr), np.nan) return out
def fn_apis_statsmodels_glsar(): import numpy as np x = request.args.get('x') y = request.args.get('y') x1 = np.array(eval(x)) y1 = np.array(eval(y)) #x1 = [[4,67,662],[9,19,618],[6,49,372],[6,33,58],[1,18,153],[2,78,938],[3,15,627],[8,55,191],[2,47,812],[2,83,946],[2,4,895],[9,37,42],[0,1,595],[7,27,392],[5,22,836],[0,12,513],[2,41,601],[3,68,615],[2,23,649],[1,98,9],[9,40,32],[5,77,798],[1,10,903],[1,53,772],[7,20,716],[2,35,678],[5,52,258],[7,31,814],[2,30,577]] #y1 = [2857.0163,2547.5962,1647.6061,343.8966,668.2108,3990.0414,2559.0662,945.1439,3393.1068,4037.1068,3596.0458,297.5798,2383.6193,1663.8839,3420.5135,2088.0197,2531.2703,2670.7878,2669.8044,332.9981,266.718,3433.975,3644.3636,3249.3518,2938.0325,2821.3308,1198.4373,3363.5752,2402.6042] x1 = sm.add_constant(x1) model = sm.GLSAR(y1, x1) rs = model.fit() c = obj_rs(rs.aic, rs.bic, rs.bse.tolist(), rs.centered_tss, rs.condition_number, rs.conf_int().tolist(), rs.cov_HC0.tolist(), rs.cov_HC1.tolist(), rs.cov_HC2.tolist(), rs.cov_HC3.tolist(), rs.cov_kwds, rs.cov_params().tolist(), rs.cov_type, rs.df_model, rs.df_resid, rs.eigenvals.tolist(), rs.ess, rs.f_pvalue, rs.fittedvalues.tolist(), rs.fvalue, rs.k_constant, rs.llf, rs.mse_model, rs.mse_resid, rs.mse_total, rs.nobs, rs.normalized_cov_params.tolist(), rs.params.tolist(), rs.pvalues.tolist(), rs.resid.tolist(), rs.resid_pearson.tolist(), rs.rsquared, rs.rsquared_adj, rs.scale, rs.ssr, rs.tvalues.tolist(), rs.uncentered_tss, rs.use_t, rs.wresid.tolist()) c = c.__dict__ tmp = json.dumps(c, ensure_ascii=False, indent=4) return Response(tmp, mimetype='application/json', headers={ "Access-Control-Allow-Origin": "http://127.0.0.0:5000", "Access-Control-Allow-Methods": "GET", "Access-Control-Allow-Headers": "x-requested-with,content-type", "Access-Control-Allow-Credentials": "true" })
def stats(predictor, response, model): ##will apply the statistical model you enter to the variables inputed, the ##codes for each statistical model are viewable in the chain of if statements predictor = np.asarray(predictor) response = np.asarray(response) if model == 'logit': model = sm.Logit(predictor, response) elif model == 'lsr': model = sm.OLS(predictor, response) elif model == "probit": model = sm.Probit(predictor, response) elif model == "gls": model = sm.GLS(predictor, response) elif model == "glsar": model = sm.GLSAR(predictor, response) elif model == "quantreg": model = sm.QuantReg(predictor, response) else: pass model = model.fit() print(model.summary())
def broken_linear_model(df, with_wind=True): """This model fits the sea-level rise has started to rise faster in 1993.""" y = df['height'] X = np.c_[ df['year']-1970, (df['year'] > 1993) * (df['year'] - 1993), np.cos(2*np.pi*(df['year']-1970)/18.613), np.sin(2*np.pi*(df['year']-1970)/18.613) ] names = ['Constant', 'Trend', '+trend (1993)', 'Nodal U', 'Nodal V'] if with_wind: X = np.c_[ X, df['u2'], df['v2'] ] names.extend(['Wind $u^2$', 'Wind $v^2$']) X = sm.add_constant(X) model_broken_linear = sm.GLSAR(y, X, rho=1) fit = model_broken_linear.iterative_fit(cov_type='HC0') return fit, names
def quadratic_model(df, with_wind=True): """This model computes a parabolic linear fit. This corresponds to the hypothesis that sea-level is accelerating.""" y = df['height'] X = np.c_[ df['year']-1970, (df['year'] - 1970) * (df['year'] - 1970), np.cos(2*np.pi*(df['year']-1970)/18.613), np.sin(2*np.pi*(df['year']-1970)/18.613) ] names = ['Constant', 'Trend', 'Acceleration', 'Nodal U', 'Nodal V'] if with_wind: X = np.c_[ X, df['u2'], df['v2'] ] names.extend(['Wind $u^2$', 'Wind $v^2$']) X = sm.add_constant(X) model_quadratic = sm.GLSAR(y, X, rho=1) fit = model_quadratic.iterative_fit(cov_type='HC0') return fit, names
def regression_model(X): # Send data frame with only one column "Date" and the index set as date. # Date from 2016 to the end of the datasets data = X.reset_index() data['Date'] = data['Date'].astype('datetime64[ns]') data.set_index("Date", inplace=True) test_data = data["2017"] train_data = data["2016"] test_data.reset_index(inplace=True) train_data.reset_index(inplace=True) model = sm.GLSAR(train_data.Close, train_data.index.values, 50) results = model.fit() predictions = results.predict(list(range(len(train_data) + len(test_data)))) test_data.set_index("Date", inplace=True) train_data.set_index("Date", inplace=True) predictions = pd.DataFrame(predictions, index=data["2016":].index) merged = pd.concat([train_data.Close, test_data.Close, predictions], axis=1) merged.columns = ["Train Data", "Test Data", "Fitted Values"] merged.plot() return rmse(np.array(test_data.Close), np.array(predictions[-len(test_data):]).flatten())
def occlude_dataset(DNN, attribution, percentiles, test=False, keep=False, random=False, batch_size=128, savedir=''): print("Condition of test : {}".format(test)) if test: Xs = Xtest ys = Ytest else: Xs = Xtrain ys = Ytrain print("initial batch_size is : {}".format(batch_size)) total_batch = math.ceil(len(Xs) / batch_size) print("batch size is :{}".format(total_batch)) hmaps = [] data = [] label = [] for i in tqdm(range(total_batch)): # batch_xs = Xs[i*batch_size:(i+1)*batch_size] # # batch_xs_scaled = scale(batch_xs) if 'LRP' in attribution: # for t in It[:10]: x = Xs[i:i + 1, ...] y = ys[i:i + 1, ...] ypred = DNN.forward(x) # print('True Class: ', np.argmax(ys[i])) # print('Predicted Class:', np.argmax(ypred),'\n') m = np.zeros_like(ypred) m[:, np.argmax(ypred)] = 1 Rinit = ypred * m Rinit.astype(np.float) R = DNN.lrp(Rinit, 'epsilon', 1.) R = R.sum(axis=3) if not np == numpy: R = np.asnumpy(R) if test: LRP_test = render.digit_to_rgb(R, scaling=3) attrs = R # attrs = np.sum(np.where(attrs > 0, attrs, 0.0), axis=-1) # print("print lrp : {}".format(attrs.shape)) elif 'proposed_method' in attribution: # for t in It[:10]: x = Xs[i:i + 1, ...] y = ys[i:i + 1, ...] ypred = DNN.forward(x) # print('True Class: ', np.argmax(ys[i])) # print('Predicted Class:', np.argmax(ypred),'\n') m = np.zeros_like(ypred) m[:, np.argmax(ypred)] = 1 Rinit = ypred * m Rinit.astype(np.float) R = DNN.lrp(Rinit, 'epsilon', 1.) R = R.sum(axis=3) if not np == numpy: xs = np.asnumpy(x) R = np.asnumpy(R) xs = x tar = xs a = np.load('../r_array/convolution.npy') a = np.reshape(a, [a.shape[1] * a.shape[2], 1]) b = np.load('../r_array/rect.npy') b = np.pad(b, ((0, 0), (2, 2), (2, 2), (0, 0))) b = np.reshape(b, [b.shape[1] * b.shape[2], b.shape[0] * b.shape[3]]) c = np.load('../r_array/sumpoll.npy') c = np.pad(c, ((0, 0), (2, 2), (2, 2), (0, 0))) c = np.reshape(c, [c.shape[1] * c.shape[2], c.shape[3]]) new_b = np.hstack((b, c)) new = np.hstack((a, new_b)) tar = np.reshape(tar, [tar.shape[0] * tar.shape[1] * tar.shape[2]]) y_tran = tar.transpose() new = sm.add_constant(new) # print(new.shape) # print(y_tran.shape) model = sm.GLSAR(y_tran, new, rho=2) result = model.iterative_fit(maxiter=30) find = result.resid check = np.reshape(find, [1, 32, 32]) if test: proposed_test = render.digit_to_rgb(check, scaling=3) attrs = check # attrs = np.sum(np.where(attrs > 0, attrs, 0.0), axis=-1) # print("print propose : {}".format(attrs.shape)) else: x = Xs[i:i + 1, ...] y = ys[i:i + 1, ...] if not np == numpy: xs = np.asnumpy(x) xs = x if test: digit = render.digit_to_rgb(xs, scaling=3) attrs = xs # attrs = np.sum(np.where(attrs > 0, attrs, 0.0), axis=-1) # print("print normal : {}".format(attrs.shape)) attrs += np.random.normal(scale=1e-4, size=attrs.shape) # print("print random normal : {}".format(attrs.shape)) hmaps.append(attrs) data.append(x) label.append(y) # print("print final : {}".format(len(hmaps))) print("Interpretation is done, concatenate...") hmaps = np.concatenate(hmaps, axis=0) data = np.concatenate(data, axis=0) print("concatenate is done...") print("print final : {}".format(hmaps.shape)) # percentiles = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90] for percent in tqdm(percentiles): # dataset = [] # y_target = [] # for i in It[:10]: # batch_xs, batch_ys = Xs[i*batch_size:(i+1)*batch_size], ys[i*batch_size:(i+1)*batch_size] # x = Xs[i:i+1,...] # y = ys[i:i+1,...] # batch_attrs = hmaps[i:i+1,...] batch_attrs = hmaps occluded_images = remove(data, batch_attrs, percent, keep) # dataset.append(scale(occluded_images)) # y_target.append(y) # del occluded_images # print("save start") # print("dataset shape : {}".format(dataset)) print("Save directory is {}".format(savedir)) save( occluded_images, savedir + '{}_{}_{}.pickle'.format( 'test' if test else 'train', attribution, percent)) # save(np.concatenate(dataset, axis=0), savedir + '{}_{}_{}.pickle'.format('test' if test else 'train', attribution, percent)) # save(np.concatenate(y_target, axis=0), savedir + '{}_{}_{}_{}.pickle'.format('test' if test else 'train', attribution, percent, 'label')) save( np.concatenate(label, axis=0), savedir + '{}_{}_{}_{}.pickle'.format( 'test' if test else 'train', attribution, percent, 'label')) print("Occlude image {} percentile...".format(percent))
lmod = smf.ols( formula= 'nhtemp ~ wusa + jasper + westgreen + chesapeake + tornetrask + urals + mongolia + tasman', data=globwarm).fit() lmod.summary() plt.scatter(lmod.resid.iloc[:-1], lmod.resid.iloc[1:]) plt.axhline(0, alpha=0.5) plt.axvline(0, alpha=0.5) plt.show() np.corrcoef(lmod.resid.iloc[:-1], lmod.resid.iloc[1:]) globwarm = globwarm.dropna() X = sm.add_constant(globwarm.iloc[:, 1:9]) gmod = sm.GLSAR(globwarm.nhtemp, X, rho=1) res = gmod.iterative_fit(maxiter=6) gmod.rho gmod = sm.GLSAR(globwarm.nhtemp, X, rho=1) for i in range(6): results = gmod.fit() print("AR coefficients: {0}".format(gmod.rho)) rho, sigma = sm.regression.yule_walker(results.resid, order=gmod.order) gmod = sm.GLSAR(globwarm.nhtemp, X, rho) oatvar = pd.read_csv("oatvar.csv", index_col=0) oatvar['variety'] = oatvar['variety'].astype('category') oatvar['grams'] = oatvar['yield'] oatvar.head()
#endregion #region REGRESSION MODELS # OLS family y = df.y_count m_OLS = sm.OLS(y, X).fit() # i.i.d. errors print(m_OLS.summary2()) m_GLS = sm.GLS(y, X).fit() # arbitrary covariance between errors print(m_GLS.summary2()) m_GLSAR = sm.GLSAR(y, X).fit() # feasible GLS with autocorrelated AR(phi) errors print(m_GLSAR.summary2()) # GMM family (not tested yet) m_GMM = sm.GMM(y, X).fit() # i.i.d. errors print(m_GMM.summary2()) # ML family # Instrumental Variables m_IV2SLS = sm.IV2SLS(y, X, instrument=X_augmented).fit() print(m_IV2SLS.summary2()) #endregion
rep_turnout, election_data, romney_offices, featurizer, mod_type='rep', k=2) X_obama, y_obama, feat_names_obama = make_X_y(obama_df, mod_type='dem') X_romney, y_romney, feat_names_romney = make_X_y(romney_df, mod_type='rep') dem = OnePartyStrat(obama_df, X_obama, y_obama, 800) rep = OnePartyStrat(romney_df, X_romney, y_romney, 800, mod_type='rep') # Activate these lines to set the republican effect equal # to the democtratic effect model = sm.GLSAR(y_obama, X_obama, rho=1).iterative_fit(1) [one_coef, two_coef, int_coef] = model.params[-3:] [one_std, two_std, int_std] = model.bse[-3:] rep.set_params(one_coef, two_coef, -int_coef, one_std, two_std, int_std) # Simulate No Offices Placed a = .000001 # dem.set_params(a, a, a, a, a, a) # rep.set_params(a, a, a, a, a, a) electoral = featurizer.get_electoral_df() print 'Running Simulation' sim = Simulation(100, electoral, dem, rep) sim.run() sim.plot_swing()
c = np.pad(c,((0,0),(2,2),(2,2),(0,0))) c = np.reshape(c,[c.shape[1]*c.shape[2],c.shape[3]]) new_b = np.hstack((b, c)) new = np.hstack((a, new_b)) y = np.reshape(y, [y.shape[0]*y.shape[1]*y.shape[2]]) y_tran = y.transpose() new = sm.add_constant(new) # new = np.nan_to_num(new) # y = np.nan_to_num(y) # model = sm.GLSAR(y_tran, new, rho = 2, missing = "raise") print("y_tran shape : ") print(y_tran.shape) print("X shape : ") print(new.shape) model = sm.GLSAR(y_tran, new, rho = 2) result = model.iterative_fit(maxiter = 5000) # for i in range(30): # result = model.fit() # print("AR coefficients : {0}".format(model.rho)) # rho, sigma = sm.regression.yule_walker(result.resid, order = model.order) # print("{}step is done".format(i)) # model = sm.GLSAR(y_tran, new, rho) find = result.resid check = np.reshape(find,[1,32,32]) print(xs.shape) digit = render.digit_to_rgb(xs, scaling = 3) LRP_test = render.digit_to_rgb(R, scaling = 3) proposed_test = render.digit_to_rgb(check, scaling = 3) hm_digit = render.hm_to_rgb(xs, X = xs, scaling = 3, sigma = 2) hm_R = render.hm_to_rgb(check, X = xs, scaling = 3, sigma = 2)
def set(self, endog, exog=None): self.model = sm.GLSAR(endog, exog=exog, **self.rmodelparams)
def fit(self, U, Y): """ Input: ----- U : numpy array (n_samples x n_times x n_channels_u) Y : numpy array (n_samples x n_times x n_channels_y) """ if self.log: print("----------------------------------------- \n") print("\n TRAIN \n") print("----------------------------------------- \n") target, regression_mat = self.formulate_regression(U, Y) # plug-in model to solve the regression if self.log: print("Solving the Least-Squares...\n") if self.solver == "ridge": self.model = Ridge(alpha=self.penal_weight) self.model.fit(regression_mat, target) weights = self.model.coef_.T residuals = target - regression_mat @ weights if self.log: print("shape of weights: ", weights.T.shape, "\n") if self.solver == "feasible": # using statsmodels self.model = sm.GLSAR(target, regression_mat) result = self.model.fit() weights = result.params if len(weights.shape) == 1: # fixing dimensionality bug weights = weights.reshape(weights.shape[-1], -1) residuals = target - regression_mat @ weights if self.log: print("shape of weights: ", weights.T.shape, "\n") if self.solver == "dmd": # pseudo-inverse the tranposed system # formulation # (n_feats_x + n_feats_u, n_common_times * n_samples) snapshots = regression_mat.T snapshots_next = target.T if self.log: print("shape of snapshot matrix: ", snapshots.shape) # compute surrogate right-side svd UU, DD, VVt = quick_svd(snapshots, rank_perc=self.penal_weight) # inverse the system and obtain weights # left-side svd (optional? not done here) if self.log: print("\n Inverting the system... \n") weights = snapshots_next @ VVt.T @ np.diag(1. / DD) @ UU.T weights = weights.T residuals = target - regression_mat @ weights if self.log: print("shape of weights: ", weights.T.shape) # record learnt coefficients self.residuals = residuals self.weights = weights.T if len(self.weights.shape) == 0: # debug self.weights = self.weights[None, :] # reformat weights to [(n_output_channels, n_lags, n_input_channels) # for u, for y] self.weights_y = self.weights[:, :self.lag_y * self.n_channels_y].reshape( self.n_channels_y, self.lag_y, self.n_channels_y) self.weights_u = self.weights[:, self.lag_y * self.n_channels_y:].reshape( self.n_channels_y, self.lag_u, self.n_channels_u) # form recurrence matrix from weights if self.lag_y != 0: self.A = np.concatenate([ self.weights_y.reshape(self.n_channels_y, -1), np.eye(N=self.n_feats_x - self.n_channels_y, M=self.n_feats_x, k=self.n_channels_y) ], axis=0) else: self.A = np.zeros((self.n_feats_x, self.n_feats_x))
import wooldridge as woo import pandas as pd import numpy as np import statsmodels.api as sm import patsy as pt barium = woo.dataWoo('barium') T = len(barium) # monthly time series starting Feb. 1978: barium.index = pd.date_range(start='1978-02', periods=T, freq='M') # perform the Cochrane-Orcutt estimation (iterative procedure): y, X = pt.dmatrices( 'np.log(chnimp) ~ np.log(chempi) + np.log(gas) +' 'np.log(rtwex) + befile6 + affile6 + afdec6', data=barium, return_type='dataframe') reg = sm.GLSAR(y, X) CORC_results = reg.iterative_fit(maxiter=100) table = pd.DataFrame({ 'b_CORC': CORC_results.params, 'se_CORC': CORC_results.bse }) print(f'reg.rho: {reg.rho}\n') print(f'table: \n{table}\n')
import pandas as pd import numpy as np import statsmodels.api as sm import matplotlib.pyplot as plt data = pd.read_csv('ice.csv') x = data[['temp', 'street']] x = sm.add_constant(x) y = data['ice'] est = sm.GLSAR(y, x).fit() print(est.summary())
subsets.remove('hf_test_1') if len(rm_test_event) < 2: subsets.remove('rm_test_2') subsets.remove('rm_test_agg') for m in model_types: print('\n- ' + m + ' -') if m == 'MLR': # Multiple Linear Regression model = sm.OLS(train['log' + f], sm.add_constant(X_train), hasconst=True).fit() elif m == 'GLS': # Generalized Least Squares model = sm.GLSAR(train['log' + f], sm.add_constant(X_train), rho=2, missing='drop', hasconst=True).iterative_fit(maxiter=5) elif m == 'RF': # Random Forests print('\n- - Random Forest - -') model = RandomForestRegressor(n_estimators=1000, oob_score=True, max_features=0.75, random_state=0) model.fit(X_train, train['log' + f]) elif m == 'ANN': # Artificial Neural Network (MLP) print('\n- - Artificial Neural Network - -') nodes = 2 * len( final_vars
print 'Making df and fitting NMF...' obama_df = make_joined_df(census_data, CVAP, dem_turnout, election_data, obama_offices, featurizer, mod_type='dem', k=2) romney_df = make_joined_df(census_data, CVAP, rep_turnout, election_data, romney_offices, featurizer, mod_type='rep', k=2) X_obama, y_obama, feat_names_obama = make_X_y(obama_df, mod_type='dem') X_romney, y_romney, feat_names_romney = make_X_y(romney_df, mod_type='rep') print 'Fitting models and printing results...' glsar_model = sm.GLSAR(y_obama, X_obama, rho=1) glsar_results = glsar_model.iterative_fit(1) print glsar_results.summary() # Plot residuals against fit target # plot_box_resids(glsar_results, glsar_results.fittedvalues)
# np.corrcoef(lmod.resid.iloc[:-1],lmod.resid.iloc[1:]).round(3) # lmod.resid.autocorr() # X = lmod.model.wexog y = lmod.model.wendog gmod = sm.GLSAR(y, X, rho=1) res=gmod.iterative_fit(maxiter=6) gmod.rho.round(3) # res.summary().tables[1] # ## Weighted Least Squares # import faraway.datasets.fpe fpe = faraway.datasets.fpe.load() fpe.head()
order = toeplitz(range(len(ols_resid))) # so that our error covariance structure is actually rho**order # which defines an autocorrelation structure sigma = rho**order gls_model = sm.GLS(data.endog, data.exog, sigma=sigma) gls_results = gls_model.fit() # of course, the exact rho in this instance is not known so it # it might make more sense to use feasible gls, which currently only # has experimental support # We can use the GLSAR model with one lag, to get to a similar result glsar_model = sm.GLSAR(data.endog, data.exog, 1) glsar_results = glsar_model.iterative_fit(1) # comparing gls and glsar results, we see that there are some small # differences in the parameter estimates and the resulting standard # errors of the parameter estimate. This might be do to the numerical # differences in the algorithm, e.g. the treatment of initial conditions, # because of the small number of observations in the longley dataset. print gls_results.params print glsar_results.params print gls_results.bse print glsar_results.bse
def RegressionAnalysis(df, Independent, Explanatory, Indicators, prefix=None): """ This function performs regression models, comparaison between series Arguments: ---------- - df: Pandas DataFrame Contains the data to be analyzed - Independent: str The name of column in df for the Independent variable data - Explanatory: str or list The name of the column in df for the Explanatory variable data. In case of a multivariate analysis, needed to pass a list object of all column names. - Indicators: list The list of the indicators/models names to compute Return: ---------- - df: Pandas DataFrame - Contains the initial df and all series indicators are added like the Residuals or the Fitted Values - OneValueIndicators: Pandas DataFrame - Contains all the indicators calculated with only one value like the FTest or the TTest """ if Indicators == None: Indicators = [ "OLS", "GLSAR", "RecursiveLS", "Yule Walker Order 1", "Yule Walker Order 2", "Yule Walker Order 3", "Burg Order 1", "Burg Order 2", "Burg Order 3", "QuantReg", "GLM Binomial", "GLM Gamma", "GLM Gaussian", "GLM Inverse Gaussian", "GLM Negative Binomial", "GLM Poisson", "GLM Tweedie" "AR", "ARMA", "ARIMA", "Granger Causality", "Levinson Durbin", "Cointegration" ] # Pre-processing Independent = df[Independent] Independent = pd.DataFrame(Independent) Explanatory = df[Explanatory] Explanatory = pd.DataFrame(Explanatory) y_sm = np.array(Independent).reshape((-1, 1)) x_sm = np.array(Explanatory) x_sm = sm.add_constant(x_sm) NumDecimal = 3 # Number of decimals for rounding numbers OneValueIndicators = {} if prefix == None: prefix = "" ################################################## ##### PART 1: Linear Regression ################################################## """ ########## Section 1: OLS """ name = "OLS" if name in Indicators: name = prefix + name model = sm.OLS(y_sm, x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 2: WLS """ ### Not Implemented """ ########## Section 3: GLS """ ### Not Implemented """ ########## Section 4: GLSAR """ name = "GLSAR" if name in Indicators: name = prefix + name model = sm.GLSAR(y_sm, x_sm, 1) results = model.iterative_fit(1) ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 5: RLS """ name = "RecursiveLS" if name in Indicators: name = prefix + name model = sm.RecursiveLS(y_sm, x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " Z Value"] = results.zvalues ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) # Cumsum # Not Implemented """ ########## Section 6: Yule Walker ORder 1 """ name = "Yule Walker Order 1" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma = statsmodels.regression.linear_model.yule_walker( x_sm[:, 1].flatten(), order=1) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma OneValueIndicators[name + " Sigma"] = round(sigma, NumDecimal) """ ########## Section 7: Yule Walker ORder 2 """ name = "Yule Walker Order 2" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma = statsmodels.regression.linear_model.yule_walker( x_sm[:, 1].flatten(), order=2) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma"] = round(sigma, NumDecimal) """ ########## Section 8: Yule Walker ORder 3 """ name = "Yule Walker Order 3" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma = statsmodels.regression.linear_model.yule_walker( x_sm[:, 1].flatten(), order=3) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma OneValueIndicators[name + " Sigma"] = round(sigma, NumDecimal) """ ########## Section 9: Burg's AR(p) ORder 1 """ name = "Burg Order 1" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma2 = statsmodels.regression.linear_model.burg( x_sm[:, 1].flatten(), order=1) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma2"] = round(sigma2, NumDecimal) """ ########## Section 10: Burg's AR(p) ORder 2 """ name = "Burg Order 2" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma2 = statsmodels.regression.linear_model.burg( x_sm[:, 1].flatten(), order=2) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma2"] = round(sigma2, NumDecimal) """ ########## Section 11: Burg's AR(p) ORder 3 """ name = "Burg Order 3" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma2 = statsmodels.regression.linear_model.burg( x_sm[:, 1].flatten(), order=3) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma2"] = round(sigma2, NumDecimal) """ ########## Section 12: Quantile Regression """ name = "QuantReg" if name in Indicators: name = prefix + name model = sm.QuantReg(y_sm, x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 2: Generalized Linear Models ################################################## """ ########## Section 1: GLM Binomial """ name = "GLM Binomial" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Binomial()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 2: GLM Gamma """ name = "GLM Gamma" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Gamma()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 3: GLM Gaussian """ name = "GLM Gaussian" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Gaussian()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 3: GLM InverseGaussian """ name = "GLM Inverse Gaussian" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.InverseGaussian()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 4: GLM NegativeBinomial """ name = "GLM Negative Binomial" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.NegativeBinomial()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 5: GLM Poisson """ name = "GLM Poisson" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Poisson()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 6: GLM Tweedie """ name = "GLM Tweedie" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Tweedie()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 3: Robust Linear Models ################################################## ################################################## ##### PART 4: AR models ################################################## name = "AR" if name in Indicators: name = prefix + name model = statsmodels.tsa.ar_model.AR(Independent) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " Final Prediction Error"] = results.fpe OneValueIndicators[ name + " Hannan-Quinn Information Criterion"] = results.hqic OneValueIndicators[name + " Roots"] = results.roots ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 5: ARMA ################################################## name = "ARMA" if name in Indicators: name = prefix + name model = statsmodels.tsa.arima_model.ARMA(y_sm, (5, 5), x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " AR Params"] = results.arparams OneValueIndicators[name + " AR Roots"] = results.arroots OneValueIndicators[name + " AR Freq"] = results.arfreq OneValueIndicators[ name + " Hannan-Quinn Information Criterion"] = results.hqic OneValueIndicators[name + " MA Params"] = results.maparams try: OneValueIndicators[name + " MA Roots"] = results.maroots except: pass try: OneValueIndicators[name + " MA Freq"] = results.mafreq except: pass OneValueIndicators[name + " Sigma2"] = results.sigma2 ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 6: ARIMA ################################################## name = "ARIMA" if name in Indicators: name = prefix + name model = statsmodels.tsa.arima_model.ARIMA(Independent, (2, 2, 2), Explanatory) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " AR Params"] = results.arparams OneValueIndicators[name + " AR Roots"] = results.arroots OneValueIndicators[name + " AR Freq"] = results.arfreq OneValueIndicators[ name + " Hannan-Quinn Information Criterion"] = results.hqic OneValueIndicators[name + " MA Params"] = results.maparams OneValueIndicators[name + " MA Roots"] = results.maroots OneValueIndicators[name + " MA Freq"] = results.mafreq OneValueIndicators[name + " Sigma2"] = results.sigma2 ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 7: Univariate Analysis ################################################## # Granger Causality name = "Granger Causality" name = prefix + name if name in Indicators: OneValueIndicators[name] = ts.grangercausalitytests( Independent.merge(Explanatory, how="inner", left_index=True, right_index=True), maxlag=10) # Levinson Durbin name = "Levinson Durbin" name = prefix + name if name in Indicators: OneValueIndicators[name] = ts.levinson_durbin(Independent) # Cointegration name = "Cointegration" name = prefix + name if name in Indicators: OneValueIndicators[name] = ts.coint(Independent, Explanatory, trend="ct", return_results=False) ################################################## ##### Not Implemented ################################################## # BDS Statistic (residuals analysis) # Not Implemented # Return’s Ljung-Box Q Statistic (AR) # Not Implemented OneValueIndicators = pd.DataFrame.from_dict(OneValueIndicators, orient="index") return df, OneValueIndicators