def artificial_data(): N = 301 # x, y = n_hat(N, 6) x, y = sin_(N, 5) y = y + 0.01 * np.random.normal(0., .5, len(y)) z = y * y Y = np.matrix([y, z]).transpose().tolist() # ====================== title('single prediction') model = VAR(Y) model_fit = model.fit(maxlags=15, ic='aic') pred = model_fit.forecast(Y[-model_fit.k_ar:], N) xx = np.arange(N, N + len(pred)) assert (len(pred) == N) # print(model_fit.k_ar) # print(model_fit.params) plot(x, Y) plot(xx, pred, '--') show() # # ======================================= title('dynamic prediction') xx, pred = test_forecast(x, Y, len_for_prediction=100, n_pred=100, maxlags=15, ic='aic') plot(x, Y) plot(xx, pred, '--') show()
def var_fit(self, endog, maxlags=5, ic='aic', verbose=False, trend='c'): ''' Find best VAR with best order and various lags Parameters ---------- endog : array-like, (shape: (n_time_points, n_variables)) 2-d endogenous response variable. The independent variable. maxlags : int Maximum number of lags to check for order selection. ic : {'aic', 'fpe', 'hqic', 'bic', None}, optional, (default="aic") Information criterion to use for VAR order selection. aic : Akaike fpe : Final prediction error hqic : Hannan-Quinn bic : Bayesian a.k.a. Schwarz verbose : bool, default False Print order selection output to the screen trend : str {"c", "ct", "ctt", "nc"}, optional, (default="c") "c" - add constant "ct" - constant and trend "ctt" - constant, linear and quadratic trend "nc" - co constant, no trend Note that these are prepended to the columns of the dataset. Notes ----- Returns ------- self (updating self.var_result) ''' self.var_result = VAR(endog).fit(maxlags=maxlags, ic=ic, verbose=verbose, trend=trend)
def parametersAR(self, lag=1): # OLS(self.parametersHistorical()['b0'], self.parametersHistorical()['b0'][]) # self._arModel = (AR(self.parametersHistorical()['b0']).fit(lag), AR(self.parametersHistorical()['b1']).fit(lag), AR(self.parametersHistorical()['b2']).fit(lag)) self._varModel = VAR(self.parametersHistorical()[['b0', 'b1', 'b2']]).fit(lag) self._varModel.summary() return True
def var(data): start_time_ = time.time() # train,test = data[:int(0.7*(len(data)))],data[int(0.7*(len(data))):] data = data.interpolate(limit=30000000, limit_direction='both').astype('float32') #split_date = '2017-01-01' train, test = data[:split_date], data[split_date:] if DOpca: steps = [('scale', StandardScaler()), ('pca', PCA(n_components=n_pca))] else: steps = [('scale', StandardScaler())] pipe = Pipeline(steps=steps) pipe.fit(data) train, test = data[:int(0.7 * (len(data)))], data[int(0.7 * (len(data))):] sc_train, sc_test = pipe.transform(train), pipe.transform(test) model = VAR(endog=sc_train) model_fit = model.fit(9) trainPredict = model_fit.forecast(sc_train, steps=len(sc_train)) testPredict = model_fit.forecast(sc_test, steps=len(sc_test)) try: trainPredict = pipe.inverse_transform(trainPredict) testPredict = pipe.inverse_transform(testPredict) trainPredict = pd.Series(data=(trainPredict[:, 0]), index=train.index) testPredict = pd.Series(data=(testPredict[:, 0]), index=test.index) except: trainPredict, testPredict = -999, -999 trainY = pd.Series(data=(train.iloc[:, 0]), index=train.index) testY = pd.Series(data=(test.iloc[:, 0]), index=test.index) time_ = time.time() - start_time_ return trainPredict, testPredict, time_, trainY, testY
def fit_model(self): """ Use Vector Autoregression, pass Training Set & fit the model """ model = VAR(endog=self.train) self.model_fit = model.fit()
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast( scaler.transform(df.values[input_ind:input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) df_predict.to_csv("./df_predict.csv", sep=',', index=False) df_test.to_csv("./df_test.csv", sep=',', index=False) return df_predicts, df_test
def test_select_order(self): result = self.model.fit(10, ic='aic', verbose=True) result = self.model.fit(10, ic='fpe', verbose=True) # bug model = VAR(self.model.endog) model.select_order()
def control_lqr(env, agent, model_fit, data, lag=4): B = np.array([[0], [0], [-.01], [-.01]]) Q = np.diag((10., 1., 10., 1.)) print(model_fit.coefs) K = lqr(model_fit.coefs[0], B, Q, 1) print("K=") print(K) obs = env.reset() action = agent.begin_episode(obs) for i in range(500): env.render() time.sleep(0.15) # slows down process to make it more visible # recompute K every 10 steps data = np.vstack([data, obs]) if (i % 10 == 0): model_next = VAR(data) model_fit_next = model_next.fit(lag) K = lqr(model_fit_next.coefs[0], B, Q, 1) # print("K=") # print(K) action = get_control(K, obs) # Get the next action from the learner, given our new state. obs, reward, done, info = env.step(action) if done: print("Final episode: lasted {} timesteps, data: {}".format( i + 1, obs)) break
def generate_final_predictions(df_coords, lag_order=3, display=False): ''' Uses the best lag_order (from testing_harness) to train the full model and forecast mean coordinates for the years 2022 and 2023. Returns a DF ''' model = VAR(endog=df_coords) model = model.fit(lag_order) forecast = model.forecast(model.y, steps=2) df_forecast = pd.DataFrame(forecast, columns=['future_latitude', 'future_longitude']) df_forecast['year'] = [2022, 2023] df_forecast = df_forecast[['year', 'future_latitude', 'future_longitude']] if display: print() print('Final model information:') print() print(model.summary()) print() print('Future hotspot forecasts:') print() print(df_forecast) return df_forecast
def extract_model(self, input, save_status=False): total = self._model_clean() if total is False: return "Try to find available area by:\n sh casa.sh --find aptartment name\n" # input n = int(input) new_index = pd.date_range(start=total.index[-1], periods=n + 1, freq='MS')[1:] model = VAR(total) model_fit = model.fit() pred = model_fit.forecast(y=total.values, steps=n) pred = pd.DataFrame(pred, columns=['Q1', 'Q2', 'Q3'], index=new_index) final_df = pd.concat([total, pred], axis=0) final = final_df.loc[new_index] if save_status is True: self._save_image_model(eval_model=final_df, pred_model=pred, pred_num=input) return final
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): """ Multivariate time series forecasting using Vector Auto-Regressive Model. :param df: pandas.DataFrame, index: time, columns: sensor id, content: data. :param n_forwards: a tuple of horizons. :param n_lags: the order of the VAR model. :param test_ratio: :return: [list of prediction in different horizon], dt_test """ n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) return df_predicts, df_test
def _run_varLiNGAM(self, xt, verbose=False): """ Run the VarLiNGAM algorithm on data. Args: xt : time series matrix with size n*m (length*num_variables) Returns: Tuple: (Bo, Bhat) Instantaneous and lagged causal coefficients """ Ident = np.identity(xt.shape[1]) # Step 1: VAR estimation model = VAR(xt) results = model.fit(self.lag) Mt_ = results.params[1:, :] # Step 2: LiNGAM on Residuals resid_VAR = results.resid model = LiNGAM(verbose=verbose) data = pd.DataFrame(resid_VAR) Bo_ = model._run_LiNGAM(data) # Step 3: Get instantaneous matrix Bo from LiNGAM # Bo_ = pd.read_csv("results.csv").values # Step 4: Calculation of lagged Bhat Bhat_ = np.dot((Ident - Bo_), Mt_) return (Bo_, Bhat_)
def var(flow, target): warnings.filterwarnings("ignore") in_mask = np.greater(target[:, 0], mask_threshold) out_mask = np.greater(target[:, 1], mask_threshold) result = np.zeros((flow.shape[0], flow.shape[-1])) for i in range(flow.shape[0]): if verbose: if (i + 1) % 10000 == 0: print("VAR: line {} of {}".format(i + 1, flow.shape[0])) for j in range(flow.shape[-1]): data = list() for k in range(flow.shape[1] - 1): data.append([flow[i, k, j], flow[i, k + 1, j]]) model = VAR(data) try: model_fit = model.fit() result[i, j] = model_fit.forecast(model_fit.y, steps=1)[0][1] except: result[i, j] = 0.0 pass in_rmse = np.sqrt( np.mean(np.square(target[:, 0][in_mask] - result[:, 0][in_mask]))) out_rmse = np.sqrt( np.mean(np.square(target[:, 1][out_mask] - result[:, 1][out_mask]))) in_mae = np.mean(np.abs(target[:, 0][in_mask] - result[:, 0][in_mask])) out_mae = np.mean(np.abs(target[:, 1][out_mask] - result[:, 1][out_mask])) return in_rmse, out_rmse, in_mae, out_mae
def extract(self, instance): assert (isinstance(instance, Instance)) params = VAR(instance.eeg_data.T).fit(self.lags).params # hstack will collapse all entries into one big vector features = np.hstack(params.reshape((np.prod(params.shape), 1))) self.assert_features(features) # features = a 1d ndarray return features
def setup_class(cls): mdata = macrodata.load_pandas().data mdata = mdata[["realgdp", "realcons", "realinv"]] data = mdata.values data = np.diff(np.log(data), axis=0) * 400 cls.res0 = VAR(data).fit(maxlags=2) cls.resl1 = VAR(data).fit(maxlags=1) cls.data = data
def extract(self, instance): assert(isinstance(instance, Instance)) params = VAR(instance.eeg_data.T).fit(self.lags).params # hstack will collapse all entries into one big vector features = np.hstack(params.reshape( (np.prod(params.shape),1) )) self.assert_features(features) # features = a 1d ndarray return features
def var_predict(train_data, num_out): var_preds = [] for x in train_data: var = VAR(x) var_fit = var.fit(2) yhat = var_fit.forecast(var_fit.y, steps=num_out) var_preds.append(yhat[:, 0]) return np.array(var_preds)
def var_simulate(data, n_simulate, pca_n=200): # PCA reduction before VAR fit pca_dim_res = pca(data, pca_n) var = VAR(pca_dim_res['pc_scores']) var_res = var.fit(maxlags=1) data_sim = var_res.simulate_var(n_simulate) # Project simulated PCA time courses into original vertex space data_sim = data_sim @ pca_dim_res['Va'] return data_sim
def extractCoeff(timeseries_data, lag_order): ''' Takes in a 7680x16 array to fit a VAR model and obtain the coefficients @return: 5x16x16 VAR coefficients array ''' model = VAR(timeseries_data) model_fit = model.fit(lag_order, trend='nc') coefs = model_fit.coefs #the lag coeffs return coefs
def load_results_statsmodels(dataset): results_per_deterministic_terms = dict.fromkeys(dt_s_list) for dt_s_tup in dt_s_list: endog = data[dataset] exog = generate_exog_from_season(dt_s_tup[1], len(endog)) model = VAR(endog, exog) results_per_deterministic_terms[dt_s_tup] = model.fit( maxlags=4, trend=dt_s_tup[0], method="ols") return results_per_deterministic_terms
def train(self, array_X, array_Y): self.train_X = array_X self.train_Y = array_Y array = numpy.concatenate((numpy.matrix(array_Y).T, array_X), axis=1) model = VAR(endog=pd.DataFrame(data=array)) fit = model.fit() res = fit.fittedvalues.values[:, 0] res = numpy.hstack((res[0], res)) return res
def VARmethod(paramsList=['pollution.csv', '0.93','pm','date'], specialParams=['2','1','4','0','1', '1', '7']): path = paramsList[0] trainRows = float(paramsList[1]) saveto = 'result.csv' df = pd.read_csv(path, usecols=paramsList[2:]) allRows = df.shape[0] train = df[0:int(allRows*trainRows)] test = df[int(allRows*trainRows)+1:] df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M') df.index = df['Timestamp'] df = df.resample('D').mean() train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M') train.index = train['Timestamp'] train = train.resample('D').mean() test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M') test.index = test['Timestamp'] test = test.resample('D').mean() y_hat = test.copy() nullArray = train.copy() nullArray['time'] = train.index # 以上可通用---------------------------- for i in range(2,len(paramsList)-1): #https://blog.csdn.net/mooncrystal123/article/details/86736397 #https://blog.csdn.net/qq_41518277/article/details/85101141 var_data = train[paramsList[i]].diff(1).dropna() #model = VAR(endog=var_data, dates=pd.date_range(train.index[0], train.index[-1]),freq='M') model = VAR(endog=var_data) # 估计最优滞后项系数 #lag_order = model.select_order() # 输出结果 #print(lag_order.summary()) model_fit = model.fit(1) prediction = model_fit.forecast(model_fit.y, steps=len(test[paramsList[i]])) print(prediction) y_hat[paramsList[i]] = prediction rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]])) print(rms) # -------------------------------------- y_hat['time'] = test.index yhat_naive = np.array(y_hat) nArray = np.array(nullArray) newArray = np.concatenate((nArray,yhat_naive),axis=0) s = pd.DataFrame(newArray, columns=paramsList[2:]) for i in range(2,len(paramsList)-1): s[paramsList[i]][0:int(len(s)*trainRows)] = "" s.to_csv(saveto,index=False,header=True,float_format='%.2f')
def test_irf_err_bands(): # smoke tests data = get_macrodata() model = VAR(data) results = model.fit(maxlags=2) irf = results.irf() bands_sz1 = irf.err_band_sz1() bands_sz2 = irf.err_band_sz2() bands_sz3 = irf.err_band_sz3() bands_mc = irf.errband_mc()
def test_var_cov_params_pandas(bivariate_var_data): df = pd.DataFrame(bivariate_var_data, columns=['x', 'y']) mod = VAR(df) res = mod.fit(2) cov = res.cov_params() assert isinstance(cov, pd.DataFrame) exog_names = ('const', 'L1.x', 'L1.y', 'L2.x', 'L2.y') index = pd.MultiIndex.from_product((exog_names, ('x', 'y'))) assert_index_equal(cov.index, cov.columns) assert_index_equal(cov.index, index)
def extract(self, instance): # instance is an object of class Instance # Wittawat: Since VAR automatically does lags order selection, # other different instances may give a different lags values ? params = VAR(instance.eeg_data.T).fit(maxlags=2).params features = np.hstack(params.reshape( (np.prod(params.shape), 1) )) self.assert_features(features) # features = a 1d ndarray return features
def parametersVAR(self, tenors, yields, lag=1, steps=1, alpha=0.01): params = pd.DataFrame(data=self.calibrateCurveParametersHistorical( tenors, yields), columns=['tau', 'b0', 'b1', 'b2'], index=yields.index) self._varModel = VAR(params[['b0', 'b1', 'b2']]).fit(lag) self._varModel.summary() fparam = self._varModel.forecast_interval( params.tail(1)[['b0', 'b1', 'b2']].values, steps, alpha=alpha) return fparam, params.tail(1)[['b0', 'b1', 'b2']].values
def extract(self, instance): # instance is an object of class Instance # Wittawat: Since VAR automatically does lags order selection, # other different instances may give a different lags values ? params = VAR(instance.eeg_data.T).fit(maxlags=2).params features = np.hstack(params.reshape((np.prod(params.shape), 1))) self.assert_features(features) # features = a 1d ndarray return features
def baseline_VAR(flow_df, road_adj, hops=5, history_window=4, prediction_window=1, test_ratio=0.25): n_timestamp, n_road = flow_df.shape n_timestamp_train = int(round(n_timestamp * (1 - test_ratio))) n_timestamp_test = n_timestamp - n_timestamp_train # find neighbors for each node symm_adj = road_adj + road_adj.transpose() neighbor_adj = symm_adj for hop in range(hops - 1): neighbor_adj = np.matmul(neighbor_adj, symm_adj) + symm_adj np.fill_diagonal(neighbor_adj, 0) # exclude self train_data = np.array( flow_df.iloc[:n_timestamp_train]) # (n_timestamp_train, n_road) test_data = np.array( flow_df.iloc[n_timestamp_train:]) # (n_timestamp_test, n_road) Y_true = test_data[history_window + (prediction_window - 1):n_timestamp_test] # (n_sample, n_road) Y_pred = np.zeros(Y_true.shape) # (n_sample, n_road) for road_index in range(n_road): filtered_roads = [road_index] + list( np.where(neighbor_adj[road_index] > 0)[0]) filtered_train_data = np.array(train_data[:, filtered_roads]) filtered_test_data = np.array(test_data[:, filtered_roads]) model = VAR(filtered_train_data) model_fitted = model.fit(history_window) X_test = np.concatenate([ np.expand_dims( filtered_test_data[i:(n_timestamp_test - history_window - prediction_window + 1 + i)], axis=2) for i in range(history_window) ], axis=2) # (n_sample, n_road, history_window) for i in range(Y_pred.shape[0]): # n_sample Y_pred[i, road_index] = model_fitted.forecast( X_test[i].transpose(), steps=prediction_window)[-1, :][0] # max_value = Y_true.max() # print((Y_pred > max_value).sum()) # no super large values # print((Y_pred < 0).sum()) # negative values account for 0.2% Y_pred[Y_pred < 0] = 0 # correct negative values return Y_pred, Y_true
def test_irf_trend(): # test for irf with different trend see #1636 # this is a rough comparison by adding trend or subtracting mean to data # to get similar AR coefficients and IRF data = get_macrodata().view((float, 3), type=np.ndarray) model = VAR(data) results = model.fit(4) # , trend = 'c') irf = results.irf(10) data_nc = data - data.mean(0) model_nc = VAR(data_nc) results_nc = model_nc.fit(4, trend="n") irf_nc = results_nc.irf(10) assert_allclose(irf_nc.stderr()[1:4], irf.stderr()[1:4], rtol=0.01) trend = 1e-3 * np.arange(len(data)) / (len(data) - 1) # for pandas version, currently not used, if data is a pd.DataFrame # data_t = pd.DataFrame(data.values + trend[:,None], index=data.index, columns=data.columns) data_t = data + trend[:, None] model_t = VAR(data_t) results_t = model_t.fit(4, trend="ct") irf_t = results_t.irf(10) assert_allclose(irf_t.stderr()[1:4], irf.stderr()[1:4], rtol=0.03)
def model_var(train_data,test_data,train_data1,test_data1): x = train_data1.reshape((372,1)) x1 = train_data.reshape((372,1)) lis = np.concatenate((x,x1), axis = 1) print(np.shape(lis)) #forecast model = VAR(endog = lis) model_fit = model.fit() print(model_fit.summary()) predictions = model_fit.forecast(model_fit.y, steps=10) print('VAR RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
def sensitivity(df, col_name, ratio, percentage=0.9): df_sen[col_name].iloc[-2] = df_sen[col_name].iloc[-2] * ratio train = df_sen[:-1] model_sen = VAR(endog=train) model_sen_fit = model_sen.fit() # Make prediction on validation yhat_sen_cal = model_sen_fit.forecast(model_sen_fit.y, steps=2) return yhat_sen_cal[:, 3][-1]
def _VAR(train, test=None): model = VAR(train) model_fit = model.fit() #maxlags=299, ic='aic') print('Lag: %s' % model_fit.k_ar) if test != None: predictions = model_fit.forecast(train[-10:, :], len(test)) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) else: predictions = model_fit.forecast(train[-10:, :], len(train)) return predictions
def vars_test(): dt = get_dataframe() name_list = ["date", "tBalance_all", "total_purchase", "total_redeem", "total_diff"] # print(dt["total_purchase"]) time = dt["date"] mdata = dt[["tBalance_all", "total_purchase", "total_redeem"]] mdata.index = pandas.DatetimeIndex(time) data = np.log(mdata).diff().dropna() model = VAR(data) results = model.fit(2) results.summary() results.plot()
class TestVARResultsLutkepohl(object): """ Verify calculations using results from Lutkepohl's book """ def __init__(self): self.p = 2 if not have_pandas(): return sdata, dates = get_lutkepohl_data("e1") names = sdata.dtype.names data = data_util.struct_to_ndarray(sdata) adj_data = np.diff(np.log(data), axis=0) # est = VAR(adj_data, p=2, dates=dates[1:], names=names) self.model = VAR(adj_data[:-16], dates=dates[1:-16], names=names, freq="Q") self.res = self.model.fit(maxlags=self.p) self.irf = self.res.irf(10) self.lut = E1_Results() def test_approx_mse(self): if not have_pandas(): raise nose.SkipTest # 3.5.18, p. 99 mse2 = np.array([[25.12, 0.580, 1.300], [0.580, 1.581, 0.586], [1.300, 0.586, 1.009]]) * 1e-4 assert_almost_equal(mse2, self.res.forecast_cov(3)[1], DECIMAL_3) def test_irf_stderr(self): if not have_pandas(): raise nose.SkipTest irf_stderr = self.irf.stderr(orth=False) for i in range(1, 1 + len(self.lut.irf_stderr)): assert_almost_equal(np.round(irf_stderr[i], 3), self.lut.irf_stderr[i - 1]) def test_cum_irf_stderr(self): if not have_pandas(): raise nose.SkipTest stderr = self.irf.cum_effect_stderr(orth=False) for i in range(1, 1 + len(self.lut.cum_irf_stderr)): assert_almost_equal(np.round(stderr[i], 3), self.lut.cum_irf_stderr[i - 1]) def test_lr_effect_stderr(self): if not have_pandas(): raise nose.SkipTest stderr = self.irf.lr_effect_stderr(orth=False) orth_stderr = self.irf.lr_effect_stderr(orth=True) assert_almost_equal(np.round(stderr, 3), self.lut.lr_stderr)
def test_var_constant(): # see 2043 import datetime from pandas import DataFrame, DatetimeIndex series = np.array([[2., 2.], [1, 2.], [1, 2.], [1, 2.], [1., 2.]]) data = DataFrame(series) d = datetime.datetime.now() delta = datetime.timedelta(days=1) index = [] for i in range(data.shape[0]): index.append(d) d += delta data.index = DatetimeIndex(index) model = VAR(data) with pytest.raises(ValueError): model.fit(1)
def test_lag_order_selection(): if debug_mode: if "lag order" not in to_test: return else: print("\n\nLAG ORDER SELECTION", end="") for ds in datasets: for dt in dt_s_list: if debug_mode: print("\n" + dt_s_tup_to_string(dt) + ": ", end="") endog_tot = data[ds] exog = generate_exog_from_season(dt[1], len(endog_tot)) model = VAR(endog_tot, exog) obtained_all = model.select_order(10, trend=dt[0]) for ic in ["aic", "fpe", "hqic", "bic"]: err_msg = build_err_msg(ds, dt, "LAG ORDER SELECTION - " + ic.upper()) obtained = getattr(obtained_all, ic) desired = results_ref[ds][dt]["lagorder"][ic] assert_allclose(obtained, desired, rtol, atol, False, err_msg)
def __init__(self): self.p = 2 sdata, dates = get_lutkepohl_data('e1') data = data_util.struct_to_ndarray(sdata) adj_data = np.diff(np.log(data), axis=0) # est = VAR(adj_data, p=2, dates=dates[1:], names=names) self.model = VAR(adj_data[:-16], dates=dates[1:-16], freq='Q') self.res = self.model.fit(maxlags=self.p) self.irf = self.res.irf(10) self.lut = E1_Results()
def test_var_constant(): # see 2043 import datetime from pandas import DataFrame, DatetimeIndex series = np.array([[2., 2.], [1, 2.], [1, 2.], [1, 2.], [1., 2.]]) data = DataFrame(series) d = datetime.datetime.now() delta = datetime.timedelta(days=1) index = [] for i in range(data.shape[0]): index.append(d) d += delta data.index = DatetimeIndex(index) #with pytest.warns(ValueWarning): #does not silence warning in test output with warnings.catch_warnings(): warnings.simplefilter("ignore", category=ValueWarning) model = VAR(data) with pytest.raises(ValueError): model.fit(1)
def test2(): mdata = statsmodels.datasets.macrodata.load_pandas().data dates = mdata[["year", "quarter"]].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] mdata = mdata[["realgdp", "realcons", "realinv"]] mdata.index = pandas.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() model = VAR(data) results = model.fit(2) results.summary() results = model.fit(maxlags=50, ic="aic") # print(results.summary()) lag_order = results.k_ar print results.forecast(data.values[-lag_order:], 30) # print(results) # print model.select_order(15) # results.plot() # results.plot_acorr() pass
def __init__(self): self.p = 2 if not have_pandas(): return sdata, dates = get_lutkepohl_data("e1") names = sdata.dtype.names data = data_util.struct_to_ndarray(sdata) adj_data = np.diff(np.log(data), axis=0) # est = VAR(adj_data, p=2, dates=dates[1:], names=names) self.model = VAR(adj_data[:-16], dates=dates[1:-16], names=names, freq="Q") self.res = self.model.fit(maxlags=self.p) self.irf = self.res.irf(10) self.lut = E1_Results()
sr_bm = np.sqrt(252)*sharpe(rets_bm) print mn_bm, sd_bm, sr_bm #calc beta's alpha's #do forecast of returns, correlation. Use to Weight rets.iloc[:,0:10].plot() ###DETOUR TO VAR FORECASTING from statsmodels.tsa.vector_ar.var_model import VAR, VARResults, VARProcess import statsmodels statsmodels.version.version #Check for NA's in data - have to reduce number of series used as full 30 #gave singular matrix v1 = VAR(rets_train[series_red], freq='D') v1.select_order(maxlags=30) results = v1.fit(5) #From fitted # results.summary() results.plot() # results.plot_acorr() # plt.show() #Make forecast for 3months test_index = rets_test.index fc_range = pd.date_range(start=test_index[0], periods=2, freq='3M') fc_periods = len(rets_test[fc_range[0]:fc_range[1]]) lag_order = results.k_ar fc = results.forecast(rets_train[series_red].values,fc_periods) fc.shape fc[:,-1]
def test_constructor(self): # make sure this works with no names ndarr = self.data.view((float, 3)) model = VAR(ndarr) res = model.fit(self.p)
import pandas as pd import numpy as np import statsmodels.api as sm import pylab from statsmodels.tsa.base.datetools import dates_from_str from statsmodels.tsa.vector_ar.var_model import VAR mdata = sm.datasets.macrodata.load_pandas().data dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp','realcons','realinv']] mdata.index = pd.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # log difference # make a VAR model model = VAR(data) results = model.fit(2) print results.summary() results.plot() results.plot_acorr() #autocorrelation model.select_order(15) results = model.fit(maxlags=15, ic='aic') irf = results.irf(10) irf.plot(orth=True) #Orthogonalization pylab.show()