def data2AB(data, x0=None): n = data.shape[0] T = data.shape[1] YY = np.dot(data[:, 1:], data[:, 1:].T) XX = np.dot(data[:, :-1], data[:, :-1].T) YX = np.dot(data[:, 1:], data[:, :-1].T) model = VAR(data.T) r = model.fit(1) A = r.coefs[0,:,:] # A = np.ones((n,n)) B = np.ones((n, n)) np.fill_diagonal(B, 0) B[np.triu_indices(n)] = 0 K = np.int(scipy.sum(abs(B)))#abs(A)+abs(B))) a_idx = np.where(A != 0) b_idx = np.where(B != 0) np.fill_diagonal(B, 1) try: s = x0.shape x = x0 except AttributeError: x = np.r_[A.flatten(), 0.1*scipy.randn(K)] o = optimize.fmin_bfgs(nllf2, x, args=(np.double(A), np.double(B), YY, XX, YX, T, a_idx, b_idx), gtol=1e-12, maxiter=500, disp=False, full_output=True) A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx) B = B+B.T return A, B
def VARprocess(df,log=False): # Log transformation, relative difference and drop NULL values if (log): df = np.log(df+0.1).diff().dropna() # Vector Autoregression Process generation maxAttr = len(df.columns) # Find the right lag order orderFound = False while orderFound!=True: try: model = VAR(df.ix[:,0:maxAttr]) order = model.select_order() orderFound = True except: exc_type, exc_obj, exc_tb = sys.exc_info() if str(exc_obj)=="data already contains a constant.": maxAttr = maxAttr - 1 else: maxAttr = int(str(exc_obj).split("-th")[0])-1 print "Exception, reducing to n_attributes ",maxAttr orderFound = False n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1] method = max(order.iteritems(), key=operator.itemgetter(1))[0] print "n_lags ",n_lags print "method ",method results = model.fit(maxlags=n_lags, ic=method) return results
def data2VARgraph_model(data, pval=0.05): model = VAR(data.T) r = model.fit(1) A = r.coefs[0,:,:] n = A.shape[0] g = {str(i):{} for i in range(1,n+1)} for i in range(n): for j in range(n): if np.abs(A[j,i]) > pval: g[str(i+1)][str(j+1)] = set([(0,1)]) return g, r
def get_fedea_on_gdp(): qbuilder = inquisitor.Inquisitor(token) df = qbuilder.series(ticker = ['ESE.940000D259D.Q.ES','FEEA.PURE064A.M.ES']) df.dropna(inplace = True) df['fedea'] = df['FEEA.PURE064A.M.ES'].diff() df['gdp'] = df['ESE.940000D259D.Q.ES'] / 100 data1 = df[['fedea','gdp']] data1.dropna(inplace = True) model1 = VAR(data1) results1 = model1.fit(4) irf1 = results1.irf(8) fedea_on_gdp = irf1.orth_lr_effects[1,0] / data1['fedea'].std() return fedea_on_gdp
def get_irf(nd, subset): ''' http://statsmodels.sourceforge.net/0.6.0/vector_ar.html ''' data = nd.reindex(columns=subset) data = data.dropna() data.describe() model = VAR(data) results = model.fit(6) irf = results.irf(12) cum_effects = irf.orth_cum_effects return cum_effects[12,2,0]
def determineOrderOfP(): X_train = readVectorAutoRegressiveMethodXTrain() for i in [1, 2, 3, 4, 5, 6, 7]: vectorAutoRegressiveMethodModel = VAR(X_train) vectorAutoRegressiveMethodModelResult = vectorAutoRegressiveMethodModel.fit( i) print('Order =', i) print('AIC: ', vectorAutoRegressiveMethodModelResult.aic) print('BIC: ', vectorAutoRegressiveMethodModelResult.bic) print()
def vector_auto_reg(self, y, dates, p, clean_data="greedy"): s = self.map_column_to_sheet(y[0]) v = np.copy(y) v = np.append(v, dates) # prepare data dfClean = s.cleanData(v, clean_data) time_series = dfClean[y] dates = dfClean[dates] time_series = time_series.set_index(dates) # run pth-order VAR model = VAR(time_series) results = model.fit(p) return results
def VAR_Model(modeldata): model = VAR(modeldata) res = {} AIC = [] for i in range(100): result = model.fit(i) aic = result.aic AIC.append(aic) if (aic <= pr.AICvalue_limit) and (aic >= -pr.AICvalue_limit): break lag_order = i - 1 varmodel = model.fit(lag_order) residuals = DataFrame(varmodel.resid) rmean = abs(residuals.mean()) #print("Residual Error = {}".format(rmean[0])) res.update({'Residual Mean': rmean, 'Lag Order': lag_order}) return varmodel, res
def var_forecast(train_df, test_df, params): _order = params['order'] _input = list(params['input']) _output = params['output'] _step = params.get('step', 1) model = VAR(train_df[_input].values) results = model.fit(_order) lag_order = results.k_ar params['order'] = lag_order forecast = [] for i in np.arange(0, len(test_df) - lag_order - _step + 1): fcst = results.forecast(test_df[_input].values[i:i + lag_order], _step) forecast.append(fcst[-1]) forecast_df = pd.DataFrame(columns=test_df[_input].columns, data=forecast) return forecast_df[_output].values
def var_model(data, look_ahead=5): '''Fits a vector autoregression model to the data and forecasts closing prices a number of days ahead equal to the look_ahead value.''' data = data.set_index('date') data = data.drop('index', axis=1) model = VAR(data) results = model.fit(maxlags=15) forecast = pd.DataFrame( results.forecast(data.values[0:], look_ahead), columns=['close', 'high', 'low', 'open', 'volume', 'sentiment']) #future = pd.date_range(start='1-1-2019', periods=5) future = pd.date_range(start=data.iloc[-1].name + dt.timedelta(days=1), periods=look_ahead) forecast = forecast.set_index(future) # dill.dump(forecast, open()) data_w_forecast = data.append(forecast) return data_w_forecast
def causality_test(var1, var2): data1 = pd.Series(var1, name='Var1') data2 = pd.Series(var2, name='Var2') mdata = pd.concat([data1, data2], axis=1) mdata.index = pd.date_range('1950-01-01', periods=600, freq='M') model = VAR(mdata) results = model.fit(7) foo = crit = results.test_causality('Var2', ['Var1'], kind='f') crit = foo['crit_value'] stat = foo['statistic'] if (stat > crit): cause = 1 else: cause = 0 return cause
def trainVectorAutoRegressiveMethodModelOnFullDataset(): vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset( "M2SLMoneyStock.csv", "PCEPersonalSpending.csv") #training model on the whole dataset vectorAutoRegressiveMethodModel = VAR(vectorAutoRegressiveMethodDataset) #we are taking p = 5 as we have created different models based on the different p values. #Model gives minimum aic and bic for p =5 vectorAutoRegressiveMethodModelResult = vectorAutoRegressiveMethodModel.fit( 5) #saving the model in pickle files saveVectorAutoRegressiveMethodModelForFullDataset( vectorAutoRegressiveMethodModelResult) print(vectorAutoRegressiveMethodModelResult.summary())
def VAR_forecast(df): """Initiates Forecasting using Forecast on the passed dataset Parameters ---------- df The dateset containing historical-observations on day-ahead prices Returns ------- forecasted A list of forecasted electricity prices for the next 24 hours """ column_name = "Day-ahead Price [EUR/MWh]" # Open CSV File and set timestamp column as index df.rename(columns={df.columns[0]: "cet_timestamp"}, inplace=True) df["cet_timestamp"] = pd.to_datetime(df["cet_timestamp"], format="%Y-%m-%d %H:%M") df.set_index("cet_timestamp", inplace=True) # Impute and get only one column df_diff = df.diff().dropna() # Generate new dates dates = list() last_date = df.index[-1:][0] for i in range(1, 25): last_date += timedelta(hours=1) dates.append(last_date.strftime("%Y-%m-%d %H:%M:%S")) var_model = VAR(df_diff).fit(26) var_forecast = var_model.forecast(y = var_model.y, steps=24) var_forecast_df = pd.DataFrame(var_forecast, columns=df.columns, index= dates) var_forecast_df = invert_transformation(df, var_forecast_df) ''' # For mean absolute error last_24hours = last_date - timedelta(hours=24) # History - 24hours history = df_diff[df_diff.index <= last_24hours] var_mae_model = VAR(history).fit(26) var_mae_forecast = var_mae_model.forecast(y = var_mae_model.y, steps=24) mae = mean_absolute_error(history['Day-ahead Price [EUR/MWh]'].values, np.array(var_mae_forecast)) print(mae) ''' return list(var_forecast_df[column_name].values), datetime.now().strftime("%Y-%m-%d")
def fit(self, p, transformation="dct"): if p < 1: raise ValueError(f"{p} is an invalid lag") self.p = p self.transformation = transformation l_train_tensor = self.__apply_trans( self.train, transformation, 2) # Applies the transformation across the rows train_model_sets = self.__split_cols_into_model_sets(l_train_tensor) # Fits all of the var models fits = [] for i in range(self.matrix_shape[1]): train_df = pd.DataFrame(train_model_sets[i]) model = VAR(train_df) fit = model.fit(p) fits.append(fit) self.var_fits = fits # Groups all of the coef matrix to coef tensors coefs = np.empty((p, self.matrix_shape[1], self.matrix_shape[0], self.matrix_shape[0])) c = np.empty(self.matrix_shape) for i in range(self.matrix_shape[1]): curr_coefs = fits[i].coefs for j in range(p): coefs[j][i] = curr_coefs[j] # Adds onto c c[:, i] = fits[i].params[fits[i].params.index == "const"].iloc[0] # Performs an inverse tranform to all of them for i in range(p): coefs[i] = self.__apply_inverse_trans(coefs[i], transformation, 0) # Performs the inverse transformation to the const matrix c = self.__apply_inverse_trans(c, transformation, 1) self.coefs = coefs self.c = c
def test_gc2(data, gc_format, maxlag=None, signif=0.05, verbose=False): from statsmodels.tsa.api import VAR model = VAR(data) if maxlag: res = model.fit(maxlag, verbose=verbose) else: res = model.fit(verbose=verbose) gc_res = res.test_causality(gc_format[0], gc_format[1], signif=signif, verbose=verbose) # results = pd.Seires({k: v for k, v in gc_res.iteritems() if k in ['conclusion','pvalue']}) results = pd.Series(gc_res) results['H0'] = "'{}' do not Granger-cause '{}'".format( gc_format[1], gc_format[0]) results['VAR'] = res results['best_order'] = (len(model.exog_names) - 1) / data.shape[1] return results
def calc_granger_caulity(data): """Summary Args: data (array): array without nan values Returns: TYPE: Description """ from statsmodels.tsa.api import VAR # data_dropna = data[~np.isnan(data).any(1)] try: model = VAR(data) res = model.fit(verbose=False) out = res.test_causality(0, 1, verbose=False)['statistic'] except ValueError as e: # print 'calc_granger_caulity: ', 'most factors are zeros.' out = np.nan return out
def granger_causality(self, data): columns = [] for i in range(data.shape[0]): for j in range(data.shape[2]): columns.append(str(i) + str(j)) # print(columns) topic_oriented_data = data[0] for i in range(1, len(data)): topic_oriented_data = np.concatenate((topic_oriented_data, data[i]), 1) topic_oriented_data = pandas.DataFrame(topic_oriented_data, columns=columns) print(topic_oriented_data) # print(type(topic_oriented_data)) var_model = VAR(topic_oriented_data) results = var_model.fit(2) gc_result = results.test_causality(columns, columns, kind='f') print(gc_result.summary())
def granger(cause, effect, lag): data = pd.DataFrame({'cause': cause, 'effect': effect}) return_vaule = 1 model = VAR(data) try: if lag == -1: results = model.fit(maxlags=15, trend='nc', ic='aic') else: results = model.fit(lag) except Exception: # can not find a lag in interval [1, maxlags], that means they have no causality return 1 try: x = results.test_causality('effect', 'cause', kind='wald').summary().data except Exception: return 0 return_vaule = x[1][2] return return_vaule
def varmodel(self): self.mvdfg.index = pd.to_datetime(self.mvdfg.index) self.var_predicted = pd.DataFrame() self.var_forecast = pd.DataFrame() self.var_data_train = pd.DataFrame() self.var_data_test = pd.DataFrame() maxlag = 3 if splitdf.upper() == 'Y': #Validation Model self.var_data_train = self.mvdfg[(pd.to_datetime(self.mvdfg.index)) <= testdate] self.var_data_test = self.mvdfg[(pd.to_datetime(self.mvdfg.index)) > testdate] var_model = VAR(self.var_data_train) results = var_model.fit(maxlags = maxlag, ic = 'aic') print(results.summary()) lag_order = results.k_ar var_steps = len(self.var_data_test) pred_values = results.forecast(self.var_data_train.values[-lag_order:], var_steps) self.predicted = pd.DataFrame(pred_values, index = self.mvdfg.index[-var_steps:], columns = self.mvdfg.columns) self.var_predicted = self.predicted #Forecast startdate = self.mvdfg.index.max()+ pd.offsets.DateOffset(months = 1) maxdate = self.mvdfg.index.max() + pd.offsets.DateOffset(months = forecaststeps + 1) var_fc_index = np.asarray((pd.date_range(startdate, maxdate, freq = 'm').strftime('%Y-%m-01'))) var_fc_index = pd.to_datetime(var_fc_index) var_forecast_model = VAR(self.mvdfg) fc_results = var_forecast_model.fit(maxlags = maxlag, ic = 'aic') print(fc_results.summary()) fc_lag_order = fc_results.k_ar fc_values = fc_results.forecast(self.mvdfg.values[-fc_lag_order:], forecaststeps) self.forecast = pd.DataFrame(fc_values, index = var_fc_index, columns = self.mvdfg.columns) self.var_forecast = self.forecast print(self.var_forecast) return self.var_predicted, self.var_forecast
def gaussian_var_copula_entropy_rate(sample, p=None, robust=False, p_ic='hqic'): """ Estimates the entropy rate of the copula-uniform dual representation of a stationary Gaussian VAR(p) (or AR(p)) process from a sample path. We recall that the copula-uniform representation of a :math:`\\mathbb{R}^d`-valued process :math:`\\{x_t\\} := \\{(x_{1t}, \\dots, x_{dt}) \\}` is, by definition, the process :math:`\\{ u_t \\} := \\{ \\left( F_{1t}\\left(x_{1t}\\right), \\dots, F_{dt}\\left(x_{dt}\\right) \\right) \\}` where :math:`F_{it}` is the cummulative density function of :math:`x_{it}`. It can be shown that .. math:: h\\left( \\{ x_t \\}\\right) = h\\left( \\{ u_t \\}\\right) + \\sum_{i=1}^d h\\left( x_{i*}\\right) where :math:`h\\left(x_{i*}\\right)` is the entropy of the i-th coordinate process at any time. Parameters ---------- sample: (T, d) np.array Array of T sample observations of a :math:`d`-dimensional process. p : int or None Number of lags to compute for the autocovariance function. If :code:`p=None` (the default), it is inferred by fitting a VAR model on the sample, using as information criterion :code:`p_ic`. robust: bool If True, the Pearson autocovariance function is estimated by first estimating a Spearman rank correlation, and then inferring the equivalent Pearson autocovariance function, under the Gaussian assumption. p_ic : str The criterion used to learn the optimal value of :code:`p` (by fitting a VAR(p) model) when :code:`p=None`. Should be one of 'hqic' (Hannan-Quinn Information Criterion), 'aic' (Akaike Information Criterion), 'bic' (Bayes Information Criterion) and 't-stat' (based on last lag). Same as the 'ic' parameter of :code:`statsmodels.tsa.api.VAR`. Returns ------- h : float The entropy rate of the copula-uniform dual representation of the input process. p : int Order of the VAR(p). """ _sample = sample[~np.isnan(sample).any(axis=1)] if len(sample.shape) > 1 else sample[~np.isnan(sample)] if p == None: # Fit an AR and use the fitted p. max_lag = int(round(12*(_sample.shape[0]/100.)**(1/4.))) if len(_sample.shape) == 1 or _sample.shape[1] == 1: m = AR(_sample) p = m.fit(ic=p_ic).k_ar else: m = VAR(_sample) p = m.fit(ic=p_ic).k_ar x = _sample if len(_sample.shape) > 1 else _sample[:, None] res = -np.sum(0.5*np.log(2.*np.pi*np.e*np.var(x, axis=0))) res += gaussian_var_entropy_rate(x, p, robust=robust) return res, p
def fit_forecast(dataset): cols = dataset.columns # creating the train and validation set train = dataset[:int(0.8 * (len(dataset)))] valid = dataset[int(0.8 * (len(dataset))):] train_differenced, round_no = remove_stationary(train) model = VAR(train_differenced) model_fit = model.fit() # make prediction on validation prediction = model_fit.forecast(model_fit.endog, steps=len(valid)) # converting predictions to dataframe forecast = pd.DataFrame(prediction, index=dataset.index[-len(valid):], columns=cols) if round_no != 0: forecast = invert_transformation(train, forecast, (round_no == 2)) # check rmse rmses = {} for i in cols: rmses[i + '_RMSE'] = sqrt(mean_squared_error(forecast[i], valid[i])) return forecast, valid, rmses
def var_param_min_search(data, limit_p): # Parameters of each minimum param_aic = [] param_bic = [] # Minimums of each criterion mins_aic = [] mins_bic = [] # Current minimums of each criterion current_min_aic = None current_min_bic = None for i in range(limit_p): model = VAR(data) model_fit = model.fit(i) current_aic = model_fit.aic current_bic = model_fit.bic # Check for new AIC minimum if current_min_aic is None or current_min_aic > current_aic: current_min_aic = current_aic param_aic.append(str(i)) mins_aic.append(current_aic) # Check for new BIC minimum if current_min_bic is None or current_min_bic > current_bic: current_min_bic = current_bic param_bic.append(str(i)) mins_bic.append(current_bic) res = { 'aic': { 'parameters': param_aic, 'mins': mins_aic }, 'bic': { 'parameters': param_bic, 'mins': mins_bic } } return res
def fit(data, maxlag): #with open('varModel.json') as f: # data = json.load(f) mdata = prepareData(data) equation = dict() equation["aic"] = [] equation["BIC"] = [] equation["hqic"] = [] equation["min"] = [] model = VAR(mdata) for x in range(0, maxlag): fitedModel = model.fit(x + 1) equation["aic"].append(fitedModel.aic) equation["BIC"].append(fitedModel.bic) equation["hqic"].append(fitedModel.hqic) minLag = model.fit(maxlags=maxlag, ic='bic') equation["min"].append(minLag.aic) equation["min"].append(minLag.bic) equation["min"].append(minLag.hqic) return equation
def evaluate_svar_model(X, p, s, feat_nm, agg_level): feat_list = X.columns.values.tolist() #add seasonal variables decomposition = seasonal_decompose(X[feat_nm], model='additive', freq=s) X['seasonal'] = decomposition.seasonal trend = decomposition.trend trend = trend.fillna( method='ffill') # fill missing values with previous values trend = trend.fillna( method='bfill') # fill first missing value with the one before it X['trend'] = trend # prepare training dataset train_size = len(X) train = X model = VAR(train) model_fit = model.fit(p) test_size = test_size_level[agg_level] yhat = model_fit.forecast(train.values[:p], train_size + max(test_size) - p) index = feat_list.index(feat_nm) # index of feature we want to analyse yhat_feat = [item[index] for item in yhat] # model output relevant to that features predictions = yhat_feat pred0 = predictions[train_size - p + test_size[0] - 1] pred1 = predictions[train_size - p + test_size[1] - 1] pred2 = predictions[train_size - p + test_size[2] - 1] avg0 = sum(predictions[train_size - p:train_size - p + test_size[0]]) / test_size[0] avg1 = sum(predictions[train_size - p:train_size - p + test_size[1]]) / test_size[1] avg2 = sum(predictions[train_size - p:train_size - p + test_size[2]]) / test_size[2] return [pred0, pred1, pred2, avg0, avg1, avg2]
def lag_selection(ytw, corp, tb): import cs_data_analysis as da from statsmodels.tsa.api import VAR import numpy as np ''' CS-Aaa-3MO CS-Aa-3MO CS-A-3MO CS-Baa-3MO CS-Aaa-1YR CS-Aa-1YR CS-A-1YR CS-Baa-1YR CS-Aaa-5YR CS-Aa-5YR CS-A-5YR CS-Baa-5YR TB-3MO-TY TB-1YR-TY TB-5YR-TY ''' debug(ytw.shape) endog = ytw[[corp, tb]] lag_count = 10 ic_aic = np.zeros((lag_count, 1)) # AIC, BIC, HQIC ic_bic = np.zeros((lag_count, 1)) for i in range(lag_count): debug(f"{'-'*4} period: {lag_count} {'-'*4}") # https://www.statsmodels.org/stable/generated/statsmodels.tsa.vector_ar.var_model.VAR.fit.html model = VAR(endog=endog) model_fit = model.fit(maxlags=i + 1, trend='ct', verbose=True) debug(f"aic: {model_fit.aic:.6f}") debug(f"bic: {model_fit.bic:.6f}") debug(f"hqic: {model_fit.hqic:.6f}") ic_aic[i] = model_fit.aic ic_bic[i] = model_fit.bic results = model_fit.summary() debug(results) ic_aic_min, aic_model_min = np.min(ic_aic), np.argmin(ic_aic) ic_bic_min, bic_model_min = np.min(ic_bic), np.argmin(ic_bic) debug('Relative Likelihoods') debug(np.exp((ic_aic_min - ic_aic) / 2)) debug(f'number of parameters in minimum AIC model {(aic_model_min + 1)}') debug(np.exp((ic_bic_min - ic_bic) / 2)) debug(f'number of parameters in minimum BIC model {(bic_model_min + 1)}') return aic_model_min + 1, bic_model_min + 1
def full_model(data, caused, L): """ :param dataX: :type dataX: np.ndarray :param dataY: :type dataY: np.ndarray :return: """ with warnings.catch_warnings(): warnings.filterwarnings("ignore") model = VAR(data) model_fit = model.fit(L) SSE = np.sum((model_fit.resid)**2)[caused] #ONLY FOR DEBUGGING #plt.plot(data['X']) #plt.plot(model_fit.fittedvalues['X']) #plt.show() #DEBUGGING END return SSE
def VARprocess(df, log=False): """ Description: This function applies Vector Auto Regression Input: dataframe Output: VARresults object """ # Log transformation, relative difference and drop NULL values if (log): df = np.log(df + 0.1).diff().dropna() # Vector Autoregression Process generation maxAttr = len(df.columns) # Find the right lag order orderFound = False print "7.1.0 ----- Finding an order for the VAR" maxIter = 0 while orderFound != True and maxIter < 15: maxIter = maxIter + 1 try: model = VAR(df) order = model.select_order() orderFound = True print " !!! loop stuck" except: exc_type, exc_obj, exc_tb = sys.exc_info() #if str(exc_obj)=="data already contains a constant.": maxAttr = maxAttr - 1 #else: #maxAttr = int(str(exc_obj).split("-th")[0])-1 #print "Exception, reducing to n_attributes ",maxAttr orderFound = False print "7.1.1 ----- Model fitting" if orderFound: n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1] method = max(order.iteritems(), key=operator.itemgetter(1))[0] results = model.fit(maxlags=n_lags, ic=method) else: results = model.fit() return results
def data2AB(data, x0=None): n = data.shape[0] T = data.shape[1] YY = np.dot(data[:, 1:], data[:, 1:].T) XX = np.dot(data[:, :-1], data[:, :-1].T) YX = np.dot(data[:, 1:], data[:, :-1].T) model = VAR(data.T) r = model.fit(1) A = r.coefs[0, :, :] #A = np.ones((n,n)) B = np.ones((n, n)) np.fill_diagonal(B, 0) B[np.triu_indices(n)] = 0 K = np.int(scipy.sum(abs(B))) #abs(A)+abs(B))) a_idx = np.where(A != 0) b_idx = np.where(B != 0) np.fill_diagonal(B, 1) try: s = x0.shape x = x0 except AttributeError: x = np.r_[A.flatten(), 0.1 * scipy.randn(K)] o = optimize.fmin_bfgs(nllf2, x, args=(np.double(A), np.double(B), YY, XX, YX, T, a_idx, b_idx), gtol=1e-12, maxiter=500, disp=False, full_output=True) ipdb.set_trace() A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx) B = B + B.T return A, B
def var_prediction(df, train_perc, incidence_file, window=18, diff=True): # Limpiamos el df df_aux = df.drop('Unnamed: 0', axis=1) df_aux = df_aux.drop('tref_start', axis=1) X = df_aux.values[:, :] if diff: X = np.diff(X, axis=0) # Obtenemos estandarizador de valores v = int(len(X) * train_perc) scaler = StandardScaler() X_train = scaler.fit_transform(X[:v]) # Entrenamos el modelo model = VAR(X_train) results = model.fit(window) # df de validación con incidencias df = df.iloc[v:] incidencias = None if incidence_file is not None: inc = get_working_incidence(incidence_file) df = generar_incidencias(df, inc).sort_values(by=['tref_start']) # Array de incidencias incidencias = df['incidencia'].values[window:] df = df.drop('incidencia', axis=1) # Obtenemos valores de la red reales df = df.drop('Unnamed: 0', axis=1) df = df.drop('tref_start', axis=1) X = df.values[:, :] if diff: X = np.diff(X, axis=0) if incidence_file is not None: incidencias = incidencias[1:] X = scaler.transform(X) # Obtengamos predicciones ys = X[window:] yhats = [] for i in range(window, len(X)): yhats.append(results.forecast(X[i - window:i], 1)[0]) return ys, np.array(yhats), incidencias
def infl_forecast_values(year='2001', month='02', n_steps=6): # n_steps is how far into future you look # crop the data depending on n_steps and date orig_df = load_data() date = form_date(year, month) train, test = crop_data(orig_df, date, n_steps) #take first difference first_row, train_1 = take_diff(train) first_YOY = first_row['YOY'] # create VAR model model = VAR(train_1, freq='MS') #for now fit to 4 results = model.fit(4) lag_order = results.k_ar prediction_input = train_1.values[-lag_order:] # I want last column infl_results = results.forecast(prediction_input, n_steps)[:, 1] return infl_results
def process(data, cid): """ Call make_stationary() to check for Stationarity and make the Time Series Stationary Make a VAR model, call the fit method with the desired lag order. Forecast VAR model and return the forcasted data. """ nobs = 1 df = data.copy() df_differenced = make_stationary( df) # check for Stationarity and make the Time Series Stationary model = VAR(df_differenced) # Make a VAR model model_fit = model.fit(10) # call fit method with lag order model_fit.summary() # summary result of the model fitted lag_order = model_fit.k_ar # Get the lag order forecast_input = df_differenced.values[ -lag_order:] # Input data for forecasting # Forecast and Invert the transformation to get the real forecast values fc = model_fit.forecast(y=forecast_input, steps=nobs) inp_file = os.getcwd() + cid joblib.dump(model_fit, inp_file)
def run(self): if not self._args: return None data = self._data_service.get_data(self._args) split_data = { self._args['dependent_variable']: data[data['ticker'] == self._args['dependent_variable']]['close'] } for i, ticker in enumerate(self._args['independent_variables']): split_data[ticker] = data[data['ticker'] == ticker]['close'] data = pd.DataFrame(split_data).dropna() model = VAR(data) result = model.fit(2) print(result.summary()) result.test_causality(self._args['dependent_variable'], self._args['independent_variables'], kind='f') return result
def temporal_detect_individual(target_idx, dta, maxlag): num_ts = len(dta[0]) len_ts = len(dta) tmp_target = [ dta[j][target_idx] for j in range(len_ts) ] res_lag = [] for i in range(num_ts): if i != target_idx: tmp_ts = [ dta[j][i] for j in range(len_ts) ] tmp_x = zip(tmp_target, tmp_ts ) # print np.shape(tmp_x) model = VAR(tmp_x) best_lag = model.select_order(maxlag, verbose= False) res_lag.append(best_lag) return res_lag
def GMMGranger(k, t, n): bet = 0 yes = 0 while bet <= n - 1: xseries = GMM(k, t) yseries = GMM(k + 3, t) data = pd.DataFrame([xseries, yseries]).transpose() model = VAR(np.asarray(data)) try: results = model.fit(maxlags=15, ic='aic', trend='nc') except: continue bet += 1 if results.test_causality(0, 1, kind='wald').summary().data[1][2] > 0.05: if results.test_causality(1, 0, kind='wald').summary().data[1][2] > 0.05: yes += 1 return float(yes) / n #accuracy=GMMGranger(5,200,100) #print accuracy
def test_gc(data, maxlag=None, signif=0.05, verbose=False): """Summary Apply granger causaulity test into permutation of all columns Args: data (TYPE): Description maxlag (None, optional): Description signif (float, optional): Description verbose (bool, optional): Description Returns: TYPE: dataframe """ from statsmodels.tsa.api import VAR if isinstance(data, pd.core.frame.DataFrame): colns = data.columns arr = data.values else: arr = np.array(data) model = VAR(arr) if maxlag: res = model.fit(maxlag, verbose=verbose) else: res = model.fit(verbose=verbose) gc_test = [] obs_name = res.names for c1, c2 in permutations(obs_name, 2): gc_res = res.test_causality(c1, c2, signif=signif, verbose=verbose) coln1, coln2 = colns[[obs_name.index(c1), obs_name.index(c2)]] gc_res = pd.Series(gc_res, name=(coln1, coln2)) gc_res['H0'] = "'{}' do not Granger-cause '{}'".format(coln2, coln1) gc_test.append(gc_res) results = pd.DataFrame(gc_test) results['VAR'] = model results['best_order'] = (len(model.exog_names) - 1) / data.shape[1] return results
import numpy as np import statsmodels.api as sm from statsmodels.tsa.api import VAR # some example data mdata = sm.datasets.macrodata.load().data mdata = mdata[['realgdp','realcons','realinv']] names = mdata.dtype.names data = mdata.view((float,3)) use_growthrate = False #True #False if use_growthrate: data = 100 * 4 * np.diff(np.log(data), axis=0) model = VAR(data, names=names) res = model.fit(4) nobs_all = data.shape[0] #in-sample 1-step ahead forecasts fc_in = np.array([np.squeeze(res.forecast(model.y[t-20:t], 1)) for t in range(nobs_all-6,nobs_all)]) print fc_in - res.fittedvalues[-6:] #out-of-sample 1-step ahead forecasts fc_out = np.array([np.squeeze(VAR(data[:t]).fit(2).forecast(data[t-20:t], 1)) for t in range(nobs_all-6,nobs_all)]) print fc_out - data[nobs_all-6:nobs_all]
#print df df = df.fillna(0) original_df[m] = df stat_df = df.diff().dropna() #get rid of columns that are zeros at the end , we just assume they will continue to be zeros for col_name in stat_df.columns.values: if stat_df[col_name][-1] == 0 and stat_df[col_name][-2] == 0:# and stat_df[col_name][-3] == 0: print col_name del stat_df[col_name] no_forecast.setdefault(m,[]).append(col_name) #print stat_df forecast_cols[m] = stat_df.columns.values #new_df = stat_df[['P17','P15','P16']] model = VAR(stat_df) maxlags = 3 try: results = model.fit(maxlags, ic='aic', verbose=True) except Exception,exc: maxlags = 1 results = model.fit(maxlags, ic='aic', verbose=True) #if m == 'M2': # import pdb # pdb.set_trace() #import pdb #pdb.set_trace() # results = model.fit(4) #print results.summary() lag_order = results.k_ar
import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.tsa.api import VAR from scipy.signal import lfilter mdata = sm.datasets.macrodata.load().data mdata = mdata[["realgdp", "realcons", "realinv"]] names = mdata.dtype.names data = mdata.view((float, 3)) data = np.diff(np.log(data), axis=0) model = VAR(data) res = model.fit(2) res.plot_sample_acorr() irf = res.irf(10) irf.plot() plt.show() plt.savefig("image.png") res.plot_forecast(5) res.fevd().plot() plt.show() plt.savefig("image2.png")
data = pd.read_csv("/home/dusty/Econ8310/DataSets/pollutionBeijing.csv") format = '%Y-%m-%d %H:%M:%S' data['datetime'] = pd.to_datetime(data['datetime'], format=format) data.set_index(pd.DatetimeIndex(data['datetime']), inplace=True) # Select variables for VAR model varData = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[:-50] test = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[-50:] # endVal = varData.loc["2014-01-04 00:00:00"] # varData = varData.diff(1) model = VAR(varData) # define the model and data # model.select_order() # uses information criteria to select # model order reg = model.fit(30) # order chosen based on BIC criterion # Forecasting fcast = reg.forecast(varData['2013-01-04':].values, steps = 50) def dediff(todaysVal, forecast): future = forecast for i in range(np.shape(forecast)[0]): if (i==0):
def estimate_VAR(): df = load_external() d = load_es_uncertainty() df1 = load_eu_uncertainty() nd = d.join(df).join(df1) plot_index_comparison(nd) plot_eu_epu(nd) plot_cinco_elpais(nd) nd = transform_data(nd) plot_epu_gdp(nd) benchmark_subset = ['EPU','europe', 'fedea', 'inflation', 'differential'] nd['EPU'] = nd['policy'].diff(periods = 1) data = nd.reindex(columns=benchmark_subset) data = data.dropna() data.describe() model = VAR(data) results = model.fit(6) irf = results.irf(12) irf.plot(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12}) #irf.plot_cum_effects(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12}) # cum_effects = irf.orth_cum_effects fedea_on_gdp = get_fedea_on_gdp() elasticity = -100*fedea_on_gdp*cum_effects[12,2,0] print 'Effects of a 1 sd uncertainty shock on gdp growth (negative): %0.3f%%' % elasticity print 'Inflation increases by %0.2f' % (100* cum_effects[12,3,0], ) print 'Bond spreads increase by %0.1f basis points' % (100* cum_effects[12,4,0], ) full_sset = ['ibex','vol','resid','europe', 'fedea', 'inflation', 'differential' ] def get_irf(nd, subset): ''' http://statsmodels.sourceforge.net/0.6.0/vector_ar.html ''' data = nd.reindex(columns=subset) data = data.dropna() data.describe() model = VAR(data) results = model.fit(6) irf = results.irf(12) cum_effects = irf.orth_cum_effects return cum_effects[12,2,0] for colname in colnames: nd['uncert'] = nd[colname] / nd.articles nd['uncert'] = nd['uncert'] / nd['uncert'].mean() * 100 nd['uncert'] = nd['uncert'].diff(periods = 1) subset = ['uncert','europe', 'fedea', 'inflation', 'differential' ] cum_effect= get_irf(nd, subset) print '**%s** | %d | %.04f' % (colname, nd[colname].sum(), 100*fedea_on_gdp*cum_effect) aa = d.mean()[colnames] plt.figure(6) h = plt.bar(range(len(aa)),aa,label = list(aa.index) ) plt.subplots_adjust(bottom=0.3) xticks_pos = [0.65*patch.get_width() + patch.get_xy()[0] for patch in h] plt.xticks(xticks_pos, list(aa.index), ha='right', rotation=45) plt.savefig(os.path.join(rootdir, 'figures','frequency_types.%s' % fig_fmt), format=fig_fmt)
from statsmodels.tsa.base.datetools import dates_from_str import pandas mdata = ds.macrodata.load_pandas().data # prepare the dates index dates = mdata[['year', 'quarter']].astype(int).astype('S4') quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp','realcons','realinv']] mdata.index = pandas.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() model = VAR(data) est = model.fit(maxlags=2) def plot_input(): est.plot() def plot_acorr(): est.plot_acorr() def plot_irf(): est.irf().plot() def plot_irf_cum(): irf = est.irf() irf.plot_cum_effects()