def varmodel(self):
     self.mvdfg.index = pd.to_datetime(self.mvdfg.index)
     self.var_predicted = pd.DataFrame()
     self.var_forecast = pd.DataFrame()
     self.var_data_train = pd.DataFrame()
     self.var_data_test = pd.DataFrame()
     maxlag = 3
     if splitdf.upper() == 'Y':
         #Validation Model
         self.var_data_train = self.mvdfg[(pd.to_datetime(self.mvdfg.index)) <= testdate]
         self.var_data_test = self.mvdfg[(pd.to_datetime(self.mvdfg.index)) > testdate]
         var_model = VAR(self.var_data_train)
         results = var_model.fit(maxlags = maxlag, ic = 'aic')
         print(results.summary())
         lag_order = results.k_ar
         var_steps = len(self.var_data_test)
         pred_values = results.forecast(self.var_data_train.values[-lag_order:], var_steps)
         self.predicted = pd.DataFrame(pred_values, index = self.mvdfg.index[-var_steps:], columns = self.mvdfg.columns)
         self.var_predicted = self.predicted
     #Forecast 
     startdate = self.mvdfg.index.max()+ pd.offsets.DateOffset(months = 1)
     maxdate = self.mvdfg.index.max() + pd.offsets.DateOffset(months = forecaststeps + 1)
     var_fc_index = np.asarray((pd.date_range(startdate, maxdate, freq = 'm').strftime('%Y-%m-01')))
     var_fc_index = pd.to_datetime(var_fc_index)
     var_forecast_model = VAR(self.mvdfg)
     fc_results = var_forecast_model.fit(maxlags = maxlag, ic = 'aic')
     print(fc_results.summary())
     fc_lag_order = fc_results.k_ar
     fc_values = fc_results.forecast(self.mvdfg.values[-fc_lag_order:], forecaststeps)
     self.forecast = pd.DataFrame(fc_values, index = var_fc_index, columns = self.mvdfg.columns)
     self.var_forecast = self.forecast
     print(self.var_forecast)
     return self.var_predicted, self.var_forecast
Exemple #2
0
    def pca(self):
        '''pca estimation of DFM'''
        #calculate loading and factor using pca
        self.pca_loading, self.pca_factor = hetero_pca(self.observations,
                                                       self.n_factor)
        self.n_factor = self.pca_factor.shape[0]
        #calculate common part
        self.pca_common = self.pca_loading @ self.pca_factor

        #calculate observation residue
        obs_res = self.observations - self.pca_common

        #calculate observation residue covariance matrix
        self.obs_res_cov = np.cov(obs_res)

        #fit factor with VAR
        if self.n_factor > 1:
            model = VAR(self.pca_factor.T)
            #select and save lag number
            results = model.fit(maxlags=self.max_lag, ic='aic')
            self.lag = max(results.k_ar, 1)
            #save dynamic matrix
            self.pca_var_param = results.coefs
            self.pca_factor_var = results
            #calculate factor residue and its covariance matrix
            factor_resid = results.resid  #(n_time-lag,n_factor)
            self.pca_factor_res_cov = np.cov(factor_resid.T)
        else:
            pass
Exemple #3
0
    def to_state_space_rep(self):
        '''use state space rep to trasfer VAR(P) to VAR(1)'''
        #put factors and their lags into one vector
        stacked_factor = np.array([
            np.roll(self.pca_factor, i, axis=1)[:, (self.lag - 1):]
            for i in range(self.lag)
        ])
        self.stacked_factor = stacked_factor.reshape(self.lag * self.n_factor,
                                                     -1)

        #
        self.observations = self.observations[:, (self.lag - 1):]

        #estimate dynamics of factors using VAR(1)
        model = VAR(self.stacked_factor.T)
        results = model.fit(1)
        self.stacked_var_param = results.coefs[0]  #choose the lag-1 coefs

        # constrcut loading of state space model which is of the form [\Lambda 0 0]
        self.stacked_loading = np.concatenate(
            (self.pca_loading,
             np.zeros((self.pca_loading.shape[0], self.pca_loading.shape[1] *
                       (self.lag - 1)))),
            axis=1)
        # construct residue matrix G
        G = np.concatenate(
            (np.eye(self.n_factor),
             np.zeros((self.n_factor * (self.lag - 1), self.n_factor))),
            axis=0)
        # construct covariance matrix for stacked factor residues
        self.stacked_factor_res_cov = G @ self.pca_factor_res_cov @ (G.T)
Exemple #4
0
def predict_var(data):
    ''' Predict continuous data using vector autoregression. Evaluation metric: RMSE
        As the Summary print is too long for some editors, it is stored as a Text file
        (var_res.txt)
    '''
    data.drop(['risk_premium'], axis=1, inplace=True) # TODO Statsmodels VAR only seems to work with 10 variables
                                                      #      I randomly chose to drop risk_premium, if you leave it in
                                                      #      you will get an error message.
    data = diff_n_times(data, 1) # TODO: You should probably not difference all columns
                                 # but selectively the once which are not stationary.
                                 # This code includes a number of evaluation functions to check for stationarity
    subsets = create_subsets_nolag(data)
    for j in subsets:
        cmb = np.concatenate((j[0], np.expand_dims(j[1], 1)), axis=1)
        nobs = int(0.2*len(j[1]))
        train = cmb[:-nobs] # Simple train, validation split
        valid = cmb[-nobs:]
        train = pd.DataFrame(train, columns=list(j[0].columns)+['log_return'])
        model = VAR(train)
        results = model.fit(2)
        file1 = open("var_res.txt", "a")
        file1.write(j[2])
        file1.write(str(results.summary()))
        file1.close()
        print(j[2])
        get_durbin_watson(list(j[0].columns)+['log_return'], results)
        res = []
        for x, y in zip(valid[:-2], valid[1:-1]):
            pred = results.forecast([x, y], 1)
            res.append(pred)
        res = np.vstack(res)
        df_res = undiff_once(train, valid, res)
        print('RMSE: ')
        print(rmse_loss(df_res['log_return'], df_res['log_return_forecast']))
Exemple #5
0
def make_var_model(data, lags=1, actual_plot=False):

    # make a VAR model
    model = VAR(data)

    result_dict = {}
    for lag in range(1, lags + 1):

        results = model.fit(maxlags=lag)

        print 'Exogenous Variables for the model with Lag: %d \n ' % lag + str(
            results.exog_names)
        print results.summary()

        if actual_plot == True:
            results.plot()

        fitted_values = results.fittedvalues

        lag_order = results.k_ar

        forecast_values = pd.DataFrame(data=results.forecast(
            y=model_data.values[-lag_order:], steps=5),
                                       columns=results.names)

        results.forecast_interval(y=model_data.values[-lag_order:], steps=5)
        results.plot_forecast(steps=5, plot_stderr=False)

        result_dict['Lag_Order_{}'.format(lag)] = results

    return result_dict
Exemple #6
0
def fit_model(data, p, mod):
    """
    The function that estimates the coefficients of the AR model.
    
    Input:
      - data: The loaded dataset
      - p: The order of the AR model
      - mod: A string "AR" or "VAR" that selects the model to be used
    
    Returns:
      - A: The coefficients (scalar or matrix)
    """
    if mod == "AR":
        A = fit_ar(data, p)
            
    elif mod == "myVAR":
        data_vectorized = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
        A = estimate_matrix_coefficients(data_vectorized, p)
        A = A[1:]
        
    elif mod == "VAR":           
        data_vectorized = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
        model = VAR(data_vectorized.T)
        results = model.fit(p)
        A2 = results.coefs
        A = []
        for i in range(A2.shape[0]):
            A.append(A2[i, ...])
            
    return A
    def fit_VAR_model(self, lags):
        model = VAR(self.df)
        self.model_fitted = model.fit(lags)
        print("\n**********model_fitted, lag: " + str(lags) + "***********\n")
        print(self.model_fitted.summary())

        return self.model_fitted
    def find_optimal_lag_length(
        self, cols, time, min_lag=1, max_lag=8, criterion="aic"
    ):

        try:
            s = self.map_column_to_sheet(cols)
            multi = False
        except:
            s = self.map_column_to_sheet(cols[0])
            multi = True

        df = s.df

        if multi:
            try:
                args_vector = np.append(cols, time)
                data = df[args_vector]
                data = data.set_index(time)
            except:
                data = df[cols]

            model = VAR(data)

        else:
            try:
                args_vector = np.array([cols, time])
                data = df[args_vector]
                data = data.set_index(time)
            except:
                data = df[cols]

            model = s_ar.AR(data)

        info_loss = np.zeros(max_lag - min_lag + 1)

        if criterion == "aic":
            for i in range(max_lag - min_lag + 1):
                fit = model.fit(i + min_lag)
                info_loss[i] = fit.aic

        elif criterion == "bic":
            for i in range(max_lag - min_lag + 1):
                fit = model.fit(i + min_lag)
                info_loss[i] = fit.bic

        else:
            print("ERROR: Criterion argument not supported.")
            return

        x = np.argsort(info_loss)
        optimal = x[0] + min_lag

        utterance = (
            "The optimal lag length according to the "
            + str(criterion)
            + " criterion is "
        )
        utterance = utterance + str(optimal) + "."

        return QueryResult(optimal, utterance)
Exemple #9
0
def forecast(data, lag, forcastStep):
    #with open('varModel.json') as f:
    #    data = json.load(f)

    mdata = prepareData(data)
    model = VAR(mdata)

    results = model.fit(lag)

    #lag_order = results.k_ar
    createForcastModel(results, forcastStep)

    fevd = results.fevd(forcastStep)

    fevModel = []

    for val in range(len(fevd.names)):
        for i in range(fevd.periods):
            for j in range(len(fevd.names)):
                fevItem = {}
                fevItem["ind"] = val
                fevItem["period"] = i
                fevItem["compindex"] = j
                fevItem["val"] = fevd.decomp[val][i][j]
                fevModel.append(fevItem)

    #irf = results.irf(forcastStep)
    #print(irf.orth_irfs)
    #print(irf.svar_irfs)
    #print(irf.irfs)

    mid, lower, upper = createForcastModel(results, forcastStep)
    print(fevd.summary())
    return mid, lower, upper, fevModel
Exemple #10
0
def VARPredict(pdData, steps: int):
    """
    Takes a pandas dataframe, then predicts steps ahead

    pdData: Pandas
    steps: int

    Returns a (steps, series) shaped ndarray of forecast
    """
    # Compute VAR
    model = VAR(pdData)

    # fit data
    try:
        results = model.fit(maxlags=15, ic=fit_ic, trend='nc')
    except Exception as e:
        print(e)
        print(pdData)
        exit(1)
    #print(results.summary())

    #results.plot()
    #plt.show()
    #results.plot_acorr()
    #plt.show()

    #Forecast diffs
    forecast = results.forecast(pdData.values[-results.k_ar:], steps)
    #results.plot_forecast(10)
    #plt.show()

    return forecast
def load_data():

    #import data
    X, Y = data.import_data(set='train')

    #do not plug in returns, but residuals
    #plug in residuals
    VAR_model = VAR(X)

    results = VAR_model.fit(1)
    ar_returns = results.fittedvalues

    #columns to drop from dataframe
    columns = [
        'XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume',
        'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume',
        'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume',
        'ETHbasevolume'
    ]
    ar_returns.drop(columns, 1, inplace=True)

    X = X.loc[ar_returns.index]
    x_returns = X[ar_returns.columns]
    residual_df = x_returns - ar_returns
    X = X.join(residual_df, how='inner', rsuffix='residual')

    y_ar_returns = ar_returns
    y_ar_returns.columns = Y.columns
    Y = (Y.loc[X.index] - y_ar_returns.shift(-1)).dropna()
    X = X.loc[Y.index]

    x = X.as_matrix()
    y = Y.as_matrix()

    return x, y, X, Y
def gc_graph(X, p = 2, signif = 0.01):
  '''
  X should be a pandas dataframe ready to be consumed by VAR(-)

  p is the model order we will use.

  We then produce a granger causality graph, where GC is tested w.r.t.
  the whole information set.
  '''
  G = nx.DiGraph()
  G.add_nodes_from(X.columns.values)
  
  model = VAR(X)
  results = model.fit(p)

  #Suppress output from test_causality
  import sys
  stdout_real = sys.stdout
  sys.stdout = open('/dev/null', 'w')

  #itertools product
  for e in product(X.columns.values, X.columns.values):
    gc = results.test_causality(*e, signif = signif)
    if gc['conclusion'] == 'reject':
      G.add_edge(e[0], e[1])

  sys.stdout = stdout_real
  return G
def test_gc(data, index, maxlag, header, alpha):
    VARResults.test_causality = a_test_causality

    # g = Digraph('G', filename='granger_all_new.gv', strict=True)

    # edgegranger = []

    model = VAR(data)
    result = {}
    lag_dic = {}
    res_output = []
    Granger_automated(maxlag, model, lag_dic, res_output, result, header,
                      alpha, index)
    print(result)
    print(res_output)

    if not len(res_output) == 0:
        output_df = pd.DataFrame(res_output)
        output_df.columns = [
            'Effect-Node', 'Cause-Node', 'Time-Lag', 'Strength', 'Method',
            'Partition'
        ]
        output_df = output_df.sort_values(by=['Strength'])

        print(output_df.head(20))

        # print(g)
        # print(g.view())
        # g

        # output_df.to_csv("gc_baseline_out.csv", header=False, index=False)
        # numpy_output = output_df.to_numpy
        # print(numpy_output)

    return res_output
Exemple #14
0
 def stats(self, p):
     '''
     VaR Model from statsmodel for testing
     p: lag
     '''
     Var_result = CRESULT()
     varmodel = VAR(self.data)
     results = varmodel.fit(p)
     Var_result.summary = results.summary()
     # AIC and BIC
     Var_result.aic = results.aic
     Var_result.bic = results.bic
     # Coefficient
     if p == 1:
         Var_result.coefs = pd.DataFrame(results.coefs[0], \
                                    index = self.data.columns, \
                                    columns = 'Lag_'+self.data.columns)
     else:
         Var_result.coefs = results.coefs
     # Correlation
     Var_result.corr = pd.DataFrame(results.resid_corr, \
                                    index = self.data.columns, \
                                    columns = self.data.columns)
     # Stable
     eignval_list = [abs(np.linalg.eig(i)[0]) for i in results.coefs]
     eignval_df = pd.DataFrame(eignval_list).T
     eignval_df.columns = ['lag' + str(i) for i in range(1, p + 1)]
     Var_result.stable = eignval_df
     return Var_result
Exemple #15
0
def VAR_IRF(df, n=10, future=20):
    m = VAR(df)
    m.select_order(n)
    n = int(input('order:'))
    model = m.fit(maxlags=n)
    print('\n\n', model.summary())
    model.irf(10).plot()
def run_VAR(data, param):

    p = param['p']
    testsize = param['testsize']

    T = data.shape[-1]
    T_test = int((T * testsize) // 1)
    result_full = np.zeros([data.shape[0], T_test])

    total_time = 0
    n_round = 0

    for i in range(T_test):

        ts = data[..., i:T - T_test + i].copy()
        n_round += 1
        model = VAR(ts)
        start = time.time()
        result = model.fit(p).forecast(ts, 1)
        end = time.time()
        total_time = total_time + (end - start)
        result_full[..., i] = result[..., -1]

    label = data[..., -T_test:]

    stat = {}
    stat['acc'] = get_acc(result_full, label)
    stat['nrmse'] = nrmse(result_full, label)
    stat['ave_time'] = total_time / n_round

    return (stat)
Exemple #17
0
def forecast(df_train, number_of_forecast_points, forecast_index, lag_order=5, diff=2):
    """
    Learn and forecast with VAR model (Max : 2 differencing)
    df_train (dataframe) : input data
    number_of_forecast_points (int) : number of time step that want to predict
    forecast_index (list) : index name of each predicted value
    lag_order (int) : window size of input (How many previous timestep will be used as input)
    diff (int 0, 1, 2) : number of differencing
    return 
    real_forecast (dataframe) : dataframe with predicted value
    model (Object) : fitted model
    """
    assert diff == 0 or diff == 1 or diff == 2, 'diff = 1 or 2 only'

    df_differenced = df_train
    for _ in range(diff):
        df_differenced = df_differenced.diff().dropna()

    model = VAR(df_differenced)
    model_fitted = model.fit(lag_order)

    forecast_input = df_differenced.values[-lag_order:]

    fc = model_fitted.forecast(y=forecast_input, steps=number_of_forecast_points)

    if diff == 0:
        real_forecast = pd.DataFrame(fc, index=forecast_index, columns=df_train.columns + '_forecast')
    elif diff == 1:
        df_forecast = pd.DataFrame(fc, index=forecast_index, columns=df_train.columns + '_1d')
        real_forecast = invert_transformation(df_train, df_forecast, second_diff=False)
    elif diff == 2:
        df_forecast = pd.DataFrame(fc, index=forecast_index, columns=df_train.columns + '_2d')
        real_forecast = invert_transformation(df_train, df_forecast, second_diff=True) 

    return real_forecast, model_fitted
Exemple #18
0
def predict(dataset=dataset_orig, future=1):
    args = parse_args()
    f = open(args.path, 'rb')
    dataset = pickle.load(f)
    future = args.future

    data = np.zeros((len(dataset), 4))
    data[:] = dataset

    for step in range(future):
        data_st2 = np.zeros((len(data), 4))
        data_st = np.log(data)

        data_st2[0] = data_st[0]
        data_st2[1:] = np.diff(data_st, axis=0)

        model = VAR(data_st2)
        results = model.fit()
        #print(results.summary())
        prediction_st2 = results.forecast(data_st2, 1)
        prediction_st = np.zeros((2, 4))
        prediction_st[0] = data_st[-1]
        prediction_st[1:] = prediction_st2
        prediction = np.cumsum(prediction_st, axis=0)[1:]
        prediction = np.exp(prediction)

        data = np.append(data, prediction, 0)

    print(data[-future:])
    return data[-future:]
Exemple #19
0
def generate_forecast_1(date='2003-01-01', n_steps=6):
    # n_steps is how far into future you look
    # crop the data depending on n_steps and date
    neg_YOY_CPI = load_sentiment_YOY_CPI()

    # if date is most recent then test is empty
    train, test = crop_data(neg_YOY_CPI, date, n_steps)

    #take first difference and record first row
    first_row = train.iloc[0]
    train_1 = train.diff().dropna()
    first_YOY = first_row['YOY']
    prev = train_1.values[:, 1]

    model = VAR(train_1, freq='MS')  # create VAR model
    results = model.fit(4)  #for now fit to 4
    lag_order = results.k_ar
    prediction_input = train_1.values[-lag_order:]

    # I want last column
    infl_results = results.forecast(prediction_input, n_steps)[:, 1]

    # return triple: previous, forecast_1, first_YOY

    return prev, infl_results, first_YOY
Exemple #20
0
def stoc_simulate(getfit_data, N=5000, nlag=8):
    #Transform tau to log scale
    fit_par_VAR = getfit_data['fit_par'].iloc[:, 0:3]
    fit_par_VAR.insert(3, '3', np.log(getfit_data['fit_par'].iloc[:, 3]))

    #Stochastic VAR fitting
    model = VAR(fit_par_VAR)
    results = model.fit(nlag)

    #Extract simulated scenarios
    u_L = np.linalg.cholesky(results.resid_corr)
    u_std = np.std(results.resid, axis=0)
    u_rand = np.random.normal(size=[fit_par_VAR.shape[1], N])
    u = np.dot(u_L.conj(), u_rand)

    Var_Rand = np.dot(u.transpose(), np.diag(u_std))
    Var_Betas = results.coefs
    Var_C = results.intercept

    return {
        'Var_Rand': Var_Rand,
        'Var_Betas': Var_Betas,
        'Var_C': Var_C,
        'nlag': nlag
    }
Exemple #21
0
def VARprocess(df, log=False):
    # Log transformation, relative difference and drop NULL values
    if (log):
        df = np.log(df + 0.1).diff().dropna()
    # Vector Autoregression Process generation
    maxAttr = len(df.columns)
    # Find the right lag order
    orderFound = False
    while orderFound != True:
        try:
            model = VAR(df.ix[:, 0:maxAttr])
            order = model.select_order()
            orderFound = True
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            if str(exc_obj) == "data already contains a constant.":
                maxAttr = maxAttr - 1
            else:
                maxAttr = int(str(exc_obj).split("-th")[0]) - 1
            print "Exception, reducing to n_attributes ", maxAttr
            orderFound = False

    n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
    method = max(order.iteritems(), key=operator.itemgetter(1))[0]
    print "n_lags ", n_lags
    print "method ", method
    results = model.fit(maxlags=n_lags, ic=method)
    return results
def compute_pair_metrics(security, candidates):
    security = security.div(security.iloc[0])
    ticker = security.name
    candidates = candidates.div(candidates.iloc[0])
    spreads = candidates.sub(security, axis=0)
    n, m = spreads.shape
    X = np.ones(shape=(n, 2))
    X[:, 1] = np.arange(1, n + 1)
    drift = ((
        np.linalg.inv(X.T @ X) @ X.T @ spreads).iloc[1].to_frame('drift'))
    vol = spreads.std().to_frame('vol')
    corr_ret = (candidates.pct_change().corrwith(
        security.pct_change()).to_frame('corr_ret'))
    corr = candidates.corrwith(security).to_frame('corr')
    metrics = drift.join(vol).join(corr).join(corr_ret).assign(n=n)
    tests = []
    for candidate, prices in candidates.items():
        df = pd.DataFrame({'s1': security, 's2': prices})
        var = VAR(df.values)
        lags = var.select_order()  # select VAR order
        k_ar_diff = lags.selected_orders['aic']
        # Johansen Test with constant Term and estd. lag order
        cj0 = coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff)
        # Engle-Granger Tests
        t1, p1 = coint(security, prices, trend='c')[:2]
        t2, p2 = coint(prices, security, trend='c')[:2]
        tests.append([ticker, candidate, t1, p1, t2, p2, k_ar_diff, *cj0.lr1])
    columns = [
        's1', 's2', 't1', 'p1', 't2', 'p2', 'k_ar_diff', 'trace0', 'trace1'
    ]
    tests = pd.DataFrame(tests, columns=columns).set_index('s2')
    return metrics.join(tests)
Exemple #23
0
def rolling_forecast(trainset,testset,lags):

    Pmse = []
    forecastreturn = []
    accuracys = []
    ntest = len(testset)
    for i in range(0,ntest):
        if i == 0:
            X_in = trainset
        else:
            X_in = trainset.append(testset.iloc[:i,:])

        X_out = testset.iloc[i,0]

        #buliding model
        model = VAR(X_in)
        results = model.fit(lags)
        forecasttest = results.forecast(results.y,steps =1)[0][0]
        if (forecasttest*X_out)>0:
            accuracy = 1
        else:
            accuracy = 0
        accuracys.append(accuracy)
        forecastreturn.append(forecasttest)
        Pmse.append(np.square(forecasttest-X_out))
    return(Pmse,forecastreturn,accuracys)
Exemple #24
0
 def best_lag_dw(self, df, threshold=0.2):
     model = VAR(df, freq="MS")
     # Assumes stationary data.
     best_aic = 99999
     best_lag = None
     best_dw = None
     # Searching for best lag order.
     for i in range(1, 16):
         result = model.fit(i)
         #print("Lag order: ", i, " AIC: ", result.aic)
         # Checking with Durbin-Watson test for autocorrelation as well.
         dw_out = durbin_watson(result.resid)
         #print("DW test: ", dw_out)
         #print(abs(2.0-dw_out[0]))
         if ((result.aic < best_aic)
                 and (abs(2.0 - round(dw_out[0], 2)) <= threshold)
                 and (abs(2.0 - round(dw_out[1], 2)) <= threshold)):
             #print("ENTRA")
             best_aic = result.aic
             best_lag = i
             best_dw = dw_out
     print("Best lag order: ", best_lag, " with an AIC score of: ",
           best_aic)
     print("Durbin-Watson results:")
     for col, val in zip(df.columns, best_dw):
         print(col, ':', round(val, 2))
     print("-------------------------------------------------")
     return best_aic, best_lag, best_dw
Exemple #25
0
def var_prediction(df, train_perc, incidence_file, window=18, diff=True):
    # Limpiamos el df
    df_aux = df.drop('Unnamed: 0', axis=1)
    df_aux = df_aux.drop('tref_start', axis=1)
    X = df_aux.values[:, :]
    if diff:
        X = np.diff(X, axis=0)
    # Obtenemos estandarizador de valores
    v = int(len(X) * train_perc)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X[:v])
    # Entrenamos el modelo
    model = VAR(X_train)
    results = model.fit(window)
    # df de validación con incidencias
    inc = get_working_incidence(incidence_file)
    df = df.iloc[v:]
    df = generar_incidencias(df, inc).sort_values(by=['tref_start'])
    # Array de incidencias
    incidencias = df['incidencia'].values[window:]
    df = df.drop('incidencia', axis=1)
    # Obtenemos valores de la red reales
    df = df.drop('Unnamed: 0', axis=1)
    df = df.drop('tref_start', axis=1)
    X = df.values[:, :]
    if diff:
        X = np.diff(X, axis=0)
        incidencias = incidencias[1:]
    X = scaler.transform(X)
    # Obtengamos predicciones
    ys = X[window:]
    yhats = []
    for i in range(window, len(X)):
        yhats.append(results.forecast(X[i - window:i], 1)[0])
    return ys, yhats, incidencias
    def fit(self, alpha=0.05):
        """
        :param alpha: threshold of F-test
        :return: granger causality denpendencies
        """
        model_full = VAR(self.X)
        model_full_fit = model_full.fit(maxlags=self.p, ic='aic')

        # make prediction
        x_hat = self.predict(model_full_fit, self.X)

        # compute error
        err_full = np.subtract(x_hat.values, self.X.values[self.p:])
        var_full = list(np.var(err_full, axis=0))

        for j in range(self.d):
            x_temp = self.X.drop(columns=[self.names[j]])
            model_rest = VAR(x_temp)
            model_rest_fit = model_rest.fit(maxlags=self.p, ic='aic')

            # make prediction
            x_hat = self.predict(model_rest_fit, x_temp)

            # compute error
            err_rest = np.subtract(x_hat.values, x_temp.values[self.p:])
            var_rest = list(np.var(err_rest, axis=0))

            # F test (extremely sensitive to non-normality of X and Y)
            var_full_rest = var_full.copy()
            del var_full_rest[j]
            m = x_hat.shape[0]

            for i in range(len(x_hat.columns.values)):
                # Start Test using F-test
                p_value = self.f_test(var_rest[i], var_full_rest[i], m)
                if p_value < alpha:
                    self.pa[x_hat.columns.values[i]].append(self.names[j])

        res_df = pd.DataFrame(np.ones([self.d, self.d]),
                              columns=self.names,
                              index=self.names)
        for e in self.pa.keys():
            for c in self.pa[e]:
                res_df[e].loc[c] = 2
                if res_df[c].loc[e] == 0:
                    res_df[c].loc[e] = 1
        return res_df
Exemple #27
0
def forecast_DNS_VAR(ts, pred): #IMPORTANT : ts has undergone the DNS_OLS function previously. pred is the date pred months after the last entry of the time series ts

    model = VAR(ts)
    model_fitted = model.fit(1, method='mle') #See Diebold and Rudebusch. All use VAR(1)
    
    lag_order=model_fitted.k_ar    
    
    return model_fitted.forecast(ts.values[-lag_order:],pred)
 def impact_value(self, data, lag):
     model = VAR(data)
     results = model.fit(lag)
     numerator = 0
     for i in range(1, lag + 1):
         numerator += results.params[results.params.columns.values[0]][
             'L' + str(i) + '.' + results.params.columns.values[1]]
     return numerator / np.abs(lag)
Exemple #29
0
def time_series(data, future_forcast, location):
    #[[people, violations, time, location],[people, violations, time, location],[people, violations, time, location]]
    columns = ["people", "violations", "time", "location"]

    df = pd.DataFrame(data=data, columns=columns)
    df = df[df["location"] == location]
    df['time'] = pd.to_datetime(df['time'])

    for i in range(len(df)):
        df['time'][i] = df['time'][i].hour

    dict_p = {}
    dict_v = {}
    for i in range(len(df)):
        if (df['time'][i] not in dict_p.keys()):
            dict_p[df['time'][i]] = [df["people"][i]]
        else:
            dict_p[df['time'][i]].append(df["people"][i])
        if (df['time'][i] not in dict_v.keys()):
            dict_v[df['time'][i]] = [df["violations"][i]]
        else:
            dict_v[df['time'][i]].append(df["violations"][i])

    people = []
    violations = []
    times = []

    for k, v in dict_p.items():
        people.append(sum(v) / float(len(v)))
        timet = pd.Timestamp(year=2000,
                             month=1,
                             day=1,
                             hour=k,
                             minute=0,
                             second=0)
        times.append(timet)

    for k, v in dict_v.items():
        violations.append(sum(v) / float(len(v)))

    n_df = pd.DataFrame(columns=["people", "violations", "time"])
    n_df["people"] = people
    n_df["violations"] = violations
    n_df["time"] = times
    n_df = n_df.sort_values(by=['time'])
    n_df.time = pd.DatetimeIndex(n_df.time).to_period('H')
    data1 = n_df[["people", 'violations']]
    data1.index = n_df["time"]
    print(data1)

    model = VAR(data1)
    model_fit = model.fit()
    freq = (n_df["time"][0].hour -
            n_df["time"][len(n_df) - 1].hour) / (len(n_df) - 1)
    steps = (future_forcast + n_df["time"][0].hour -
             n_df["time"][0].hour) / freq
    pred = model_fit.forecast(model_fit.y, steps)
    return pred[0], pred[1]
 def decide_degree_best(self):
     # make a VAR model
     model = VAR(self.X)
     model.select_order(15)
     
     # determine the optimal VAR model order using AIC
     print(model.select_order(15))
     results = model.fit(maxlags=15, ic='aic')
     print(results.summary())