예제 #1
0
def VARprocess(df,log=False):
    # Log transformation, relative difference and drop NULL values
    if (log):    
        df = np.log(df+0.1).diff().dropna()
    # Vector Autoregression Process generation     
    maxAttr = len(df.columns) 
    # Find the right lag order
    orderFound = False
    while orderFound!=True:   
        try:
            model = VAR(df.ix[:,0:maxAttr])
            order = model.select_order() 
            orderFound = True
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            if str(exc_obj)=="data already contains a constant.":
                maxAttr = maxAttr - 1
            else:
                maxAttr = int(str(exc_obj).split("-th")[0])-1
            print "Exception, reducing to n_attributes ",maxAttr
            orderFound = False
 
    n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
    method = max(order.iteritems(), key=operator.itemgetter(1))[0]
    print "n_lags ",n_lags
    print "method ",method    
    results = model.fit(maxlags=n_lags, ic=method)
    return results
예제 #2
0
def data2AB(data, x0=None):
    n = data.shape[0]
    T = data.shape[1]
    YY = np.dot(data[:, 1:], data[:, 1:].T)
    XX = np.dot(data[:, :-1], data[:, :-1].T)
    YX = np.dot(data[:, 1:], data[:, :-1].T)

    model = VAR(data.T)
    r = model.fit(1)
    A = r.coefs[0,:,:]

    # A = np.ones((n,n))
    B = np.ones((n, n))
    np.fill_diagonal(B, 0)
    B[np.triu_indices(n)] = 0
    K = np.int(scipy.sum(abs(B)))#abs(A)+abs(B)))

    a_idx = np.where(A != 0)
    b_idx = np.where(B != 0)
    np.fill_diagonal(B, 1)

    try:
        s = x0.shape
        x = x0
    except AttributeError:
        x = np.r_[A.flatten(), 0.1*scipy.randn(K)]
    o = optimize.fmin_bfgs(nllf2, x,
                           args=(np.double(A), np.double(B),
                                 YY, XX, YX, T, a_idx, b_idx),
                           gtol=1e-12, maxiter=500,
                           disp=False, full_output=True)
    A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx)
    B = B+B.T
    return  A, B
예제 #3
0
def causality_VAR(post_ts, max_order):
    
    model =  VAR(post_ts)
    best_lag = model.select_order(max_order, verbose= False)
    
    print 'best lag: ', best_lag
    
    result = model.fit(best_lag['aic'])
    
    return result, best_lag
예제 #4
0
 def _train(self,
            data: np.ndarray,
            max_lag: int = 300,
            *args: Any,
            **kwargs: Any) -> None:
     data_fd = np.diff(data, axis=0)
     assert data_fd.shape[0] >= max_lag
     model = VAR(endog=data_fd)
     self.max_lag = max_lag
     self._model = model.fit(maxlags=max_lag, trend="n")
예제 #5
0
파일: code1.py 프로젝트: dgydgydgy/Code-
def model(data):
    #     print(data)
    model = VAR(data)
    #     print(data)
    res = model.fit(maxlags=1)
    #     print(data)

    output = res.test_causality(1, [1, 2], kind='f')

    return output['pvalue']
예제 #6
0
def forecast_DNS_VARm(ts,pred):
    model = VAR(ts)
    x = model.select_order(maxlags=3)
    lag_order = x.selected_orders["bic"] #we select best model based on the BIC criterion
    if lag_order==0:    #constrains not turning into a random walk
        lag_order=1
    model_fitted = model.fit(lag_order)
        
    
    return model_fitted.forecast(ts.values[-lag_order:],pred)
예제 #7
0
def var(X, lookback=4, forward=1):
    """
    Prediction using VAR model
    """
    dat = X.iloc[:, -lookback:].values.T
    dat += 1e-10 * np.random.rand(dat.shape[0], dat.shape[1])
    model = VAR(dat)
    results = model.fit()
    lag_order = results.k_ar

    return results.forecast(dat[-lag_order:], forward)[-1]
예제 #8
0
def data2VARgraph_model(data, pval=0.05):
    model = VAR(data.T)
    r = model.fit(1)
    A = r.coefs[0,:,:]
    n = A.shape[0]
    g = {str(i):{} for i in range(1,n+1)}

    for i in range(n):
        for j in range(n):
            if np.abs(A[j,i]) > pval: g[str(i+1)][str(j+1)] = set([(0,1)])
    return g, r
예제 #9
0
def run_model(model_name, hidden_size):

    # import data
    # X, Y = data.import_data(set='cross_val')
    X, Y = data.import_data(set='train')

    # do not plug in returns, but residuals
    # plug in residuals
    VAR_model = VAR(X)

    results = VAR_model.fit(1)
    ar_returns = results.fittedvalues

    # columns to drop from dataframe
    columns = ['XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread',
               'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume',
               'ETHbasevolume']
    ar_returns.drop(columns, 1, inplace=True)

    X = X.loc[ar_returns.index]
    x_returns = X[ar_returns.columns]
    residual_df = x_returns - ar_returns
    X = X.join(residual_df, how='inner', rsuffix='residual')

    y_ar_returns = ar_returns
    y_ar_returns.columns = Y.columns
    Y = (Y.loc[X.index] - y_ar_returns.shift(-1)).dropna()
    y_ar_returns = y_ar_returns.shift(-1).dropna()
    X = X.loc[Y.index]

    x = X.as_matrix()
    y = Y.as_matrix()

    # set preditcion matrix
    y_pred = np.zeros(shape=y.shape)

    # set model
    model = RNN(hidden_size=hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size=len(Y.iloc[0:1].values[0]))
    model.load_state_dict(
        torch.load(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +
                   '/model_params/{}.pth.tar'.format(model_name)))

    for iter in range(len(x)):
        input = Variable(torch.from_numpy(x[iter]).float())

        output = model.forward(input)

        y_pred[iter] = output.data.numpy()

    y_pred = y_pred + y_ar_returns.as_matrix()

    Y_pred = pd.DataFrame(data=y_pred, index=Y.index, columns=Y.columns)

    return Y_pred, Y
예제 #10
0
def granger(cause, effect, lag):
    data = pd.DataFrame({'cause': cause, 'effect': effect})
    return_vaule = 1
    model = VAR(data)
    try:
        if lag == -1:
            results = model.fit(maxlags=15, trend='nc', ic='aic')
        else:
            results = model.fit(lag)
    except Exception:
        # can not find a lag in interval [1, maxlags], that means they have no causality
        return 1
    try:
        x = results.test_causality('effect', 'cause',
                                   kind='wald').summary().data
    except Exception:
        return 0
    return_vaule = x[1][2]

    return return_vaule
예제 #11
0
파일: models.py 프로젝트: ruzzzzz/Citrus
    def var(self, df, host):

        df_diffed, no_diffs = Helper.diff_test(df)

        print(df_diffed)
        df_diffed.replace([np.inf, -np.inf], np.nan)
        cols = df_diffed.columns
        df_diffed = df_diffed.dropna()

        print("Length  : " + str(len(df_diffed)))
        nobs = int(len(df_diffed) / 10) + 2
        train = df_diffed[:-nobs]
        test = df_diffed[-nobs:]
        #print(train)
        model = VAR(train)

        maxlags = int(nobs / 2) + 1

        aic = model.select_order(maxlags).selected_orders['aic']

        results = model.fit(aic)
        print(results.summary())

        lagged_values = train.values[-maxlags:]
        #print(lagged_values)
        forecast = results.forecast(y=lagged_values, steps=nobs)

        idx = pd.date_range(test.first_valid_index(), periods=nobs)
 
        df_forecast = pd.DataFrame(data=forecast, index = idx, columns=cols)
        #print(df_forecast)

        df_fixed = Helper.reverse_diff(df_forecast, df, nobs, no_diffs)

        


        test_range = df[-nobs:]
        print("-- TEST Result -- \n")
        print(test_range)
        print("-- TEST Result END -- \n")
        print("-- Forecast Result -- \n")
        print(df_fixed)
        print("-- Forecast Result END -- \n")

        for col in df.columns:
            print("-- RMSE --")
            print(rmse(test_range[col], df_fixed[col + '_forecast']))
            print("-- Mean --")
            print(test_range[col].mean())
            df[col].plot(legend=True)
            df_fixed[col + '_forecast'].plot(legend=True)
            plt.show()
def get_optimal_lag_exper(p_src_index, src_neighbor_indices,
                          normalized_cells_response_curve):
    from statsmodels.tsa.api import VAR

    #get the src neighbors
    number_of_points = len(src_neighbor_indices)

    optimal_lag_vector = dict()

    for p_dst_index in src_neighbor_indices:
        src_dst_data = None
        try:
            src_dst_data = normalized_cells_response_curve[
                [p_src_index, p_dst_index], :]
            src_dst_data = np.transpose(src_dst_data)
            model = VAR(src_dst_data)
            maxlags = None

            lag_order_results = model.select_order(maxlags=maxlags)

            lags = [
                lag_order_results.aic, lag_order_results.bic,
                lag_order_results.fpe, lag_order_results.hqic
            ]

            min_i = np.argmin(lags)

            model = model.fit(maxlags=lags[min_i], ic=None)

            p_value_whiteness = model.test_whiteness(nlags=lags[min_i]).pvalue

            if p_value_whiteness == float('nan') or p_value_whiteness < 0.05:
                raise ValueError('found autocorrelation in residuals.')

                #i = models[min_i].k_ar + 1
                #while i < 12 * (models[min_i].nobs/100.)**(1./4):
                #    result_auto_co = model._estimate_var(i,  trend='c')
                #    if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05:
                #        break
                #    i += 1

                #    print 'error order:' + str(models[min_i].k_ar)
                #    print 'found correlation ' + str(i)

            optimal_lag_vector[p_dst_index] = lags[min_i]
        except:
            print('src index: ' + str(p_src_index) + ' dst index: ' +
                  str(p_dst_index))
            if src_dst_data is not None:
                print(src_dst_data)
            raise

    return optimal_lag_vector
예제 #13
0
def data2VARgraph_model(data, pval=0.05):
    model = VAR(data.T)
    r = model.fit(1)
    A = r.coefs[0, :, :]
    n = A.shape[0]
    g = {str(i): {} for i in range(1, n + 1)}

    for i in range(n):
        for j in range(n):
            if np.abs(A[j, i]) > pval:
                g[str(i + 1)][str(j + 1)] = set([(0, 1)])
    return g, r
예제 #14
0
 def var_simul(self, M = 1e4, N = 10, max_lags = 4):
     '''
     period: returns period,
     M: Number of sample trajectories,
     N: Number of steps ahead.
     '''
     model = VAR(self.returns)
     results = model.fit(max_lags, ic = "aic")
     rets_simul = np.zeros((int(M), int(N), self.n), np.float64)
     for i in range(int(M)):
         rets_simul[i] = results.simulate_var(int(N))
     return(rets_simul)
 def modelorder(self):
     maxl = 12
     model = VAR(self.mvdfg)
     mo_data = []
     mo_indexed = []
     lags = range(1, len(self.mvdfg.columns)-2)
     for i in lags:
         result = model.fit(i)
         mo_indexed.append(f'Lag Order {i}')
         mo_data.append([result.aic, result.bic, result.fpe, result.hqic])
     self.mo_df = pd.DataFrame(data = mo_data, index = mo_indexed, columns = ['AIC','BIC','FPE','HQIC'])
     print(self.mo_df)
     return self.mo_df
예제 #16
0
def var(ds): #numpy array input
    lag = 10;days = 5
    dslog = np.log(ds)
    df = np.nan_to_num(np.diff(dslog, axis=0))
    model = VAR(df)
    results = model.fit(maxlags=lag, ic='aic')
    lag_order = results.k_ar 
    if lag_order < 5:
        lag_order = 10
    fc = results.forecast(df[-lag_order:], days)
    fcdenorm = np.exp(np.cumsum(fc,axis=0)+ dslog[-1:])
    fcdenorm = np.vstack((ds[-1:],fcdenorm)) 
    return np.round(fcdenorm[-5:],5),lag_order
예제 #17
0
def fit_forecast_next(dataset):
    cols = dataset.columns
    dataset_differenced, round_no = remove_stationary(dataset)
    model = VAR(dataset_differenced)
    model_fit = model.fit()
    # make prediction on validation
    prediction = model_fit.forecast(model_fit.endog, steps=1)
    # converting predictions to dataframe
    forecast = pd.DataFrame(prediction, index=dataset.index[-1:], columns=cols)
    if round_no != 0:
        forecast = invert_transformation(dataset, forecast, (round_no == 2))
    # check rmse
    return forecast
예제 #18
0
def fit(data, maxlag):
    #with open('varModel.json') as f:
    #   data = json.load(f)
    mdata = prepareData(data)
    equation = dict()
    equation["aic"] = []
    equation["BIC"] = []
    equation["hqic"] = []
    equation["min"] = []
    model = VAR(mdata)
    for x in range(0, maxlag):
        fitedModel = model.fit(x + 1)
        equation["aic"].append(fitedModel.aic)
        equation["BIC"].append(fitedModel.bic)
        equation["hqic"].append(fitedModel.hqic)

    minLag = model.fit(maxlags=maxlag, ic='bic')
    equation["min"].append(minLag.aic)
    equation["min"].append(minLag.bic)
    equation["min"].append(minLag.hqic)

    return equation
예제 #19
0
def VARprocess(df, log=False):
    """
    Description: This function applies Vector Auto Regression
    Input: dataframe
    Output: VARresults object
    """
    # Log transformation, relative difference and drop NULL values
    if (log):
        df = np.log(df + 0.1).diff().dropna()
    # Vector Autoregression Process generation
    maxAttr = len(df.columns)
    # Find the right lag order
    orderFound = False
    print "7.1.0 ----- Finding an order for the VAR"
    maxIter = 0
    while orderFound != True and maxIter < 15:
        maxIter = maxIter + 1
        try:
            model = VAR(df)
            order = model.select_order()
            orderFound = True
            print " !!! loop stuck"
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            #if str(exc_obj)=="data already contains a constant.":
            maxAttr = maxAttr - 1
            #else:
            #maxAttr = int(str(exc_obj).split("-th")[0])-1
            #print "Exception, reducing to n_attributes ",maxAttr
            orderFound = False
    print "7.1.1 ----- Model fitting"
    if orderFound:
        n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
        method = max(order.iteritems(), key=operator.itemgetter(1))[0]
        results = model.fit(maxlags=n_lags, ic=method)
    else:
        results = model.fit()
    return results
예제 #20
0
def get_fedea_on_gdp():
    qbuilder = inquisitor.Inquisitor(token)
    df = qbuilder.series(ticker = ['ESE.940000D259D.Q.ES','FEEA.PURE064A.M.ES'])
    df.dropna(inplace = True)
    df['fedea'] = df['FEEA.PURE064A.M.ES'].diff()
    df['gdp'] = df['ESE.940000D259D.Q.ES'] / 100
    data1 = df[['fedea','gdp']]
    data1.dropna(inplace = True)
    model1 = VAR(data1)
    results1 = model1.fit(4)
    
    irf1 = results1.irf(8)
    fedea_on_gdp = irf1.orth_lr_effects[1,0] / data1['fedea'].std()
    return fedea_on_gdp
예제 #21
0
def get_fedea_on_gdp():
    qbuilder = inquisitor.Inquisitor(token)
    df = qbuilder.series(ticker=['ESE.940000D259D.Q.ES', 'FEEA.PURE064A.M.ES'])
    df.dropna(inplace=True)
    df['fedea'] = df['FEEA.PURE064A.M.ES'].diff()
    df['gdp'] = df['ESE.940000D259D.Q.ES'] / 100
    data1 = df[['fedea', 'gdp']]
    data1.dropna(inplace=True)
    model1 = VAR(data1)
    results1 = model1.fit(4)

    irf1 = results1.irf(8)
    fedea_on_gdp = irf1.orth_lr_effects[1, 0] / data1['fedea'].std()
    return fedea_on_gdp
def get_optimal_lag(p_src_index, neighbor_indices,
                    normalized_cells_response_curve):
    #get the src neighbors
    number_of_points = len(neighbor_indices)

    src_neighbor_indices = neighbor_indices[p_src_index]

    optimal_lag_vector = np.zeros((number_of_points))

    for p_dst_index in src_neighbor_indices:
        #find the common neighbours
        dst_neighbor_indices = neighbor_indices[p_dst_index]
        disjoint_neighbours = get_disjoint_neighbours(p_src_index, p_dst_index,
                                                      neighbor_indices)

        src_dst_data = normalized_cells_response_curve[
            [p_src_index, p_dst_index], :]
        src_dst_data = np.transpose(src_dst_data)
        model = VAR(src_dst_data)
        maxlags = None

        lag_order_results = model.select_order(maxlags=maxlags)

        lags = [
            lag_order_results.aic, lag_order_results.bic,
            lag_order_results.fpe, lag_order_results.hqic
        ]

        min_i = np.argmin(lags)

        model = model.fit(maxlags=lags[min_i], ic=None)

        if model.test_whiteness(nlags=lags[min_i]).pvalue < 0.05:
            raise ValueError('found autocorrelation in residuals.')

            #i = models[min_i].k_ar + 1
            #while i < 12 * (models[min_i].nobs/100.)**(1./4):
            #    result_auto_co = model._estimate_var(i,  trend='c')
            #    if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05:
            #        break
            #    i += 1

            #    print 'error order:' + str(models[min_i].k_ar)
            #    print 'found correlation ' + str(i)

        optimal_lag_vector[p_dst_index] = lags[min_i]

        break

    return optimal_lag_vector
예제 #23
0
def SW_PCA_VAR(X, Y, ws=50, r=2):
    """
    Vector auto-regression model.
    """
    log_series_hat = sliding_window(X, ws)
    pca = PCA(n_components=r)
    components = pca.fit_transform(log_series_hat)
    model = VAR(components)
    results = model.fit(2)
    fitted_components = np.zeros(components.shape)
    fitted_components[:2, :] = components[:2, :]
    fitted_components[2:, :] = results.fittedvalues
    Y_hat = pca.inverse_transform(fitted_components)
    return fitted_components, Y_hat
예제 #24
0
def test_gc(data, maxlag=None, signif=0.05, verbose=False):
    """Summary
    Apply granger causaulity test into permutation of all columns

    Args:
        data (TYPE): Description
        maxlag (None, optional): Description
        signif (float, optional): Description
        verbose (bool, optional): Description

    Returns:
        TYPE: dataframe
    """
    from statsmodels.tsa.api import VAR
    if isinstance(data, pd.core.frame.DataFrame):
        colns = data.columns
        arr = data.values
    else:
        arr = np.array(data)

    model = VAR(arr)
    if maxlag:
        res = model.fit(maxlag, verbose=verbose)
    else:
        res = model.fit(verbose=verbose)
    gc_test = []
    obs_name = res.names
    for c1, c2 in permutations(obs_name, 2):
        gc_res = res.test_causality(c1, c2, signif=signif, verbose=verbose)
        coln1, coln2 = colns[[obs_name.index(c1), obs_name.index(c2)]]
        gc_res = pd.Series(gc_res, name=(coln1, coln2))
        gc_res['H0'] = "'{}' do not Granger-cause '{}'".format(coln2, coln1)
        gc_test.append(gc_res)
    results = pd.DataFrame(gc_test)
    results['VAR'] = model
    results['best_order'] = (len(model.exog_names) - 1) / data.shape[1]
    return results
예제 #25
0
def get_optimal_lag(p_src_index, neighbor_indices,
                    normalized_cells_response_curve):
    #get the src neighbors
    number_of_points = len(neighbor_indices)

    src_neighbor_indices = neighbor_indices[p_src_index]

    optimal_lag_vector = np.zeros((number_of_points))

    for p_dst_index in src_neighbor_indices:
        src_dst_data = normalized_cells_response_curve[
            [p_src_index, p_dst_index], :]
        src_dst_data = np.transpose(src_dst_data)
        model = VAR(src_dst_data)
        maxlags = None

        lag_order_results = model.select_order(maxlags=maxlags)

        lags = [
            lag_order_results.aic, lag_order_results.bic,
            lag_order_results.fpe, lag_order_results.hqic
        ]

        min_i = np.argmin(lags)

        var_result = model.fit(maxlags=lags[min_i], ic=None)

        portmanteau_test = var_result.test_whiteness(lags[min_i]).pvalue
        if portmanteau_test < 0.05:
            raise ValueError('found autocorrelation in residuals.' +
                             str(portmanteau_test))
            '''                        
            i = lags[min_i] + 1
            while i < 12 * (model.nobs/100.)**(1./4):                
                var_result = model.fit(i, ic=None)
                if var_result.test_whiteness(max(10, i + 1)).pvalue >= 0.05:
                    break
                i += 1
                
                #print('error order:' + str(lags[min_i]))
                #print('found correlation ' + str(i))

            optimal_lag_vector[p_dst_index] = i    
        
            else:
            '''
        optimal_lag_vector[p_dst_index] = lags[min_i]

    return optimal_lag_vector
예제 #26
0
    def select_order_of_VAR_model(self):
        model = VAR(self.df)
        print("\n*********checking different orders of lag************\n")
        for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
            result = model.fit(i)
            print('Lag Order =', i)
            print('AIC : ', result.aic)
            print('BIC : ', result.bic)
            print('FPE : ', result.fpe)
            print('HQIC: ', result.hqic, '\n')

        #alternative
        print("\n*********select_order method used: ************\n")
        x = model.select_order(maxlags=self.max_lags)
        print(x.summary())
예제 #27
0
def eval_ar(Y, T1, T2, ic, p, loss):
    MSFE = []
    for u in range(T1, T2):
        trainY = Y[p:(u + p), :]
        var_mod = VAR(trainY)
        mod = var_mod.fit(maxlags=p, ic=ic)
        lag_order = mod.k_ar
        if lag_order != 0:
            yhat = mod.forecast(trainY[-lag_order:], 1)
        else:
            yhat = mod.params
        MSFE_temp = calc_loss(Y[u + p, :] - yhat, loss)
        MSFE.append(MSFE_temp)
    MSFE = np.array(MSFE)
    return (np.mean(MSFE))
예제 #28
0
    def get_irf(nd, subset):
        '''
        http://statsmodels.sourceforge.net/0.6.0/vector_ar.html
        '''
        data = nd.reindex(columns=subset)
        data = data.dropna()
        data.describe()
        model = VAR(data)
        results = model.fit(6)

        irf = results.irf(12)

        cum_effects = irf.orth_cum_effects

        return cum_effects[12, 2, 0]
예제 #29
0
파일: TS_VAR.py 프로젝트: dryabokon/tools
    def predict(self, test_X, test_Y):
        predictions = numpy.empty(0)

        array_train = numpy.concatenate((numpy.array([self.train_Y]).T, self.train_X), axis=1)
        array_test  = numpy.concatenate((numpy.array([test_Y]).T, test_X), axis=1)

        for t in range(0, test_Y.shape[0]):
            array = numpy.vstack((array_train, array_test[:t]))
            model = VAR(endog=pd.DataFrame(data=array))
            fit = model.fit()
            lag = fit.k_ar
            pred = fit.forecast(array[-lag:],1)[0]
            predictions = numpy.append(predictions,pred[0])

        return predictions
예제 #30
0
 def get_irf(nd, subset):
     '''
     http://statsmodels.sourceforge.net/0.6.0/vector_ar.html
     '''
     data = nd.reindex(columns=subset)
     data = data.dropna()
     data.describe()
     model = VAR(data)
     results = model.fit(6)
 
     irf = results.irf(12)
     
     cum_effects = irf.orth_cum_effects 
     
     return cum_effects[12,2,0]
예제 #31
0
파일: linvar.py 프로젝트: i6092467/GVAR
class LinVAR:
    def __init__(self, X: np.ndarray, K=1):
        """
        Linear VAR model.

        @param X: numpy array with data of shape T x p.
        @param K: order of the VAR model (maximum lag).
        """
        # X.shape: T x p
        super(LinVAR, self).__init__()

        self.model = VAR(X)
        self.p = X.shape[1]
        self.K = K

        # Fit the model
        self.model_results = self.model.fit(maxlags=self.K)

    def infer_causal_structure(self, kind="f", adjust=False, signed=False):
        """
        Infer GC based on the fitted VAR model.

        @param kind: type of the statistical test for GC (as implemented within statsmodels). Default: F-test.
        @param adjust: whether to adjust p-values? If True, p-values are adjusted using the Benjamini-Hochberg procedure
        for controlling the FDR.
        @param signed: whether to return coeffcient signs?
        @return: p x p array with p-values, p x p array with hypothesis test results, and, if signed == True,
        p x p array with coefficient signs.
        """
        pvals = np.zeros((self.p, self.p))
        reject = None
        for i in range(self.p):
            for j in range(self.p):
                pvals[i,
                      j] = self.model_results.test_causality(caused=i,
                                                             causing=j,
                                                             kind=kind).pvalue
        reject = pvals <= 0.05
        if adjust:
            reject, pvals, alpha_Sidak, alpha_Bonf = multitest.multipletests(
                pvals.ravel(), method="fdr_bh")
            pvals = np.reshape(pvals, (self.p, self.p))
            reject = np.reshape(reject, (self.p, self.p))
        if signed:
            return pvals, reject, np.sign(self.model_results.params[1:, :].T *
                                          reject)
        else:
            return pvals, reject
예제 #32
0
def construct_model_based_connectivity(event_type, reader, pairs, BUFFER,
                                       freqs, EPSILON, window_size):
    sess_events = reader.load('task_events')
    events = sess_events[sess_events.type == event_type]
    rel_start = 0
    rel_stop = 0
    if event_type == 'WORD':
        rel_stop = 1366
    if event_type == 'COUNTDOWN_START':
        countdown_end_events = sess_events[sess_events.type == 'COUNTDOWN_END']
        countdown_times = []
        for i in np.arange(len(countdown_end_events)):
            countdown_times.append(countdown_end_events.iloc[i]['mstime'] -
                                   events.iloc[i]['mstime'])

        rel_stop = np.min(countdown_times)
    else:
        rel_stop = 1366

    events_eeg = reader.load_eeg(events,
                                 rel_start=rel_start,
                                 rel_stop=rel_stop,
                                 scheme=pairs)
    events_eeg = events_eeg.to_ptsa()
    events_eeg = events_eeg.filtered(freq_range=[58.0, 62.0])
    events_eeg.dims
    events_eeg = events_eeg.add_mirror_buffer(BUFFER)
    wf = morlet.MorletWaveletFilter(events_eeg, freqs=freqs)
    power_wavelet, phase_wavelet = wf.filter()
    power_wavelet = power_wavelet.remove_buffer(BUFFER)
    power_wavelet = power_wavelet.transpose('channel', 'event', 'time',
                                            'frequency')
    power_wavelet = np.log10(power_wavelet + EPSILON)
    n_times = power_wavelet.shape[2]
    intervals = np.array_split(np.arange(n_times), int(n_times / window_size))
    power_wavelet_aggregate = np.zeros(shape=list(power_wavelet.shape[:2]) +
                                       [len(intervals)])
    for i in np.arange(len(intervals)):
        power_wavelet_aggregate[:, :, i] = power_wavelet[:, :, intervals[i],
                                                         0].mean('time')

    dims = power_wavelet_aggregate.shape
    power_wavelet_aggregate = power_wavelet_aggregate.reshape(
        dims[0], dims[1] * dims[2])
    model = VAR(power_wavelet_aggregate[:, :].T)
    results = model.fit(maxlags=1)
    conn_mat = results.coefs[0, :, :]
    return conn_mat
예제 #33
0
    def VAR(self):
        '''
        实现滚动计算 k-lag 的 VAR 模型
        并且保存矩阵的系数以及相关系数矩阵
        实现了 k-lag>1 时的向量值回归模型

        k-lag: 向量值自回归的滞后系数
        '''
        for i in range(self.gundong_time, self.row+1,1):
            datai = self.data.iloc[i-self.gundong_time:i,:]
            model = VAR(datai)
            # 滞后 k_lag 个单位计算
            results = model.fit(self.k_lag)
            coef = results.params
            self.save_data_coef[i-self.gundong_time,:,:]= coef.iloc[1:1+self.k_lag*self.column,:].T
            self.save_data_cov[i-self.gundong_time,:,:] = results.sigma_u
def trainVectorAutoRegressiveMethodModel():

    X_train = readVectorAutoRegressiveMethodXTrain()

    #training model on the training set
    vectorAutoRegressiveMethodModel = VAR(X_train)

    #we are taking p = 5 as we have created different models based on the different p values.
    #Model gives minimum aic and bic for p =5
    vectorAutoRegressiveMethodModelResult = vectorAutoRegressiveMethodModel.fit(
        5)

    #saving the model in pickle file
    saveVectorAutoRegressiveMethodModel(vectorAutoRegressiveMethodModelResult)

    print(vectorAutoRegressiveMethodModelResult.summary())
    def test(self):
        mdata = sm.datasets.macrodata.load_pandas().data
        dates = mdata[['year', 'quarter']].astype(int).astype(str)
        quarterly = dates["year"] + "Q" + dates["quarter"]
        from statsmodels.tsa.base.datetools import dates_from_str
        quarterly = dates_from_str(quarterly)
        mdata = mdata[['realgdp', 'realcons', 'realinv']]
        mdata.index = pandas.DatetimeIndex(quarterly)
        data = np.log(mdata).diff().dropna()
        # print(type(data))
        # print(data)

        model = VAR(data)
        results = model.fit(2)
        # print(results.summary())
        gc_result = results.test_causality(['realgdp', 'realcons', 'realinv'], ['realgdp', 'realcons', 'realinv'], kind='f')
        print(gc_result.summary())
예제 #36
0
파일: var.py 프로젝트: id774/sandbox
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from scipy.signal import lfilter

mdata = sm.datasets.macrodata.load().data
mdata = mdata[["realgdp", "realcons", "realinv"]]
names = mdata.dtype.names
data = mdata.view((float, 3))
data = np.diff(np.log(data), axis=0)
model = VAR(data)
res = model.fit(2)

res.plot_sample_acorr()

irf = res.irf(10)
irf.plot()
plt.show()
plt.savefig("image.png")

res.plot_forecast(5)

res.fevd().plot()
plt.show()
plt.savefig("image2.png")
예제 #37
0
data['datetime'] = pd.to_datetime(data['datetime'], format=format)
data.set_index(pd.DatetimeIndex(data['datetime']), inplace=True)


# Select variables for VAR model
varData = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[:-50]
test = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[-50:]

# endVal = varData.loc["2014-01-04 00:00:00"]
# varData = varData.diff(1)


model = VAR(varData) # define the model and data
# model.select_order() # uses information criteria to select
		     # model order
reg = model.fit(30) # order chosen based on BIC criterion



# Forecasting

fcast = reg.forecast(varData['2013-01-04':].values, steps = 50)


def dediff(todaysVal, forecast):
    future = forecast
    for i in range(np.shape(forecast)[0]):
        if (i==0):
            future[i] = todaysVal + forecast[0]
        else:
            future[i] = future[i-1] + forecast[i]
예제 #38
0

import pandas
mdata = ds.macrodata.load_pandas().data

# prepare the dates index
dates = mdata[['year', 'quarter']].astype(int).astype('S4')
quarterly = dates["year"] + "Q" + dates["quarter"]
quarterly = dates_from_str(quarterly)

mdata = mdata[['realgdp','realcons','realinv']]
mdata.index = pandas.DatetimeIndex(quarterly)
data = np.log(mdata).diff().dropna()

model = VAR(data)
est = model.fit(maxlags=2)

def plot_input():
    est.plot()

def plot_acorr():
    est.plot_acorr()

def plot_irf():
    est.irf().plot()

def plot_irf_cum():
    irf = est.irf()
    irf.plot_cum_effects()

def plot_forecast():
예제 #39
0
def estimate_VAR():
    df = load_external()
    d = load_es_uncertainty()
    df1 = load_eu_uncertainty()
    nd = d.join(df).join(df1)
    plot_index_comparison(nd)
    plot_eu_epu(nd)
    plot_cinco_elpais(nd)
    nd = transform_data(nd)
    plot_epu_gdp(nd)
    
    benchmark_subset = ['EPU','europe', 'fedea', 'inflation', 'differential'] 

    nd['EPU'] = nd['policy'].diff(periods = 1)
    data = nd.reindex(columns=benchmark_subset)
    data = data.dropna()
    data.describe()
    model = VAR(data)
    results = model.fit(6)

    irf = results.irf(12)
    irf.plot(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12})
    #irf.plot_cum_effects(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12}) #

    cum_effects = irf.orth_cum_effects 
    
    fedea_on_gdp = get_fedea_on_gdp()
    elasticity = -100*fedea_on_gdp*cum_effects[12,2,0]
    print 'Effects of a 1 sd uncertainty shock on gdp growth (negative): %0.3f%%' % elasticity
    print 'Inflation increases by %0.2f' % (100* cum_effects[12,3,0], )
    print 'Bond spreads increase by %0.1f basis points' % (100* cum_effects[12,4,0], )
    full_sset = ['ibex','vol','resid','europe', 'fedea', 'inflation', 'differential' ]
    

    def get_irf(nd, subset):
        '''
        http://statsmodels.sourceforge.net/0.6.0/vector_ar.html
        '''
        data = nd.reindex(columns=subset)
        data = data.dropna()
        data.describe()
        model = VAR(data)
        results = model.fit(6)
    
        irf = results.irf(12)
        
        cum_effects = irf.orth_cum_effects 
        
        return cum_effects[12,2,0]
    
    for colname in colnames:
        nd['uncert'] = nd[colname] / nd.articles
        nd['uncert'] = nd['uncert'] / nd['uncert'].mean() * 100
        nd['uncert'] = nd['uncert'].diff(periods = 1)
        subset = ['uncert','europe', 'fedea', 'inflation', 'differential' ]
        cum_effect= get_irf(nd, subset)
        print '**%s** | %d | %.04f' % (colname, nd[colname].sum(), 100*fedea_on_gdp*cum_effect)
    
    aa = d.mean()[colnames]
    plt.figure(6)
    h = plt.bar(range(len(aa)),aa,label = list(aa.index) )
    plt.subplots_adjust(bottom=0.3)
    
    xticks_pos = [0.65*patch.get_width() + patch.get_xy()[0] for patch in h]
    
    plt.xticks(xticks_pos, list(aa.index),  ha='right', rotation=45)
    plt.savefig(os.path.join(rootdir, 'figures','frequency_types.%s' % fig_fmt), format=fig_fmt)
예제 #40
0
파일: ex_var.py 프로젝트: AnaMP/statsmodels
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.api import VAR

# some example data
mdata = sm.datasets.macrodata.load().data
mdata = mdata[['realgdp','realcons','realinv']]
names = mdata.dtype.names
data = mdata.view((float,3))

use_growthrate = False #True #False
if use_growthrate:
    data = 100 * 4 * np.diff(np.log(data), axis=0)

model = VAR(data, names=names)
res = model.fit(4)

nobs_all = data.shape[0]

#in-sample 1-step ahead forecasts
fc_in = np.array([np.squeeze(res.forecast(model.y[t-20:t], 1))
                  for t in range(nobs_all-6,nobs_all)])

print fc_in - res.fittedvalues[-6:]

#out-of-sample 1-step ahead forecasts
fc_out = np.array([np.squeeze(VAR(data[:t]).fit(2).forecast(data[t-20:t], 1))
                   for t in range(nobs_all-6,nobs_all)])

print fc_out - data[nobs_all-6:nobs_all]
print fc_out - res.fittedvalues[-6:]
예제 #41
0
파일: var.py 프로젝트: DaryaVM/kaggle_asus
    stat_df = df.diff().dropna()

    
    #get rid of columns that are zeros at the end , we just assume they will continue to be zeros
    for col_name in stat_df.columns.values:
        if stat_df[col_name][-1] == 0 and  stat_df[col_name][-2] == 0:# and stat_df[col_name][-3] == 0:
            print col_name
            del stat_df[col_name]
            no_forecast.setdefault(m,[]).append(col_name)
    #print stat_df
    forecast_cols[m] = stat_df.columns.values
    #new_df = stat_df[['P17','P15','P16']]
    model = VAR(stat_df)
    maxlags = 3
    try:
        results = model.fit(maxlags, ic='aic', verbose=True)
    except Exception,exc:
        maxlags = 1         
        results = model.fit(maxlags, ic='aic', verbose=True)
    
    #if m == 'M2':
    #   import pdb
    #   pdb.set_trace()
    #import pdb
    #pdb.set_trace()
#    results = model.fit(4)
    #print results.summary()
    lag_order = results.k_ar
    #print "lag_order\n" ,lag_order
    #print "stat_df.values[-log_order] ---\n", stat_df.values[-lag_order:]
    #print "----------------\n"