Example #1
0
def artificial_data():
    N = 301
    # x, y = n_hat(N, 6)
    x, y = sin_(N, 5)
    y = y + 0.01 * np.random.normal(0., .5, len(y))
    z = y * y
    Y = np.matrix([y, z]).transpose().tolist()

    # ======================
    title('single prediction')
    model = VAR(Y)
    model_fit = model.fit(maxlags=15, ic='aic')
    pred = model_fit.forecast(Y[-model_fit.k_ar:], N)
    xx = np.arange(N, N + len(pred))
    assert (len(pred) == N)
    # print(model_fit.k_ar)
    # print(model_fit.params)
    plot(x, Y)
    plot(xx, pred, '--')
    show()
    # # =======================================

    title('dynamic prediction')
    xx, pred = test_forecast(x,
                             Y,
                             len_for_prediction=100,
                             n_pred=100,
                             maxlags=15,
                             ic='aic')
    plot(x, Y)
    plot(xx, pred, '--')
    show()
Example #2
0
 def var_fit(self, endog, maxlags=5, ic='aic', verbose=False, trend='c'):
     '''
     Find best VAR with best order and various lags
     Parameters
     ----------
     endog : array-like, (shape: (n_time_points, n_variables))
         2-d endogenous response variable. The independent variable.
     maxlags : int
         Maximum number of lags to check for order selection.
     ic : {'aic', 'fpe', 'hqic', 'bic', None}, optional, (default="aic")
         Information criterion to use for VAR order selection.
         aic : Akaike
         fpe : Final prediction error
         hqic : Hannan-Quinn
         bic : Bayesian a.k.a. Schwarz
     verbose : bool, default False
         Print order selection output to the screen
     trend : str {"c", "ct", "ctt", "nc"}, optional, (default="c")
         "c" - add constant
         "ct" - constant and trend
         "ctt" - constant, linear and quadratic trend
         "nc" - co constant, no trend
         Note that these are prepended to the columns of the dataset.
     Notes
     -----
     Returns
     -------
     self (updating self.var_result)
     '''
     self.var_result = VAR(endog).fit(maxlags=maxlags,
                                      ic=ic,
                                      verbose=verbose,
                                      trend=trend)
Example #3
0
 def parametersAR(self, lag=1):
     #        OLS(self.parametersHistorical()['b0'], self.parametersHistorical()['b0'][])
     #        self._arModel = (AR(self.parametersHistorical()['b0']).fit(lag), AR(self.parametersHistorical()['b1']).fit(lag), AR(self.parametersHistorical()['b2']).fit(lag))
     self._varModel = VAR(self.parametersHistorical()[['b0', 'b1',
                                                       'b2']]).fit(lag)
     self._varModel.summary()
     return True
Example #4
0
def var(data):
    start_time_ = time.time()
    # train,test = data[:int(0.7*(len(data)))],data[int(0.7*(len(data))):]
    data = data.interpolate(limit=30000000,
                            limit_direction='both').astype('float32')
    #split_date = '2017-01-01'
    train, test = data[:split_date], data[split_date:]

    if DOpca:
        steps = [('scale', StandardScaler()), ('pca', PCA(n_components=n_pca))]
    else:
        steps = [('scale', StandardScaler())]
    pipe = Pipeline(steps=steps)
    pipe.fit(data)

    train, test = data[:int(0.7 * (len(data)))], data[int(0.7 * (len(data))):]
    sc_train, sc_test = pipe.transform(train), pipe.transform(test)

    model = VAR(endog=sc_train)
    model_fit = model.fit(9)

    trainPredict = model_fit.forecast(sc_train, steps=len(sc_train))
    testPredict = model_fit.forecast(sc_test, steps=len(sc_test))
    try:
        trainPredict = pipe.inverse_transform(trainPredict)
        testPredict = pipe.inverse_transform(testPredict)

        trainPredict = pd.Series(data=(trainPredict[:, 0]), index=train.index)
        testPredict = pd.Series(data=(testPredict[:, 0]), index=test.index)
    except:
        trainPredict, testPredict = -999, -999
    trainY = pd.Series(data=(train.iloc[:, 0]), index=train.index)
    testY = pd.Series(data=(test.iloc[:, 0]), index=test.index)
    time_ = time.time() - start_time_
    return trainPredict, testPredict, time_, trainY, testY
Example #5
0
    def fit_model(self):
        """
        Use Vector Autoregression, pass Training Set & fit the model
        """

        model = VAR(endog=self.train)
        self.model_fit = model.fit()
Example #6
0
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2):
    n_sample, n_output = df.shape
    n_test = int(round(n_sample * test_ratio))
    n_train = n_sample - n_test
    df_train, df_test = df[:n_train], df[n_train:]

    scaler = StandardScaler(mean=df_train.values.mean(),
                            std=df_train.values.std())
    data = scaler.transform(df_train.values)
    var_model = VAR(data)
    var_result = var_model.fit(n_lags)
    max_n_forwards = np.max(n_forwards)
    # Do forecasting.
    result = np.zeros(shape=(len(n_forwards), n_test, n_output))
    start = n_train - n_lags - max_n_forwards + 1
    for input_ind in range(start, n_sample - n_lags):
        prediction = var_result.forecast(
            scaler.transform(df.values[input_ind:input_ind + n_lags]),
            max_n_forwards)
        for i, n_forward in enumerate(n_forwards):
            result_ind = input_ind - n_train + n_lags + n_forward - 1
            if 0 <= result_ind < n_test:
                result[i, result_ind, :] = prediction[n_forward - 1, :]

    df_predicts = []
    for i, n_forward in enumerate(n_forwards):
        df_predict = pd.DataFrame(scaler.inverse_transform(result[i]),
                                  index=df_test.index,
                                  columns=df_test.columns)
        df_predicts.append(df_predict)

    df_predict.to_csv("./df_predict.csv", sep=',', index=False)
    df_test.to_csv("./df_test.csv", sep=',', index=False)
    return df_predicts, df_test
Example #7
0
    def test_select_order(self):
        result = self.model.fit(10, ic='aic', verbose=True)
        result = self.model.fit(10, ic='fpe', verbose=True)

        # bug
        model = VAR(self.model.endog)
        model.select_order()
def control_lqr(env, agent, model_fit, data, lag=4):
    B = np.array([[0], [0], [-.01], [-.01]])
    Q = np.diag((10., 1., 10., 1.))

    print(model_fit.coefs)

    K = lqr(model_fit.coefs[0], B, Q, 1)
    print("K=")
    print(K)

    obs = env.reset()
    action = agent.begin_episode(obs)
    for i in range(500):
        env.render()
        time.sleep(0.15)  # slows down process to make it more visible

        # recompute K every 10 steps
        data = np.vstack([data, obs])
        if (i % 10 == 0):
            model_next = VAR(data)
            model_fit_next = model_next.fit(lag)
            K = lqr(model_fit_next.coefs[0], B, Q, 1)
            # print("K=")
            # print(K)

        action = get_control(K, obs)

        # Get the next action from the learner, given our new state.
        obs, reward, done, info = env.step(action)

        if done:
            print("Final episode: lasted {} timesteps, data: {}".format(
                i + 1, obs))
            break
Example #9
0
File: VAR.py Project: aparwal7/6242
def generate_final_predictions(df_coords, lag_order=3, display=False):
    '''
    
    Uses the best lag_order (from testing_harness) to train the full model
    and forecast mean coordinates for the years 2022 and 2023. Returns a DF
    
    '''
    model = VAR(endog=df_coords)
    model = model.fit(lag_order)
    forecast = model.forecast(model.y, steps=2)

    df_forecast = pd.DataFrame(forecast,
                               columns=['future_latitude', 'future_longitude'])
    df_forecast['year'] = [2022, 2023]
    df_forecast = df_forecast[['year', 'future_latitude', 'future_longitude']]

    if display:
        print()
        print('Final model information:')
        print()
        print(model.summary())
        print()
        print('Future hotspot forecasts:')
        print()
        print(df_forecast)
    return df_forecast
Example #10
0
    def extract_model(self, input, save_status=False):
        total = self._model_clean()

        if total is False:
            return "Try to find available area by:\n  sh casa.sh --find aptartment name\n"

        # input
        n = int(input)

        new_index = pd.date_range(start=total.index[-1],
                                  periods=n + 1,
                                  freq='MS')[1:]

        model = VAR(total)
        model_fit = model.fit()
        pred = model_fit.forecast(y=total.values, steps=n)

        pred = pd.DataFrame(pred, columns=['Q1', 'Q2', 'Q3'], index=new_index)
        final_df = pd.concat([total, pred], axis=0)

        final = final_df.loc[new_index]

        if save_status is True:
            self._save_image_model(eval_model=final_df,
                                   pred_model=pred,
                                   pred_num=input)

        return final
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2):
    """
    Multivariate time series forecasting using Vector Auto-Regressive Model.
    :param df: pandas.DataFrame, index: time, columns: sensor id, content: data.
    :param n_forwards: a tuple of horizons.
    :param n_lags: the order of the VAR model.
    :param test_ratio:
    :return: [list of prediction in different horizon], dt_test
    """
    n_sample, n_output = df.shape
    n_test = int(round(n_sample * test_ratio))
    n_train = n_sample - n_test
    df_train, df_test = df[:n_train], df[n_train:]

    scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std())
    data = scaler.transform(df_train.values)
    var_model = VAR(data)
    var_result = var_model.fit(n_lags)
    max_n_forwards = np.max(n_forwards)
    # Do forecasting.
    result = np.zeros(shape=(len(n_forwards), n_test, n_output))
    start = n_train - n_lags - max_n_forwards + 1
    for input_ind in range(start, n_sample - n_lags):
        prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards)
        for i, n_forward in enumerate(n_forwards):
            result_ind = input_ind - n_train + n_lags + n_forward - 1
            if 0 <= result_ind < n_test:
                result[i, result_ind, :] = prediction[n_forward - 1, :]

    df_predicts = []
    for i, n_forward in enumerate(n_forwards):
        df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns)
        df_predicts.append(df_predict)
    return df_predicts, df_test
Example #12
0
    def _run_varLiNGAM(self, xt, verbose=False):
        """ Run the VarLiNGAM algorithm on data.

        Args:
            xt : time series matrix with size n*m (length*num_variables)

        Returns:
            Tuple: (Bo, Bhat) Instantaneous and lagged causal coefficients

        """
        Ident = np.identity(xt.shape[1])

        # Step 1: VAR estimation
        model = VAR(xt)
        results = model.fit(self.lag)
        Mt_ = results.params[1:, :]

        # Step 2: LiNGAM on Residuals
        resid_VAR = results.resid
        model = LiNGAM(verbose=verbose)
        data = pd.DataFrame(resid_VAR)
        Bo_ = model._run_LiNGAM(data)

        # Step 3: Get instantaneous matrix Bo from LiNGAM
        # Bo_ = pd.read_csv("results.csv").values

        # Step 4: Calculation of lagged Bhat
        Bhat_ = np.dot((Ident - Bo_), Mt_)
        return (Bo_, Bhat_)
Example #13
0
    def test_select_order(self):
        result = self.model.fit(10, ic='aic', verbose=True)
        result = self.model.fit(10, ic='fpe', verbose=True)

        # bug
        model = VAR(self.model.endog)
        model.select_order()
Example #14
0
def var(flow, target):
    warnings.filterwarnings("ignore")
    in_mask = np.greater(target[:, 0], mask_threshold)
    out_mask = np.greater(target[:, 1], mask_threshold)
    result = np.zeros((flow.shape[0], flow.shape[-1]))
    for i in range(flow.shape[0]):
        if verbose:
            if (i + 1) % 10000 == 0:
                print("VAR: line {} of {}".format(i + 1, flow.shape[0]))
        for j in range(flow.shape[-1]):
            data = list()
            for k in range(flow.shape[1] - 1):
                data.append([flow[i, k, j], flow[i, k + 1, j]])
            model = VAR(data)
            try:
                model_fit = model.fit()
                result[i, j] = model_fit.forecast(model_fit.y, steps=1)[0][1]
            except:
                result[i, j] = 0.0
                pass
    in_rmse = np.sqrt(
        np.mean(np.square(target[:, 0][in_mask] - result[:, 0][in_mask])))
    out_rmse = np.sqrt(
        np.mean(np.square(target[:, 1][out_mask] - result[:, 1][out_mask])))
    in_mae = np.mean(np.abs(target[:, 0][in_mask] - result[:, 0][in_mask]))
    out_mae = np.mean(np.abs(target[:, 1][out_mask] - result[:, 1][out_mask]))
    return in_rmse, out_rmse, in_mae, out_mae
Example #15
0
 def extract(self, instance):
     assert (isinstance(instance, Instance))
     params = VAR(instance.eeg_data.T).fit(self.lags).params
     # hstack will collapse all entries into one big vector
     features = np.hstack(params.reshape((np.prod(params.shape), 1)))
     self.assert_features(features)
     # features = a 1d ndarray
     return features
Example #16
0
 def setup_class(cls):
     mdata = macrodata.load_pandas().data
     mdata = mdata[["realgdp", "realcons", "realinv"]]
     data = mdata.values
     data = np.diff(np.log(data), axis=0) * 400
     cls.res0 = VAR(data).fit(maxlags=2)
     cls.resl1 = VAR(data).fit(maxlags=1)
     cls.data = data
 def extract(self, instance):
     assert(isinstance(instance, Instance))
     params = VAR(instance.eeg_data.T).fit(self.lags).params
     # hstack will collapse all entries into one big vector 
     features = np.hstack(params.reshape( (np.prod(params.shape),1) ))
     self.assert_features(features)
     # features = a 1d ndarray 
     return features
Example #18
0
def var_predict(train_data, num_out):
    var_preds = []
    for x in train_data:
        var = VAR(x)
        var_fit = var.fit(2)
        yhat = var_fit.forecast(var_fit.y, steps=num_out)
        var_preds.append(yhat[:, 0])

    return np.array(var_preds) 
Example #19
0
def var_simulate(data, n_simulate, pca_n=200):
    # PCA reduction before VAR fit
    pca_dim_res = pca(data, pca_n)
    var = VAR(pca_dim_res['pc_scores'])
    var_res = var.fit(maxlags=1)
    data_sim = var_res.simulate_var(n_simulate)
    # Project simulated PCA time courses into original vertex space
    data_sim = data_sim @ pca_dim_res['Va']
    return data_sim
Example #20
0
def extractCoeff(timeseries_data, lag_order):
    '''
    Takes in a 7680x16 array to fit a VAR model and obtain the coefficients
    @return: 5x16x16 VAR coefficients array
    '''
    model = VAR(timeseries_data)
    model_fit = model.fit(lag_order, trend='nc')
    coefs = model_fit.coefs  #the lag coeffs
    return coefs
Example #21
0
def load_results_statsmodels(dataset):
    results_per_deterministic_terms = dict.fromkeys(dt_s_list)
    for dt_s_tup in dt_s_list:
        endog = data[dataset]
        exog = generate_exog_from_season(dt_s_tup[1], len(endog))
        model = VAR(endog, exog)
        results_per_deterministic_terms[dt_s_tup] = model.fit(
                maxlags=4, trend=dt_s_tup[0], method="ols")
    return results_per_deterministic_terms
Example #22
0
 def train(self, array_X, array_Y):
     self.train_X = array_X
     self.train_Y = array_Y
     array = numpy.concatenate((numpy.matrix(array_Y).T, array_X), axis=1)
     model = VAR(endog=pd.DataFrame(data=array))
     fit = model.fit()
     res = fit.fittedvalues.values[:, 0]
     res = numpy.hstack((res[0], res))
     return res
Example #23
0
def load_results_statsmodels(dataset):
    results_per_deterministic_terms = dict.fromkeys(dt_s_list)
    for dt_s_tup in dt_s_list:
        endog = data[dataset]
        exog = generate_exog_from_season(dt_s_tup[1], len(endog))
        model = VAR(endog, exog)
        results_per_deterministic_terms[dt_s_tup] = model.fit(
                maxlags=4, trend=dt_s_tup[0], method="ols")
    return results_per_deterministic_terms
Example #24
0
def VARmethod(paramsList=['pollution.csv', '0.93','pm','date'], specialParams=['2','1','4','0','1', '1', '7']):
    path = paramsList[0]
    trainRows = float(paramsList[1])
    saveto = 'result.csv'
    df = pd.read_csv(path, usecols=paramsList[2:])
    allRows = df.shape[0]


    train = df[0:int(allRows*trainRows)]
    test = df[int(allRows*trainRows)+1:]


    df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M')
    df.index = df['Timestamp']
    df = df.resample('D').mean()

    train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M')
    train.index = train['Timestamp']
    train = train.resample('D').mean()

    test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M')
    test.index = test['Timestamp']
    test = test.resample('D').mean()

    y_hat = test.copy()
    nullArray = train.copy()
    nullArray['time'] = train.index
    # 以上可通用----------------------------

    for i in range(2,len(paramsList)-1):
        #https://blog.csdn.net/mooncrystal123/article/details/86736397
        #https://blog.csdn.net/qq_41518277/article/details/85101141
        var_data = train[paramsList[i]].diff(1).dropna()
        #model = VAR(endog=var_data, dates=pd.date_range(train.index[0], train.index[-1]),freq='M')
        model = VAR(endog=var_data)
        # 估计最优滞后项系数
        #lag_order = model.select_order()
        # 输出结果
        #print(lag_order.summary())
        model_fit = model.fit(1)
        prediction = model_fit.forecast(model_fit.y, steps=len(test[paramsList[i]]))
        print(prediction)
        y_hat[paramsList[i]] = prediction


        rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]]))
        print(rms)
    # --------------------------------------
    y_hat['time'] = test.index
    yhat_naive = np.array(y_hat)
    nArray = np.array(nullArray)
    newArray = np.concatenate((nArray,yhat_naive),axis=0)
    s = pd.DataFrame(newArray, columns=paramsList[2:])
    for i in range(2,len(paramsList)-1):
        s[paramsList[i]][0:int(len(s)*trainRows)] = ""
    s.to_csv(saveto,index=False,header=True,float_format='%.2f')
Example #25
0
def test_irf_err_bands():
    # smoke tests
    data = get_macrodata()
    model = VAR(data)
    results = model.fit(maxlags=2)
    irf = results.irf()
    bands_sz1 = irf.err_band_sz1()
    bands_sz2 = irf.err_band_sz2()
    bands_sz3 = irf.err_band_sz3()
    bands_mc = irf.errband_mc()
Example #26
0
def test_var_cov_params_pandas(bivariate_var_data):
    df = pd.DataFrame(bivariate_var_data, columns=['x', 'y'])
    mod = VAR(df)
    res = mod.fit(2)
    cov = res.cov_params()
    assert isinstance(cov, pd.DataFrame)
    exog_names = ('const', 'L1.x', 'L1.y', 'L2.x', 'L2.y')
    index = pd.MultiIndex.from_product((exog_names, ('x', 'y')))
    assert_index_equal(cov.index, cov.columns)
    assert_index_equal(cov.index, index)
    def extract(self, instance):
        # instance is an object of class Instance

        # Wittawat: Since VAR automatically does lags order selection, 
        # other different instances may give a different lags values ?
        params = VAR(instance.eeg_data.T).fit(maxlags=2).params
        features = np.hstack(params.reshape( (np.prod(params.shape), 1) ))
        self.assert_features(features)
        # features = a 1d ndarray 
        return features
Example #28
0
 def parametersVAR(self, tenors, yields, lag=1, steps=1, alpha=0.01):
     params = pd.DataFrame(data=self.calibrateCurveParametersHistorical(
         tenors, yields),
                           columns=['tau', 'b0', 'b1', 'b2'],
                           index=yields.index)
     self._varModel = VAR(params[['b0', 'b1', 'b2']]).fit(lag)
     self._varModel.summary()
     fparam = self._varModel.forecast_interval(
         params.tail(1)[['b0', 'b1', 'b2']].values, steps, alpha=alpha)
     return fparam, params.tail(1)[['b0', 'b1', 'b2']].values
Example #29
0
    def extract(self, instance):
        # instance is an object of class Instance

        # Wittawat: Since VAR automatically does lags order selection,
        # other different instances may give a different lags values ?
        params = VAR(instance.eeg_data.T).fit(maxlags=2).params
        features = np.hstack(params.reshape((np.prod(params.shape), 1)))
        self.assert_features(features)
        # features = a 1d ndarray
        return features
Example #30
0
def baseline_VAR(flow_df,
                 road_adj,
                 hops=5,
                 history_window=4,
                 prediction_window=1,
                 test_ratio=0.25):

    n_timestamp, n_road = flow_df.shape
    n_timestamp_train = int(round(n_timestamp * (1 - test_ratio)))
    n_timestamp_test = n_timestamp - n_timestamp_train

    # find neighbors for each node
    symm_adj = road_adj + road_adj.transpose()
    neighbor_adj = symm_adj
    for hop in range(hops - 1):
        neighbor_adj = np.matmul(neighbor_adj, symm_adj) + symm_adj
    np.fill_diagonal(neighbor_adj, 0)  # exclude self

    train_data = np.array(
        flow_df.iloc[:n_timestamp_train])  # (n_timestamp_train, n_road)
    test_data = np.array(
        flow_df.iloc[n_timestamp_train:])  # (n_timestamp_test, n_road)

    Y_true = test_data[history_window +
                       (prediction_window -
                        1):n_timestamp_test]  # (n_sample, n_road)
    Y_pred = np.zeros(Y_true.shape)  # (n_sample, n_road)

    for road_index in range(n_road):

        filtered_roads = [road_index] + list(
            np.where(neighbor_adj[road_index] > 0)[0])
        filtered_train_data = np.array(train_data[:, filtered_roads])
        filtered_test_data = np.array(test_data[:, filtered_roads])

        model = VAR(filtered_train_data)
        model_fitted = model.fit(history_window)

        X_test = np.concatenate([
            np.expand_dims(
                filtered_test_data[i:(n_timestamp_test - history_window -
                                      prediction_window + 1 + i)],
                axis=2) for i in range(history_window)
        ],
                                axis=2)  # (n_sample, n_road, history_window)
        for i in range(Y_pred.shape[0]):  # n_sample
            Y_pred[i, road_index] = model_fitted.forecast(
                X_test[i].transpose(), steps=prediction_window)[-1, :][0]

#     max_value = Y_true.max()
#     print((Y_pred > max_value).sum()) # no super large values
#     print((Y_pred < 0).sum()) # negative values account for 0.2%
    Y_pred[Y_pred < 0] = 0  # correct negative values

    return Y_pred, Y_true
Example #31
0
def test_irf_trend():
    # test for irf with different trend see #1636
    # this is a rough comparison by adding trend or subtracting mean to data
    # to get similar AR coefficients and IRF
    data = get_macrodata().view((float, 3), type=np.ndarray)

    model = VAR(data)
    results = model.fit(4)  # , trend = 'c')
    irf = results.irf(10)

    data_nc = data - data.mean(0)
    model_nc = VAR(data_nc)
    results_nc = model_nc.fit(4, trend="n")
    irf_nc = results_nc.irf(10)

    assert_allclose(irf_nc.stderr()[1:4], irf.stderr()[1:4], rtol=0.01)

    trend = 1e-3 * np.arange(len(data)) / (len(data) - 1)
    # for pandas version, currently not used, if data is a pd.DataFrame
    # data_t = pd.DataFrame(data.values + trend[:,None], index=data.index, columns=data.columns)
    data_t = data + trend[:, None]

    model_t = VAR(data_t)
    results_t = model_t.fit(4, trend="ct")
    irf_t = results_t.irf(10)

    assert_allclose(irf_t.stderr()[1:4], irf.stderr()[1:4], rtol=0.03)
Example #32
0
def model_var(train_data,test_data,train_data1,test_data1):
    x = train_data1.reshape((372,1))
    x1 = train_data.reshape((372,1))
    lis = np.concatenate((x,x1), axis = 1)
    print(np.shape(lis))
    #forecast
    model = VAR(endog = lis)
    model_fit = model.fit()
    print(model_fit.summary())
    predictions = model_fit.forecast(model_fit.y, steps=10)
    print('VAR RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
Example #33
0
    def sensitivity(df, col_name, ratio, percentage=0.9):
        df_sen[col_name].iloc[-2] = df_sen[col_name].iloc[-2] * ratio

        train = df_sen[:-1]

        model_sen = VAR(endog=train)
        model_sen_fit = model_sen.fit()

        # Make prediction on validation
        yhat_sen_cal = model_sen_fit.forecast(model_sen_fit.y, steps=2)
        return yhat_sen_cal[:, 3][-1]
Example #34
0
def _VAR(train, test=None):
    model = VAR(train)
    model_fit = model.fit()  #maxlags=299, ic='aic')
    print('Lag: %s' % model_fit.k_ar)
    if test != None:
        predictions = model_fit.forecast(train[-10:, :], len(test))
        error = mean_squared_error(test, predictions)
        print('Test MSE: %.3f' % error)
    else:
        predictions = model_fit.forecast(train[-10:, :], len(train))
    return predictions
Example #35
0
def vars_test():
    dt = get_dataframe()
    name_list = ["date", "tBalance_all", "total_purchase", "total_redeem", "total_diff"]
    # print(dt["total_purchase"])
    time = dt["date"]
    mdata = dt[["tBalance_all", "total_purchase", "total_redeem"]]
    mdata.index = pandas.DatetimeIndex(time)
    data = np.log(mdata).diff().dropna()
    model = VAR(data)
    results = model.fit(2)
    results.summary()

    results.plot()
Example #36
0
class TestVARResultsLutkepohl(object):
    """
    Verify calculations using results from Lutkepohl's book
    """

    def __init__(self):
        self.p = 2

        if not have_pandas():
            return

        sdata, dates = get_lutkepohl_data("e1")

        names = sdata.dtype.names
        data = data_util.struct_to_ndarray(sdata)
        adj_data = np.diff(np.log(data), axis=0)
        # est = VAR(adj_data, p=2, dates=dates[1:], names=names)

        self.model = VAR(adj_data[:-16], dates=dates[1:-16], names=names, freq="Q")
        self.res = self.model.fit(maxlags=self.p)
        self.irf = self.res.irf(10)
        self.lut = E1_Results()

    def test_approx_mse(self):
        if not have_pandas():
            raise nose.SkipTest

        # 3.5.18, p. 99
        mse2 = np.array([[25.12, 0.580, 1.300], [0.580, 1.581, 0.586], [1.300, 0.586, 1.009]]) * 1e-4

        assert_almost_equal(mse2, self.res.forecast_cov(3)[1], DECIMAL_3)

    def test_irf_stderr(self):
        if not have_pandas():
            raise nose.SkipTest

        irf_stderr = self.irf.stderr(orth=False)
        for i in range(1, 1 + len(self.lut.irf_stderr)):
            assert_almost_equal(np.round(irf_stderr[i], 3), self.lut.irf_stderr[i - 1])

    def test_cum_irf_stderr(self):
        if not have_pandas():
            raise nose.SkipTest

        stderr = self.irf.cum_effect_stderr(orth=False)
        for i in range(1, 1 + len(self.lut.cum_irf_stderr)):
            assert_almost_equal(np.round(stderr[i], 3), self.lut.cum_irf_stderr[i - 1])

    def test_lr_effect_stderr(self):
        if not have_pandas():
            raise nose.SkipTest

        stderr = self.irf.lr_effect_stderr(orth=False)
        orth_stderr = self.irf.lr_effect_stderr(orth=True)
        assert_almost_equal(np.round(stderr, 3), self.lut.lr_stderr)
Example #37
0
def test_var_constant():
    # see 2043
    import datetime
    from pandas import DataFrame, DatetimeIndex

    series = np.array([[2., 2.], [1, 2.], [1, 2.], [1, 2.], [1., 2.]])
    data = DataFrame(series)

    d = datetime.datetime.now()
    delta = datetime.timedelta(days=1)
    index = []
    for i in range(data.shape[0]):
        index.append(d)
        d += delta

    data.index = DatetimeIndex(index)

    model = VAR(data)
    with pytest.raises(ValueError):
        model.fit(1)
Example #38
0
def test_lag_order_selection():
    if debug_mode:
        if "lag order" not in to_test:
            return
        else:
            print("\n\nLAG ORDER SELECTION", end="")
    for ds in datasets:
        for dt in dt_s_list:
            if debug_mode:
                print("\n" + dt_s_tup_to_string(dt) + ": ", end="")
            endog_tot = data[ds]
            exog = generate_exog_from_season(dt[1], len(endog_tot))
            model = VAR(endog_tot, exog)
            obtained_all = model.select_order(10, trend=dt[0])
            for ic in ["aic", "fpe", "hqic", "bic"]:
                err_msg = build_err_msg(ds, dt,
                                        "LAG ORDER SELECTION - " + ic.upper())
                obtained = getattr(obtained_all, ic)
                desired = results_ref[ds][dt]["lagorder"][ic]
                assert_allclose(obtained, desired, rtol, atol, False, err_msg)
Example #39
0
    def __init__(self):
        self.p = 2
        sdata, dates = get_lutkepohl_data('e1')

        data = data_util.struct_to_ndarray(sdata)
        adj_data = np.diff(np.log(data), axis=0)
        # est = VAR(adj_data, p=2, dates=dates[1:], names=names)

        self.model = VAR(adj_data[:-16], dates=dates[1:-16], freq='Q')
        self.res = self.model.fit(maxlags=self.p)
        self.irf = self.res.irf(10)
        self.lut = E1_Results()
Example #40
0
def test_var_constant():
    # see 2043
    import datetime
    from pandas import DataFrame, DatetimeIndex

    series = np.array([[2., 2.], [1, 2.], [1, 2.], [1, 2.], [1., 2.]])
    data = DataFrame(series)

    d = datetime.datetime.now()
    delta = datetime.timedelta(days=1)
    index = []
    for i in range(data.shape[0]):
        index.append(d)
        d += delta

    data.index = DatetimeIndex(index)

    #with pytest.warns(ValueWarning):  #does not silence warning in test output
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=ValueWarning)
        model = VAR(data)
    with pytest.raises(ValueError):
        model.fit(1)
Example #41
0
def test2():
    mdata = statsmodels.datasets.macrodata.load_pandas().data
    dates = mdata[["year", "quarter"]].astype(int).astype(str)
    quarterly = dates["year"] + "Q" + dates["quarter"]

    mdata = mdata[["realgdp", "realcons", "realinv"]]
    mdata.index = pandas.DatetimeIndex(quarterly)
    data = np.log(mdata).diff().dropna()

    model = VAR(data)
    results = model.fit(2)
    results.summary()
    results = model.fit(maxlags=50, ic="aic")
    # print(results.summary())

    lag_order = results.k_ar
    print results.forecast(data.values[-lag_order:], 30)
    # print(results)
    # print model.select_order(15)

    # results.plot()
    # results.plot_acorr()

    pass
Example #42
0
    def __init__(self):
        self.p = 2

        if not have_pandas():
            return

        sdata, dates = get_lutkepohl_data("e1")

        names = sdata.dtype.names
        data = data_util.struct_to_ndarray(sdata)
        adj_data = np.diff(np.log(data), axis=0)
        # est = VAR(adj_data, p=2, dates=dates[1:], names=names)

        self.model = VAR(adj_data[:-16], dates=dates[1:-16], names=names, freq="Q")
        self.res = self.model.fit(maxlags=self.p)
        self.irf = self.res.irf(10)
        self.lut = E1_Results()
Example #43
0
sr_bm = np.sqrt(252)*sharpe(rets_bm)
print mn_bm, sd_bm, sr_bm
#calc beta's alpha's 


#do forecast of returns, correlation. Use to Weight
rets.iloc[:,0:10].plot()
###DETOUR TO VAR FORECASTING

from statsmodels.tsa.vector_ar.var_model import VAR, VARResults, VARProcess
import statsmodels
statsmodels.version.version

#Check for NA's in data - have to reduce number of series used as full 30
#gave singular matrix
v1 = VAR(rets_train[series_red], freq='D')
v1.select_order(maxlags=30)
results = v1.fit(5) #From fitted
# results.summary()
results.plot()
# results.plot_acorr()
# plt.show()

#Make forecast for 3months
test_index = rets_test.index
fc_range = pd.date_range(start=test_index[0], periods=2, freq='3M')
fc_periods = len(rets_test[fc_range[0]:fc_range[1]])
lag_order = results.k_ar
fc = results.forecast(rets_train[series_red].values,fc_periods)
fc.shape
fc[:,-1]
Example #44
0
 def test_constructor(self):
     # make sure this works with no names
     ndarr = self.data.view((float, 3))
     model = VAR(ndarr)
     res = model.fit(self.p)
Example #45
0
import pandas as pd
import numpy as np
import statsmodels.api as sm
import pylab
from statsmodels.tsa.base.datetools import dates_from_str
from statsmodels.tsa.vector_ar.var_model import VAR

mdata = sm.datasets.macrodata.load_pandas().data
dates = mdata[['year', 'quarter']].astype(int).astype(str)
quarterly = dates["year"] + "Q" + dates["quarter"]
quarterly = dates_from_str(quarterly)

mdata = mdata[['realgdp','realcons','realinv']]
mdata.index = pd.DatetimeIndex(quarterly)
data = np.log(mdata).diff().dropna() # log difference

# make a VAR model
model = VAR(data)
results = model.fit(2)
print results.summary()
results.plot()
results.plot_acorr() #autocorrelation 

model.select_order(15)
results = model.fit(maxlags=15, ic='aic')

irf = results.irf(10)
irf.plot(orth=True) #Orthogonalization

pylab.show()