예제 #1
0
def data2AB(data, x0=None):
    n = data.shape[0]
    T = data.shape[1]
    YY = np.dot(data[:, 1:], data[:, 1:].T)
    XX = np.dot(data[:, :-1], data[:, :-1].T)
    YX = np.dot(data[:, 1:], data[:, :-1].T)

    model = VAR(data.T)
    r = model.fit(1)
    A = r.coefs[0,:,:]

    # A = np.ones((n,n))
    B = np.ones((n, n))
    np.fill_diagonal(B, 0)
    B[np.triu_indices(n)] = 0
    K = np.int(scipy.sum(abs(B)))#abs(A)+abs(B)))

    a_idx = np.where(A != 0)
    b_idx = np.where(B != 0)
    np.fill_diagonal(B, 1)

    try:
        s = x0.shape
        x = x0
    except AttributeError:
        x = np.r_[A.flatten(), 0.1*scipy.randn(K)]
    o = optimize.fmin_bfgs(nllf2, x,
                           args=(np.double(A), np.double(B),
                                 YY, XX, YX, T, a_idx, b_idx),
                           gtol=1e-12, maxiter=500,
                           disp=False, full_output=True)
    A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx)
    B = B+B.T
    return  A, B
예제 #2
0
def VARprocess(df,log=False):
    # Log transformation, relative difference and drop NULL values
    if (log):    
        df = np.log(df+0.1).diff().dropna()
    # Vector Autoregression Process generation     
    maxAttr = len(df.columns) 
    # Find the right lag order
    orderFound = False
    while orderFound!=True:   
        try:
            model = VAR(df.ix[:,0:maxAttr])
            order = model.select_order() 
            orderFound = True
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            if str(exc_obj)=="data already contains a constant.":
                maxAttr = maxAttr - 1
            else:
                maxAttr = int(str(exc_obj).split("-th")[0])-1
            print "Exception, reducing to n_attributes ",maxAttr
            orderFound = False
 
    n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
    method = max(order.iteritems(), key=operator.itemgetter(1))[0]
    print "n_lags ",n_lags
    print "method ",method    
    results = model.fit(maxlags=n_lags, ic=method)
    return results
예제 #3
0
def data2VARgraph_model(data, pval=0.05):
    model = VAR(data.T)
    r = model.fit(1)
    A = r.coefs[0,:,:]
    n = A.shape[0]
    g = {str(i):{} for i in range(1,n+1)}

    for i in range(n):
        for j in range(n):
            if np.abs(A[j,i]) > pval: g[str(i+1)][str(j+1)] = set([(0,1)])
    return g, r
예제 #4
0
def get_fedea_on_gdp():
    qbuilder = inquisitor.Inquisitor(token)
    df = qbuilder.series(ticker = ['ESE.940000D259D.Q.ES','FEEA.PURE064A.M.ES'])
    df.dropna(inplace = True)
    df['fedea'] = df['FEEA.PURE064A.M.ES'].diff()
    df['gdp'] = df['ESE.940000D259D.Q.ES'] / 100
    data1 = df[['fedea','gdp']]
    data1.dropna(inplace = True)
    model1 = VAR(data1)
    results1 = model1.fit(4)
    
    irf1 = results1.irf(8)
    fedea_on_gdp = irf1.orth_lr_effects[1,0] / data1['fedea'].std()
    return fedea_on_gdp
예제 #5
0
 def get_irf(nd, subset):
     '''
     http://statsmodels.sourceforge.net/0.6.0/vector_ar.html
     '''
     data = nd.reindex(columns=subset)
     data = data.dropna()
     data.describe()
     model = VAR(data)
     results = model.fit(6)
 
     irf = results.irf(12)
     
     cum_effects = irf.orth_cum_effects 
     
     return cum_effects[12,2,0]
def determineOrderOfP():

    X_train = readVectorAutoRegressiveMethodXTrain()

    for i in [1, 2, 3, 4, 5, 6, 7]:

        vectorAutoRegressiveMethodModel = VAR(X_train)

        vectorAutoRegressiveMethodModelResult = vectorAutoRegressiveMethodModel.fit(
            i)

        print('Order =', i)

        print('AIC: ', vectorAutoRegressiveMethodModelResult.aic)

        print('BIC: ', vectorAutoRegressiveMethodModelResult.bic)

        print()
    def vector_auto_reg(self, y, dates, p, clean_data="greedy"):

        s = self.map_column_to_sheet(y[0])
        v = np.copy(y)
        v = np.append(v, dates)

        # prepare data
        dfClean = s.cleanData(v, clean_data)
        time_series = dfClean[y]
        dates = dfClean[dates]

        time_series = time_series.set_index(dates)

        # run pth-order VAR
        model = VAR(time_series)
        results = model.fit(p)

        return results
예제 #8
0
def VAR_Model(modeldata):
    model = VAR(modeldata)
    res = {}
    AIC = []
    for i in range(100):
        result = model.fit(i)
        aic = result.aic
        AIC.append(aic)
        if (aic <= pr.AICvalue_limit) and (aic >= -pr.AICvalue_limit):
            break
    lag_order = i - 1

    varmodel = model.fit(lag_order)
    residuals = DataFrame(varmodel.resid)
    rmean = abs(residuals.mean())
    #print("Residual Error = {}".format(rmean[0]))
    res.update({'Residual Mean': rmean, 'Lag Order': lag_order})
    return varmodel, res
예제 #9
0
def var_forecast(train_df, test_df, params):
    _order = params['order']
    _input = list(params['input'])
    _output = params['output']
    _step = params.get('step', 1)

    model = VAR(train_df[_input].values)
    results = model.fit(_order)
    lag_order = results.k_ar
    params['order'] = lag_order

    forecast = []
    for i in np.arange(0, len(test_df) - lag_order - _step + 1):
        fcst = results.forecast(test_df[_input].values[i:i + lag_order], _step)
        forecast.append(fcst[-1])

    forecast_df = pd.DataFrame(columns=test_df[_input].columns, data=forecast)
    return forecast_df[_output].values
예제 #10
0
def var_model(data, look_ahead=5):
    '''Fits a vector autoregression model to the data and forecasts closing 
    prices a number of days ahead equal to the look_ahead value.'''
    data = data.set_index('date')
    data = data.drop('index', axis=1)
    model = VAR(data)
    results = model.fit(maxlags=15)
    forecast = pd.DataFrame(
        results.forecast(data.values[0:], look_ahead),
        columns=['close', 'high', 'low', 'open', 'volume', 'sentiment'])
    #future = pd.date_range(start='1-1-2019', periods=5)
    future = pd.date_range(start=data.iloc[-1].name + dt.timedelta(days=1),
                           periods=look_ahead)
    forecast = forecast.set_index(future)
    # dill.dump(forecast, open())
    data_w_forecast = data.append(forecast)

    return data_w_forecast
def causality_test(var1, var2):
    data1 = pd.Series(var1, name='Var1')
    data2 = pd.Series(var2, name='Var2')
    mdata = pd.concat([data1, data2], axis=1)
    mdata.index = pd.date_range('1950-01-01', periods=600, freq='M')

    model = VAR(mdata)
    results = model.fit(7)

    foo = crit = results.test_causality('Var2', ['Var1'], kind='f')
    crit = foo['crit_value']
    stat = foo['statistic']
    if (stat > crit):
        cause = 1
    else:
        cause = 0

    return cause
def trainVectorAutoRegressiveMethodModelOnFullDataset():

    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    #training model on the whole dataset
    vectorAutoRegressiveMethodModel = VAR(vectorAutoRegressiveMethodDataset)

    #we are taking p = 5 as we have created different models based on the different p values.
    #Model gives minimum aic and bic for p =5
    vectorAutoRegressiveMethodModelResult = vectorAutoRegressiveMethodModel.fit(
        5)

    #saving the model in pickle files
    saveVectorAutoRegressiveMethodModelForFullDataset(
        vectorAutoRegressiveMethodModelResult)

    print(vectorAutoRegressiveMethodModelResult.summary())
예제 #13
0
def VAR_forecast(df):
    """Initiates Forecasting using Forecast on the passed dataset
    Parameters
    ----------
    df
        The dateset containing historical-observations on day-ahead prices
    Returns
    -------
    forecasted
        A list of forecasted electricity prices for the next 24 hours
    """

    column_name = "Day-ahead Price [EUR/MWh]"

    # Open CSV File and set timestamp column as index
    df.rename(columns={df.columns[0]: "cet_timestamp"}, inplace=True)
    df["cet_timestamp"] = pd.to_datetime(df["cet_timestamp"], format="%Y-%m-%d %H:%M")
    df.set_index("cet_timestamp", inplace=True)

    # Impute and get only one column
    df_diff = df.diff().dropna()

    # Generate new dates
    dates = list()
    last_date = df.index[-1:][0]
    for i in range(1, 25):
        last_date += timedelta(hours=1)
        dates.append(last_date.strftime("%Y-%m-%d %H:%M:%S"))

    var_model = VAR(df_diff).fit(26)
    var_forecast = var_model.forecast(y = var_model.y, steps=24)
    var_forecast_df = pd.DataFrame(var_forecast, columns=df.columns, index= dates)
    var_forecast_df = invert_transformation(df, var_forecast_df)
    '''
    # For mean absolute error
    last_24hours = last_date - timedelta(hours=24)
    # History - 24hours
    history = df_diff[df_diff.index <= last_24hours]
    var_mae_model = VAR(history).fit(26)
    var_mae_forecast = var_mae_model.forecast(y = var_mae_model.y, steps=24)
    mae = mean_absolute_error(history['Day-ahead Price [EUR/MWh]'].values, np.array(var_mae_forecast))
    print(mae)
    '''
    return list(var_forecast_df[column_name].values), datetime.now().strftime("%Y-%m-%d")
예제 #14
0
    def fit(self, p, transformation="dct"):

        if p < 1:
            raise ValueError(f"{p} is an invalid lag")
        self.p = p

        self.transformation = transformation
        l_train_tensor = self.__apply_trans(
            self.train, transformation,
            2)  # Applies the transformation across the rows

        train_model_sets = self.__split_cols_into_model_sets(l_train_tensor)

        # Fits all of the var models
        fits = []
        for i in range(self.matrix_shape[1]):
            train_df = pd.DataFrame(train_model_sets[i])
            model = VAR(train_df)
            fit = model.fit(p)
            fits.append(fit)
        self.var_fits = fits

        # Groups all of the coef matrix to coef tensors
        coefs = np.empty((p, self.matrix_shape[1], self.matrix_shape[0],
                          self.matrix_shape[0]))
        c = np.empty(self.matrix_shape)
        for i in range(self.matrix_shape[1]):
            curr_coefs = fits[i].coefs
            for j in range(p):
                coefs[j][i] = curr_coefs[j]

            # Adds onto c
            c[:, i] = fits[i].params[fits[i].params.index == "const"].iloc[0]

        # Performs an inverse tranform to all of them
        for i in range(p):
            coefs[i] = self.__apply_inverse_trans(coefs[i], transformation, 0)

        # Performs the inverse transformation to the const matrix
        c = self.__apply_inverse_trans(c, transformation, 1)

        self.coefs = coefs
        self.c = c
예제 #15
0
def test_gc2(data, gc_format, maxlag=None, signif=0.05, verbose=False):
    from statsmodels.tsa.api import VAR
    model = VAR(data)
    if maxlag:
        res = model.fit(maxlag, verbose=verbose)
    else:
        res = model.fit(verbose=verbose)
    gc_res = res.test_causality(gc_format[0],
                                gc_format[1],
                                signif=signif,
                                verbose=verbose)
    # results = pd.Seires({k: v for k, v in gc_res.iteritems() if k in ['conclusion','pvalue']})

    results = pd.Series(gc_res)
    results['H0'] = "'{}' do not Granger-cause '{}'".format(
        gc_format[1], gc_format[0])
    results['VAR'] = res
    results['best_order'] = (len(model.exog_names) - 1) / data.shape[1]
    return results
예제 #16
0
def calc_granger_caulity(data):
    """Summary

    Args:
        data (array): array without nan values

    Returns:
        TYPE: Description
    """
    from statsmodels.tsa.api import VAR
    # data_dropna = data[~np.isnan(data).any(1)]
    try:
        model = VAR(data)
        res = model.fit(verbose=False)
        out = res.test_causality(0, 1, verbose=False)['statistic']
    except ValueError as e:
        # print 'calc_granger_caulity: ', 'most factors are zeros.'
        out = np.nan
    return out
    def granger_causality(self, data):
        columns = []
        for i in range(data.shape[0]):
            for j in range(data.shape[2]):
                columns.append(str(i) + str(j))
        # print(columns)

        topic_oriented_data = data[0]
        for i in range(1, len(data)):
            topic_oriented_data = np.concatenate((topic_oriented_data, data[i]), 1)

        topic_oriented_data = pandas.DataFrame(topic_oriented_data, columns=columns)
        print(topic_oriented_data)
        # print(type(topic_oriented_data))


        var_model = VAR(topic_oriented_data)
        results = var_model.fit(2)
        gc_result = results.test_causality(columns, columns, kind='f')
        print(gc_result.summary())
예제 #18
0
def granger(cause, effect, lag):
    data = pd.DataFrame({'cause': cause, 'effect': effect})
    return_vaule = 1
    model = VAR(data)
    try:
        if lag == -1:
            results = model.fit(maxlags=15, trend='nc', ic='aic')
        else:
            results = model.fit(lag)
    except Exception:
        # can not find a lag in interval [1, maxlags], that means they have no causality
        return 1
    try:
        x = results.test_causality('effect', 'cause',
                                   kind='wald').summary().data
    except Exception:
        return 0
    return_vaule = x[1][2]

    return return_vaule
 def varmodel(self):
     self.mvdfg.index = pd.to_datetime(self.mvdfg.index)
     self.var_predicted = pd.DataFrame()
     self.var_forecast = pd.DataFrame()
     self.var_data_train = pd.DataFrame()
     self.var_data_test = pd.DataFrame()
     maxlag = 3
     if splitdf.upper() == 'Y':
         #Validation Model
         self.var_data_train = self.mvdfg[(pd.to_datetime(self.mvdfg.index)) <= testdate]
         self.var_data_test = self.mvdfg[(pd.to_datetime(self.mvdfg.index)) > testdate]
         var_model = VAR(self.var_data_train)
         results = var_model.fit(maxlags = maxlag, ic = 'aic')
         print(results.summary())
         lag_order = results.k_ar
         var_steps = len(self.var_data_test)
         pred_values = results.forecast(self.var_data_train.values[-lag_order:], var_steps)
         self.predicted = pd.DataFrame(pred_values, index = self.mvdfg.index[-var_steps:], columns = self.mvdfg.columns)
         self.var_predicted = self.predicted
     #Forecast 
     startdate = self.mvdfg.index.max()+ pd.offsets.DateOffset(months = 1)
     maxdate = self.mvdfg.index.max() + pd.offsets.DateOffset(months = forecaststeps + 1)
     var_fc_index = np.asarray((pd.date_range(startdate, maxdate, freq = 'm').strftime('%Y-%m-01')))
     var_fc_index = pd.to_datetime(var_fc_index)
     var_forecast_model = VAR(self.mvdfg)
     fc_results = var_forecast_model.fit(maxlags = maxlag, ic = 'aic')
     print(fc_results.summary())
     fc_lag_order = fc_results.k_ar
     fc_values = fc_results.forecast(self.mvdfg.values[-fc_lag_order:], forecaststeps)
     self.forecast = pd.DataFrame(fc_values, index = var_fc_index, columns = self.mvdfg.columns)
     self.var_forecast = self.forecast
     print(self.var_forecast)
     return self.var_predicted, self.var_forecast
예제 #20
0
def gaussian_var_copula_entropy_rate(sample, p=None, robust=False, p_ic='hqic'):
	"""
	Estimates the entropy rate of the copula-uniform dual representation of a stationary Gaussian VAR(p) (or AR(p)) process from a sample path.

	We recall that the copula-uniform representation of a :math:`\\mathbb{R}^d`-valued process :math:`\\{x_t\\} := \\{(x_{1t}, \\dots, x_{dt}) \\}`
	is, by definition, the process :math:`\\{ u_t \\} := \\{ \\left( F_{1t}\\left(x_{1t}\\right), \\dots, F_{dt}\\left(x_{dt}\\right) \\right) \\}` 
	where :math:`F_{it}` is the cummulative density function of :math:`x_{it}`.

	It can be shown that 

	.. math::
		h\\left( \\{ x_t \\}\\right) = h\\left( \\{ u_t \\}\\right) + \\sum_{i=1}^d h\\left( x_{i*}\\right) 

	where :math:`h\\left(x_{i*}\\right)` is the entropy of the i-th coordinate process at any time.



	Parameters
	----------
	sample: (T, d) np.array 
		Array of T sample observations of a :math:`d`-dimensional process.
	p : int or None
		Number of lags to compute for the autocovariance function. If :code:`p=None` (the default), it is inferred by fitting a VAR model on the sample, using as information criterion :code:`p_ic`.
	robust: bool
		If True, the Pearson autocovariance function is estimated by first estimating a Spearman rank correlation, and then inferring the equivalent Pearson autocovariance function, under the Gaussian assumption.
	p_ic : str
		The criterion used to learn the optimal value of :code:`p` (by fitting a VAR(p) model) when :code:`p=None`. 
		Should be one of 'hqic' (Hannan-Quinn Information Criterion), 'aic' (Akaike Information Criterion), 'bic' (Bayes Information Criterion) and 't-stat' (based on last lag). 
		Same as the 'ic' parameter of :code:`statsmodels.tsa.api.VAR`.
	Returns
	-------
	h : float
		The entropy rate of the copula-uniform dual representation of the input process.
	p : int
		Order of the VAR(p).
	"""
	_sample = sample[~np.isnan(sample).any(axis=1)] if len(sample.shape) > 1 else sample[~np.isnan(sample)]
	if p == None:
		# Fit an AR and use the fitted p.
		max_lag = int(round(12*(_sample.shape[0]/100.)**(1/4.)))
		if len(_sample.shape) == 1 or _sample.shape[1] == 1:
			m = AR(_sample)
			p = m.fit(ic=p_ic).k_ar
		else:
			m = VAR(_sample)
			p = m.fit(ic=p_ic).k_ar

	x = _sample if len(_sample.shape) > 1 else _sample[:, None]
	res = -np.sum(0.5*np.log(2.*np.pi*np.e*np.var(x, axis=0)))
	res += gaussian_var_entropy_rate(x, p, robust=robust)

	return res, p
예제 #21
0
def fit_forecast(dataset):
    cols = dataset.columns
    # creating the train and validation set
    train = dataset[:int(0.8 * (len(dataset)))]
    valid = dataset[int(0.8 * (len(dataset))):]
    train_differenced, round_no = remove_stationary(train)
    model = VAR(train_differenced)
    model_fit = model.fit()
    # make prediction on validation
    prediction = model_fit.forecast(model_fit.endog, steps=len(valid))
    # converting predictions to dataframe
    forecast = pd.DataFrame(prediction,
                            index=dataset.index[-len(valid):],
                            columns=cols)
    if round_no != 0:
        forecast = invert_transformation(train, forecast, (round_no == 2))
    # check rmse
    rmses = {}
    for i in cols:
        rmses[i + '_RMSE'] = sqrt(mean_squared_error(forecast[i], valid[i]))

    return forecast, valid, rmses
예제 #22
0
파일: mtsu.py 프로젝트: Seawyn/Time-Series
def var_param_min_search(data, limit_p):
    # Parameters of each minimum
    param_aic = []
    param_bic = []
    # Minimums of each criterion
    mins_aic = []
    mins_bic = []
    # Current minimums of each criterion
    current_min_aic = None
    current_min_bic = None
    for i in range(limit_p):
        model = VAR(data)
        model_fit = model.fit(i)
        current_aic = model_fit.aic
        current_bic = model_fit.bic

        # Check for new AIC minimum
        if current_min_aic is None or current_min_aic > current_aic:
            current_min_aic = current_aic
            param_aic.append(str(i))
            mins_aic.append(current_aic)

        # Check for new BIC minimum
        if current_min_bic is None or current_min_bic > current_bic:
            current_min_bic = current_bic
            param_bic.append(str(i))
            mins_bic.append(current_bic)

    res = {
        'aic': {
            'parameters': param_aic,
            'mins': mins_aic
        },
        'bic': {
            'parameters': param_bic,
            'mins': mins_bic
        }
    }
    return res
예제 #23
0
def fit(data, maxlag):
    #with open('varModel.json') as f:
    #   data = json.load(f)
    mdata = prepareData(data)
    equation = dict()
    equation["aic"] = []
    equation["BIC"] = []
    equation["hqic"] = []
    equation["min"] = []
    model = VAR(mdata)
    for x in range(0, maxlag):
        fitedModel = model.fit(x + 1)
        equation["aic"].append(fitedModel.aic)
        equation["BIC"].append(fitedModel.bic)
        equation["hqic"].append(fitedModel.hqic)

    minLag = model.fit(maxlags=maxlag, ic='bic')
    equation["min"].append(minLag.aic)
    equation["min"].append(minLag.bic)
    equation["min"].append(minLag.hqic)

    return equation
예제 #24
0
def evaluate_svar_model(X, p, s, feat_nm, agg_level):
    feat_list = X.columns.values.tolist()
    #add seasonal variables
    decomposition = seasonal_decompose(X[feat_nm], model='additive', freq=s)
    X['seasonal'] = decomposition.seasonal
    trend = decomposition.trend
    trend = trend.fillna(
        method='ffill')  # fill missing values with previous values
    trend = trend.fillna(
        method='bfill')  # fill first missing value with the one before it
    X['trend'] = trend

    # prepare training dataset
    train_size = len(X)
    train = X

    model = VAR(train)
    model_fit = model.fit(p)

    test_size = test_size_level[agg_level]
    yhat = model_fit.forecast(train.values[:p],
                              train_size + max(test_size) - p)
    index = feat_list.index(feat_nm)  # index of feature we want to analyse

    yhat_feat = [item[index]
                 for item in yhat]  # model output relevant to that features
    predictions = yhat_feat

    pred0 = predictions[train_size - p + test_size[0] - 1]
    pred1 = predictions[train_size - p + test_size[1] - 1]
    pred2 = predictions[train_size - p + test_size[2] - 1]
    avg0 = sum(predictions[train_size - p:train_size - p +
                           test_size[0]]) / test_size[0]
    avg1 = sum(predictions[train_size - p:train_size - p +
                           test_size[1]]) / test_size[1]
    avg2 = sum(predictions[train_size - p:train_size - p +
                           test_size[2]]) / test_size[2]

    return [pred0, pred1, pred2, avg0, avg1, avg2]
예제 #25
0
def lag_selection(ytw, corp, tb):
    import cs_data_analysis as da
    from statsmodels.tsa.api import VAR
    import numpy as np
    '''
    CS-Aaa-3MO	 CS-Aa-3MO	 CS-A-3MO	 CS-Baa-3MO	 CS-Aaa-1YR	 CS-Aa-1YR	 CS-A-1YR	 CS-Baa-1YR	 CS-Aaa-5YR	 CS-Aa-5YR	 CS-A-5YR	 CS-Baa-5YR		 
    TB-3MO-TY	 TB-1YR-TY	 TB-5YR-TY
    '''
    debug(ytw.shape)
    endog = ytw[[corp, tb]]

    lag_count = 10
    ic_aic = np.zeros((lag_count, 1))  # AIC, BIC, HQIC
    ic_bic = np.zeros((lag_count, 1))

    for i in range(lag_count):
        debug(f"{'-'*4} period: {lag_count} {'-'*4}")
        # https://www.statsmodels.org/stable/generated/statsmodels.tsa.vector_ar.var_model.VAR.fit.html
        model = VAR(endog=endog)
        model_fit = model.fit(maxlags=i + 1, trend='ct', verbose=True)
        debug(f"aic: {model_fit.aic:.6f}")
        debug(f"bic: {model_fit.bic:.6f}")
        debug(f"hqic: {model_fit.hqic:.6f}")
        ic_aic[i] = model_fit.aic
        ic_bic[i] = model_fit.bic
        results = model_fit.summary()
        debug(results)

    ic_aic_min, aic_model_min = np.min(ic_aic), np.argmin(ic_aic)
    ic_bic_min, bic_model_min = np.min(ic_bic), np.argmin(ic_bic)

    debug('Relative Likelihoods')
    debug(np.exp((ic_aic_min - ic_aic) / 2))
    debug(f'number of parameters in minimum AIC model {(aic_model_min + 1)}')

    debug(np.exp((ic_bic_min - ic_bic) / 2))
    debug(f'number of parameters in minimum BIC model {(bic_model_min + 1)}')

    return aic_model_min + 1, bic_model_min + 1
예제 #26
0
def full_model(data, caused, L):
    """

    :param dataX:
    :type dataX: np.ndarray
    :param dataY:
    :type dataY: np.ndarray
    :return:
    """
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        model = VAR(data)
        model_fit = model.fit(L)
    SSE = np.sum((model_fit.resid)**2)[caused]

    #ONLY FOR DEBUGGING
    #plt.plot(data['X'])
    #plt.plot(model_fit.fittedvalues['X'])
    #plt.show()
    #DEBUGGING END

    return SSE
예제 #27
0
def VARprocess(df, log=False):
    """
    Description: This function applies Vector Auto Regression
    Input: dataframe
    Output: VARresults object
    """
    # Log transformation, relative difference and drop NULL values
    if (log):
        df = np.log(df + 0.1).diff().dropna()
    # Vector Autoregression Process generation
    maxAttr = len(df.columns)
    # Find the right lag order
    orderFound = False
    print "7.1.0 ----- Finding an order for the VAR"
    maxIter = 0
    while orderFound != True and maxIter < 15:
        maxIter = maxIter + 1
        try:
            model = VAR(df)
            order = model.select_order()
            orderFound = True
            print " !!! loop stuck"
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            #if str(exc_obj)=="data already contains a constant.":
            maxAttr = maxAttr - 1
            #else:
            #maxAttr = int(str(exc_obj).split("-th")[0])-1
            #print "Exception, reducing to n_attributes ",maxAttr
            orderFound = False
    print "7.1.1 ----- Model fitting"
    if orderFound:
        n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
        method = max(order.iteritems(), key=operator.itemgetter(1))[0]
        results = model.fit(maxlags=n_lags, ic=method)
    else:
        results = model.fit()
    return results
예제 #28
0
def data2AB(data, x0=None):
    n = data.shape[0]
    T = data.shape[1]
    YY = np.dot(data[:, 1:], data[:, 1:].T)
    XX = np.dot(data[:, :-1], data[:, :-1].T)
    YX = np.dot(data[:, 1:], data[:, :-1].T)

    model = VAR(data.T)
    r = model.fit(1)
    A = r.coefs[0, :, :]

    #A = np.ones((n,n))
    B = np.ones((n, n))
    np.fill_diagonal(B, 0)
    B[np.triu_indices(n)] = 0
    K = np.int(scipy.sum(abs(B)))  #abs(A)+abs(B)))

    a_idx = np.where(A != 0)
    b_idx = np.where(B != 0)
    np.fill_diagonal(B, 1)

    try:
        s = x0.shape
        x = x0
    except AttributeError:
        x = np.r_[A.flatten(), 0.1 * scipy.randn(K)]
    o = optimize.fmin_bfgs(nllf2,
                           x,
                           args=(np.double(A), np.double(B), YY, XX, YX, T,
                                 a_idx, b_idx),
                           gtol=1e-12,
                           maxiter=500,
                           disp=False,
                           full_output=True)
    ipdb.set_trace()
    A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx)
    B = B + B.T
    return A, B
예제 #29
0
def var_prediction(df, train_perc, incidence_file, window=18, diff=True):
    # Limpiamos el df
    df_aux = df.drop('Unnamed: 0', axis=1)
    df_aux = df_aux.drop('tref_start', axis=1)
    X = df_aux.values[:, :]
    if diff:
        X = np.diff(X, axis=0)
    # Obtenemos estandarizador de valores
    v = int(len(X) * train_perc)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X[:v])
    # Entrenamos el modelo
    model = VAR(X_train)
    results = model.fit(window)
    # df de validación con incidencias
    df = df.iloc[v:]
    incidencias = None
    if incidence_file is not None:
        inc = get_working_incidence(incidence_file)
        df = generar_incidencias(df, inc).sort_values(by=['tref_start'])
        # Array de incidencias
        incidencias = df['incidencia'].values[window:]
        df = df.drop('incidencia', axis=1)
    # Obtenemos valores de la red reales
    df = df.drop('Unnamed: 0', axis=1)
    df = df.drop('tref_start', axis=1)
    X = df.values[:, :]
    if diff:
        X = np.diff(X, axis=0)
        if incidence_file is not None:
            incidencias = incidencias[1:]
    X = scaler.transform(X)
    # Obtengamos predicciones
    ys = X[window:]
    yhats = []
    for i in range(window, len(X)):
        yhats.append(results.forecast(X[i - window:i], 1)[0])
    return ys, np.array(yhats), incidencias
예제 #30
0
def infl_forecast_values(year='2001', month='02', n_steps=6):
    # n_steps is how far into future you look
    # crop the data depending on n_steps and date
    orig_df = load_data()
    date = form_date(year, month)
    train, test = crop_data(orig_df, date, n_steps)

    #take first difference
    first_row, train_1 = take_diff(train)
    first_YOY = first_row['YOY']

    # create VAR model
    model = VAR(train_1, freq='MS')

    #for now fit to 4
    results = model.fit(4)

    lag_order = results.k_ar
    prediction_input = train_1.values[-lag_order:]

    # I want last column
    infl_results = results.forecast(prediction_input, n_steps)[:, 1]
    return infl_results
예제 #31
0
def process(data, cid):
    """ Call make_stationary() to check for Stationarity and make the Time Series Stationary
        Make a VAR model, call the fit method with the desired lag order.
        Forecast VAR model and return the forcasted data.
    """

    nobs = 1
    df = data.copy()
    df_differenced = make_stationary(
        df)  # check for Stationarity and make the Time Series Stationary

    model = VAR(df_differenced)  # Make a VAR model
    model_fit = model.fit(10)  # call fit method with lag order
    model_fit.summary()  # summary result of the model fitted
    lag_order = model_fit.k_ar  # Get the lag order
    forecast_input = df_differenced.values[
        -lag_order:]  # Input data for forecasting

    # Forecast and Invert the transformation to get the real forecast values
    fc = model_fit.forecast(y=forecast_input, steps=nobs)

    inp_file = os.getcwd() + cid
    joblib.dump(model_fit, inp_file)
예제 #32
0
    def run(self):
        if not self._args:
            return None

        data = self._data_service.get_data(self._args)

        split_data = {
            self._args['dependent_variable']:
            data[data['ticker'] == self._args['dependent_variable']]['close']
        }
        for i, ticker in enumerate(self._args['independent_variables']):
            split_data[ticker] = data[data['ticker'] == ticker]['close']

        data = pd.DataFrame(split_data).dropna()

        model = VAR(data)
        result = model.fit(2)
        print(result.summary())
        result.test_causality(self._args['dependent_variable'],
                              self._args['independent_variables'],
                              kind='f')

        return result
예제 #33
0
def temporal_detect_individual(target_idx, dta, maxlag):
    
    num_ts = len(dta[0])
    len_ts = len(dta)
     
    tmp_target = [ dta[j][target_idx] for j in range(len_ts) ] 
    
    res_lag = []
    
    for i in range(num_ts):
        if i != target_idx:
            
            tmp_ts = [ dta[j][i] for j in range(len_ts) ]
            tmp_x = zip(tmp_target, tmp_ts )
            
#             print np.shape(tmp_x)
            
            model =  VAR(tmp_x)
            best_lag = model.select_order(maxlag, verbose= False)
            
            res_lag.append(best_lag)
    
    return res_lag 
예제 #34
0
def GMMGranger(k, t, n):
    bet = 0
    yes = 0
    while bet <= n - 1:
        xseries = GMM(k, t)
        yseries = GMM(k + 3, t)
        data = pd.DataFrame([xseries, yseries]).transpose()
        model = VAR(np.asarray(data))
        try:
            results = model.fit(maxlags=15, ic='aic', trend='nc')
        except:
            continue
        bet += 1
        if results.test_causality(0, 1,
                                  kind='wald').summary().data[1][2] > 0.05:
            if results.test_causality(1, 0,
                                      kind='wald').summary().data[1][2] > 0.05:
                yes += 1
    return float(yes) / n


#accuracy=GMMGranger(5,200,100)
#print accuracy
예제 #35
0
def test_gc(data, maxlag=None, signif=0.05, verbose=False):
    """Summary
    Apply granger causaulity test into permutation of all columns

    Args:
        data (TYPE): Description
        maxlag (None, optional): Description
        signif (float, optional): Description
        verbose (bool, optional): Description

    Returns:
        TYPE: dataframe
    """
    from statsmodels.tsa.api import VAR
    if isinstance(data, pd.core.frame.DataFrame):
        colns = data.columns
        arr = data.values
    else:
        arr = np.array(data)

    model = VAR(arr)
    if maxlag:
        res = model.fit(maxlag, verbose=verbose)
    else:
        res = model.fit(verbose=verbose)
    gc_test = []
    obs_name = res.names
    for c1, c2 in permutations(obs_name, 2):
        gc_res = res.test_causality(c1, c2, signif=signif, verbose=verbose)
        coln1, coln2 = colns[[obs_name.index(c1), obs_name.index(c2)]]
        gc_res = pd.Series(gc_res, name=(coln1, coln2))
        gc_res['H0'] = "'{}' do not Granger-cause '{}'".format(coln2, coln1)
        gc_test.append(gc_res)
    results = pd.DataFrame(gc_test)
    results['VAR'] = model
    results['best_order'] = (len(model.exog_names) - 1) / data.shape[1]
    return results
예제 #36
0
파일: ex_var.py 프로젝트: AnaMP/statsmodels
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.api import VAR

# some example data
mdata = sm.datasets.macrodata.load().data
mdata = mdata[['realgdp','realcons','realinv']]
names = mdata.dtype.names
data = mdata.view((float,3))

use_growthrate = False #True #False
if use_growthrate:
    data = 100 * 4 * np.diff(np.log(data), axis=0)

model = VAR(data, names=names)
res = model.fit(4)

nobs_all = data.shape[0]

#in-sample 1-step ahead forecasts
fc_in = np.array([np.squeeze(res.forecast(model.y[t-20:t], 1))
                  for t in range(nobs_all-6,nobs_all)])

print fc_in - res.fittedvalues[-6:]

#out-of-sample 1-step ahead forecasts
fc_out = np.array([np.squeeze(VAR(data[:t]).fit(2).forecast(data[t-20:t], 1))
                   for t in range(nobs_all-6,nobs_all)])

print fc_out - data[nobs_all-6:nobs_all]
예제 #37
0
파일: var.py 프로젝트: DaryaVM/kaggle_asus
    #print df    
    df = df.fillna(0)
    original_df[m] = df
    stat_df = df.diff().dropna()

    
    #get rid of columns that are zeros at the end , we just assume they will continue to be zeros
    for col_name in stat_df.columns.values:
        if stat_df[col_name][-1] == 0 and  stat_df[col_name][-2] == 0:# and stat_df[col_name][-3] == 0:
            print col_name
            del stat_df[col_name]
            no_forecast.setdefault(m,[]).append(col_name)
    #print stat_df
    forecast_cols[m] = stat_df.columns.values
    #new_df = stat_df[['P17','P15','P16']]
    model = VAR(stat_df)
    maxlags = 3
    try:
        results = model.fit(maxlags, ic='aic', verbose=True)
    except Exception,exc:
        maxlags = 1         
        results = model.fit(maxlags, ic='aic', verbose=True)
    
    #if m == 'M2':
    #   import pdb
    #   pdb.set_trace()
    #import pdb
    #pdb.set_trace()
#    results = model.fit(4)
    #print results.summary()
    lag_order = results.k_ar
예제 #38
0
파일: var.py 프로젝트: id774/sandbox
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
from scipy.signal import lfilter

mdata = sm.datasets.macrodata.load().data
mdata = mdata[["realgdp", "realcons", "realinv"]]
names = mdata.dtype.names
data = mdata.view((float, 3))
data = np.diff(np.log(data), axis=0)
model = VAR(data)
res = model.fit(2)

res.plot_sample_acorr()

irf = res.irf(10)
irf.plot()
plt.show()
plt.savefig("image.png")

res.plot_forecast(5)

res.fevd().plot()
plt.show()
plt.savefig("image2.png")
예제 #39
0
data = pd.read_csv("/home/dusty/Econ8310/DataSets/pollutionBeijing.csv")

format = '%Y-%m-%d %H:%M:%S'
data['datetime'] = pd.to_datetime(data['datetime'], format=format)
data.set_index(pd.DatetimeIndex(data['datetime']), inplace=True)


# Select variables for VAR model
varData = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[:-50]
test = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[-50:]

# endVal = varData.loc["2014-01-04 00:00:00"]
# varData = varData.diff(1)


model = VAR(varData) # define the model and data
# model.select_order() # uses information criteria to select
		     # model order
reg = model.fit(30) # order chosen based on BIC criterion



# Forecasting

fcast = reg.forecast(varData['2013-01-04':].values, steps = 50)


def dediff(todaysVal, forecast):
    future = forecast
    for i in range(np.shape(forecast)[0]):
        if (i==0):
예제 #40
0
def estimate_VAR():
    df = load_external()
    d = load_es_uncertainty()
    df1 = load_eu_uncertainty()
    nd = d.join(df).join(df1)
    plot_index_comparison(nd)
    plot_eu_epu(nd)
    plot_cinco_elpais(nd)
    nd = transform_data(nd)
    plot_epu_gdp(nd)
    
    benchmark_subset = ['EPU','europe', 'fedea', 'inflation', 'differential'] 

    nd['EPU'] = nd['policy'].diff(periods = 1)
    data = nd.reindex(columns=benchmark_subset)
    data = data.dropna()
    data.describe()
    model = VAR(data)
    results = model.fit(6)

    irf = results.irf(12)
    irf.plot(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12})
    #irf.plot_cum_effects(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12}) #

    cum_effects = irf.orth_cum_effects 
    
    fedea_on_gdp = get_fedea_on_gdp()
    elasticity = -100*fedea_on_gdp*cum_effects[12,2,0]
    print 'Effects of a 1 sd uncertainty shock on gdp growth (negative): %0.3f%%' % elasticity
    print 'Inflation increases by %0.2f' % (100* cum_effects[12,3,0], )
    print 'Bond spreads increase by %0.1f basis points' % (100* cum_effects[12,4,0], )
    full_sset = ['ibex','vol','resid','europe', 'fedea', 'inflation', 'differential' ]
    

    def get_irf(nd, subset):
        '''
        http://statsmodels.sourceforge.net/0.6.0/vector_ar.html
        '''
        data = nd.reindex(columns=subset)
        data = data.dropna()
        data.describe()
        model = VAR(data)
        results = model.fit(6)
    
        irf = results.irf(12)
        
        cum_effects = irf.orth_cum_effects 
        
        return cum_effects[12,2,0]
    
    for colname in colnames:
        nd['uncert'] = nd[colname] / nd.articles
        nd['uncert'] = nd['uncert'] / nd['uncert'].mean() * 100
        nd['uncert'] = nd['uncert'].diff(periods = 1)
        subset = ['uncert','europe', 'fedea', 'inflation', 'differential' ]
        cum_effect= get_irf(nd, subset)
        print '**%s** | %d | %.04f' % (colname, nd[colname].sum(), 100*fedea_on_gdp*cum_effect)
    
    aa = d.mean()[colnames]
    plt.figure(6)
    h = plt.bar(range(len(aa)),aa,label = list(aa.index) )
    plt.subplots_adjust(bottom=0.3)
    
    xticks_pos = [0.65*patch.get_width() + patch.get_xy()[0] for patch in h]
    
    plt.xticks(xticks_pos, list(aa.index),  ha='right', rotation=45)
    plt.savefig(os.path.join(rootdir, 'figures','frequency_types.%s' % fig_fmt), format=fig_fmt)
예제 #41
0
from statsmodels.tsa.base.datetools import dates_from_str


import pandas
mdata = ds.macrodata.load_pandas().data

# prepare the dates index
dates = mdata[['year', 'quarter']].astype(int).astype('S4')
quarterly = dates["year"] + "Q" + dates["quarter"]
quarterly = dates_from_str(quarterly)

mdata = mdata[['realgdp','realcons','realinv']]
mdata.index = pandas.DatetimeIndex(quarterly)
data = np.log(mdata).diff().dropna()

model = VAR(data)
est = model.fit(maxlags=2)

def plot_input():
    est.plot()

def plot_acorr():
    est.plot_acorr()

def plot_irf():
    est.irf().plot()

def plot_irf_cum():
    irf = est.irf()
    irf.plot_cum_effects()