コード例 #1
0
ファイル: vector_model.py プロジェクト: raisulru/MMHE
    def get(self, request, *args, **kwargs):
        start_date = self.request.query_params.get('startdate', '1970-01-30')
        end_date = self.request.query_params.get('enddate', '2018-01-01')

        data = read_frame(PriceProduction.objects.all())
        data['date'] = pd.to_datetime(data['date'])
        data = data.drop('id', axis=1)
        data = data.set_index('date')

        startdate = dat.strptime(start_date, '%Y-%m-%d')
        enddate = dat.strptime(end_date, '%Y-%m-%d')

        nextmonth = enddate + relativedelta.relativedelta(months=1)
        train, test = data[startdate:nextmonth], data[nextmonth:]
        model = VARMAX(train, order=(1, 1, 1))
        model_fit = model.fit(disp=False)
        yhat = model_fit.forecast(len(test) - 1)

        yhat['actual'] = test['price']
        predictdata = yhat.drop("production", axis=1)
        metrics = forecast_accuracy(predictdata['price'],
                                    predictdata['actual'])
        predictdata.index = predictdata.index.astype("str")
        print(predictdata)
        json = predictdata.to_json()
        json = ast.literal_eval(json)
        json['mape'] = metrics['mape']
        return Response(json)
コード例 #2
0
    def find_best_parameters(self, data: pd.DataFrame):
        """
        Given a dataset, finds the best parameters using the settings in the class
        """
        #### dmax here means the column number of the data frame: it serves as a placeholder for columns
        dmax = data.shape[1]
        ###############################################################################################
        cols = data.columns.tolist()
        # TODO: #14 Make sure that we have a way to not rely on column order to determine the target
        # It is assumed that the first column of the dataframe is the target variable ####
        ### make sure that is the case before doing this program ####################
        i = 1
        results_dict = {}

        for d_val in range(1, dmax):
            # Takes the target column and one other endogenous column at a time
            # and makes a prediction based on that. Then selects the best
            # exogenous column at the end.
            y_train = data.iloc[:, [0, d_val]]
            print('\nAdditional Variable in VAR model = %s' % cols[d_val])
            info_criteria = pd.DataFrame(
                index=['AR{}'.format(i) for i in range(0, self.p_max+1)],
                columns=['MA{}'.format(i) for i in range(0, self.q_max+1)]
            )
            for p_val, q_val in itertools.product(range(0, self.p_max+1), range(0, self.q_max+1)):
                if p_val == 0 and q_val == 0:
                    info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan
                    print(' Iteration %d completed' % i)
                    i += 1
                else:
                    try:
                        model = VARMAX(y_train, order=(p_val, q_val), trend='c')
                        model = model.fit(max_iter=1000, disp=False)
                        info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('model.' + self.scoring)
                        print(' Iteration %d completed' % i)
                        i += 1
                    except Exception:
                        i += 1
                        print(' Iteration %d completed' % i)
            info_criteria = info_criteria[info_criteria.columns].astype(float)
            interim_d = copy.deepcopy(d_val)
            interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria)
            if self.verbose == 1:
                _, axis = plt.subplots(figsize=(20, 10))
                axis = sns.heatmap(
                    info_criteria,
                    mask=info_criteria.isnull(),
                    ax=axis,
                    annot=True,
                    fmt='.0f'
                )
                axis.set_title(self.scoring)
            results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic
        best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1]
        best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0]
        self.best_p = int(best_pdq.split(' ')[0])
        self.best_d = int(best_pdq.split(' ')[1])
        self.best_q = int(best_pdq.split(' ')[2])
        print('Best variable selected for VAR: %s' % data.columns.tolist()[self.best_d])
コード例 #3
0
def varma_prediction(train,test,steps):
    p,q = get_var_pq_params(train)
    model = VARMAX(train,order=(p, q))
    model_fit = model.fit(disp=False)
    if not steps:
        prediction = model_fit.forecast(steps=len(test))
    else: 
        prediction = model_fit.forecast(steps=steps)
    multi_predicts_df = pd.DataFrame(prediction, columns = train.columns)
    return multi_predicts_df
コード例 #4
0
def varma_forecast(history, config):
    order, trend = config
    # define model
    model = VARMAX(history, order=order, trend=trend, enforce_stationarity=False,
                    enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]
コード例 #5
0
def model_varmax(train_data,test_data,train_data1,test_data1):
    x = train_data1.reshape((372,1))
    x1 = train_data.reshape((372,1))
    lis = np.concatenate((x,x1), axis = 1)
    print(np.shape(lis))
    #forecast
    model = VARMAX(lis, order=(1,1))
    model_fit = model.fit(disp = -1)
    print(model_fit.summary().tables[1])
    predictions = model_fit.forecast(steps=10)
    print('VARMAX RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
    def VectorAutoRegressiveMovingAverage(self):

        #currently, exodata not used.

        #make a dataframe the size of prediction
        datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3)))
        #convert to a list
        datalist = data.values.tolist()
        # create a model for each axis and predict each axis
        model = VARMAX(datalist, order=(1, 1))
        model_fit = model.fit(disp=False)
        datahat = model_fit.forecast(model_fit.y,
                                     steps=(self.end - self.start))
        return (datahat)
コード例 #7
0
ファイル: vector_model.py プロジェクト: raisulru/MMHE
    def get(self, request, *args, **kwargs):
        n_steps = int(self.request.query_params.get('nsteps', 10))

        data = read_frame(PriceProduction.objects.all())
        data['date'] = pd.to_datetime(data['date'])
        data = data.drop('id', axis=1)
        data = data.set_index('date')
        model = VARMAX(data, order=(1, 1, 1))
        model_fit = model.fit(disp=False)
        yhat = model_fit.forecast(n_steps)
        yhat = yhat['price']
        yhat.index = yhat.index.astype("str")
        json = yhat.to_json()
        json = ast.literal_eval(json)
        return Response(json)
    def VARMA(self, order=(1, 1), name="VARMA"):
        print("=" * 30 + "\n" + name + "\n" + "=" * 30 + "\n")
        # fit model
        model = VARMAX(self.data_train, order=order)
        model_fit = model.fit(disp=False)
        # make prediction
        yhat = model_fit.forecast(steps=42)
        prediction = pd.DataFrame(yhat,
                                  index=self.data_test.index.values,
                                  columns=self.data_train.columns.values)

        plt.plot(self.data_train_and_test)
        plt.plot(prediction, color='red')
        plt.title(name)
        plt.show()
コード例 #9
0
    def test_4(self):
        data = self.getMultiDimensionalData()
        model = VARMAX(data,order=(1,2))
        result = model.fit()

        f_name='varmax_12.pmml'
        StatsmodelsToPmml(result, f_name,model_name="varmax_test",conf_int=[95])

        model_name = self.adapa_utility.upload_to_zserver(f_name)
        z_pred = self.adapa_utility.score_in_zserver(model_name, {'h':5},'TS')
        forecasts=result.get_forecast(5)

        z_forecast_hum = list(z_pred['outputs'][0]['predicted_SanDiegoHum'].values())
        model_forecast_hum = forecasts.predicted_mean['SanDiegoHum'].values.tolist()

        z_forecast_pressure = list(z_pred['outputs'][0]['predicted_SanDiegoPressure'].values())
        model_forecast_pressure = forecasts.predicted_mean['SanDiegoPressure'].values.tolist()

        z_forecast_temp = list(z_pred['outputs'][0]['predicted_SanDiegoTemp'].values())
        model_forecast_temp = forecasts.predicted_mean['SanDiegoTemp'].values.tolist()

        z_conf_int_95_lower_hum = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoHum'].values())
        model_conf_int_95_lower_hum = forecasts.conf_int()['lower SanDiegoHum'].values.tolist()

        z_conf_int_95_lower_pressure = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoPressure'].values())
        model_conf_int_95_lower_pressure = forecasts.conf_int()['lower SanDiegoPressure'].values.tolist()

        z_conf_int_95_lower_temp = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoTemp'].values())
        model_conf_int_95_lower_temp = forecasts.conf_int()['lower SanDiegoTemp'].values.tolist()

        z_conf_int_95_upper_hum = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoHum'].values())
        model_conf_int_95_upper_hum = forecasts.conf_int()['upper SanDiegoHum'].values.tolist()

        z_conf_int_95_upper_pressure = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoPressure'].values())
        model_conf_int_95_upper_pressure = forecasts.conf_int()['upper SanDiegoPressure'].values.tolist()

        z_conf_int_95_upper_temp = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoTemp'].values())
        model_conf_int_95_upper_temp = forecasts.conf_int()['upper SanDiegoTemp'].values.tolist()

        self.assertEqual(np.allclose(z_forecast_hum,model_forecast_hum),True)
        self.assertEqual(np.allclose(z_forecast_pressure,model_forecast_pressure),True)
        self.assertEqual(np.allclose(z_forecast_temp,model_forecast_temp),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_hum,model_conf_int_95_lower_hum),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_pressure,model_conf_int_95_lower_pressure),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_temp,model_conf_int_95_lower_temp),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_hum,model_conf_int_95_upper_hum),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_pressure,model_conf_int_95_upper_pressure),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_temp,model_conf_int_95_upper_temp),True)
コード例 #10
0
    def predict(self, action):
        """ Description: returns action based on input state x """
        #store the new action
        #self.ts = np.roll(self.ts, -1, axis = 0)
        #self.ts[-1] = action
        del self.ts[0]
        self.ts.append(action)
        #print(self.ts)

        model = VARMAX(self.ts, order=(self.p, self.p))
        model_fit = model.fit(disp=False)
        self.y_pred = model_fit.forecast(steps=1)

        print(self.y_pred)

        return self.y_pred
コード例 #11
0
def trainVectorARMAMethodModel():

    X_train = readVectorARMAMethodXTrain()

    #training model on the training set
    vectorARMAMethodModel = VARMAX(X_train, order=(1, 2), trend="c")

    #we are taking p = 5 as we have created different models based on the different p values.
    #Model gives minimum aic and bic for p =5
    vectorARMAMethodModelResult = vectorARMAMethodModel.fit(maxiter=1000,
                                                            disp=False)

    #saving the model in pickle file
    saveVectorARMAMethodModel(vectorARMAMethodModelResult)

    print(vectorARMAMethodModelResult.summary())
コード例 #12
0
    def _fit(self, train_data):
        """Fits the model based on training data `train_data`.

        Parameters
        ----------
        train_data: pd.DataFrame
            A pandas DataFrame representing the data used for training.

        Returns
        -------
        None

        """
        varma_order = (self._p, self._q)
        model = VARMAX(train_data, order=varma_order)
        self._model = model.fit(disp=False)
コード例 #13
0
    def varma_final(self):
        predictions = []
        input_data = numpy.array(self.total)
        input_data = numpy.log(input_data)
        input_data = self.difference(input_data)
        input_data = pd.DataFrame(input_data)
        input_data = input_data.dropna()
        for i in range(0, len(self.test)):
            model = VARMAX(input_data, order=(1, 1))
            model_fit = model.fit(disp=False)
            yhat = model_fit.forecast()
            predictions.append(yhat)
            input_data.append(yhat)
        for i in range(0, len(predictions)):
            predictions[i] = round(predictions[i], 2)
            if predictions[i] < 0:
                predictions[i] = 0

        return predictions
コード例 #14
0
    def varmax_model_fit(self, x_train, x_test, df_time, oreder = (1, 0), col_exog=[], verbose = 1):
        if col_exog:
            exo_train = pd.DataFrame()
            exo_test = pd.DataFrame()
            for col in col_exog:
                exo_train[col] = x_train[col]
                x_train.drop([col], axis=1, inplace = True)
                exo_test[col] = x_test[col]
                x_test.drop([col], axis=1, inplace = True)

            model = VARMAX(x_train, order=oreder, exog=exo_train)
        else:
            model = VARMAX(x_train, order=oreder)

        result = model.fit()
        out = durbin_watson(result.resid)
        df_results = pd.DataFrame()
        for col, val in zip(x_train.columns, out):
            df_results[col] = [round(val, 2)]
        if verbose == 1:
            st.subheader('durbin_watson test')
            st.write('the closer the result is to 2 then there is no correlation, the closer to 0 or 4 then correlation implies')
            st.write(df_results.T)

        if col_exog:
            df_forecast = result.forecast(steps=x_test.shape[0], exog = exo_test)
        else:
            df_forecast = result.forecast(steps=x_test.shape[0])

        df_forecast.index = df_time['test']
        df_forecast.columns = x_test.columns
        x_test.index = df_time['test']
        if verbose == 1:
            st.write(df_forecast)
            for i, col in enumerate(x_test):
                fig = ds().nuova_fig(555+i)
                st.subheader(col)
                df_forecast[col].plot(label = 'Predicition')
                x_test[col].plot(label = 'True')
                ds().legenda()
                st.pyplot(fig)
        return df_forecast
コード例 #15
0
    def initialize(self, params):
        self.p = params['p']
        self.action_dim = params['dim']

        self.ts = [
            [0] * self.action_dim
        ] * self.p  #[np.zeros(self.action_dim) for i in range(self.p)]#np.zeros((self.p, self.action_dim))

        data = list()
        for i in range(100):
            v1 = random()
            v2 = v1 + random()
            row = [v1, v2]
            data.append(row)
        model = VARMAX(self.ts, order=(16, 16))
        print("VARMAX")
        model_fit = model.fit()
        print("fit")
        exit()

        self.initialized = True
コード例 #16
0
    def precictTrajectory(self):
        predict_num = 5
        gps_points = self.gps_points()
        # data = [[p["long"],p["lat"]] for p in gps_points]
        data = list()
        for i in range(100):
            v1 = random()
            v2 = v1 + random()
            row = [v1, v2]
            data.append(row)
        model = VARMAX(data, order=(1, 1))
        model_fit = model.fit(disp=False)

        yhat = model_fit.forecast(predict_num)

        return {
            "object_id": self.lastappeared.object_id,
            "gps_points": [{
                "long": p[0],
                "lat": p[1]
            } for p in yhat]
        }
コード例 #17
0
    def _fit(self, train_features, train_target):
        """Fits the model based on `train_features` and `train_target`.

        A VARMAX model is built to predict the target variables with data
        given by `train_target` based on the features with data given by
        `train_features`.

        Parameters
        ----------
        train_features: pd.DataFrame
            A pandas DataFrame representing the training features.
        train_target: pd.Series
            A pandas Series representing the target variable.

        Returns
        -------
        None

        """
        varmax_order = (self._p, self._q)
        model = VARMAX(train_target, train_features, order=varmax_order)
        self._model = model.fit(disp=False)
        self._is_fit = True
コード例 #18
0
ファイル: regression.py プロジェクト: ryansteed/datamaster
def regress_varmax(df_endog, bin_size_weeks, n):
    """
    Trains a varmax model on time series for each patent up to n steps,
    working forwards from the publication date or working backwards from the current date. Also includes exogenous
    patent features.

    :param df_endog: the multiple endogenous time series, not yet transformed
    :param bin_size_weeks: the bin size in weeks
    :type bin_size_weeks: pd.Timedelta
    :param n: the number of steps required in each patent series - must make a square matrix!
    :return: None
    """
    df_endog = VARMAXTransformer("varmax").transform(df_endog, bin_size_weeks, n)

    # remove columns with low variance
    order = 4
    df_endog = df_endog.loc[:, df_endog.apply(pd.Series.nunique, axis=0) > order]
    logger.debug(df_endog)
    logger.debug(df_endog.describe())

    logger.debug("Training VARMAX...")
    model = VARMAX(df_endog.values, order=(order, 0))
    res = model.fit(maxiter=1000, disp=True)
    logger.debug(res.summary())
コード例 #19
0
def build_var_model(df,
                    criteria,
                    forecast_period=2,
                    p_max=3,
                    q_max=3,
                    verbose=0):
    """
    This builds a VAR model given a multivariate time series data frame with time as the Index.
    Note that the input "y_train" can be a data frame with one column or multiple cols or a
    multivariate array. However, the first column must be the target variable. The others are added.
    You must include only Time Series data in it. DO NOT include "Non-Stationary" or "Trendy" data.
    Make sure your Time Series is "Stationary" before you send it in!! If not, this will give spurious
    results. Since it automatically builds a VAR model, you need to give it a Criteria to optimize on.
    You can give it any of the following metrics as criteria: AIC, BIC, Deviance, Log-likelihood.
    You can give the highest order values for p and q. Default is set to 3 for both.
    """
    df = df[:]
    #### dmax here means the column number of the data frame: it serves as a placeholder for columns
    dmax = df.shape[1]
    ###############################################################################################
    cols = df.columns.tolist()
    ts_train = df[:-forecast_period]
    ts_test = df[-forecast_period:]
    if verbose == 1:
        print(
            'Data Set split into train %s and test %s for Cross Validation Purposes'
            % (ts_train.shape, ts_test.shape))
    # It is assumed that the first column of the dataframe is the target variable ####
    ### make sure that is the case before doing this program ####################
    i = 1
    results_dict = {}
    for d_val in range(1, dmax):
        y_train = ts_train.iloc[:, [0, d_val]]
        print('\nAdditional Variable in VAR model = %s' % cols[d_val])
        info_criteria = pd.DataFrame(
            index=['AR{}'.format(i) for i in range(0, p_max + 1)],
            columns=['MA{}'.format(i) for i in range(0, q_max + 1)])
        for p_val, q_val in itertools.product(range(0, p_max + 1),
                                              range(0, q_max + 1)):
            if p_val == 0 and q_val == 0:
                info_criteria.loc['AR{}'.format(p_val),
                                  'MA{}'.format(q_val)] = np.nan
                print(' Iteration %d completed' % i)
                i += 1
            else:
                try:
                    model = VARMAX(y_train, order=(p_val, q_val), trend='c')
                    model = model.fit(max_iter=1000, displ=False)
                    info_criteria.loc['AR{}'.format(p_val),
                                      'MA{}'.format(q_val)] = eval('model.' +
                                                                   criteria)
                    print(' Iteration %d completed' % i)
                    i += 1
                except:
                    i += 1
                    print(' Iteration %d completed' % i)
        info_criteria = info_criteria[info_criteria.columns].astype(float)
        interim_d = copy.deepcopy(d_val)
        interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria)
        if verbose == 1:
            fig, ax = plt.subplots(figsize=(20, 10))
            ax = sns.heatmap(info_criteria,
                             mask=info_criteria.isnull(),
                             ax=ax,
                             annot=True,
                             fmt='.0f')
            ax.set_title(criteria)
        results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' +
                     str(interim_q)] = interim_bic
    best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1]
    best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0]
    best_p = int(best_pdq.split(' ')[0])
    best_d = int(best_pdq.split(' ')[1])
    best_q = int(best_pdq.split(' ')[2])
    print('Best variable selected for VAR: %s' %
          ts_train.columns.tolist()[best_d])
    y_train = ts_train.iloc[:, [0, best_d]]
    bestmodel = VARMAX(y_train, order=(best_p, best_q), trend='c')
    bestmodel = bestmodel.fit()
    if verbose == 1:
        bestmodel.plot_diagnostics(figsize=(16, 12))
        ax = bestmodel.impulse_responses(12,
                                         orthogonalized=True).plot(figsize=(12,
                                                                            4))
        ax.set(xlabel='Time Steps', title='Impulse Response Functions')
    res2 = bestmodel.get_forecast(forecast_period)
    res2_df = res2.summary_frame()
    rmse, norm_rmse = print_dynamic_rmse(ts_test.iloc[:, 0],
                                         res2_df['mean'].values,
                                         ts_train.iloc[:, 0])
    return bestmodel, res2_df, rmse, norm_rmse
コード例 #20
0
    print('stationary')
x.plot()
plt.show()
plt.pause(5)

test_stationarity(data['qty'])

data['qty'] = pd.Series(np.log(data['qty']).diff().dropna())
data.dropna(inplace=True)
# data.plot()
# plt.show()

# train, validate = train_test_split(data, test_size = 0.3)
train = data[:int(0.8 * (len(data)))]
validate = data[int(0.2 * (len(data))):]

model = VARMAX(endog=train, enforce_stationarity=True)
model_fit = model.fit(maxiters=1)
print('-----------RESULTS----------------')
print(model_fit.summary())
prediction = model_fit.predict(start=datetime.strptime('20180101', '%Y%m%'),
                               steps=len(validate))
print(prediction)
print('Variables for th model %s' % result.exog_names)
order = result.k_ar
forecast_values = pd.DataFrame(
    data=result.forecast(y=data['qty'].values, steps=5))
result.plot_forecast(steps=5, plot_stderr=False)

pred = model_fit.forecast(model_fit.y, steps=len(validate))
コード例 #21
0
Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX)
The Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX) is an extension of the VARMA model that also includes the modeling of exogenous variables. It is a multivariate version of the ARMAX method.

Exogenous variables are also called covariates and can be thought of as parallel input sequences that have observations at the same time steps as the original series. The primary series(es) are referred to as endogenous data to contrast it from the exogenous sequence(s). The observations for exogenous variables are included in the model directly at each time step and are not modeled in the same way as the primary endogenous sequence (e.g. as an AR, MA, etc. process).

The VARMAX method can also be used to model the subsumed models with exogenous variables, such as VARX and VMAX.

The method is suitable for multivariate time series without trend and seasonal components with exogenous variables.
'''

from random import random

# VARMAX example
from statsmodels.tsa.statespace.varmax import VARMAX

# contrived dataset with dependency
data = list()
for i in range(100):
    v1 = random()
    v2 = v1 + random()
    row = [v1, v2]
    data.append(row)
data_exog = [x + random() for x in range(100)]
# fit model
model = VARMAX(data, exog=data_exog, order=(1, 1))
model_fit = model.fit(disp=False)
# make prediction
data_exog2 = [[100]]
yhat = model_fit.forecast(exog=data_exog2)
print(yhat)
コード例 #22
0
plot_acf(endog_diff['energy_sum'], lags=20)


# In[51]:


plot_pacf(endog_diff['energy_sum'], lags=20)


# In[54]:


from statsmodels.tsa.statespace.varmax import VARMAX
model_varmax = VARMAX(endog=endog_diff, exog=exog, order=(15, 0))
results_varmax = model_varmax.fit(maxiter=5000, disp=False)
results_varmax.summary()


# In[55]:


results_varmax.plot_diagnostics()


# In[56]:


#exog_test = merged_df_varmax_test[['humidity', 'temperatureLow', 'month_1', 'month_2', 'month_3',
#                                   'month_4','month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10','month_11', 'month_12']]
#exog_test = merged_df_varmax_test[['humidity', 'day_0', 'day_1', 'day_2', 'day_3', 'day_4', 'day_5', 'day_6',
コード例 #23
0
def varmax(
    tickers,
    p: int = 2,
    q: int = 0,
):

    # Split data
    train_val_test_split = {'train': 0.7, 'val': 0.85, 'test': 1}
    train_data = data[0:int(n * train_val_test_split['train'])]
    val_data = data[int(n * train_val_test_split['train']
                        ):int(n * train_val_test_split['val'])]
    test_data = data[int(n * train_val_test_split['val']
                         ):int(n * train_val_test_split['test'])]

    # split data in X and Y
    y_list = [ticker + '_returns' for ticker in tickers]

    # Train
    endog_y = train_data[y_list]
    exog_x = train_data.drop(columns=y_list)

    # Validate
    endog_y_val = val_data[y_list]
    exog_x_val = val_data.drop(columns=y_list)

    # Test
    endog_y_test = test_data[y_list]
    exog_x_test = test_data.drop(columns=y_list)

    # Fit model
    model = VARMAX(endog=endog_y.values, exog=exog_x.values, order=(p, q))
    model_fit = model.fit(disp=False, order=(p, q), maxiter=200, method='nm')
    # Validate
    predictions_val = model_fit.forecast(steps=exog_x_val.shape[0],
                                         exog=exog_x_val.values)
    MSE = 0
    #for i in range(endog_y_val.shape[0]):
    #    for j in range(endog_y_val.shape[1]):
    #        MSE += (endog_y_val.values[i, j] - float(predictions_val[i][j]))**2
    print('p:', p, ' MSE:', MSE)

    # Test -- this is just here for simplcity!!
    predictions_test = model_fit.forecast(steps=exog_x_test.shape[0],
                                          exog=exog_x_test.values)

    train_residuals = model_fit.resid
    pd.DataFrame(train_residuals).to_csv(
        '../output/VARMAX_results/residual_data_train.csv')
    val_residual = endog_y_val.values - predictions_val
    pd.DataFrame(val_residual).to_csv(
        '../output/VARMAX_results/residual_data_val.csv')
    test_residual = endog_y_test.values - predictions_test
    pd.DataFrame(test_residual).to_csv(
        '../output/VARMAX_results/residual_data_test.csv')

    q: bool = False
    if q is True:
        for i, ticker in enumerate(tickers):
            real_val = endog_y_val.values[:, i]
            pred_val = predictions_val[:, i]
            pd.DataFrame(real_val).to_csv(
                '../output/VARMAX_results/val_files/' + ticker +
                '_val_predictions.csv',
                index=False)
            pd.DataFrame(pred_val).to_csv(
                '../output/VARMAX_results/val_files/' + ticker +
                '_val_real.csv',
                index=False)
            real_test = endog_y_test.values[:, i]
            pred_test = predictions_test[:, i]
            pd.DataFrame(real_test).to_csv(
                '../output/VARMAX_results/test_files/' + ticker +
                '_test_predictions.csv',
                index=False)
            pd.DataFrame(pred_test).to_csv(
                '../output/VARMAX_results/test_files/' + ticker +
                '_test_real.csv',
                index=False)

    # Evaluate
    pic: bool = False
    if pic is True:
        for i, ticker in enumerate(tickers):
            pred = (predictions[:, i] + 1) * opens_val.values[:, i]
            real = (endog_y_val.values[:, i] + 1) * opens_val.values[:, i]
            MSE = sum((pred - real)**2) / endog_y_val.shape[0]
            dummy_mse = sum((real[1:real.shape[0]] - real[0:real.shape[0] - 1])
                            **2) / (endog_y_val.shape[0] - 1)
            print('=========', ticker, '=========')
            print('Dummy MSE:', dummy_mse)
            print('MSE:', MSE)
            pred_zero_one = predictions[:, i]
            pred_zero_one[pred_zero_one > 0] = 1
            pred_zero_one[pred_zero_one < 0] = 0
            print('Predicted ones:', np.mean(pred_zero_one))
            real_zero_one = endog_y_val.values[:, i]
            real_zero_one[real_zero_one > 0] = 1
            real_zero_one[real_zero_one < 0] = 0
            print('Real ones:', np.mean(real_zero_one))
            TP = np.sum(np.logical_and(pred_zero_one == 1, real_zero_one == 1))
            TN = np.sum(np.logical_and(pred_zero_one == 0, real_zero_one == 0))
            FP = np.sum(np.logical_and(pred_zero_one == 1, real_zero_one == 0))
            FN = np.sum(np.logical_and(pred_zero_one == 0, real_zero_one == 1))
            print('True positive:', TP)
            print('True Negative:', TN)
            print('False positive:', FP)
            print('False Negative:', FN)
            accuracy = (TP + TN) / (TP + TN + FP + FN)
            print('Dummy guess:',
                  max(np.mean(real_zero_one), 1 - np.mean(real_zero_one)))
            print('Accuracy:', max(accuracy, 1 - accuracy))

            plt.plot(real,
                     color='red',
                     label='Real ' + ticker + ' Stock Price')
            plt.plot(pred,
                     color='blue',
                     label='Predicted ' + ticker + ' Stock Price')
            plt.title(ticker + ' Stock Price Prediction')
            plt.xlabel('Time')
            plt.ylabel(ticker + ' Stock Price')
            plt.legend()
            plt.savefig('../output/VARMAX_results/VARMAX_test_' + ticker +
                        '.png')
            plt.close()
コード例 #24
0
            'Central_Bank_Rate_(CBR)'] = true_predictions_localrates[
                'Central_Bank_Rate_(CBR)'].apply(np.floor)
        true_predictions_localrates.index = pd.to_datetime(
            true_predictions_localrates.index)
        index_localrates = pd.date_range(appdata_localrates.index.max() +
                                         timedelta(1),
                                         periods=periods_input,
                                         freq='MS')
        true_predictions_localrates.index = index_localrates.date
        # true_predictions_localrates.index = pd.to_datetime(true_predictions_localrates.index).strftime('%Y-%m')
        # st.subheader("Local Rates Forecasted Values with Vector Autoregression")
        # st.dataframe(true_predictions_localrates)

        # Local Rates - VARMA
        model_localrates_varma = VARMAX(appdata_localrates, order=(1, 2))
        model_localrates_varma_fit = model_localrates_varma.fit(disp=False)
        yhat_localrates_varma = model_localrates_varma_fit.forecast(
            steps=periods_input)
        yhat_localrates_varma_df = pd.DataFrame(
            yhat_localrates_varma, columns=appdata_localrates.columns).abs()
        yhat_localrates_varma_df.index = pd.date_range(
            appdata_localrates.index.max() + timedelta(1),
            periods=periods_input,
            freq='MS')
        yhat_localrates_varma_df[
            'Central_Bank_Rate_(CBR)'] = yhat_localrates_varma_df[
                'Central_Bank_Rate_(CBR)'].apply(np.floor)
        yhat_localrates_varma_df.index = yhat_localrates_varma_df.index.date
        # yhat_localrates_varma_df.index = pd.to_datetime(yhat_localrates_varma_df.index).strftime('%Y-%m')
        # st.subheader("Local Rates Forecasted Values with Vector Autoregression Moving Average")
        # st.dataframe(yhat_localrates_varma_df)
コード例 #25
0
auto_arima(df1['Money'], maxiter=1000)

auto_arima(df1['Spending'], maxiter=1000)

df_transformed = df1.diff().diff()

df_transformed = df_transformed.dropna()

nobs = 12

train, test = df_transformed[0:-nobs], df_transformed[-nobs:]

model = VARMAX(train, order=(1, 2), trend='c')

results = model.fit(maximer=1000, disp=False)

results.summary()

df_forecast = results.forecast(12)

df_forecast

df_forecast['Money1d'] = (df1['Money'].iloc[-nobs - 1] -
                          df1['Money'].iloc[-nobs - 2]) + df_forecast['Money']

df_forecast['MoneyForecast'] = df1['Money'].iloc[
    -nobs - 1] + df_forecast['Money1d'].cumsum()

df_forecast['Spending1d'] = (
    df1['Spending'].iloc[-nobs - 1] -
コード例 #26
0
                        encoding="utf-8-sig",
                        converters={0: to_dt},
                        names=["TS", "x", "y", "z"])

    req_period = datetime.timedelta(milliseconds=100)

    even_frame = frame.resample(req_period).mean().interpolate()

    #aclr_x=even_frame["x"]
    aclr_x = even_frame
    seria_len = len(aclr_x)

    train_seria, test_seria = aclr_x[:seria_len // 2], aclr_x[seria_len // 2:]

    model = VARMAX(train_seria, order=(5, 5))
    #model = VARMAX(train_seria, order=(3, 3))
    #model = VARMAX(train_seria,)
    model_fit = model.fit()

    predictions = model_fit.forecast(len(test_seria))
    print(type(predictions))
    print(predictions.shape)

    for axis in range(3):
        plt.subplot(3, 1, axis + 1)
        # plt.plot(test_seria.index[:100], predictions[:, axis][:100], label="predictions")
        plt.plot(predictions.iloc[:100, axis], label="predicted")
        plt.plot(test_seria.iloc[:100, axis], label="expected")
        plt.legend(loc="upper right")
    plt.show()
コード例 #27
0
print(jh_results.cvt)                           # dim = (n,3) critical value table (90%, 95%, 99%)
print(jh_results.evec)
print(jh_results.eig)

train = johan_test_temp[:int(0.8*(len(johan_test_temp)))]
valid = johan_test_temp[int(0.8*(len(johan_test_temp))):]

train_orignal = orignal_dataframe[:int(0.8*(len(orignal_dataframe)))]
valid_orignal = orignal_dataframe[int(0.8*(len(orignal_dataframe))):]


order = [2,3,4,5,6]
for i in order:

    model = VARMAX(train, order=(i,0), trend='c')
    model_result = model.fit(maxiter= 1000)
    print(model_result.summary())
    model_result.plot_diagnostics(variable=0)
    plt.show()
    model_result.plot_diagnostics(variable=1)
    plt.show()
    model_result.plot_diagnostics(variable=2)
    plt.show()
    """
    VAR_forecast_value_hrf_pressure = np.exp(train["value_hrf_pressure"]) * train_orignal['value_hrf_pressure'][-2:]
    VAR_forecast_value_hrf_humidity = np.exp(train["value_hrf_humidity"]) * train_orignal['value_hrf_humidity'][-2:]
    #VAR_forecast_value_hrf_pressure = np.exp(train["value_hrf_temperature_bmp180"]) * train['value_hrf_temperature_bmp180'][-2:]

    rmse_value_hrf_pressure = math.sqrt(mean_squared_error(train_orignal['value_hrf_pressure'][-2:], VAR_forecast_value_hrf_pressure))
    rmse_value_hrf_humidity = math.sqrt(mean_squared_error(train_orignal['value_hrf_humidity'][-2:], VAR_forecast_value_hrf_humidity))
    print(rmse_value_hrf_pressure)