def _estimate_varma_coefs(self, X):
        if self._criterion not in ["aic", "bic", "hqic"]:
            result = VARMAX(X, order=self._order,
                            trend="c").fit(maxiter=self._max_iter)
        else:
            min_value = float("Inf")
            result = None

            orders = [(p, q) for p in range(self._order[0] + 1)
                      for q in range(self._order[1] + 1)]
            orders.remove((0, 0))

            for order in orders:
                fitted = VARMAX(X, order=order,
                                trend="c").fit(maxiter=self._max_iter)

                value = getattr(fitted, self._criterion)
                if value < min_value:
                    min_value = value
                    result = fitted

        return (
            result.coefficient_matrices_var,
            result.coefficient_matrices_vma,
            result.specification["order"],
            result.resid,
        )
Exemple #2
0
    def get(self, request, *args, **kwargs):
        start_date = self.request.query_params.get('startdate', '1970-01-30')
        end_date = self.request.query_params.get('enddate', '2018-01-01')

        data = read_frame(PriceProduction.objects.all())
        data['date'] = pd.to_datetime(data['date'])
        data = data.drop('id', axis=1)
        data = data.set_index('date')

        startdate = dat.strptime(start_date, '%Y-%m-%d')
        enddate = dat.strptime(end_date, '%Y-%m-%d')

        nextmonth = enddate + relativedelta.relativedelta(months=1)
        train, test = data[startdate:nextmonth], data[nextmonth:]
        model = VARMAX(train, order=(1, 1, 1))
        model_fit = model.fit(disp=False)
        yhat = model_fit.forecast(len(test) - 1)

        yhat['actual'] = test['price']
        predictdata = yhat.drop("production", axis=1)
        metrics = forecast_accuracy(predictdata['price'],
                                    predictdata['actual'])
        predictdata.index = predictdata.index.astype("str")
        print(predictdata)
        json = predictdata.to_json()
        json = ast.literal_eval(json)
        json['mape'] = metrics['mape']
        return Response(json)
Exemple #3
0
    def find_best_parameters(self, data: pd.DataFrame):
        """
        Given a dataset, finds the best parameters using the settings in the class
        """
        #### dmax here means the column number of the data frame: it serves as a placeholder for columns
        dmax = data.shape[1]
        ###############################################################################################
        cols = data.columns.tolist()
        # TODO: #14 Make sure that we have a way to not rely on column order to determine the target
        # It is assumed that the first column of the dataframe is the target variable ####
        ### make sure that is the case before doing this program ####################
        i = 1
        results_dict = {}

        for d_val in range(1, dmax):
            # Takes the target column and one other endogenous column at a time
            # and makes a prediction based on that. Then selects the best
            # exogenous column at the end.
            y_train = data.iloc[:, [0, d_val]]
            print('\nAdditional Variable in VAR model = %s' % cols[d_val])
            info_criteria = pd.DataFrame(
                index=['AR{}'.format(i) for i in range(0, self.p_max+1)],
                columns=['MA{}'.format(i) for i in range(0, self.q_max+1)]
            )
            for p_val, q_val in itertools.product(range(0, self.p_max+1), range(0, self.q_max+1)):
                if p_val == 0 and q_val == 0:
                    info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan
                    print(' Iteration %d completed' % i)
                    i += 1
                else:
                    try:
                        model = VARMAX(y_train, order=(p_val, q_val), trend='c')
                        model = model.fit(max_iter=1000, disp=False)
                        info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('model.' + self.scoring)
                        print(' Iteration %d completed' % i)
                        i += 1
                    except Exception:
                        i += 1
                        print(' Iteration %d completed' % i)
            info_criteria = info_criteria[info_criteria.columns].astype(float)
            interim_d = copy.deepcopy(d_val)
            interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria)
            if self.verbose == 1:
                _, axis = plt.subplots(figsize=(20, 10))
                axis = sns.heatmap(
                    info_criteria,
                    mask=info_criteria.isnull(),
                    ax=axis,
                    annot=True,
                    fmt='.0f'
                )
                axis.set_title(self.scoring)
            results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic
        best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1]
        best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0]
        self.best_p = int(best_pdq.split(' ')[0])
        self.best_d = int(best_pdq.split(' ')[1])
        self.best_q = int(best_pdq.split(' ')[2])
        print('Best variable selected for VAR: %s' % data.columns.tolist()[self.best_d])
Exemple #4
0
def varma_prediction(train,test,steps):
    p,q = get_var_pq_params(train)
    model = VARMAX(train,order=(p, q))
    model_fit = model.fit(disp=False)
    if not steps:
        prediction = model_fit.forecast(steps=len(test))
    else: 
        prediction = model_fit.forecast(steps=steps)
    multi_predicts_df = pd.DataFrame(prediction, columns = train.columns)
    return multi_predicts_df
Exemple #5
0
def varma_forecast(history, config):
    order, trend = config
    # define model
    model = VARMAX(history, order=order, trend=trend, enforce_stationarity=False,
                    enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]
Exemple #6
0
def model_varmax(train_data,test_data,train_data1,test_data1):
    x = train_data1.reshape((372,1))
    x1 = train_data.reshape((372,1))
    lis = np.concatenate((x,x1), axis = 1)
    print(np.shape(lis))
    #forecast
    model = VARMAX(lis, order=(1,1))
    model_fit = model.fit(disp = -1)
    print(model_fit.summary().tables[1])
    predictions = model_fit.forecast(steps=10)
    print('VARMAX RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
    def VectorAutoRegressiveMovingAverage(self):

        #currently, exodata not used.

        #make a dataframe the size of prediction
        datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3)))
        #convert to a list
        datalist = data.values.tolist()
        # create a model for each axis and predict each axis
        model = VARMAX(datalist, order=(1, 1))
        model_fit = model.fit(disp=False)
        datahat = model_fit.forecast(model_fit.y,
                                     steps=(self.end - self.start))
        return (datahat)
Exemple #8
0
    def get(self, request, *args, **kwargs):
        n_steps = int(self.request.query_params.get('nsteps', 10))

        data = read_frame(PriceProduction.objects.all())
        data['date'] = pd.to_datetime(data['date'])
        data = data.drop('id', axis=1)
        data = data.set_index('date')
        model = VARMAX(data, order=(1, 1, 1))
        model_fit = model.fit(disp=False)
        yhat = model_fit.forecast(n_steps)
        yhat = yhat['price']
        yhat.index = yhat.index.astype("str")
        json = yhat.to_json()
        json = ast.literal_eval(json)
        return Response(json)
    def VARMA(self, order=(1, 1), name="VARMA"):
        print("=" * 30 + "\n" + name + "\n" + "=" * 30 + "\n")
        # fit model
        model = VARMAX(self.data_train, order=order)
        model_fit = model.fit(disp=False)
        # make prediction
        yhat = model_fit.forecast(steps=42)
        prediction = pd.DataFrame(yhat,
                                  index=self.data_test.index.values,
                                  columns=self.data_train.columns.values)

        plt.plot(self.data_train_and_test)
        plt.plot(prediction, color='red')
        plt.title(name)
        plt.show()
Exemple #10
0
    def test_4(self):
        data = self.getMultiDimensionalData()
        model = VARMAX(data,order=(1,2))
        result = model.fit()

        f_name='varmax_12.pmml'
        StatsmodelsToPmml(result, f_name,model_name="varmax_test",conf_int=[95])

        model_name = self.adapa_utility.upload_to_zserver(f_name)
        z_pred = self.adapa_utility.score_in_zserver(model_name, {'h':5},'TS')
        forecasts=result.get_forecast(5)

        z_forecast_hum = list(z_pred['outputs'][0]['predicted_SanDiegoHum'].values())
        model_forecast_hum = forecasts.predicted_mean['SanDiegoHum'].values.tolist()

        z_forecast_pressure = list(z_pred['outputs'][0]['predicted_SanDiegoPressure'].values())
        model_forecast_pressure = forecasts.predicted_mean['SanDiegoPressure'].values.tolist()

        z_forecast_temp = list(z_pred['outputs'][0]['predicted_SanDiegoTemp'].values())
        model_forecast_temp = forecasts.predicted_mean['SanDiegoTemp'].values.tolist()

        z_conf_int_95_lower_hum = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoHum'].values())
        model_conf_int_95_lower_hum = forecasts.conf_int()['lower SanDiegoHum'].values.tolist()

        z_conf_int_95_lower_pressure = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoPressure'].values())
        model_conf_int_95_lower_pressure = forecasts.conf_int()['lower SanDiegoPressure'].values.tolist()

        z_conf_int_95_lower_temp = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoTemp'].values())
        model_conf_int_95_lower_temp = forecasts.conf_int()['lower SanDiegoTemp'].values.tolist()

        z_conf_int_95_upper_hum = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoHum'].values())
        model_conf_int_95_upper_hum = forecasts.conf_int()['upper SanDiegoHum'].values.tolist()

        z_conf_int_95_upper_pressure = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoPressure'].values())
        model_conf_int_95_upper_pressure = forecasts.conf_int()['upper SanDiegoPressure'].values.tolist()

        z_conf_int_95_upper_temp = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoTemp'].values())
        model_conf_int_95_upper_temp = forecasts.conf_int()['upper SanDiegoTemp'].values.tolist()

        self.assertEqual(np.allclose(z_forecast_hum,model_forecast_hum),True)
        self.assertEqual(np.allclose(z_forecast_pressure,model_forecast_pressure),True)
        self.assertEqual(np.allclose(z_forecast_temp,model_forecast_temp),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_hum,model_conf_int_95_lower_hum),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_pressure,model_conf_int_95_lower_pressure),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_temp,model_conf_int_95_lower_temp),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_hum,model_conf_int_95_upper_hum),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_pressure,model_conf_int_95_upper_pressure),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_temp,model_conf_int_95_upper_temp),True)
Exemple #11
0
def trainVectorARMAMethodModel():

    X_train = readVectorARMAMethodXTrain()

    #training model on the training set
    vectorARMAMethodModel = VARMAX(X_train, order=(1, 2), trend="c")

    #we are taking p = 5 as we have created different models based on the different p values.
    #Model gives minimum aic and bic for p =5
    vectorARMAMethodModelResult = vectorARMAMethodModel.fit(maxiter=1000,
                                                            disp=False)

    #saving the model in pickle file
    saveVectorARMAMethodModel(vectorARMAMethodModelResult)

    print(vectorARMAMethodModelResult.summary())
Exemple #12
0
 def get_best_model(self, data: pd.DataFrame):
     """
     Returns the 'unfit' SARIMAX model with the given dataset and the
     selected best parameters. This can be used to fit or refit the model.
     """
     bestmodel = VARMAX(data, order=(self.best_p, self.best_q), trend='c')
     return bestmodel
    def predict(self, action):
        """ Description: returns action based on input state x """
        #store the new action
        #self.ts = np.roll(self.ts, -1, axis = 0)
        #self.ts[-1] = action
        del self.ts[0]
        self.ts.append(action)
        #print(self.ts)

        model = VARMAX(self.ts, order=(self.p, self.p))
        model_fit = model.fit(disp=False)
        self.y_pred = model_fit.forecast(steps=1)

        print(self.y_pred)

        return self.y_pred
Exemple #14
0
    def _fit(self, train_data):
        """Fits the model based on training data `train_data`.

        Parameters
        ----------
        train_data: pd.DataFrame
            A pandas DataFrame representing the data used for training.

        Returns
        -------
        None

        """
        varma_order = (self._p, self._q)
        model = VARMAX(train_data, order=varma_order)
        self._model = model.fit(disp=False)
def model_var1(endog=None, params=None, measurement_error=False, init=None):
    if endog is None:
        endog = (np.log(
            macrodata[['realgdp','realcons']]).iloc[:21].diff().iloc[1:] * 400)
    if params is None:
        params = np.r_[0.5, 0.3, 0.2, 0.4, 2**0.5, 0, 3**0.5]
        if measurement_error:
            params = np.r_[params, 4, 5]

    # Model
    mod = VARMAX(endog, order=(1, 0), trend='n',
                 measurement_error=measurement_error)
    mod.update(params)
    ssm = mod.ssm
    if init is None:
        init = Initialization(ssm.k_states, 'diffuse')
    ssm.initialize(init)

    return mod, ssm
    def varma_final(self):
        predictions = []
        input_data = numpy.array(self.total)
        input_data = numpy.log(input_data)
        input_data = self.difference(input_data)
        input_data = pd.DataFrame(input_data)
        input_data = input_data.dropna()
        for i in range(0, len(self.test)):
            model = VARMAX(input_data, order=(1, 1))
            model_fit = model.fit(disp=False)
            yhat = model_fit.forecast()
            predictions.append(yhat)
            input_data.append(yhat)
        for i in range(0, len(predictions)):
            predictions[i] = round(predictions[i], 2)
            if predictions[i] < 0:
                predictions[i] = 0

        return predictions
def model_var1(endog=None, params=None, measurement_error=False, init=None):
    if endog is None:
        levels = macrodata[['realgdp', 'realcons']]
        endog = np.log(levels).iloc[:21].diff().iloc[1:] * 400
    if params is None:
        params = np.r_[0.5, 0.3, 0.2, 0.4, 2**0.5, 0, 3**0.5]
        if measurement_error:
            params = np.r_[params, 4, 5]

    # Model
    mod = VARMAX(endog, order=(1, 0), trend='n',
                 measurement_error=measurement_error)
    mod.update(params)
    ssm = mod.ssm
    if init is None:
        init = Initialization(ssm.k_states, 'diffuse')
    ssm.initialize(init)

    return mod, ssm
    def varmax_model_fit(self, x_train, x_test, df_time, oreder = (1, 0), col_exog=[], verbose = 1):
        if col_exog:
            exo_train = pd.DataFrame()
            exo_test = pd.DataFrame()
            for col in col_exog:
                exo_train[col] = x_train[col]
                x_train.drop([col], axis=1, inplace = True)
                exo_test[col] = x_test[col]
                x_test.drop([col], axis=1, inplace = True)

            model = VARMAX(x_train, order=oreder, exog=exo_train)
        else:
            model = VARMAX(x_train, order=oreder)

        result = model.fit()
        out = durbin_watson(result.resid)
        df_results = pd.DataFrame()
        for col, val in zip(x_train.columns, out):
            df_results[col] = [round(val, 2)]
        if verbose == 1:
            st.subheader('durbin_watson test')
            st.write('the closer the result is to 2 then there is no correlation, the closer to 0 or 4 then correlation implies')
            st.write(df_results.T)

        if col_exog:
            df_forecast = result.forecast(steps=x_test.shape[0], exog = exo_test)
        else:
            df_forecast = result.forecast(steps=x_test.shape[0])

        df_forecast.index = df_time['test']
        df_forecast.columns = x_test.columns
        x_test.index = df_time['test']
        if verbose == 1:
            st.write(df_forecast)
            for i, col in enumerate(x_test):
                fig = ds().nuova_fig(555+i)
                st.subheader(col)
                df_forecast[col].plot(label = 'Predicition')
                x_test[col].plot(label = 'True')
                ds().legenda()
                st.pyplot(fig)
        return df_forecast
    def initialize(self, params):
        self.p = params['p']
        self.action_dim = params['dim']

        self.ts = [
            [0] * self.action_dim
        ] * self.p  #[np.zeros(self.action_dim) for i in range(self.p)]#np.zeros((self.p, self.action_dim))

        data = list()
        for i in range(100):
            v1 = random()
            v2 = v1 + random()
            row = [v1, v2]
            data.append(row)
        model = VARMAX(self.ts, order=(16, 16))
        print("VARMAX")
        model_fit = model.fit()
        print("fit")
        exit()

        self.initialized = True
Exemple #20
0
    def precictTrajectory(self):
        predict_num = 5
        gps_points = self.gps_points()
        # data = [[p["long"],p["lat"]] for p in gps_points]
        data = list()
        for i in range(100):
            v1 = random()
            v2 = v1 + random()
            row = [v1, v2]
            data.append(row)
        model = VARMAX(data, order=(1, 1))
        model_fit = model.fit(disp=False)

        yhat = model_fit.forecast(predict_num)

        return {
            "object_id": self.lastappeared.object_id,
            "gps_points": [{
                "long": p[0],
                "lat": p[1]
            } for p in yhat]
        }
    def _fit(self, train_features, train_target):
        """Fits the model based on `train_features` and `train_target`.

        A VARMAX model is built to predict the target variables with data
        given by `train_target` based on the features with data given by
        `train_features`.

        Parameters
        ----------
        train_features: pd.DataFrame
            A pandas DataFrame representing the training features.
        train_target: pd.Series
            A pandas Series representing the target variable.

        Returns
        -------
        None

        """
        varmax_order = (self._p, self._q)
        model = VARMAX(train_target, train_features, order=varmax_order)
        self._model = model.fit(disp=False)
        self._is_fit = True
Exemple #22
0
def varmax_model(target_variable, exog_variables, start_date, end_date, plot):
    from statsmodels.tsa.statespace.varmax import VARMAX
    import numpy as np
    
    #Split target variable into training/test set
    train = target_variable[:int(0.7*(len(target_variable)))]
    test = target_variable[int(0.7*(len(target_variable))):]

    exog_variables_train = []
    exog_variables_test = []
    
    #Split external variables into test/training sets
    for variable in exog_variables:
        variable = variable.values
        exog_variables_train.append(variable[:int(0.7*(len(variable)))])
        exog_variables_test.append(variable[int(0.7*(len(variable))):])
        
        
    exog_train = np.column_stack(exog_variables_train)
    exog_test = np.column_stack(exog_variables_test)

    #Fit the model
    y_hat_avg = test
    model = VARMAX(train, exog=exog_train, order=(1, 1)).fit(disp=False)
    # make prediction
    y_hat_avg["VARMAX"] = model.predict(exog=exog_test, start = start_date, end = end_date)
    
    if(plot == True):
        import matplotlib.pyplot as plt
        plt.figure(figsize=(16,8))
        #plt.plot(train[train.columns[0]], label='dod_model.Train')
        plt.plot(test[test.columns[0]], label='Test')
        plt.plot(y_hat_avg['VARMAX'] ,label='VARMAX')
        plt.legend(loc='best')
        plt.show()
        
    print(y_hat_avg)
Exemple #23
0
def regress_varmax(df_endog, bin_size_weeks, n):
    """
    Trains a varmax model on time series for each patent up to n steps,
    working forwards from the publication date or working backwards from the current date. Also includes exogenous
    patent features.

    :param df_endog: the multiple endogenous time series, not yet transformed
    :param bin_size_weeks: the bin size in weeks
    :type bin_size_weeks: pd.Timedelta
    :param n: the number of steps required in each patent series - must make a square matrix!
    :return: None
    """
    df_endog = VARMAXTransformer("varmax").transform(df_endog, bin_size_weeks, n)

    # remove columns with low variance
    order = 4
    df_endog = df_endog.loc[:, df_endog.apply(pd.Series.nunique, axis=0) > order]
    logger.debug(df_endog)
    logger.debug(df_endog.describe())

    logger.debug("Training VARMAX...")
    model = VARMAX(df_endog.values, order=(order, 0))
    res = model.fit(maxiter=1000, disp=True)
    logger.debug(res.summary())
Exemple #24
0
 def get_VAR_models(self, data, exog_data=None, order=None, type='VAR'):
     '''
     generate the model VAR. Vector Autoregression (VAR) is a multivariate forecasting algorithm that is used when two or more time series influence each other.
     You need atleast two time series (variables). The time series should influence each other.
     :param data: matrix with the all data, pandas. The model will try to predict the next value for each of the features.
     :param exog_train: If some features are non strictly influenced can be put in this matrix, pandas
     :param order: (p,q) order of the model for the number of AR and MA parameters to use, needed only with VARMAX
     :param type: VAR, VARMAX
     :return: model
     '''
     if type == 'VAR':
         model = VAR(data, exog=exog_data)
     if type == 'VARMAX':
         model = VARMAX(data, exog=exog_data, order=order)
     return model
Exemple #25
0
def VARMAXgridsearch(modeldata, cfg_list):
    results = []
    for index in range(len(cfg_list)):
        order = cfg_list[index]
        # define model
        temp_dict = {}
        varmaxmodel = VARMAX(modeldata, order=order).fit()
        residuals = DataFrame(varmaxmodel.resid)
        mean_error = abs(residuals.mean())
        temp_dict.update({
            'order': order,
            'model': varmaxmodel,
            'meanError': mean_error[0]
        })
        #print("\n {}".format(temp_dict))
        results.append(temp_dict)
    return results
Exemple #26
0
print(yhat)

## Varmax: Like VAR, but with seasonality and exogenous variable
# VARMAX example
from statsmodels.tsa.statespace.varmax import VARMAX
from random import random
# contrived dataset with dependency
data = list()
for i in range(100):
v1 = random()
v2 = v1 + random()
row = [v1, v2]
data.append(row)
data_exog = [x + random() for x in range(100)]
# fit model
model = VARMAX(data, exog=data_exog, order=(1, 1))
model_fit = model.fit(disp=False)
# make prediction
data_exog2 = [[100]]
yhat = model_fit.forecast(exog=data_exog2)
print(yhat)


## Exponential Smoothing: Like autoregression but time decay of lagged values
## Can use to get trend or seasonal effect
# HWES example
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from random import random
# contrived dataset
data = [x + random() for x in range(1, 100)]
# fit model
Exemple #27
0
                ])
  


q1 = np.asmatrix([
                    [-0., 0.],
                    [0., -0.],
                 ])
p = [p1]#, p2] #, p3]
q = [q1]#q1]

# y0 = np.asmatrix([[0., 0., 0.]]).T #, [0., 0., 0.]

X = sim.varmapqGaussian(t = t, pMatrix = p, qMatrix = q)#, y0 = y0)

y = VARMAX(X.T, order = (1,1)).fit()

print(y.summary())

x1 = np.asarray(X[0,:]).reshape(t)
x2 = np.asarray(X[1,:]).reshape(t)
# x3 = np.asarray(X[2,:]).reshape(t)



# nprocess = X.shape[0]
pLag = len(p)
qLag = len(q)
# 

params = logL.maxVARMApqN(X, pLag, qLag)
Exemple #28
0
def submit_ts():

    f = request.files['userfile']
    f.save(f.filename)
    print(f)

    s1 = request.form['query1']
    s2 = request.form['query2']
    s3 = int(request.form['query3'])
    s4 = request.form['query4']
    s5 = request.form['query5']
    if s5 == 'Yes':
        s6 = request.form['query6']
        s7 = request.form['query7']

    t = int(request.form['query8'])

    d1 = f.filename
    print(d1)
    d3 = pd.read_csv(d1)

    if s3 == 1:
        d3[s1] = pd.to_datetime(d3[s1], format=s2, infer_datetime_format=True)
        list1 = []
        list3 = []
        list9 = []
        """
        for i in range(len(d3[s4])):
            try:
                list1.append(int(d3[s4][i]))
            except:
                list3.append(i)
                continue
        for i in range(len(list3)):
            n2=d3[s4][list3[i]]
            d3[s4].replace(n2,np.nan,inplace=True)
        for i in range(len(d3)):
            d3[s4].fillna(d3[s4].median(),inplace=True)
        d3[s4]=d3[s4].astype(int)"""
        if s5 == 'No':
            datewise = d3.groupby([s1]).agg({s4: 'sum'})
        elif s5 == 'Yes':
            s8 = d3[d3[s6] == s7]
            datewise = s8.groupby([s1]).agg({s4: 'sum'})

        #ARIMA

        datewise = datewise.astype('float32')
        model_train = datewise.iloc[:int(datewise.shape[0] * 0.95)]
        valid = datewise.iloc[int(datewise.shape[0] * 0.95):]
        n11 = pd.infer_freq(datewise.index, warn=True)
        list9 = []
        model_arima = auto_arima(model_train[s4],
                                 trace=True,
                                 error_action='ignore',
                                 start_p=1,
                                 start_q=1,
                                 max_p=3,
                                 max_q=3,
                                 suppress_warnings=True,
                                 stepwise=False,
                                 seasonal=False)
        model_arima.fit(model_train[s4])
        prediction_arima = model_arima.predict(len(valid))
        print("Root Mean Square Error for ARIMA Model: ",
              np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima))))
        list9.append(
            np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima))))
        print('\n')
        m1 = model_arima.order
        model = ARIMA(datewise[s4], order=m1)
        results = model.fit()
        s = t - 1
        forecast_arima = results.predict(len(datewise),
                                         len(datewise) + s,
                                         typ='levels').rename(s4)

        #Prophet
        datewise1 = datewise.reset_index()
        datewise1.rename(columns={s1: 'ds', s4: 'y'}, inplace=True)
        train = datewise1.iloc[:int(datewise1.shape[0] * 0.95)]
        valid = datewise1.iloc[int(datewise1.shape[0] * 0.95):]
        m = Prophet(weekly_seasonality=True)
        m.fit(train)
        future = m.make_future_dataframe(periods=len(valid), freq=n11)
        forecast = m.predict(future)
        predictions = forecast.tail(len(valid))['yhat']
        print('\n')
        print("Root Mean Squared Error for Prophet Model: ",
              rmse(valid['y'], predictions))
        print('\n')
        list9.append(rmse(valid['y'], predictions))
        m = Prophet(weekly_seasonality=True)
        m.fit(datewise1)
        future = m.make_future_dataframe(periods=t, freq=n11)
        forecast = m.predict(future)
        forecast_prophet = forecast[['ds', 'yhat']].tail(t)

        #LSTM

        train = datewise.iloc[:int(datewise.shape[0] * 0.95)]
        test = datewise.iloc[int(datewise.shape[0] * 0.95):]
        scaler = MinMaxScaler()
        scaler.fit(train)
        scaled_train = scaler.transform(train)
        scaled_test = scaler.transform(test)

        n_input = len(test)
        n_features = 1
        generator = TimeseriesGenerator(scaled_train,
                                        scaled_train,
                                        length=n_input,
                                        batch_size=1)
        model = Sequential()
        model.add(
            LSTM(150, activation='relu', input_shape=(n_input, n_features)))
        model.add(Dense(1))
        model.compile(optimizer='adam', loss='mse')

        model.fit_generator(generator, epochs=30)

        first_eval_batch = scaled_train[-n_input:]
        test_predictions = []
        first_eval_batch = scaled_train[-n_input:]
        current_batch = first_eval_batch.reshape((1, n_input, n_features))
        for i in range(len(test)):
            current_pred = model.predict(current_batch)[0]
            test_predictions.append(current_pred)
            current_batch = np.append(current_batch[:, 1:, :],
                                      [[current_pred]],
                                      axis=1)
        true_predictions = scaler.inverse_transform(test_predictions)
        test['predictions'] = true_predictions
        list9.append(rmse(test[s4], test['predictions']))
        print('\n')
        print("Root Mean Square Error for LSTM Model: ",
              rmse(test[s4], test['predictions']))
        print('\n')
        train = datewise
        scaler.fit(train)
        train = scaler.transform(train)
        n_input = len(test)
        n_features = 1
        generator = TimeseriesGenerator(train,
                                        train,
                                        length=n_input,
                                        batch_size=1)
        model.fit_generator(generator, epochs=30)
        test_predictions = []
        first_eval_batch = train[-n_input:]
        current_batch = first_eval_batch.reshape((1, n_input, n_features))
        for i in range(t):
            current_pred = model.predict(current_batch)[0]
            test_predictions.append(current_pred)
            current_batch = np.append(current_batch[:, 1:, :],
                                      [[current_pred]],
                                      axis=1)
        from pandas.tseries.offsets import DateOffset
        add_dates = [
            datewise.index[-1] + DateOffset(months=x) for x in range(0, t + 1)
        ]
        future_dates = pd.DataFrame(index=add_dates[1:],
                                    columns=datewise.columns)
        df_predict = pd.DataFrame(scaler.inverse_transform(test_predictions),
                                  index=future_dates[-t:].index,
                                  columns=[s4])
        d_proj = df_predict
        d_proj.reset_index(drop=True, inplace=True)
        forecast_prophet.reset_index(drop=True, inplace=True)
        d1 = pd.DataFrame(forecast_prophet['ds'])
        lstm = pd.concat([d1, d_proj], axis=1)
        #print('\n')
        #t=str(t)
        #print('Forecasted Data of '+s4+' feature for '+t+ ' days : ' )
        #print('\n')
        small = float('inf')
        for i in range(len(list9)):
            if list9[i] < small:
                small = list9[i]
        no = list9.index(small)

        if no == 0:
            forecast_arima = pd.DataFrame(forecast_arima)
            forecast_arima.reset_index(drop=True, inplace=True)
            d18 = pd.DataFrame(forecast_prophet['ds'])
            d18.reset_index(drop=True, inplace=True)
            forecast_arima = pd.concat([d18, forecast_arima], axis=1)
            forecast_arima.rename(columns={'ds': s1}, inplace=True)
            forecast_data = forecast_arima
            forecast_data1 = forecast_data.set_index(s1)
            forecast_data1
            #print(forecast_data1)

        elif no == 1:
            forecast_prophet.rename(columns={
                'ds': s1,
                'yhat': s4
            },
                                    inplace=True)
            forecast_data = forecast_prophet
            forecast_data1 = forecast_data.set_index(s1)
            #plt.plot(datewise[s4],label="Original Data")
            #plt.plot(forecast_data[s4],label="Forecasted Data")
            #plt.legend()
            #plt.xlabel("Date")
            #plt.ylabel('Confirmed Cases')
            #plt.title("Confirmed Cases Prophet Model Forecasting")
            #plt.xticks(rotation=90)

        elif no == 2:
            lstm.rename(columns={'ds': s1, 'yhat': s4}, inplace=True)
            forecast_data = lstm
            forecast_data1 = forecast_data.set_index(s1)
            #plt.plot(datewise[s4],label="Original Data")
            #plt.plot(forecast_data[s4],label="Forecasted Data")
            #plt.legend()
            #plt.xlabel("Date")
            #plt.ylabel('Confirmed Cases')
            #plt.title("Confirmed Cases LSTM Model Forecasting")
            #plt.xticks(rotation=90)"""

        fig, ax = plt.subplots(nrows=1, ncols=1)
        ax.plot(datewise[s4], label="Original Data")
        ax.plot(forecast_data1[s4], label="Forecasted Data")
        ax.legend()
        ax.set_xlabel("Date")
        ax.set_ylabel(s4)
        ax.set_title('forecasted data of ' + s4)
        plt.xticks(rotation=90)
        plt.show()
        n = randint(0, 1000000000000)
        n = str(n)
        fig.savefig(
            os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png'))

        full_filename = os.path.join(app.config["IMAGE_UPLOADS"],
                                     n + 'time_series.png')
    # VARMAX
    if s3 > 1:
        n2 = s4
        n4 = n2.split()
        n5 = n2.split()
        if s5 == 'No':
            datewise = d3.groupby([s1]).agg({n4[0]: 'sum'})
            n4.pop(0)
            for i in range(len(n4)):
                d3i = d3.groupby([s1]).agg({n4[i]: 'sum'})
                datewise = pd.concat([datewise, d3i], axis=1)
        elif s5 == 'Yes':
            #s6=str(input('Enter the feature name from which who want to pick the category (eg:- country): '))
            #s7=str(input('Ente the category name from'+' '+s6+' '+'to forecast'+' '+s4+' '+'  : '))
            s8 = d3[d3[s6] == s7]
            datewise = s8.groupby([s1]).agg({n4[0]: 'sum'})
            n4.pop(0)
            for i in range(len(n4)):
                d3i = s8.groupby([s1]).agg({n4[i]: 'sum'})
                datewise = pd.concat([datewise, d3i], axis=1)
                #datewise=pd.concat([datewise,d3i],axis=1)
        list1 = []
        list2 = []
        list3 = []
        list4 = []
        for i in range(len(n5)):
            model_arima = auto_arima(datewise[n5[i]],
                                     trace=True,
                                     error_action='ignore',
                                     start_p=1,
                                     start_q=1,
                                     max_p=3,
                                     max_q=3,
                                     suppress_warnings=True,
                                     stepwise=False,
                                     seasonal=False)
            list1.append(model_arima.order)
        for i in range(len(list1)):
            list2.append(list1[i][0])
            list3.append(list1[i][1])
            list4.append(list1[i][2])
        list2.sort(reverse=True)
        p = list2[0]
        list3.sort(reverse=True)
        d = list3[0]
        list4.sort(reverse=True)
        q = list4[0]
        if d < 1:
            df_transformed = datewise
        elif d == 1:
            df_transformed = datewise.diff()
            df_transformed = df_transformed.dropna()
        elif d > 1:
            df_transformed = datewise.diff().diff()
            df_transformed = df_transformed.dropna()

        nobs = 12
        train, test = df_transformed[0:-nobs], df_transformed[-nobs:]
        model = VARMAX(train, order=(p, q), trend='c')
        results = model.fit(maxiter=100, disp=False)
        results.summary()
        df_forecast = results.forecast(nobs)
        for i in range(len(n5)):
            j = '1d'
            df_forecast[n5[i] + j] = (
                datewise[n5[i]].iloc[-nobs - 1] -
                datewise[n5[i]].iloc[-nobs - 2]) + df_forecast[n5[i]].cumsum()
            df_forecast[n5[i] + 'forecasteed'] = datewise[n5[i]].iloc[
                -nobs - 1] + df_forecast[n5[i]].cumsum()
        list89 = df_forecast.columns
        list98 = []
        for i in range(len(list89)):
            if list89[i][-11:] == 'forecasteed':
                list98.append(list89[i])
        d_new = pd.concat([datewise.iloc[-12:], df_forecast[list98]], axis=1)
        for i in range(len(n5)):
            RMSE = rmse(datewise[n5[i]][-nobs:], df_forecast[list98[i]])
            print('Root Mean Square Error for ' + n5[i] + ':', RMSE)
        model = VARMAX(df_transformed, order=(p, q), trend='c')
        results = model.fit(maxiter=100, disp=False)
        results.summary()
        #t=int(input('Enter number of days to forecast ? :'))
        df_forecast = results.forecast(t)
        for i in range(len(n5)):
            j = '2d'
            df_forecast[n5[i] + j] = (
                datewise[n5[i]].iloc[-t - 1] -
                datewise[n5[i]].iloc[-t - 2]) + df_forecast[n5[i]].cumsum()
            df_forecast[n5[i] + ' Forecasted'] = datewise[n5[i]].iloc[
                -t - 1] + df_forecast[n5[i]].cumsum()
        list89 = df_forecast.columns
        list98 = []
        for i in range(len(list89)):
            if list89[i][-11:] == ' Forecasted':
                list98.append(list89[i])
        df_forecast = df_forecast[list98]
        df_forecast.reset_index(inplace=True)
        df_forecast.rename(columns={'index': s1}, inplace=True)
        df_forecast.set_index(s1, inplace=True)
        forecast_data1 = df_forecast[list98]

        fig, b = plt.subplots(len(n5), 2, figsize=(15, 5))
        for i in range(len(n5)):
            datewise[n5[i]].plot(kind='line', ax=b[i][0], title=n5[i])
            df_forecast[list98[i]].plot(kind='line',
                                        ax=b[i][1],
                                        title='Forecasted data of ' + n5[i],
                                        color='orange')
            fig.tight_layout(pad=1.0)
        plt.show()

        n = randint(0, 1000000000000)
        n = str(n)
        fig.savefig(
            os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png'))

        full_filename = os.path.join(app.config["IMAGE_UPLOADS"],
                                     n + 'time_series.png')

    return render_template('step1_img.html',
                           user_image=full_filename,
                           tables=[forecast_data1.to_html(classes='page')],
                           titles=['na', 'Job'],
                           query1=request.form['query1'],
                           query2=request.form['query2'],
                           query3=request.form['query3'],
                           query4=request.form['query4'],
                           query5=request.form['query5'],
                           query6=request.form['query6'],
                           query7=request.form['query7'],
                           query8=request.form['query8'])
                        index_col=0,
                        encoding="utf-8-sig",
                        converters={0: to_dt},
                        names=["TS", "x", "y", "z"])

    req_period = datetime.timedelta(milliseconds=100)

    even_frame = frame.resample(req_period).mean().interpolate()

    #aclr_x=even_frame["x"]
    aclr_x = even_frame
    seria_len = len(aclr_x)

    train_seria, test_seria = aclr_x[:seria_len // 2], aclr_x[seria_len // 2:]

    model = VARMAX(train_seria, order=(5, 5))
    #model = VARMAX(train_seria, order=(3, 3))
    #model = VARMAX(train_seria,)
    model_fit = model.fit()

    predictions = model_fit.forecast(len(test_seria))
    print(type(predictions))
    print(predictions.shape)

    for axis in range(3):
        plt.subplot(3, 1, axis + 1)
        # plt.plot(test_seria.index[:100], predictions[:, axis][:100], label="predictions")
        plt.plot(predictions.iloc[:100, axis], label="predicted")
        plt.plot(test_seria.iloc[:100, axis], label="expected")
        plt.legend(loc="upper right")
    plt.show()
Exemple #30
0
Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX)
The Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX) is an extension of the VARMA model that also includes the modeling of exogenous variables. It is a multivariate version of the ARMAX method.

Exogenous variables are also called covariates and can be thought of as parallel input sequences that have observations at the same time steps as the original series. The primary series(es) are referred to as endogenous data to contrast it from the exogenous sequence(s). The observations for exogenous variables are included in the model directly at each time step and are not modeled in the same way as the primary endogenous sequence (e.g. as an AR, MA, etc. process).

The VARMAX method can also be used to model the subsumed models with exogenous variables, such as VARX and VMAX.

The method is suitable for multivariate time series without trend and seasonal components with exogenous variables.
'''

from random import random

# VARMAX example
from statsmodels.tsa.statespace.varmax import VARMAX

# contrived dataset with dependency
data = list()
for i in range(100):
    v1 = random()
    v2 = v1 + random()
    row = [v1, v2]
    data.append(row)
data_exog = [x + random() for x in range(100)]
# fit model
model = VARMAX(data, exog=data_exog, order=(1, 1))
model_fit = model.fit(disp=False)
# make prediction
data_exog2 = [[100]]
yhat = model_fit.forecast(exog=data_exog2)
print(yhat)
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 21 10:47:28 2019

@author: Nielsen
"""

# VARMA example
from statsmodels.tsa.statespace.varmax import VARMAX
from random import random
# contrived dataset with dependency
data = list()
for i in range(100):
    v1 = random()
    v2 = v1 + random()
    row = [v1, v2]
    data.append(row)
# fit model
model = VARMAX(data, order=(1, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.forecast()
print(yhat)