Esempio n. 1
0
    def _estimate_varma_coefs(self, X):
        if self._criterion not in ["aic", "bic", "hqic"]:
            result = VARMAX(X, order=self._order,
                            trend="c").fit(maxiter=self._max_iter)
        else:
            min_value = float("Inf")
            result = None

            orders = [(p, q) for p in range(self._order[0] + 1)
                      for q in range(self._order[1] + 1)]
            orders.remove((0, 0))

            for order in orders:
                fitted = VARMAX(X, order=order,
                                trend="c").fit(maxiter=self._max_iter)

                value = getattr(fitted, self._criterion)
                if value < min_value:
                    min_value = value
                    result = fitted

        return (
            result.coefficient_matrices_var,
            result.coefficient_matrices_vma,
            result.specification["order"],
            result.resid,
        )
Esempio n. 2
0
    def get(self, request, *args, **kwargs):
        start_date = self.request.query_params.get('startdate', '1970-01-30')
        end_date = self.request.query_params.get('enddate', '2018-01-01')

        data = read_frame(PriceProduction.objects.all())
        data['date'] = pd.to_datetime(data['date'])
        data = data.drop('id', axis=1)
        data = data.set_index('date')

        startdate = dat.strptime(start_date, '%Y-%m-%d')
        enddate = dat.strptime(end_date, '%Y-%m-%d')

        nextmonth = enddate + relativedelta.relativedelta(months=1)
        train, test = data[startdate:nextmonth], data[nextmonth:]
        model = VARMAX(train, order=(1, 1, 1))
        model_fit = model.fit(disp=False)
        yhat = model_fit.forecast(len(test) - 1)

        yhat['actual'] = test['price']
        predictdata = yhat.drop("production", axis=1)
        metrics = forecast_accuracy(predictdata['price'],
                                    predictdata['actual'])
        predictdata.index = predictdata.index.astype("str")
        print(predictdata)
        json = predictdata.to_json()
        json = ast.literal_eval(json)
        json['mape'] = metrics['mape']
        return Response(json)
Esempio n. 3
0
 def get_best_model(self, data: pd.DataFrame):
     """
     Returns the 'unfit' SARIMAX model with the given dataset and the
     selected best parameters. This can be used to fit or refit the model.
     """
     bestmodel = VARMAX(data, order=(self.best_p, self.best_q), trend='c')
     return bestmodel
Esempio n. 4
0
    def find_best_parameters(self, data: pd.DataFrame):
        """
        Given a dataset, finds the best parameters using the settings in the class
        """
        #### dmax here means the column number of the data frame: it serves as a placeholder for columns
        dmax = data.shape[1]
        ###############################################################################################
        cols = data.columns.tolist()
        # TODO: #14 Make sure that we have a way to not rely on column order to determine the target
        # It is assumed that the first column of the dataframe is the target variable ####
        ### make sure that is the case before doing this program ####################
        i = 1
        results_dict = {}

        for d_val in range(1, dmax):
            # Takes the target column and one other endogenous column at a time
            # and makes a prediction based on that. Then selects the best
            # exogenous column at the end.
            y_train = data.iloc[:, [0, d_val]]
            print('\nAdditional Variable in VAR model = %s' % cols[d_val])
            info_criteria = pd.DataFrame(
                index=['AR{}'.format(i) for i in range(0, self.p_max+1)],
                columns=['MA{}'.format(i) for i in range(0, self.q_max+1)]
            )
            for p_val, q_val in itertools.product(range(0, self.p_max+1), range(0, self.q_max+1)):
                if p_val == 0 and q_val == 0:
                    info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan
                    print(' Iteration %d completed' % i)
                    i += 1
                else:
                    try:
                        model = VARMAX(y_train, order=(p_val, q_val), trend='c')
                        model = model.fit(max_iter=1000, disp=False)
                        info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('model.' + self.scoring)
                        print(' Iteration %d completed' % i)
                        i += 1
                    except Exception:
                        i += 1
                        print(' Iteration %d completed' % i)
            info_criteria = info_criteria[info_criteria.columns].astype(float)
            interim_d = copy.deepcopy(d_val)
            interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria)
            if self.verbose == 1:
                _, axis = plt.subplots(figsize=(20, 10))
                axis = sns.heatmap(
                    info_criteria,
                    mask=info_criteria.isnull(),
                    ax=axis,
                    annot=True,
                    fmt='.0f'
                )
                axis.set_title(self.scoring)
            results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic
        best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1]
        best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0]
        self.best_p = int(best_pdq.split(' ')[0])
        self.best_d = int(best_pdq.split(' ')[1])
        self.best_q = int(best_pdq.split(' ')[2])
        print('Best variable selected for VAR: %s' % data.columns.tolist()[self.best_d])
Esempio n. 5
0
def varma_prediction(train,test,steps):
    p,q = get_var_pq_params(train)
    model = VARMAX(train,order=(p, q))
    model_fit = model.fit(disp=False)
    if not steps:
        prediction = model_fit.forecast(steps=len(test))
    else: 
        prediction = model_fit.forecast(steps=steps)
    multi_predicts_df = pd.DataFrame(prediction, columns = train.columns)
    return multi_predicts_df
Esempio n. 6
0
def varma_forecast(history, config):
    order, trend = config
    # define model
    model = VARMAX(history, order=order, trend=trend, enforce_stationarity=False,
                    enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]
    def varmax_model_fit(self, x_train, x_test, df_time, oreder = (1, 0), col_exog=[], verbose = 1):
        if col_exog:
            exo_train = pd.DataFrame()
            exo_test = pd.DataFrame()
            for col in col_exog:
                exo_train[col] = x_train[col]
                x_train.drop([col], axis=1, inplace = True)
                exo_test[col] = x_test[col]
                x_test.drop([col], axis=1, inplace = True)

            model = VARMAX(x_train, order=oreder, exog=exo_train)
        else:
            model = VARMAX(x_train, order=oreder)

        result = model.fit()
        out = durbin_watson(result.resid)
        df_results = pd.DataFrame()
        for col, val in zip(x_train.columns, out):
            df_results[col] = [round(val, 2)]
        if verbose == 1:
            st.subheader('durbin_watson test')
            st.write('the closer the result is to 2 then there is no correlation, the closer to 0 or 4 then correlation implies')
            st.write(df_results.T)

        if col_exog:
            df_forecast = result.forecast(steps=x_test.shape[0], exog = exo_test)
        else:
            df_forecast = result.forecast(steps=x_test.shape[0])

        df_forecast.index = df_time['test']
        df_forecast.columns = x_test.columns
        x_test.index = df_time['test']
        if verbose == 1:
            st.write(df_forecast)
            for i, col in enumerate(x_test):
                fig = ds().nuova_fig(555+i)
                st.subheader(col)
                df_forecast[col].plot(label = 'Predicition')
                x_test[col].plot(label = 'True')
                ds().legenda()
                st.pyplot(fig)
        return df_forecast
Esempio n. 8
0
def model_varmax(train_data,test_data,train_data1,test_data1):
    x = train_data1.reshape((372,1))
    x1 = train_data.reshape((372,1))
    lis = np.concatenate((x,x1), axis = 1)
    print(np.shape(lis))
    #forecast
    model = VARMAX(lis, order=(1,1))
    model_fit = model.fit(disp = -1)
    print(model_fit.summary().tables[1])
    predictions = model_fit.forecast(steps=10)
    print('VARMAX RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
    def VectorAutoRegressiveMovingAverage(self):

        #currently, exodata not used.

        #make a dataframe the size of prediction
        datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3)))
        #convert to a list
        datalist = data.values.tolist()
        # create a model for each axis and predict each axis
        model = VARMAX(datalist, order=(1, 1))
        model_fit = model.fit(disp=False)
        datahat = model_fit.forecast(model_fit.y,
                                     steps=(self.end - self.start))
        return (datahat)
Esempio n. 10
0
    def get(self, request, *args, **kwargs):
        n_steps = int(self.request.query_params.get('nsteps', 10))

        data = read_frame(PriceProduction.objects.all())
        data['date'] = pd.to_datetime(data['date'])
        data = data.drop('id', axis=1)
        data = data.set_index('date')
        model = VARMAX(data, order=(1, 1, 1))
        model_fit = model.fit(disp=False)
        yhat = model_fit.forecast(n_steps)
        yhat = yhat['price']
        yhat.index = yhat.index.astype("str")
        json = yhat.to_json()
        json = ast.literal_eval(json)
        return Response(json)
    def VARMA(self, order=(1, 1), name="VARMA"):
        print("=" * 30 + "\n" + name + "\n" + "=" * 30 + "\n")
        # fit model
        model = VARMAX(self.data_train, order=order)
        model_fit = model.fit(disp=False)
        # make prediction
        yhat = model_fit.forecast(steps=42)
        prediction = pd.DataFrame(yhat,
                                  index=self.data_test.index.values,
                                  columns=self.data_train.columns.values)

        plt.plot(self.data_train_and_test)
        plt.plot(prediction, color='red')
        plt.title(name)
        plt.show()
Esempio n. 12
0
 def get_VAR_models(self, data, exog_data=None, order=None, type='VAR'):
     '''
     generate the model VAR. Vector Autoregression (VAR) is a multivariate forecasting algorithm that is used when two or more time series influence each other.
     You need atleast two time series (variables). The time series should influence each other.
     :param data: matrix with the all data, pandas. The model will try to predict the next value for each of the features.
     :param exog_train: If some features are non strictly influenced can be put in this matrix, pandas
     :param order: (p,q) order of the model for the number of AR and MA parameters to use, needed only with VARMAX
     :param type: VAR, VARMAX
     :return: model
     '''
     if type == 'VAR':
         model = VAR(data, exog=exog_data)
     if type == 'VARMAX':
         model = VARMAX(data, exog=exog_data, order=order)
     return model
Esempio n. 13
0
    def test_4(self):
        data = self.getMultiDimensionalData()
        model = VARMAX(data,order=(1,2))
        result = model.fit()

        f_name='varmax_12.pmml'
        StatsmodelsToPmml(result, f_name,model_name="varmax_test",conf_int=[95])

        model_name = self.adapa_utility.upload_to_zserver(f_name)
        z_pred = self.adapa_utility.score_in_zserver(model_name, {'h':5},'TS')
        forecasts=result.get_forecast(5)

        z_forecast_hum = list(z_pred['outputs'][0]['predicted_SanDiegoHum'].values())
        model_forecast_hum = forecasts.predicted_mean['SanDiegoHum'].values.tolist()

        z_forecast_pressure = list(z_pred['outputs'][0]['predicted_SanDiegoPressure'].values())
        model_forecast_pressure = forecasts.predicted_mean['SanDiegoPressure'].values.tolist()

        z_forecast_temp = list(z_pred['outputs'][0]['predicted_SanDiegoTemp'].values())
        model_forecast_temp = forecasts.predicted_mean['SanDiegoTemp'].values.tolist()

        z_conf_int_95_lower_hum = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoHum'].values())
        model_conf_int_95_lower_hum = forecasts.conf_int()['lower SanDiegoHum'].values.tolist()

        z_conf_int_95_lower_pressure = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoPressure'].values())
        model_conf_int_95_lower_pressure = forecasts.conf_int()['lower SanDiegoPressure'].values.tolist()

        z_conf_int_95_lower_temp = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoTemp'].values())
        model_conf_int_95_lower_temp = forecasts.conf_int()['lower SanDiegoTemp'].values.tolist()

        z_conf_int_95_upper_hum = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoHum'].values())
        model_conf_int_95_upper_hum = forecasts.conf_int()['upper SanDiegoHum'].values.tolist()

        z_conf_int_95_upper_pressure = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoPressure'].values())
        model_conf_int_95_upper_pressure = forecasts.conf_int()['upper SanDiegoPressure'].values.tolist()

        z_conf_int_95_upper_temp = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoTemp'].values())
        model_conf_int_95_upper_temp = forecasts.conf_int()['upper SanDiegoTemp'].values.tolist()

        self.assertEqual(np.allclose(z_forecast_hum,model_forecast_hum),True)
        self.assertEqual(np.allclose(z_forecast_pressure,model_forecast_pressure),True)
        self.assertEqual(np.allclose(z_forecast_temp,model_forecast_temp),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_hum,model_conf_int_95_lower_hum),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_pressure,model_conf_int_95_lower_pressure),True)
        self.assertEqual(np.allclose(z_conf_int_95_lower_temp,model_conf_int_95_lower_temp),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_hum,model_conf_int_95_upper_hum),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_pressure,model_conf_int_95_upper_pressure),True)
        self.assertEqual(np.allclose(z_conf_int_95_upper_temp,model_conf_int_95_upper_temp),True)
Esempio n. 14
0
    def _fit(self, train_data):
        """Fits the model based on training data `train_data`.

        Parameters
        ----------
        train_data: pd.DataFrame
            A pandas DataFrame representing the data used for training.

        Returns
        -------
        None

        """
        varma_order = (self._p, self._q)
        model = VARMAX(train_data, order=varma_order)
        self._model = model.fit(disp=False)
Esempio n. 15
0
def trainVectorARMAMethodModel():

    X_train = readVectorARMAMethodXTrain()

    #training model on the training set
    vectorARMAMethodModel = VARMAX(X_train, order=(1, 2), trend="c")

    #we are taking p = 5 as we have created different models based on the different p values.
    #Model gives minimum aic and bic for p =5
    vectorARMAMethodModelResult = vectorARMAMethodModel.fit(maxiter=1000,
                                                            disp=False)

    #saving the model in pickle file
    saveVectorARMAMethodModel(vectorARMAMethodModelResult)

    print(vectorARMAMethodModelResult.summary())
    def predict(self, action):
        """ Description: returns action based on input state x """
        #store the new action
        #self.ts = np.roll(self.ts, -1, axis = 0)
        #self.ts[-1] = action
        del self.ts[0]
        self.ts.append(action)
        #print(self.ts)

        model = VARMAX(self.ts, order=(self.p, self.p))
        model_fit = model.fit(disp=False)
        self.y_pred = model_fit.forecast(steps=1)

        print(self.y_pred)

        return self.y_pred
Esempio n. 17
0
def VARMAXgridsearch(modeldata, cfg_list):
    results = []
    for index in range(len(cfg_list)):
        order = cfg_list[index]
        # define model
        temp_dict = {}
        varmaxmodel = VARMAX(modeldata, order=order).fit()
        residuals = DataFrame(varmaxmodel.resid)
        mean_error = abs(residuals.mean())
        temp_dict.update({
            'order': order,
            'model': varmaxmodel,
            'meanError': mean_error[0]
        })
        #print("\n {}".format(temp_dict))
        results.append(temp_dict)
    return results
    def varma_final(self):
        predictions = []
        input_data = numpy.array(self.total)
        input_data = numpy.log(input_data)
        input_data = self.difference(input_data)
        input_data = pd.DataFrame(input_data)
        input_data = input_data.dropna()
        for i in range(0, len(self.test)):
            model = VARMAX(input_data, order=(1, 1))
            model_fit = model.fit(disp=False)
            yhat = model_fit.forecast()
            predictions.append(yhat)
            input_data.append(yhat)
        for i in range(0, len(predictions)):
            predictions[i] = round(predictions[i], 2)
            if predictions[i] < 0:
                predictions[i] = 0

        return predictions
def model_var1(endog=None, params=None, measurement_error=False, init=None):
    if endog is None:
        levels = macrodata[['realgdp', 'realcons']]
        endog = np.log(levels).iloc[:21].diff().iloc[1:] * 400
    if params is None:
        params = np.r_[0.5, 0.3, 0.2, 0.4, 2**0.5, 0, 3**0.5]
        if measurement_error:
            params = np.r_[params, 4, 5]

    # Model
    mod = VARMAX(endog, order=(1, 0), trend='n',
                 measurement_error=measurement_error)
    mod.update(params)
    ssm = mod.ssm
    if init is None:
        init = Initialization(ssm.k_states, 'diffuse')
    ssm.initialize(init)

    return mod, ssm
    def initialize(self, params):
        self.p = params['p']
        self.action_dim = params['dim']

        self.ts = [
            [0] * self.action_dim
        ] * self.p  #[np.zeros(self.action_dim) for i in range(self.p)]#np.zeros((self.p, self.action_dim))

        data = list()
        for i in range(100):
            v1 = random()
            v2 = v1 + random()
            row = [v1, v2]
            data.append(row)
        model = VARMAX(self.ts, order=(16, 16))
        print("VARMAX")
        model_fit = model.fit()
        print("fit")
        exit()

        self.initialized = True
Esempio n. 21
0
    def precictTrajectory(self):
        predict_num = 5
        gps_points = self.gps_points()
        # data = [[p["long"],p["lat"]] for p in gps_points]
        data = list()
        for i in range(100):
            v1 = random()
            v2 = v1 + random()
            row = [v1, v2]
            data.append(row)
        model = VARMAX(data, order=(1, 1))
        model_fit = model.fit(disp=False)

        yhat = model_fit.forecast(predict_num)

        return {
            "object_id": self.lastappeared.object_id,
            "gps_points": [{
                "long": p[0],
                "lat": p[1]
            } for p in yhat]
        }
    def _fit(self, train_features, train_target):
        """Fits the model based on `train_features` and `train_target`.

        A VARMAX model is built to predict the target variables with data
        given by `train_target` based on the features with data given by
        `train_features`.

        Parameters
        ----------
        train_features: pd.DataFrame
            A pandas DataFrame representing the training features.
        train_target: pd.Series
            A pandas Series representing the target variable.

        Returns
        -------
        None

        """
        varmax_order = (self._p, self._q)
        model = VARMAX(train_target, train_features, order=varmax_order)
        self._model = model.fit(disp=False)
        self._is_fit = True
Esempio n. 23
0
def varmax_model(target_variable, exog_variables, start_date, end_date, plot):
    from statsmodels.tsa.statespace.varmax import VARMAX
    import numpy as np
    
    #Split target variable into training/test set
    train = target_variable[:int(0.7*(len(target_variable)))]
    test = target_variable[int(0.7*(len(target_variable))):]

    exog_variables_train = []
    exog_variables_test = []
    
    #Split external variables into test/training sets
    for variable in exog_variables:
        variable = variable.values
        exog_variables_train.append(variable[:int(0.7*(len(variable)))])
        exog_variables_test.append(variable[int(0.7*(len(variable))):])
        
        
    exog_train = np.column_stack(exog_variables_train)
    exog_test = np.column_stack(exog_variables_test)

    #Fit the model
    y_hat_avg = test
    model = VARMAX(train, exog=exog_train, order=(1, 1)).fit(disp=False)
    # make prediction
    y_hat_avg["VARMAX"] = model.predict(exog=exog_test, start = start_date, end = end_date)
    
    if(plot == True):
        import matplotlib.pyplot as plt
        plt.figure(figsize=(16,8))
        #plt.plot(train[train.columns[0]], label='dod_model.Train')
        plt.plot(test[test.columns[0]], label='Test')
        plt.plot(y_hat_avg['VARMAX'] ,label='VARMAX')
        plt.legend(loc='best')
        plt.show()
        
    print(y_hat_avg)
Esempio n. 24
0
def regress_varmax(df_endog, bin_size_weeks, n):
    """
    Trains a varmax model on time series for each patent up to n steps,
    working forwards from the publication date or working backwards from the current date. Also includes exogenous
    patent features.

    :param df_endog: the multiple endogenous time series, not yet transformed
    :param bin_size_weeks: the bin size in weeks
    :type bin_size_weeks: pd.Timedelta
    :param n: the number of steps required in each patent series - must make a square matrix!
    :return: None
    """
    df_endog = VARMAXTransformer("varmax").transform(df_endog, bin_size_weeks, n)

    # remove columns with low variance
    order = 4
    df_endog = df_endog.loc[:, df_endog.apply(pd.Series.nunique, axis=0) > order]
    logger.debug(df_endog)
    logger.debug(df_endog.describe())

    logger.debug("Training VARMAX...")
    model = VARMAX(df_endog.values, order=(order, 0))
    res = model.fit(maxiter=1000, disp=True)
    logger.debug(res.summary())
Esempio n. 25
0
print(yhat)

## Varmax: Like VAR, but with seasonality and exogenous variable
# VARMAX example
from statsmodels.tsa.statespace.varmax import VARMAX
from random import random
# contrived dataset with dependency
data = list()
for i in range(100):
v1 = random()
v2 = v1 + random()
row = [v1, v2]
data.append(row)
data_exog = [x + random() for x in range(100)]
# fit model
model = VARMAX(data, exog=data_exog, order=(1, 1))
model_fit = model.fit(disp=False)
# make prediction
data_exog2 = [[100]]
yhat = model_fit.forecast(exog=data_exog2)
print(yhat)


## Exponential Smoothing: Like autoregression but time decay of lagged values
## Can use to get trend or seasonal effect
# HWES example
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from random import random
# contrived dataset
data = [x + random() for x in range(1, 100)]
# fit model
Esempio n. 26
0
                ])
  


q1 = np.asmatrix([
                    [-0., 0.],
                    [0., -0.],
                 ])
p = [p1]#, p2] #, p3]
q = [q1]#q1]

# y0 = np.asmatrix([[0., 0., 0.]]).T #, [0., 0., 0.]

X = sim.varmapqGaussian(t = t, pMatrix = p, qMatrix = q)#, y0 = y0)

y = VARMAX(X.T, order = (1,1)).fit()

print(y.summary())

x1 = np.asarray(X[0,:]).reshape(t)
x2 = np.asarray(X[1,:]).reshape(t)
# x3 = np.asarray(X[2,:]).reshape(t)



# nprocess = X.shape[0]
pLag = len(p)
qLag = len(q)
# 

params = logL.maxVARMApqN(X, pLag, qLag)
Esempio n. 27
0
def submit_ts():

    f = request.files['userfile']
    f.save(f.filename)
    print(f)

    s1 = request.form['query1']
    s2 = request.form['query2']
    s3 = int(request.form['query3'])
    s4 = request.form['query4']
    s5 = request.form['query5']
    if s5 == 'Yes':
        s6 = request.form['query6']
        s7 = request.form['query7']

    t = int(request.form['query8'])

    d1 = f.filename
    print(d1)
    d3 = pd.read_csv(d1)

    if s3 == 1:
        d3[s1] = pd.to_datetime(d3[s1], format=s2, infer_datetime_format=True)
        list1 = []
        list3 = []
        list9 = []
        """
        for i in range(len(d3[s4])):
            try:
                list1.append(int(d3[s4][i]))
            except:
                list3.append(i)
                continue
        for i in range(len(list3)):
            n2=d3[s4][list3[i]]
            d3[s4].replace(n2,np.nan,inplace=True)
        for i in range(len(d3)):
            d3[s4].fillna(d3[s4].median(),inplace=True)
        d3[s4]=d3[s4].astype(int)"""
        if s5 == 'No':
            datewise = d3.groupby([s1]).agg({s4: 'sum'})
        elif s5 == 'Yes':
            s8 = d3[d3[s6] == s7]
            datewise = s8.groupby([s1]).agg({s4: 'sum'})

        #ARIMA

        datewise = datewise.astype('float32')
        model_train = datewise.iloc[:int(datewise.shape[0] * 0.95)]
        valid = datewise.iloc[int(datewise.shape[0] * 0.95):]
        n11 = pd.infer_freq(datewise.index, warn=True)
        list9 = []
        model_arima = auto_arima(model_train[s4],
                                 trace=True,
                                 error_action='ignore',
                                 start_p=1,
                                 start_q=1,
                                 max_p=3,
                                 max_q=3,
                                 suppress_warnings=True,
                                 stepwise=False,
                                 seasonal=False)
        model_arima.fit(model_train[s4])
        prediction_arima = model_arima.predict(len(valid))
        print("Root Mean Square Error for ARIMA Model: ",
              np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima))))
        list9.append(
            np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima))))
        print('\n')
        m1 = model_arima.order
        model = ARIMA(datewise[s4], order=m1)
        results = model.fit()
        s = t - 1
        forecast_arima = results.predict(len(datewise),
                                         len(datewise) + s,
                                         typ='levels').rename(s4)

        #Prophet
        datewise1 = datewise.reset_index()
        datewise1.rename(columns={s1: 'ds', s4: 'y'}, inplace=True)
        train = datewise1.iloc[:int(datewise1.shape[0] * 0.95)]
        valid = datewise1.iloc[int(datewise1.shape[0] * 0.95):]
        m = Prophet(weekly_seasonality=True)
        m.fit(train)
        future = m.make_future_dataframe(periods=len(valid), freq=n11)
        forecast = m.predict(future)
        predictions = forecast.tail(len(valid))['yhat']
        print('\n')
        print("Root Mean Squared Error for Prophet Model: ",
              rmse(valid['y'], predictions))
        print('\n')
        list9.append(rmse(valid['y'], predictions))
        m = Prophet(weekly_seasonality=True)
        m.fit(datewise1)
        future = m.make_future_dataframe(periods=t, freq=n11)
        forecast = m.predict(future)
        forecast_prophet = forecast[['ds', 'yhat']].tail(t)

        #LSTM

        train = datewise.iloc[:int(datewise.shape[0] * 0.95)]
        test = datewise.iloc[int(datewise.shape[0] * 0.95):]
        scaler = MinMaxScaler()
        scaler.fit(train)
        scaled_train = scaler.transform(train)
        scaled_test = scaler.transform(test)

        n_input = len(test)
        n_features = 1
        generator = TimeseriesGenerator(scaled_train,
                                        scaled_train,
                                        length=n_input,
                                        batch_size=1)
        model = Sequential()
        model.add(
            LSTM(150, activation='relu', input_shape=(n_input, n_features)))
        model.add(Dense(1))
        model.compile(optimizer='adam', loss='mse')

        model.fit_generator(generator, epochs=30)

        first_eval_batch = scaled_train[-n_input:]
        test_predictions = []
        first_eval_batch = scaled_train[-n_input:]
        current_batch = first_eval_batch.reshape((1, n_input, n_features))
        for i in range(len(test)):
            current_pred = model.predict(current_batch)[0]
            test_predictions.append(current_pred)
            current_batch = np.append(current_batch[:, 1:, :],
                                      [[current_pred]],
                                      axis=1)
        true_predictions = scaler.inverse_transform(test_predictions)
        test['predictions'] = true_predictions
        list9.append(rmse(test[s4], test['predictions']))
        print('\n')
        print("Root Mean Square Error for LSTM Model: ",
              rmse(test[s4], test['predictions']))
        print('\n')
        train = datewise
        scaler.fit(train)
        train = scaler.transform(train)
        n_input = len(test)
        n_features = 1
        generator = TimeseriesGenerator(train,
                                        train,
                                        length=n_input,
                                        batch_size=1)
        model.fit_generator(generator, epochs=30)
        test_predictions = []
        first_eval_batch = train[-n_input:]
        current_batch = first_eval_batch.reshape((1, n_input, n_features))
        for i in range(t):
            current_pred = model.predict(current_batch)[0]
            test_predictions.append(current_pred)
            current_batch = np.append(current_batch[:, 1:, :],
                                      [[current_pred]],
                                      axis=1)
        from pandas.tseries.offsets import DateOffset
        add_dates = [
            datewise.index[-1] + DateOffset(months=x) for x in range(0, t + 1)
        ]
        future_dates = pd.DataFrame(index=add_dates[1:],
                                    columns=datewise.columns)
        df_predict = pd.DataFrame(scaler.inverse_transform(test_predictions),
                                  index=future_dates[-t:].index,
                                  columns=[s4])
        d_proj = df_predict
        d_proj.reset_index(drop=True, inplace=True)
        forecast_prophet.reset_index(drop=True, inplace=True)
        d1 = pd.DataFrame(forecast_prophet['ds'])
        lstm = pd.concat([d1, d_proj], axis=1)
        #print('\n')
        #t=str(t)
        #print('Forecasted Data of '+s4+' feature for '+t+ ' days : ' )
        #print('\n')
        small = float('inf')
        for i in range(len(list9)):
            if list9[i] < small:
                small = list9[i]
        no = list9.index(small)

        if no == 0:
            forecast_arima = pd.DataFrame(forecast_arima)
            forecast_arima.reset_index(drop=True, inplace=True)
            d18 = pd.DataFrame(forecast_prophet['ds'])
            d18.reset_index(drop=True, inplace=True)
            forecast_arima = pd.concat([d18, forecast_arima], axis=1)
            forecast_arima.rename(columns={'ds': s1}, inplace=True)
            forecast_data = forecast_arima
            forecast_data1 = forecast_data.set_index(s1)
            forecast_data1
            #print(forecast_data1)

        elif no == 1:
            forecast_prophet.rename(columns={
                'ds': s1,
                'yhat': s4
            },
                                    inplace=True)
            forecast_data = forecast_prophet
            forecast_data1 = forecast_data.set_index(s1)
            #plt.plot(datewise[s4],label="Original Data")
            #plt.plot(forecast_data[s4],label="Forecasted Data")
            #plt.legend()
            #plt.xlabel("Date")
            #plt.ylabel('Confirmed Cases')
            #plt.title("Confirmed Cases Prophet Model Forecasting")
            #plt.xticks(rotation=90)

        elif no == 2:
            lstm.rename(columns={'ds': s1, 'yhat': s4}, inplace=True)
            forecast_data = lstm
            forecast_data1 = forecast_data.set_index(s1)
            #plt.plot(datewise[s4],label="Original Data")
            #plt.plot(forecast_data[s4],label="Forecasted Data")
            #plt.legend()
            #plt.xlabel("Date")
            #plt.ylabel('Confirmed Cases')
            #plt.title("Confirmed Cases LSTM Model Forecasting")
            #plt.xticks(rotation=90)"""

        fig, ax = plt.subplots(nrows=1, ncols=1)
        ax.plot(datewise[s4], label="Original Data")
        ax.plot(forecast_data1[s4], label="Forecasted Data")
        ax.legend()
        ax.set_xlabel("Date")
        ax.set_ylabel(s4)
        ax.set_title('forecasted data of ' + s4)
        plt.xticks(rotation=90)
        plt.show()
        n = randint(0, 1000000000000)
        n = str(n)
        fig.savefig(
            os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png'))

        full_filename = os.path.join(app.config["IMAGE_UPLOADS"],
                                     n + 'time_series.png')
    # VARMAX
    if s3 > 1:
        n2 = s4
        n4 = n2.split()
        n5 = n2.split()
        if s5 == 'No':
            datewise = d3.groupby([s1]).agg({n4[0]: 'sum'})
            n4.pop(0)
            for i in range(len(n4)):
                d3i = d3.groupby([s1]).agg({n4[i]: 'sum'})
                datewise = pd.concat([datewise, d3i], axis=1)
        elif s5 == 'Yes':
            #s6=str(input('Enter the feature name from which who want to pick the category (eg:- country): '))
            #s7=str(input('Ente the category name from'+' '+s6+' '+'to forecast'+' '+s4+' '+'  : '))
            s8 = d3[d3[s6] == s7]
            datewise = s8.groupby([s1]).agg({n4[0]: 'sum'})
            n4.pop(0)
            for i in range(len(n4)):
                d3i = s8.groupby([s1]).agg({n4[i]: 'sum'})
                datewise = pd.concat([datewise, d3i], axis=1)
                #datewise=pd.concat([datewise,d3i],axis=1)
        list1 = []
        list2 = []
        list3 = []
        list4 = []
        for i in range(len(n5)):
            model_arima = auto_arima(datewise[n5[i]],
                                     trace=True,
                                     error_action='ignore',
                                     start_p=1,
                                     start_q=1,
                                     max_p=3,
                                     max_q=3,
                                     suppress_warnings=True,
                                     stepwise=False,
                                     seasonal=False)
            list1.append(model_arima.order)
        for i in range(len(list1)):
            list2.append(list1[i][0])
            list3.append(list1[i][1])
            list4.append(list1[i][2])
        list2.sort(reverse=True)
        p = list2[0]
        list3.sort(reverse=True)
        d = list3[0]
        list4.sort(reverse=True)
        q = list4[0]
        if d < 1:
            df_transformed = datewise
        elif d == 1:
            df_transformed = datewise.diff()
            df_transformed = df_transformed.dropna()
        elif d > 1:
            df_transformed = datewise.diff().diff()
            df_transformed = df_transformed.dropna()

        nobs = 12
        train, test = df_transformed[0:-nobs], df_transformed[-nobs:]
        model = VARMAX(train, order=(p, q), trend='c')
        results = model.fit(maxiter=100, disp=False)
        results.summary()
        df_forecast = results.forecast(nobs)
        for i in range(len(n5)):
            j = '1d'
            df_forecast[n5[i] + j] = (
                datewise[n5[i]].iloc[-nobs - 1] -
                datewise[n5[i]].iloc[-nobs - 2]) + df_forecast[n5[i]].cumsum()
            df_forecast[n5[i] + 'forecasteed'] = datewise[n5[i]].iloc[
                -nobs - 1] + df_forecast[n5[i]].cumsum()
        list89 = df_forecast.columns
        list98 = []
        for i in range(len(list89)):
            if list89[i][-11:] == 'forecasteed':
                list98.append(list89[i])
        d_new = pd.concat([datewise.iloc[-12:], df_forecast[list98]], axis=1)
        for i in range(len(n5)):
            RMSE = rmse(datewise[n5[i]][-nobs:], df_forecast[list98[i]])
            print('Root Mean Square Error for ' + n5[i] + ':', RMSE)
        model = VARMAX(df_transformed, order=(p, q), trend='c')
        results = model.fit(maxiter=100, disp=False)
        results.summary()
        #t=int(input('Enter number of days to forecast ? :'))
        df_forecast = results.forecast(t)
        for i in range(len(n5)):
            j = '2d'
            df_forecast[n5[i] + j] = (
                datewise[n5[i]].iloc[-t - 1] -
                datewise[n5[i]].iloc[-t - 2]) + df_forecast[n5[i]].cumsum()
            df_forecast[n5[i] + ' Forecasted'] = datewise[n5[i]].iloc[
                -t - 1] + df_forecast[n5[i]].cumsum()
        list89 = df_forecast.columns
        list98 = []
        for i in range(len(list89)):
            if list89[i][-11:] == ' Forecasted':
                list98.append(list89[i])
        df_forecast = df_forecast[list98]
        df_forecast.reset_index(inplace=True)
        df_forecast.rename(columns={'index': s1}, inplace=True)
        df_forecast.set_index(s1, inplace=True)
        forecast_data1 = df_forecast[list98]

        fig, b = plt.subplots(len(n5), 2, figsize=(15, 5))
        for i in range(len(n5)):
            datewise[n5[i]].plot(kind='line', ax=b[i][0], title=n5[i])
            df_forecast[list98[i]].plot(kind='line',
                                        ax=b[i][1],
                                        title='Forecasted data of ' + n5[i],
                                        color='orange')
            fig.tight_layout(pad=1.0)
        plt.show()

        n = randint(0, 1000000000000)
        n = str(n)
        fig.savefig(
            os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png'))

        full_filename = os.path.join(app.config["IMAGE_UPLOADS"],
                                     n + 'time_series.png')

    return render_template('step1_img.html',
                           user_image=full_filename,
                           tables=[forecast_data1.to_html(classes='page')],
                           titles=['na', 'Job'],
                           query1=request.form['query1'],
                           query2=request.form['query2'],
                           query3=request.form['query3'],
                           query4=request.form['query4'],
                           query5=request.form['query5'],
                           query6=request.form['query6'],
                           query7=request.form['query7'],
                           query8=request.form['query8'])
Esempio n. 28
0

plot_acf(endog_diff['energy_sum'], lags=20)


# In[51]:


plot_pacf(endog_diff['energy_sum'], lags=20)


# In[54]:


from statsmodels.tsa.statespace.varmax import VARMAX
model_varmax = VARMAX(endog=endog_diff, exog=exog, order=(15, 0))
results_varmax = model_varmax.fit(maxiter=5000, disp=False)
results_varmax.summary()


# In[55]:


results_varmax.plot_diagnostics()


# In[56]:


#exog_test = merged_df_varmax_test[['humidity', 'temperatureLow', 'month_1', 'month_2', 'month_3',
#                                   'month_4','month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10','month_11', 'month_12']]
Esempio n. 29
0
                        index_col=0,
                        encoding="utf-8-sig",
                        converters={0: to_dt},
                        names=["TS", "x", "y", "z"])

    req_period = datetime.timedelta(milliseconds=100)

    even_frame = frame.resample(req_period).mean().interpolate()

    #aclr_x=even_frame["x"]
    aclr_x = even_frame
    seria_len = len(aclr_x)

    train_seria, test_seria = aclr_x[:seria_len // 2], aclr_x[seria_len // 2:]

    model = VARMAX(train_seria, order=(5, 5))
    #model = VARMAX(train_seria, order=(3, 3))
    #model = VARMAX(train_seria,)
    model_fit = model.fit()

    predictions = model_fit.forecast(len(test_seria))
    print(type(predictions))
    print(predictions.shape)

    for axis in range(3):
        plt.subplot(3, 1, axis + 1)
        # plt.plot(test_seria.index[:100], predictions[:, axis][:100], label="predictions")
        plt.plot(predictions.iloc[:100, axis], label="predicted")
        plt.plot(test_seria.iloc[:100, axis], label="expected")
        plt.legend(loc="upper right")
    plt.show()
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 21 10:47:28 2019

@author: Nielsen
"""

# VARMA example
from statsmodels.tsa.statespace.varmax import VARMAX
from random import random
# contrived dataset with dependency
data = list()
for i in range(100):
    v1 = random()
    v2 = v1 + random()
    row = [v1, v2]
    data.append(row)
# fit model
model = VARMAX(data, order=(1, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.forecast()
print(yhat)