def _estimate_varma_coefs(self, X): if self._criterion not in ["aic", "bic", "hqic"]: result = VARMAX(X, order=self._order, trend="c").fit(maxiter=self._max_iter) else: min_value = float("Inf") result = None orders = [(p, q) for p in range(self._order[0] + 1) for q in range(self._order[1] + 1)] orders.remove((0, 0)) for order in orders: fitted = VARMAX(X, order=order, trend="c").fit(maxiter=self._max_iter) value = getattr(fitted, self._criterion) if value < min_value: min_value = value result = fitted return ( result.coefficient_matrices_var, result.coefficient_matrices_vma, result.specification["order"], result.resid, )
def get(self, request, *args, **kwargs): start_date = self.request.query_params.get('startdate', '1970-01-30') end_date = self.request.query_params.get('enddate', '2018-01-01') data = read_frame(PriceProduction.objects.all()) data['date'] = pd.to_datetime(data['date']) data = data.drop('id', axis=1) data = data.set_index('date') startdate = dat.strptime(start_date, '%Y-%m-%d') enddate = dat.strptime(end_date, '%Y-%m-%d') nextmonth = enddate + relativedelta.relativedelta(months=1) train, test = data[startdate:nextmonth], data[nextmonth:] model = VARMAX(train, order=(1, 1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast(len(test) - 1) yhat['actual'] = test['price'] predictdata = yhat.drop("production", axis=1) metrics = forecast_accuracy(predictdata['price'], predictdata['actual']) predictdata.index = predictdata.index.astype("str") print(predictdata) json = predictdata.to_json() json = ast.literal_eval(json) json['mape'] = metrics['mape'] return Response(json)
def find_best_parameters(self, data: pd.DataFrame): """ Given a dataset, finds the best parameters using the settings in the class """ #### dmax here means the column number of the data frame: it serves as a placeholder for columns dmax = data.shape[1] ############################################################################################### cols = data.columns.tolist() # TODO: #14 Make sure that we have a way to not rely on column order to determine the target # It is assumed that the first column of the dataframe is the target variable #### ### make sure that is the case before doing this program #################### i = 1 results_dict = {} for d_val in range(1, dmax): # Takes the target column and one other endogenous column at a time # and makes a prediction based on that. Then selects the best # exogenous column at the end. y_train = data.iloc[:, [0, d_val]] print('\nAdditional Variable in VAR model = %s' % cols[d_val]) info_criteria = pd.DataFrame( index=['AR{}'.format(i) for i in range(0, self.p_max+1)], columns=['MA{}'.format(i) for i in range(0, self.q_max+1)] ) for p_val, q_val in itertools.product(range(0, self.p_max+1), range(0, self.q_max+1)): if p_val == 0 and q_val == 0: info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan print(' Iteration %d completed' % i) i += 1 else: try: model = VARMAX(y_train, order=(p_val, q_val), trend='c') model = model.fit(max_iter=1000, disp=False) info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('model.' + self.scoring) print(' Iteration %d completed' % i) i += 1 except Exception: i += 1 print(' Iteration %d completed' % i) info_criteria = info_criteria[info_criteria.columns].astype(float) interim_d = copy.deepcopy(d_val) interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria) if self.verbose == 1: _, axis = plt.subplots(figsize=(20, 10)) axis = sns.heatmap( info_criteria, mask=info_criteria.isnull(), ax=axis, annot=True, fmt='.0f' ) axis.set_title(self.scoring) results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1] best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0] self.best_p = int(best_pdq.split(' ')[0]) self.best_d = int(best_pdq.split(' ')[1]) self.best_q = int(best_pdq.split(' ')[2]) print('Best variable selected for VAR: %s' % data.columns.tolist()[self.best_d])
def varma_prediction(train,test,steps): p,q = get_var_pq_params(train) model = VARMAX(train,order=(p, q)) model_fit = model.fit(disp=False) if not steps: prediction = model_fit.forecast(steps=len(test)) else: prediction = model_fit.forecast(steps=steps) multi_predicts_df = pd.DataFrame(prediction, columns = train.columns) return multi_predicts_df
def varma_forecast(history, config): order, trend = config # define model model = VARMAX(history, order=order, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # fit model model_fit = model.fit(disp=False) # make one step forecast yhat = model_fit.predict(len(history), len(history)) return yhat[0]
def model_varmax(train_data,test_data,train_data1,test_data1): x = train_data1.reshape((372,1)) x1 = train_data.reshape((372,1)) lis = np.concatenate((x,x1), axis = 1) print(np.shape(lis)) #forecast model = VARMAX(lis, order=(1,1)) model_fit = model.fit(disp = -1) print(model_fit.summary().tables[1]) predictions = model_fit.forecast(steps=10) print('VARMAX RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
def VectorAutoRegressiveMovingAverage(self): #currently, exodata not used. #make a dataframe the size of prediction datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3))) #convert to a list datalist = data.values.tolist() # create a model for each axis and predict each axis model = VARMAX(datalist, order=(1, 1)) model_fit = model.fit(disp=False) datahat = model_fit.forecast(model_fit.y, steps=(self.end - self.start)) return (datahat)
def get(self, request, *args, **kwargs): n_steps = int(self.request.query_params.get('nsteps', 10)) data = read_frame(PriceProduction.objects.all()) data['date'] = pd.to_datetime(data['date']) data = data.drop('id', axis=1) data = data.set_index('date') model = VARMAX(data, order=(1, 1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast(n_steps) yhat = yhat['price'] yhat.index = yhat.index.astype("str") json = yhat.to_json() json = ast.literal_eval(json) return Response(json)
def VARMA(self, order=(1, 1), name="VARMA"): print("=" * 30 + "\n" + name + "\n" + "=" * 30 + "\n") # fit model model = VARMAX(self.data_train, order=order) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.forecast(steps=42) prediction = pd.DataFrame(yhat, index=self.data_test.index.values, columns=self.data_train.columns.values) plt.plot(self.data_train_and_test) plt.plot(prediction, color='red') plt.title(name) plt.show()
def test_4(self): data = self.getMultiDimensionalData() model = VARMAX(data,order=(1,2)) result = model.fit() f_name='varmax_12.pmml' StatsmodelsToPmml(result, f_name,model_name="varmax_test",conf_int=[95]) model_name = self.adapa_utility.upload_to_zserver(f_name) z_pred = self.adapa_utility.score_in_zserver(model_name, {'h':5},'TS') forecasts=result.get_forecast(5) z_forecast_hum = list(z_pred['outputs'][0]['predicted_SanDiegoHum'].values()) model_forecast_hum = forecasts.predicted_mean['SanDiegoHum'].values.tolist() z_forecast_pressure = list(z_pred['outputs'][0]['predicted_SanDiegoPressure'].values()) model_forecast_pressure = forecasts.predicted_mean['SanDiegoPressure'].values.tolist() z_forecast_temp = list(z_pred['outputs'][0]['predicted_SanDiegoTemp'].values()) model_forecast_temp = forecasts.predicted_mean['SanDiegoTemp'].values.tolist() z_conf_int_95_lower_hum = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoHum'].values()) model_conf_int_95_lower_hum = forecasts.conf_int()['lower SanDiegoHum'].values.tolist() z_conf_int_95_lower_pressure = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoPressure'].values()) model_conf_int_95_lower_pressure = forecasts.conf_int()['lower SanDiegoPressure'].values.tolist() z_conf_int_95_lower_temp = list(z_pred['outputs'][0]['conf_int_95_lower_SanDiegoTemp'].values()) model_conf_int_95_lower_temp = forecasts.conf_int()['lower SanDiegoTemp'].values.tolist() z_conf_int_95_upper_hum = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoHum'].values()) model_conf_int_95_upper_hum = forecasts.conf_int()['upper SanDiegoHum'].values.tolist() z_conf_int_95_upper_pressure = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoPressure'].values()) model_conf_int_95_upper_pressure = forecasts.conf_int()['upper SanDiegoPressure'].values.tolist() z_conf_int_95_upper_temp = list(z_pred['outputs'][0]['conf_int_95_upper_SanDiegoTemp'].values()) model_conf_int_95_upper_temp = forecasts.conf_int()['upper SanDiegoTemp'].values.tolist() self.assertEqual(np.allclose(z_forecast_hum,model_forecast_hum),True) self.assertEqual(np.allclose(z_forecast_pressure,model_forecast_pressure),True) self.assertEqual(np.allclose(z_forecast_temp,model_forecast_temp),True) self.assertEqual(np.allclose(z_conf_int_95_lower_hum,model_conf_int_95_lower_hum),True) self.assertEqual(np.allclose(z_conf_int_95_lower_pressure,model_conf_int_95_lower_pressure),True) self.assertEqual(np.allclose(z_conf_int_95_lower_temp,model_conf_int_95_lower_temp),True) self.assertEqual(np.allclose(z_conf_int_95_upper_hum,model_conf_int_95_upper_hum),True) self.assertEqual(np.allclose(z_conf_int_95_upper_pressure,model_conf_int_95_upper_pressure),True) self.assertEqual(np.allclose(z_conf_int_95_upper_temp,model_conf_int_95_upper_temp),True)
def trainVectorARMAMethodModel(): X_train = readVectorARMAMethodXTrain() #training model on the training set vectorARMAMethodModel = VARMAX(X_train, order=(1, 2), trend="c") #we are taking p = 5 as we have created different models based on the different p values. #Model gives minimum aic and bic for p =5 vectorARMAMethodModelResult = vectorARMAMethodModel.fit(maxiter=1000, disp=False) #saving the model in pickle file saveVectorARMAMethodModel(vectorARMAMethodModelResult) print(vectorARMAMethodModelResult.summary())
def get_best_model(self, data: pd.DataFrame): """ Returns the 'unfit' SARIMAX model with the given dataset and the selected best parameters. This can be used to fit or refit the model. """ bestmodel = VARMAX(data, order=(self.best_p, self.best_q), trend='c') return bestmodel
def predict(self, action): """ Description: returns action based on input state x """ #store the new action #self.ts = np.roll(self.ts, -1, axis = 0) #self.ts[-1] = action del self.ts[0] self.ts.append(action) #print(self.ts) model = VARMAX(self.ts, order=(self.p, self.p)) model_fit = model.fit(disp=False) self.y_pred = model_fit.forecast(steps=1) print(self.y_pred) return self.y_pred
def _fit(self, train_data): """Fits the model based on training data `train_data`. Parameters ---------- train_data: pd.DataFrame A pandas DataFrame representing the data used for training. Returns ------- None """ varma_order = (self._p, self._q) model = VARMAX(train_data, order=varma_order) self._model = model.fit(disp=False)
def model_var1(endog=None, params=None, measurement_error=False, init=None): if endog is None: endog = (np.log( macrodata[['realgdp','realcons']]).iloc[:21].diff().iloc[1:] * 400) if params is None: params = np.r_[0.5, 0.3, 0.2, 0.4, 2**0.5, 0, 3**0.5] if measurement_error: params = np.r_[params, 4, 5] # Model mod = VARMAX(endog, order=(1, 0), trend='n', measurement_error=measurement_error) mod.update(params) ssm = mod.ssm if init is None: init = Initialization(ssm.k_states, 'diffuse') ssm.initialize(init) return mod, ssm
def varma_final(self): predictions = [] input_data = numpy.array(self.total) input_data = numpy.log(input_data) input_data = self.difference(input_data) input_data = pd.DataFrame(input_data) input_data = input_data.dropna() for i in range(0, len(self.test)): model = VARMAX(input_data, order=(1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast() predictions.append(yhat) input_data.append(yhat) for i in range(0, len(predictions)): predictions[i] = round(predictions[i], 2) if predictions[i] < 0: predictions[i] = 0 return predictions
def model_var1(endog=None, params=None, measurement_error=False, init=None): if endog is None: levels = macrodata[['realgdp', 'realcons']] endog = np.log(levels).iloc[:21].diff().iloc[1:] * 400 if params is None: params = np.r_[0.5, 0.3, 0.2, 0.4, 2**0.5, 0, 3**0.5] if measurement_error: params = np.r_[params, 4, 5] # Model mod = VARMAX(endog, order=(1, 0), trend='n', measurement_error=measurement_error) mod.update(params) ssm = mod.ssm if init is None: init = Initialization(ssm.k_states, 'diffuse') ssm.initialize(init) return mod, ssm
def varmax_model_fit(self, x_train, x_test, df_time, oreder = (1, 0), col_exog=[], verbose = 1): if col_exog: exo_train = pd.DataFrame() exo_test = pd.DataFrame() for col in col_exog: exo_train[col] = x_train[col] x_train.drop([col], axis=1, inplace = True) exo_test[col] = x_test[col] x_test.drop([col], axis=1, inplace = True) model = VARMAX(x_train, order=oreder, exog=exo_train) else: model = VARMAX(x_train, order=oreder) result = model.fit() out = durbin_watson(result.resid) df_results = pd.DataFrame() for col, val in zip(x_train.columns, out): df_results[col] = [round(val, 2)] if verbose == 1: st.subheader('durbin_watson test') st.write('the closer the result is to 2 then there is no correlation, the closer to 0 or 4 then correlation implies') st.write(df_results.T) if col_exog: df_forecast = result.forecast(steps=x_test.shape[0], exog = exo_test) else: df_forecast = result.forecast(steps=x_test.shape[0]) df_forecast.index = df_time['test'] df_forecast.columns = x_test.columns x_test.index = df_time['test'] if verbose == 1: st.write(df_forecast) for i, col in enumerate(x_test): fig = ds().nuova_fig(555+i) st.subheader(col) df_forecast[col].plot(label = 'Predicition') x_test[col].plot(label = 'True') ds().legenda() st.pyplot(fig) return df_forecast
def initialize(self, params): self.p = params['p'] self.action_dim = params['dim'] self.ts = [ [0] * self.action_dim ] * self.p #[np.zeros(self.action_dim) for i in range(self.p)]#np.zeros((self.p, self.action_dim)) data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) model = VARMAX(self.ts, order=(16, 16)) print("VARMAX") model_fit = model.fit() print("fit") exit() self.initialized = True
def precictTrajectory(self): predict_num = 5 gps_points = self.gps_points() # data = [[p["long"],p["lat"]] for p in gps_points] data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) model = VARMAX(data, order=(1, 1)) model_fit = model.fit(disp=False) yhat = model_fit.forecast(predict_num) return { "object_id": self.lastappeared.object_id, "gps_points": [{ "long": p[0], "lat": p[1] } for p in yhat] }
def _fit(self, train_features, train_target): """Fits the model based on `train_features` and `train_target`. A VARMAX model is built to predict the target variables with data given by `train_target` based on the features with data given by `train_features`. Parameters ---------- train_features: pd.DataFrame A pandas DataFrame representing the training features. train_target: pd.Series A pandas Series representing the target variable. Returns ------- None """ varmax_order = (self._p, self._q) model = VARMAX(train_target, train_features, order=varmax_order) self._model = model.fit(disp=False) self._is_fit = True
def varmax_model(target_variable, exog_variables, start_date, end_date, plot): from statsmodels.tsa.statespace.varmax import VARMAX import numpy as np #Split target variable into training/test set train = target_variable[:int(0.7*(len(target_variable)))] test = target_variable[int(0.7*(len(target_variable))):] exog_variables_train = [] exog_variables_test = [] #Split external variables into test/training sets for variable in exog_variables: variable = variable.values exog_variables_train.append(variable[:int(0.7*(len(variable)))]) exog_variables_test.append(variable[int(0.7*(len(variable))):]) exog_train = np.column_stack(exog_variables_train) exog_test = np.column_stack(exog_variables_test) #Fit the model y_hat_avg = test model = VARMAX(train, exog=exog_train, order=(1, 1)).fit(disp=False) # make prediction y_hat_avg["VARMAX"] = model.predict(exog=exog_test, start = start_date, end = end_date) if(plot == True): import matplotlib.pyplot as plt plt.figure(figsize=(16,8)) #plt.plot(train[train.columns[0]], label='dod_model.Train') plt.plot(test[test.columns[0]], label='Test') plt.plot(y_hat_avg['VARMAX'] ,label='VARMAX') plt.legend(loc='best') plt.show() print(y_hat_avg)
def regress_varmax(df_endog, bin_size_weeks, n): """ Trains a varmax model on time series for each patent up to n steps, working forwards from the publication date or working backwards from the current date. Also includes exogenous patent features. :param df_endog: the multiple endogenous time series, not yet transformed :param bin_size_weeks: the bin size in weeks :type bin_size_weeks: pd.Timedelta :param n: the number of steps required in each patent series - must make a square matrix! :return: None """ df_endog = VARMAXTransformer("varmax").transform(df_endog, bin_size_weeks, n) # remove columns with low variance order = 4 df_endog = df_endog.loc[:, df_endog.apply(pd.Series.nunique, axis=0) > order] logger.debug(df_endog) logger.debug(df_endog.describe()) logger.debug("Training VARMAX...") model = VARMAX(df_endog.values, order=(order, 0)) res = model.fit(maxiter=1000, disp=True) logger.debug(res.summary())
def get_VAR_models(self, data, exog_data=None, order=None, type='VAR'): ''' generate the model VAR. Vector Autoregression (VAR) is a multivariate forecasting algorithm that is used when two or more time series influence each other. You need atleast two time series (variables). The time series should influence each other. :param data: matrix with the all data, pandas. The model will try to predict the next value for each of the features. :param exog_train: If some features are non strictly influenced can be put in this matrix, pandas :param order: (p,q) order of the model for the number of AR and MA parameters to use, needed only with VARMAX :param type: VAR, VARMAX :return: model ''' if type == 'VAR': model = VAR(data, exog=exog_data) if type == 'VARMAX': model = VARMAX(data, exog=exog_data, order=order) return model
def VARMAXgridsearch(modeldata, cfg_list): results = [] for index in range(len(cfg_list)): order = cfg_list[index] # define model temp_dict = {} varmaxmodel = VARMAX(modeldata, order=order).fit() residuals = DataFrame(varmaxmodel.resid) mean_error = abs(residuals.mean()) temp_dict.update({ 'order': order, 'model': varmaxmodel, 'meanError': mean_error[0] }) #print("\n {}".format(temp_dict)) results.append(temp_dict) return results
print(yhat) ## Varmax: Like VAR, but with seasonality and exogenous variable # VARMAX example from statsmodels.tsa.statespace.varmax import VARMAX from random import random # contrived dataset with dependency data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) data_exog = [x + random() for x in range(100)] # fit model model = VARMAX(data, exog=data_exog, order=(1, 1)) model_fit = model.fit(disp=False) # make prediction data_exog2 = [[100]] yhat = model_fit.forecast(exog=data_exog2) print(yhat) ## Exponential Smoothing: Like autoregression but time decay of lagged values ## Can use to get trend or seasonal effect # HWES example from statsmodels.tsa.holtwinters import ExponentialSmoothing from random import random # contrived dataset data = [x + random() for x in range(1, 100)] # fit model
]) q1 = np.asmatrix([ [-0., 0.], [0., -0.], ]) p = [p1]#, p2] #, p3] q = [q1]#q1] # y0 = np.asmatrix([[0., 0., 0.]]).T #, [0., 0., 0.] X = sim.varmapqGaussian(t = t, pMatrix = p, qMatrix = q)#, y0 = y0) y = VARMAX(X.T, order = (1,1)).fit() print(y.summary()) x1 = np.asarray(X[0,:]).reshape(t) x2 = np.asarray(X[1,:]).reshape(t) # x3 = np.asarray(X[2,:]).reshape(t) # nprocess = X.shape[0] pLag = len(p) qLag = len(q) # params = logL.maxVARMApqN(X, pLag, qLag)
def submit_ts(): f = request.files['userfile'] f.save(f.filename) print(f) s1 = request.form['query1'] s2 = request.form['query2'] s3 = int(request.form['query3']) s4 = request.form['query4'] s5 = request.form['query5'] if s5 == 'Yes': s6 = request.form['query6'] s7 = request.form['query7'] t = int(request.form['query8']) d1 = f.filename print(d1) d3 = pd.read_csv(d1) if s3 == 1: d3[s1] = pd.to_datetime(d3[s1], format=s2, infer_datetime_format=True) list1 = [] list3 = [] list9 = [] """ for i in range(len(d3[s4])): try: list1.append(int(d3[s4][i])) except: list3.append(i) continue for i in range(len(list3)): n2=d3[s4][list3[i]] d3[s4].replace(n2,np.nan,inplace=True) for i in range(len(d3)): d3[s4].fillna(d3[s4].median(),inplace=True) d3[s4]=d3[s4].astype(int)""" if s5 == 'No': datewise = d3.groupby([s1]).agg({s4: 'sum'}) elif s5 == 'Yes': s8 = d3[d3[s6] == s7] datewise = s8.groupby([s1]).agg({s4: 'sum'}) #ARIMA datewise = datewise.astype('float32') model_train = datewise.iloc[:int(datewise.shape[0] * 0.95)] valid = datewise.iloc[int(datewise.shape[0] * 0.95):] n11 = pd.infer_freq(datewise.index, warn=True) list9 = [] model_arima = auto_arima(model_train[s4], trace=True, error_action='ignore', start_p=1, start_q=1, max_p=3, max_q=3, suppress_warnings=True, stepwise=False, seasonal=False) model_arima.fit(model_train[s4]) prediction_arima = model_arima.predict(len(valid)) print("Root Mean Square Error for ARIMA Model: ", np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima)))) list9.append( np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima)))) print('\n') m1 = model_arima.order model = ARIMA(datewise[s4], order=m1) results = model.fit() s = t - 1 forecast_arima = results.predict(len(datewise), len(datewise) + s, typ='levels').rename(s4) #Prophet datewise1 = datewise.reset_index() datewise1.rename(columns={s1: 'ds', s4: 'y'}, inplace=True) train = datewise1.iloc[:int(datewise1.shape[0] * 0.95)] valid = datewise1.iloc[int(datewise1.shape[0] * 0.95):] m = Prophet(weekly_seasonality=True) m.fit(train) future = m.make_future_dataframe(periods=len(valid), freq=n11) forecast = m.predict(future) predictions = forecast.tail(len(valid))['yhat'] print('\n') print("Root Mean Squared Error for Prophet Model: ", rmse(valid['y'], predictions)) print('\n') list9.append(rmse(valid['y'], predictions)) m = Prophet(weekly_seasonality=True) m.fit(datewise1) future = m.make_future_dataframe(periods=t, freq=n11) forecast = m.predict(future) forecast_prophet = forecast[['ds', 'yhat']].tail(t) #LSTM train = datewise.iloc[:int(datewise.shape[0] * 0.95)] test = datewise.iloc[int(datewise.shape[0] * 0.95):] scaler = MinMaxScaler() scaler.fit(train) scaled_train = scaler.transform(train) scaled_test = scaler.transform(test) n_input = len(test) n_features = 1 generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1) model = Sequential() model.add( LSTM(150, activation='relu', input_shape=(n_input, n_features))) model.add(Dense(1)) model.compile(optimizer='adam', loss='mse') model.fit_generator(generator, epochs=30) first_eval_batch = scaled_train[-n_input:] test_predictions = [] first_eval_batch = scaled_train[-n_input:] current_batch = first_eval_batch.reshape((1, n_input, n_features)) for i in range(len(test)): current_pred = model.predict(current_batch)[0] test_predictions.append(current_pred) current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1) true_predictions = scaler.inverse_transform(test_predictions) test['predictions'] = true_predictions list9.append(rmse(test[s4], test['predictions'])) print('\n') print("Root Mean Square Error for LSTM Model: ", rmse(test[s4], test['predictions'])) print('\n') train = datewise scaler.fit(train) train = scaler.transform(train) n_input = len(test) n_features = 1 generator = TimeseriesGenerator(train, train, length=n_input, batch_size=1) model.fit_generator(generator, epochs=30) test_predictions = [] first_eval_batch = train[-n_input:] current_batch = first_eval_batch.reshape((1, n_input, n_features)) for i in range(t): current_pred = model.predict(current_batch)[0] test_predictions.append(current_pred) current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1) from pandas.tseries.offsets import DateOffset add_dates = [ datewise.index[-1] + DateOffset(months=x) for x in range(0, t + 1) ] future_dates = pd.DataFrame(index=add_dates[1:], columns=datewise.columns) df_predict = pd.DataFrame(scaler.inverse_transform(test_predictions), index=future_dates[-t:].index, columns=[s4]) d_proj = df_predict d_proj.reset_index(drop=True, inplace=True) forecast_prophet.reset_index(drop=True, inplace=True) d1 = pd.DataFrame(forecast_prophet['ds']) lstm = pd.concat([d1, d_proj], axis=1) #print('\n') #t=str(t) #print('Forecasted Data of '+s4+' feature for '+t+ ' days : ' ) #print('\n') small = float('inf') for i in range(len(list9)): if list9[i] < small: small = list9[i] no = list9.index(small) if no == 0: forecast_arima = pd.DataFrame(forecast_arima) forecast_arima.reset_index(drop=True, inplace=True) d18 = pd.DataFrame(forecast_prophet['ds']) d18.reset_index(drop=True, inplace=True) forecast_arima = pd.concat([d18, forecast_arima], axis=1) forecast_arima.rename(columns={'ds': s1}, inplace=True) forecast_data = forecast_arima forecast_data1 = forecast_data.set_index(s1) forecast_data1 #print(forecast_data1) elif no == 1: forecast_prophet.rename(columns={ 'ds': s1, 'yhat': s4 }, inplace=True) forecast_data = forecast_prophet forecast_data1 = forecast_data.set_index(s1) #plt.plot(datewise[s4],label="Original Data") #plt.plot(forecast_data[s4],label="Forecasted Data") #plt.legend() #plt.xlabel("Date") #plt.ylabel('Confirmed Cases') #plt.title("Confirmed Cases Prophet Model Forecasting") #plt.xticks(rotation=90) elif no == 2: lstm.rename(columns={'ds': s1, 'yhat': s4}, inplace=True) forecast_data = lstm forecast_data1 = forecast_data.set_index(s1) #plt.plot(datewise[s4],label="Original Data") #plt.plot(forecast_data[s4],label="Forecasted Data") #plt.legend() #plt.xlabel("Date") #plt.ylabel('Confirmed Cases') #plt.title("Confirmed Cases LSTM Model Forecasting") #plt.xticks(rotation=90)""" fig, ax = plt.subplots(nrows=1, ncols=1) ax.plot(datewise[s4], label="Original Data") ax.plot(forecast_data1[s4], label="Forecasted Data") ax.legend() ax.set_xlabel("Date") ax.set_ylabel(s4) ax.set_title('forecasted data of ' + s4) plt.xticks(rotation=90) plt.show() n = randint(0, 1000000000000) n = str(n) fig.savefig( os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png')) full_filename = os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png') # VARMAX if s3 > 1: n2 = s4 n4 = n2.split() n5 = n2.split() if s5 == 'No': datewise = d3.groupby([s1]).agg({n4[0]: 'sum'}) n4.pop(0) for i in range(len(n4)): d3i = d3.groupby([s1]).agg({n4[i]: 'sum'}) datewise = pd.concat([datewise, d3i], axis=1) elif s5 == 'Yes': #s6=str(input('Enter the feature name from which who want to pick the category (eg:- country): ')) #s7=str(input('Ente the category name from'+' '+s6+' '+'to forecast'+' '+s4+' '+' : ')) s8 = d3[d3[s6] == s7] datewise = s8.groupby([s1]).agg({n4[0]: 'sum'}) n4.pop(0) for i in range(len(n4)): d3i = s8.groupby([s1]).agg({n4[i]: 'sum'}) datewise = pd.concat([datewise, d3i], axis=1) #datewise=pd.concat([datewise,d3i],axis=1) list1 = [] list2 = [] list3 = [] list4 = [] for i in range(len(n5)): model_arima = auto_arima(datewise[n5[i]], trace=True, error_action='ignore', start_p=1, start_q=1, max_p=3, max_q=3, suppress_warnings=True, stepwise=False, seasonal=False) list1.append(model_arima.order) for i in range(len(list1)): list2.append(list1[i][0]) list3.append(list1[i][1]) list4.append(list1[i][2]) list2.sort(reverse=True) p = list2[0] list3.sort(reverse=True) d = list3[0] list4.sort(reverse=True) q = list4[0] if d < 1: df_transformed = datewise elif d == 1: df_transformed = datewise.diff() df_transformed = df_transformed.dropna() elif d > 1: df_transformed = datewise.diff().diff() df_transformed = df_transformed.dropna() nobs = 12 train, test = df_transformed[0:-nobs], df_transformed[-nobs:] model = VARMAX(train, order=(p, q), trend='c') results = model.fit(maxiter=100, disp=False) results.summary() df_forecast = results.forecast(nobs) for i in range(len(n5)): j = '1d' df_forecast[n5[i] + j] = ( datewise[n5[i]].iloc[-nobs - 1] - datewise[n5[i]].iloc[-nobs - 2]) + df_forecast[n5[i]].cumsum() df_forecast[n5[i] + 'forecasteed'] = datewise[n5[i]].iloc[ -nobs - 1] + df_forecast[n5[i]].cumsum() list89 = df_forecast.columns list98 = [] for i in range(len(list89)): if list89[i][-11:] == 'forecasteed': list98.append(list89[i]) d_new = pd.concat([datewise.iloc[-12:], df_forecast[list98]], axis=1) for i in range(len(n5)): RMSE = rmse(datewise[n5[i]][-nobs:], df_forecast[list98[i]]) print('Root Mean Square Error for ' + n5[i] + ':', RMSE) model = VARMAX(df_transformed, order=(p, q), trend='c') results = model.fit(maxiter=100, disp=False) results.summary() #t=int(input('Enter number of days to forecast ? :')) df_forecast = results.forecast(t) for i in range(len(n5)): j = '2d' df_forecast[n5[i] + j] = ( datewise[n5[i]].iloc[-t - 1] - datewise[n5[i]].iloc[-t - 2]) + df_forecast[n5[i]].cumsum() df_forecast[n5[i] + ' Forecasted'] = datewise[n5[i]].iloc[ -t - 1] + df_forecast[n5[i]].cumsum() list89 = df_forecast.columns list98 = [] for i in range(len(list89)): if list89[i][-11:] == ' Forecasted': list98.append(list89[i]) df_forecast = df_forecast[list98] df_forecast.reset_index(inplace=True) df_forecast.rename(columns={'index': s1}, inplace=True) df_forecast.set_index(s1, inplace=True) forecast_data1 = df_forecast[list98] fig, b = plt.subplots(len(n5), 2, figsize=(15, 5)) for i in range(len(n5)): datewise[n5[i]].plot(kind='line', ax=b[i][0], title=n5[i]) df_forecast[list98[i]].plot(kind='line', ax=b[i][1], title='Forecasted data of ' + n5[i], color='orange') fig.tight_layout(pad=1.0) plt.show() n = randint(0, 1000000000000) n = str(n) fig.savefig( os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png')) full_filename = os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png') return render_template('step1_img.html', user_image=full_filename, tables=[forecast_data1.to_html(classes='page')], titles=['na', 'Job'], query1=request.form['query1'], query2=request.form['query2'], query3=request.form['query3'], query4=request.form['query4'], query5=request.form['query5'], query6=request.form['query6'], query7=request.form['query7'], query8=request.form['query8'])
index_col=0, encoding="utf-8-sig", converters={0: to_dt}, names=["TS", "x", "y", "z"]) req_period = datetime.timedelta(milliseconds=100) even_frame = frame.resample(req_period).mean().interpolate() #aclr_x=even_frame["x"] aclr_x = even_frame seria_len = len(aclr_x) train_seria, test_seria = aclr_x[:seria_len // 2], aclr_x[seria_len // 2:] model = VARMAX(train_seria, order=(5, 5)) #model = VARMAX(train_seria, order=(3, 3)) #model = VARMAX(train_seria,) model_fit = model.fit() predictions = model_fit.forecast(len(test_seria)) print(type(predictions)) print(predictions.shape) for axis in range(3): plt.subplot(3, 1, axis + 1) # plt.plot(test_seria.index[:100], predictions[:, axis][:100], label="predictions") plt.plot(predictions.iloc[:100, axis], label="predicted") plt.plot(test_seria.iloc[:100, axis], label="expected") plt.legend(loc="upper right") plt.show()
Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX) The Vector Autoregression Moving-Average with Exogenous Regressors (VARMAX) is an extension of the VARMA model that also includes the modeling of exogenous variables. It is a multivariate version of the ARMAX method. Exogenous variables are also called covariates and can be thought of as parallel input sequences that have observations at the same time steps as the original series. The primary series(es) are referred to as endogenous data to contrast it from the exogenous sequence(s). The observations for exogenous variables are included in the model directly at each time step and are not modeled in the same way as the primary endogenous sequence (e.g. as an AR, MA, etc. process). The VARMAX method can also be used to model the subsumed models with exogenous variables, such as VARX and VMAX. The method is suitable for multivariate time series without trend and seasonal components with exogenous variables. ''' from random import random # VARMAX example from statsmodels.tsa.statespace.varmax import VARMAX # contrived dataset with dependency data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) data_exog = [x + random() for x in range(100)] # fit model model = VARMAX(data, exog=data_exog, order=(1, 1)) model_fit = model.fit(disp=False) # make prediction data_exog2 = [[100]] yhat = model_fit.forecast(exog=data_exog2) print(yhat)
# -*- coding: utf-8 -*- """ Created on Mon Oct 21 10:47:28 2019 @author: Nielsen """ # VARMA example from statsmodels.tsa.statespace.varmax import VARMAX from random import random # contrived dataset with dependency data = list() for i in range(100): v1 = random() v2 = v1 + random() row = [v1, v2] data.append(row) # fit model model = VARMAX(data, order=(1, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.forecast() print(yhat)