def processing_df3(df3): """对df3数据进行处理""" df3['date'] = pd.to_datetime(df3['date'], format='%Y%m%d') # 由于给的数据是1.17-2.15共计30天的数据,所以需要转换为0~29的数字 df3['day'] = df3['date'].dt.day.apply( lambda x: x - 17 if x >= 17 else x + 14) df3['weekday'] = df3['date'].dt.weekday # 数值为0-6,表示一周中的第几天 # 增加一个标签,表示迁入地是否为北京 df3['city'] = df3['arrival_city'].apply(lambda x: 1 if x == '北京市' else 0) inbj = df3[df3['city'] == 1] # 表示迁入数据 outbj = df3[df3['city'] == 0] # 表示迁出数据 index_mean_in = inbj.groupby(['day']).mean()['index'] index_std_in = inbj.groupby(['day']).std()['index'] index_mean_in_copy = index_mean_in.copy() index_mean_in_copy.index = np.arange(30) dict_imi = dict(index_mean_in_copy) index_std_in_copy = index_std_in.copy() index_std_in_copy.index = np.arange(30) dict_isi = dict(index_std_in_copy) index_mean_out = outbj.groupby(['day']).mean()['index'] index_std_out = outbj.groupby(['day']).std()['index'] index_mean_out_copy = index_mean_out.copy() index_mean_out_copy.index = np.arange(30) dict_imo = dict(index_mean_out_copy) index_std_out_copy = index_std_out.copy() index_std_out_copy.index = np.arange(30) dict_iso = dict(index_std_out_copy) # 预测测试数据所需的输入特征值 ar_model1 = AR(index_mean_in.values).fit() pred_index_mean_in = ar_model1.predict(len(index_mean_in), len(index_mean_in) + 8, dynamic=True) ar_model2 = AR(index_std_in.values).fit() pred_index_std_in = ar_model2.predict(len(index_std_in), len(index_std_in) + 8, dynamic=True) ar_model3 = AR(index_mean_out.values).fit() pred_index_mean_out = ar_model3.predict(len(index_mean_out), len(index_mean_out) + 8, dynamic=True) ar_model4 = AR(index_std_out.values).fit() pred_index_std_out = ar_model4.predict(len(index_std_out), len(index_std_out) + 8, dynamic=True) pred_index_mean_in = pd.Series(pred_index_mean_in, index=np.arange(30, 39, 1)) dict_pimi = dict(pred_index_mean_in) pred_index_std_in = pd.Series(pred_index_std_in, index=np.arange(30, 39, 1)) dict_pisi = dict(pred_index_std_in) pred_index_mean_out = pd.Series(pred_index_mean_out, index=np.arange(30, 39, 1)) dict_pimo = dict(pred_index_mean_out) pred_index_std_out = pd.Series(pred_index_std_out, index=np.arange(30, 39, 1)) dict_piso = dict(pred_index_std_out) return dict_imi, dict_isi, dict_imo, dict_iso, dict_pimi, dict_pisi, dict_pimo, dict_piso
def factorEstimator(series): ans = [] for i in range(0, (len(series) - 258)): arModel = AR(series[i:(i + 258)]) res = arModel.fit(ic='bic') ans.append(arModel.predict(res.params)[-1]) return pd.Series(list(series[:258]) + list(ans))
def test_ar_dates(): # just make sure they work data = sm.datasets.sunspots.load(as_pandas=False) dates = date_range(start='1700', periods=len(data.endog), freq='A') endog = Series(data.endog, index=dates) with pytest.warns(FutureWarning): ar_model = AR(endog, freq='A').fit(maxlag=9, method='mle', disp=-1) pred = ar_model.predict(start='2005', end='2015') predict_dates = date_range(start='2005', end='2016', freq='A')[:11] assert_equal(ar_model.data.predict_dates, predict_dates) assert_equal(pred.index, predict_dates)
def do_forecast_ar_model(self, today, train, test): # train autoregression model_fit = AR(train.fillna(0)).fit() logging.info("Fitted AR...") AResults = model_fit.predict(start=len(train), end=len(train) + len(test) - 1) logging.info("Predicted AR") mse = self.utils_cl.compute_mse(test, AResults) mae = self.utils_cl.compute_mae(test, AResults) mase = self.utils_cl.compute_mase(today, test, AResults) logging.info("Exit do_forecast_ar_model") return AResults, mse, mae, mase
class AutoRegression: def __init__(self, train_series, p): self.train_series = train_series self.ar = AR(train_series) self.p = p def fit(self): self.ar = self.ar.fit(disp=0) def preprocessing(self): pass def predict(self, step_size): return self.ar.predict(start=len(self.train_series), end=(len(self.train_series) + step_size - 1), dynamic=False)
def predict(x, y, pred): #degree is unused here mu = 0.9 ns = len(y) weights = np.ones(ns) * mu for k in range(ns): weights[k] = weights[k]**k weights = np.flip(weights, 0) # Fitting SVR to the dataset from sklearn.linear_model import LinearRegression lr = AR() #lr.fit(x, y, sample_weight=weights) lr.fit(y) y_pred = lr.predict(pred) return y_pred
def project(ser, start, end): """Fit AR model to series and project to end of index. Primarily useful for filling in missing values at the end of time series to ensure they match. ser: series to fit trend to start: date to begin fitting end: date to end fitting Returns: new_ser: series with missing end values replaced by fitted values.""" from statsmodels.tsa.ar_model import AR trend_mod = AR(ser[start:end]).fit() return trend_mod.predict(start=trend_mod.k_ar, end=ser.index.shape[0])
class AutoRegression: def __init__(self, ts_data, num_output): self.ts_data = ts_data self.t = len(self.ts_data) self.num_output = num_output self.model = None self.fitted_params = None self.pred_test_output = None def train_ar(self, lag): self.model = AR(self.ts_data[: -self.num_output]).fit(maxlag=lag, trend='nc') self.fitted_params = self.model.params[::-1] # forecast out-of-sample data by rolling the predicted values self.pred_test_output = self.model.predict(start=self.t - self.num_output, end=self.t - 1) def evaluate(self): true = self.ts_data[-self.num_output:] pred = self.pred_test_output return smape(true, pred)[0]
def project(ser, start, end): """Fit AR model to series and project to end of index. Primarily useful for filling in missing values at the end of time series to ensure they match. ser: series to fit trend to start: date to begin fitting end: date to end fitting Returns: new_ser: series with missing end values replaced by fitted values.""" from statsmodels.tsa.ar_model import AR trend_mod = AR(ser[start:end]).fit() return trend_mod.predict( start=trend_mod.k_ar, end=ser.index.shape[0])
class ArRetrainAgent(Agent): """Agent that uses autoregression with public data only. It fits its model again after each day. Attributes: model: an autoregression model trained each day on all available history used for predicting future aggregate energy consumption. """ def __init__(self, account=ACCOUNT_0, logging=True, **kwargs): super(ArRetrainAgent, self).__init__(account, logging, **kwargs) self.model = AR(self.aggregate_history).fit() self.log('ArRetrainAgent') def predict_for_tomorrow(self): # need to predict all starting from train_amt, but only return last NUM_PREDICTIONS # there is a 1 day offset between the period predicted for and the training data train_amt = len(self.aggregate_history) self.model = AR(self.aggregate_history).fit() predictions = self.model.predict(start=train_amt, end=train_amt + 2 * NUM_PREDICTIONS - 1, dynamic=False)[-NUM_PREDICTIONS:] return list(map(int, predictions))
def AR_prediction(data, test_data, test_for_AR, ar_summary): print("AR_prediction() start execute") tickers = test_for_AR log_returns = to_log_return(data) with open('prediction_results/AR_model_prediction.txt', 'w') as results_file: for ticker in tickers: print("AR_prediction() start execute in ticker: " + ticker) log_rtn = log_returns[ticker].dropna() result = AR(log_rtn).fit(ar_summary[ticker][0]) result_show = result.predict(test_data.index[0], test_data.index[-1]) test_log_returns = to_log_return(test_data) test_log_rtn = test_log_returns[ticker].dropna() test_log_rtn = test_log_rtn[result_show.index] visualization(test_log_rtn, result_show, 'AR_prediction', ticker) rmse = sqrt( sum((result_show - test_log_rtn).dropna()**2) / test_log_rtn.size) message = "The prediction for {} is \n {}\n RMSE:{}\n".format( ticker, result_show, rmse) results_file.write(message) return
class AutoRegressionModel(Model): def __init__(self, series_name, dataset, freq='2H'): """ Start modelling a time serie :param series_name: name of the serie :param dataset: dataframe (Panda) with datapoints :param m: the seasonality factor :param d: the de-rending differencing factor :param d_large: the de-seasonality differencing factor """ super().__init__(series_name, dataset) self._model = None self._dataset = dataset self.forecast_values = None self.is_stationary = False self._dataset.columns = ['ds', 'y'] # self._dataset['ds'] = pd.to_datetime(self._dataset['ds'], unit='s') self._dataset['datetime'] = pd.to_datetime(self._dataset['ds'], unit='s') self._dataset = self._dataset.set_index('datetime') self._dataset.drop(['ds'], axis=1, inplace=True) self._dataset.head() self._dataset = self._dataset.asfreq(freq=freq, method="pad") def create_model(self): series = pd.Series(self._dataset['y'], index=self._dataset.index) self._model = AR(series, missing='drop') self._model = self._model.fit() def do_forecast(self, update=False): """ When a model is present, a set of forecasted future values can be generated. :param update: :return: """ # freq = pd.Timedelta(self._find_frequency(self._dataset['ds'])).ceil('H') # periods = int(datetime.timedelta(days=7) / freq) # print(freq, periods) # # if periods < 20: # periods = 20 # l = 0/0 if update or self.forecast_values is None: yhat = self._model.predict(len(self._dataset), len(self._dataset) + 200) # yhat = self._model.predict(start=1, end=5) indexed_forecast_values = [] # values = yhat.to_frame() values = pd.DataFrame({'ds': yhat.index, 'yhat': yhat.values}) for index, row in values.iterrows(): indexed_forecast_values.append([ int( time.mktime( datetime.datetime.strptime(str( row['ds']), "%Y-%m-%d %H:%M:%S").timetuple())), row['yhat'] ]) self.forecast_values = indexed_forecast_values return self.forecast_values else: return self.forecast_values
regressor = SVR(kernel='rbf') regressor.fit(X_train, y_train) y_pred = regressor.predict(X_test) r2_test = mean_squared_error(y_test, y_pred) K.clear_session() model = Sequential() model.add( Dense(50, input_shape=(X_test.shape[1], ), activation='relu', kernel_initializer='lecun_uniform')) model.add(Dense(50, input_shape=(X_test.shape[1], ), activation='relu')) model.add(Dense(1)) model.compile(optimizer=Adam(lr=0.001), loss='mean_squared_error') model.fit(X_train, y_train, batch_size=12, epochs=24, verbose=0) y_pred = model.predict(X_test) print('R-Squared: %f' % (mean_squared_error(y_test, y_pred))) plt.figure(figsize=(16, 8)) plt.plot(sc.inverse_transform(y_test), label='Resampled') plt.plot(sc.inverse_transform(y_pred), label='Forecast') plt.legend(loc='best') plt.show() plot = False else: sys.exit("Error: Invalid model '" + model_type + "' specified!") if plot: plt.figure(figsize=(16, 8)) plt.plot(heart_rate, label='Original') plt.plot(heart_rate_resampled, label='Resampled') plt.plot(heart_rate_forecast, label=model_type + ' Forecast')
# - # That's not so good! Let's calculate mean absolute error: mape(df_valid, forecast) # Now let's try larger models by increasing order of AR. It looks at a longer term trend now. model = AR(df_train) model = model.fit(maxlag=7, trend='nc') start = len(df_train) end = len(df_train) + len(df_valid) - 1 forecast = model.predict(start, end) fig = plt.figure() ax = fig.gca() df_train['target'].plot(ax=ax, legend=True, label="Train") df_valid['target'].plot(ax=ax, legend=True, label="Actual") forecast.plot(ax=ax, legend=True, label="Forecast") mape(df_valid, forecast) # Note that the MAPE is lower, meaning it is a better fit # <div class="alert alert-success"> # <h2>Exercise</h2> # # Try a few other values yourself and see if you get a better/lower result than mape=0.4 # # - try trend='nc', which makes it return to the mean.
def test_ar_predict_no_fit(): data = sm.datasets.sunspots.load(as_pandas=False) with pytest.warns(FutureWarning): mod = AR(data.endog) with pytest.raises(RuntimeError, match='Model must be fit'): mod.predict([.1])
lag = X[-window_size:] np.save('man_data.npy', lag) #Salvamos la última observación np.save('man_obs.npy', [series.values[-1]]) ################### Haciendo predicciones series temporales #Cargamos el modelo model = ARResults.load('ar_model.pkl') data = np.load('ar_data.npy') last_ob = np.load('ar_obs.npy') #Hacemos predicciones predictions = model.predict(start = len(data), end = len(data)) #Transformamos las predicciones yhat = predictions[0] + last_ob[0] print('Prediction: %f' % yhat) #Esto también se podría realizar de la siguiente manera def predict(coef, history): yhat = coef[0] for i in range(1, len(coef)): yhat += coef[i]*history[-1] return yhat #Cargamos el modelo coef = np.load('man_model.npy')
train_y, epochs=1, batch_size=batch_size, validation_data=(test_X, test_y), verbose=2, shuffle=False) model.reset_states() # plot history #pyplot.plot(history.history['loss'], label='train') #pyplot.plot(history.history['val_loss'], label='test') #pyplot.legend() #pyplot.show() # make a prediction # test data should be predicted with batch size yhat = model.predict(test_X, batch_size) # invert scaling for forecast # take every 7 variables on certain time ranges like 0 to 6 and 7 to 13 and so forth # assign this variables to dictionary # make inverse transform t1 = list() t2 = list() dic = {} for g in range(0, n_seq, features): for j in range(features - 1, n_seq, features): t1.append(g) t1 = list(set(t1)) t2.append(j) t2 = list(set(t2)) t1 = sorted(t1)
def auto_regressive(data, p=6): """ Auto regressive mode to predict accident rate in Jan 2017 """ model = AR(data).fit(maxlag=p) return model.predict(len(data), len(data), dynamic=False)
class PredictPrices(object): """ This class trains on a time series data and forecast the next day value """ def __init__(self, data_path=None, inspect_data=False, n_estimators=10, number_of_test_values=1): """ :param data_path:string The location of the data file (loads CSV files only) :param inspect_data:bool Go over all the input values and make sure everything comes as expected :param n_estimators:int The amount of estimators to use :param number_of_test_values:int The amount of given test values """ self.logger.info('Prices prediction process begins') self.logger = _logger() self.data_path = data_path self.number_of_test_values = number_of_test_values self.inspect_data = inspect_data self.n_estimators = n_estimators self.validate_input() self.products = [] self.model = None self.model_fit = None self.t = Timer() def train(self, x_train, x_test): self.model = AR(x_train) history = [x_train[i] for i in range(len(x_train))] min_diff = math.inf optimized_maxlag = 0 best_trend = vanilla_predictor = None for i in range(1, len(x_train)): for trend in [None, 'nc']: for vanilla in [True, False]: if trend is None: self.model_fit = self.model.fit(maxlag=i, disp=False) else: self.model_fit = self.model.fit(maxlag=i, disp=False, trend=trend) y_predicted = self.predict(history, vanilla_predictor=vanilla) temp_diff = abs(y_predicted - x_test) if temp_diff < min_diff: best_trend = trend min_diff = temp_diff optimized_maxlag = i vanilla_predictor = vanilla if best_trend is None: self.model_fit = self.model.fit(maxlag=optimized_maxlag, disp=False) else: self.model_fit = self.model.fit(maxlag=optimized_maxlag, disp=False, trend=best_trend) return self.model_fit, history, vanilla_predictor def predict(self, history, vanilla_predictor=False): coef = self.model_fit.params if vanilla_predictor: return self.model.predict(params=coef)[0] yhat = coef[0] for i in range(1, len(coef)): yhat += coef[i] * history[-i] return yhat def import_and_arrange_data(self): train = pd.read_csv(self.data_path) train.fillna( train.mean()) # Empty cells (if there are any) will be replaced with the average of the product prices if self.inspect_data: self.logger.info(train.describe()) self.products = train.columns.tolist() values_per_product = {} for product in self.products: values_per_product[product] = train[product].values.tolist() return values_per_product def import_train_and_predict(self): t = Timer() x_test_predicted = [] x_test = [] train_per_product = self.import_and_arrange_data() for product, values in train_per_product.items(): if self.number_of_test_values >= len(values): raise Exception('The number of test values must be smaller than the amount of train set values') x_train = values[:-self.number_of_test_values] x_test.append(values[-self.number_of_test_values]) self.model_fit, history, vanilla_predictor = self.train(x_train, values[-self.number_of_test_values]) x_test_predicted.append(self.predict(history, vanilla_predictor)) for x_test_, x_predicted in zip(x_test, x_test_predicted): self.logger.info('Predicted value: %.3f, real value: %s' % (x_predicted, x_test_)) self.logger.info('Abs diff between predicted and true value: %.3f\n' % (abs(x_test_ - x_predicted))) self.logger.info('Price prediction took %s' % t.print_timer()) def validate_input(self): self.logger.info('Validating input') if self.data_path is None: raise Exception('You must provide the location of the data') if self.data_path[-3:] != 'csv': raise TypeError('This code support CSV files only') if self.number_of_test_values <= 0: raise ValueError('The number of test values must be greater than 0') if not isinstance(self.number_of_test_values, int): raise TypeError('The number of test values must be integer') if not isinstance(self.inspect_data, bool): raise TypeError('Inspect data must be Boolean') if not isinstance(self.n_estimators, int): raise TypeError('The number of estimators must an integer') self.logger.info('Validation complete')
plt.ylabel('unit') plt.title('MA fitting result') plt.legend(loc="best") plt.show() #AR fitting result #自迴歸的的參數p ar_p=2 #取得以AR(2)fit過的模型 model_fit=AR(data_df).fit(ar_p) #印出AR(2)的迴歸係數 print('AR(%d)迴歸係數:'%(ar_p),model_fit.params) #利用AR(2)fit過的模型取得 時間段到預測期101年的預測結果 ar_result=model_fit.predict(end=pd.datetime.strptime(str(101+1911), '%Y')).tolist() #由於使用AR(p)模型 因此 前p年沒有預測結果 補上NA for i in range(0,ar_p): ar_result.insert(0,np.nan) #印出101年的預測結果 在ar_result index為最後的數值 print("民101年的預測結果:",ar_result[-1]) #作圖 plt.plot(data_df.index,data_df.values, label='real') #x軸加上預測期(即民101年) xlist=data_df.index.tolist() xlist.append(str(101+1911)) plt.plot(xlist,ar_result, color='red', linestyle='--', label='AR(%d)'%(ar_p)) plt.xlabel('year') plt.ylabel('unit')
# %% # Exploring the data plt.plot(data[::12], '-') plt.show() plt.scatter(data.index[::12], data[::12], s=2) plt.show() #yearly mean temperature data['temp'].groupby(data.index.year).mean() plt.plot(data['temp'].groupby(data.index.year).mean()) plt.show() #monthly mean temerature data['temp'].groupby(data.index.month).mean() plt.plot(data['temp'].groupby(data.index.month).mean()) plt.show() data['temp'].groupby(data.index.year).count() # %% data['moving_average'] = data['temp'].rolling(12).mean() # %% # auto-regressor from stats model from statsmodels.tsa.ar_model import AR data['temp'] ar = AR(data['temp'], dates=None, missing='drop') ar.fit() ar.predict(data['temp'], start='2013-01-01', end='2013-05-01')
def fit_AR(series, t_pred =12 ): ar = AR(endog= series) ar_fit = ar.fit(maxlag=1) pred = ar.predict(params= ar_fit.params, end=t_pred) return np.round(pred,0)
plot_acf(df.temp.tolist(), lags=30, ax=axes[0]) plot_pacf(df.temp.tolist(), lags=30, ax=axes[1]) fig.savefig('../Images/3_pacf.png') # Create train data train_df = df["temp"][:-13] date = df.index[:-13] # 1. AR model # with statsmodel from statsmodels.tsa.ar_model import AR ar = AR(train_df, dates=date).fit(maxlag=52, ic='aic') # prediction is ar_predict = ar.predict('2019-10-22', '2020-10-21') # Visualization fig = go.Figure() fig.add_trace(go.Scatter(name="Raw Data", x=df.index, y=df.temp)) fig.add_trace( go.Scatter(name="AR model Prediction", x=ar_predict.index, y=ar_predict)) fig.update_xaxes(rangeslider_visible=True) fig.update_layout(title_text="AR MODEL", xaxis_title="Date", yaxis_title="Temperature, C") plotly.offline.plot(fig, filename=r'../Images/4_AR.png') # 2. ARMA Model # with statsmodel, aic check of params from statsmodels.tsa import stattools as st
class VarModel(BaseModel): def __init__(self, feat_id, run_id, data=None): self.model_type = 'VAR' self.opt_p = 1 super().__init__(feat_id, run_id, data) def train(self, data): if len(data.columns) > 1: self.model = VAR(data) self.opt_p = self.model.select_order(30).aic else: self.model = AR(data) self.opt_p = self.model.select_order(30, 'aic') def save(self): joblib.dump( self.opt_p, os.path.join( 'temp', self.run_id, 'models', 'VAR', '{}_VAR.pkl'.format( replace_multiple( self.feat_id, ['/', '\\', ':', '?', '*', '"', '<', '>', '|'], "x")))) def load(self): self.opt_p = \ joblib.load(os.path.join('runs', self.run_id, 'models', 'VAR', '{}_VAR.pkl'.format(replace_multiple(self.feat_id, ['/', '\\', ':', '?', '*', '"', '<', '>', '|'], "x")))) def result(self, history, actual, prediction, forecast, anomaly_scores): mse = mean_squared_error(actual, prediction) mae = mean_absolute_error(actual, prediction) rmse = np.sqrt(mse) anomaly_scores['points'] = anomaly_scores.index future_alert = anomaly_scores.tail(len(forecast)) past_alert = anomaly_scores.iloc[:len(history)] future_alert = future_alert[future_alert['outlier'] == -1] past_alert = past_alert[past_alert['outlier'] == -1] output = { 'history': history.tolist(), 'expected': prediction.tolist(), 'forecast': forecast.tolist(), 'rmse': rmse, 'mse': mse, 'mae': mae, 'future_alerts': future_alert.fillna(0).to_dict(orient='record'), 'past_alerts': past_alert.fillna(0).to_dict(orient='record'), 'model': self.model_type } return output def predict(self, data, start_idx, end_idx): if len(data.columns) > 1: self.model = VAR(data) result = self.model.fit(self.opt_p) y_pred = self.model.predict(result.params, start=start_idx, end=end_idx, lags=self.opt_p) return pd.DataFrame(data=y_pred, columns=data.columns.values) else: self.model = AR(data) self.model = self.model.fit(self.opt_p) y_pred = self.model.predict(start=start_idx, end=end_idx) return pd.DataFrame(data=y_pred, columns=data.columns.values)
plt.legend() plt.show() # plot y_train.plot(color="blue") y_test.plot(color="pink") predictions.plot(color="purple") #######################---ARIMA---############################## from pmdarima.arima import auto_arima model = auto_arima(y_train, trace=True, error_action='ignore', suppress_warnings=True) model.fit(y_train) forecast = model.predict(n_periods=len(y_test)) forecast = pd.DataFrame(forecast, index=y_test.index, columns=['Prediction']) print("R2_score:%.3f" % r2_score(y_test, forecast)) #plot the predictions for validation set plt.plot(y_train, label='Train') plt.plot(y_test, label='Valid') plt.plot(forecast, label='Prediction') plt.legend() plt.show() #################################################################
#%% #build a basic lagged model labels = gas.pop('RSGASSN') X_train, X_test, y_train, y_test = train_test_split(gas, labels, test_size=.25, shuffle=False) model = AR(y_train) model = model.fit() print('Lag: ' + str(model.k_ar)) print('Coefficients: ' + str(model.params)) predictions = model.predict(start=len(y_train), end=len(gas) - 1) plt.plot(y_test) plt.plot(predictions, color='red') plt.show() #%% #now build a lagged model usng all features from statsmodels.tsa.vector_ar.var_model import VAR gas = pd.concat([gas, labels], axis=1) model = VAR(endog=gas[:75]) model = model.fit()
train, test = diff[diff.index < pd.to_datetime('2014-08-01')], diff[ diff.index > pd.to_datetime('2014-07-31')] model_test = ARIMA(train, order=(3, 0, 5)).fit() predict = model_test.forecast(steps=31)[0] x = [] x.append(train[-1]) for i in predict: x.append(x[-1] + i) y = x[1:] df = test.reset_index() df['pred'] = y df.set_index('report_date', inplace=True) df.plot(figsize=(12, 8)) plt.show() # In[408]: #AR(3) from statsmodels.tsa.ar_model import AR ar_3 = AR(diff).fit(ic='hqic') ar_3_pre = ar_3.predict() # In[411]: diff_copy = diff[diff.index > pd.to_datetime('2014-04-13')] ardf = diff_copy.reset_index() ardf['pred'] = ar_3_pre.values ardf.set_index('report_date', inplace=True) ardf.plot(figsize=(12, 8)) plt.show()
# Let's train the regressor we want to work with: # In[136]: model = RandomForestRegressor() model.fit(XX_train, yy_train) # We reuse the previous evaluation code but this time we make predictions with this regressor # In[137]: # walk-forward validation history = [x for x in train] prediction_sl = list() for i in range(len(test)): yhat = model.predict(XX_test[i, :])[0] yhat = inverse_difference(history, yhat, months_in_year) prediction_sl.append(yhat) # observation obs = test[i] history.append(obs) prediction_sl[i] = yhat # print('>Predicted=%.3f, Expected=%3.f' % (yhat, obs)) # In[138]: rmse = sqrt(mean_squared_error(test, prediction_sl)) print('RMSE: %.3f' % rmse) # In[139]: