def multi_output(input1): model = Holt(df[input1]).fit() # fit the Exponential Smoothing model exp_sm = model.fittedvalues # fitted values of the model # calculate the mean absolute error mae = np.round(mean_absolute_error(df[input1], exp_sm), decimals=2) # calculate the mean absolute percentage error y_true = list(filter(lambda x: x > 0, df[input1])) # actual observations y_pred = exp_sm[len(df[input1]) - len(y_true):] # fitted/predicted observations mape = np.round(mean_absolute_percentage_error(y_true, y_pred), decimals=2) # find out the 7-day forecast preds = model.predict(start=len(df), end=len(df) + 6) dates = pd.date_range(df['Date'][len(df) - 1], periods=8, closed='right') # line plot showing the observed/actual datapoints, fitted datapoints and forecasts fig = px.line(df, x='Date', y=input1, title='Number of COVID19 cases') fig['data'][0]['showlegend'] = True fig['data'][0]['name'] = 'Actual Values' fig.add_scatter(x=df['Date'], y=exp_sm, mode='lines', name='Exponential Smoother') fig.add_scatter(x=dates, y=preds, mode='lines', name='Forecasts') return fig, 'Mean Absolute Error of the Fits: {}'.format( mae), 'Mean Absolute Percentage Error of the Fits: {}'.format(mape)
def run_holts(train, validate, target_variable,exponential, smoothing_level = .1, smoothing_slope = .1): # Create model object model = Holt(train[target_variable], exponential = exponential) # Fit model model = model.fit(smoothing_level = smoothing_level, smoothing_slope=smoothing_slope, optimized = False) # Create predictions y_pred = model.predict(start=validate.index[0], end=validate.index[-1]) return model, y_pred
def twocolorball_holt_forecast(df): l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = Holt(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, smoothing_slope=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() l.append(int(predict[0])) print(l) return l
def holt_forecast(df): print("==== 逐一对每位数字进行霍尔特预测 ====") l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = Holt(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, smoothing_slope=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() is_blue = False if i < 7 else True l = add_number_pool(l, int(round(predict[0], 0)), is_blue) # print("霍尔特预测结果:%s" % l); return l
def holts(train, validate, yhat_df): ''' This function sets default parameters for Holt's model. yhat_items makes predictions based on model. ''' for col in train.columns: model = Holt(train[col], exponential=False, damped=True) model = model.fit(smoothing_level=.1, smoothing_slope=.1, optimized=True) yhat_items = model.predict(start=validate.index[0], end=validate.index[-1]) yhat_df[col] = round(yhat_items, 2) return yhat_df
def holt(train, validate, target_var, eval_df): model_type = "Holt's Linear Trend" model = Holt(train[target_var], exponential=False) model = model.fit(smoothing_level=.1, smoothing_slope=.1, optimized=False) temps = model.predict(start=validate.index[0], end=validate.index[-1]) yhat = pd.DataFrame({target_var: '1'}, index=validate.index) yhat[target_var] = round(temps, 4) rmse = plot_and_eval(train, validate, yhat, target_var, model_type) eval_df = append(model_type, target_var, rmse, eval_df) return eval_df
def time_series_fun_5(): # 读取 csv 文件,删除无用列 df = pd.read_csv("/temp/time_series_data.csv").drop(labels="Unnamed: 0", axis=1); # 取出最后一条数据 last_data = df.loc[len(df) - 1]; time = datetime.strptime(last_data["date"], "%Y-%m-%d %H:%M:%S"); # 未来三个月预测数据 Holt_forecast_start = time + timedelta(hours=2); Holt_forecast_end = time + timedelta(days=90); datetime_index = pd.date_range(start=Holt_forecast_start, end=Holt_forecast_end, freq="2H"); # 传入历史数据集,设置权重值(0 - 1),训练出适应模型 fit_model = Holt(np.asarray(df["count"])).fit(smoothing_level=0.7, smoothing_slope=0.1, optimized=False); # 用适应模型获取预测数据 data = fit_model.predict(start=0, end=len(datetime_index)); Holt_forecast_dataFrame = df.append(DataFrame(data=list(zip(datetime_index, data)), columns=["date", "count"])); Holt_forecast_dataFrame["count"] = Holt_forecast_dataFrame["count"].apply(lambda item: int(item)); # 按月平均值重新采集数据 df.index = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S"); df = df.resample(rule="M").mean(); Holt_forecast_dataFrame.index = pd.to_datetime(Holt_forecast_dataFrame["date"], format="%Y-%m-%d %H:%M:%S"); Holt_forecast_dataFrame = Holt_forecast_dataFrame.resample(rule="M").mean(); # 绘制折线图 plt.rcParams['font.sans-serif'] = ['SimHei']; plt.plot(Holt_forecast_dataFrame.index, Holt_forecast_dataFrame["count"], label="预测数据", linewidth=2); plt.plot(df.index, df["count"], label="预测数据", linewidth=2); # 指定标题以及 x、y 轴标签 plt.title("铁路购票预测图"); plt.xlabel("时间"); plt.ylabel("每月购票均值"); plt.legend(loc='upper left'); # 显示图画 plt.show();
predict = np.exp(predict_log) plt.plot(given_set['Count'], label = 'given_set') plt.plot(predict, color='red', label = 'Predict') plt.title('RMSE: %.4f'% (np.sqrt(np.dot(predict, given_set['Count']))/given_set.shape[0])) plt.show() ARIMA_predict_diff = results_ARIMA.predict(start='2014-06-25', end='2014-09-25') check_prediction_diff(ARIMA_predict_diff,valid) """**SARIMAX**""" import statsmodels.api as sm y_hat_avg = valid.copy() fit1 = sm.tsa.statespace.SARIMAX(Train.Count, order=(2,1,4), seasonal_order=(0,1,1,7)).fit() y_hat_avg['SARIMA'] = fit1.predict(start='2014-06-25', end='2014-09-25', dynamic=True) plt.figure(figsize=(16,8)) plt.plot(Train['Count'], label='Train') plt.plot(valid['Count'], label='vaild') plt.plot(y_hat_avg['SARIMA'], label='SARIMA') plt.legend(loc='best') plt.show() rmse = sqrt(mean_squared_error(valid.Count,y_hat_avg.SARIMA)) print(rmse) predict = fit1.predict(start='2014-09-26', end='2015-04-26', dynamic=True) test['prediction'] = predict merge = pd.merge(test, test_original, on=('day','month','year'), how='left') merge['Hour'] = merge['Hour_y'] merge = merge.drop(['year', 'month', 'Datetime', 'Hour_x', 'Hour_y'], axis=1)
plt.title('RMSE: %.4f'% (np.sqrt(np.dot(predict, given_set['Count']))/given_set.shape[0])) plt.show() # Let’s predict the values for validation set. ARIMA_predict_diff=results_ARIMA.predict(start="2014-06-25", end="2014-09-25") check_prediction_diff(ARIMA_predict_diff, valid) # In[133]: import statsmodels.api as sm y_hat_avg = valid.copy() fit1 = sm.tsa.statespace.SARIMAX(Train.Count, order=(2, 1, 4),seasonal_order=(0,1,1,7)).fit() y_hat_avg['SARIMA'] = fit1.predict(start="2014-6-25", end="2014-9-25", dynamic=True) plt.figure(figsize=(16,8)) plt.plot( Train['Count'], label='Train') plt.plot(valid['Count'], label='Valid') plt.plot(y_hat_avg['SARIMA'], label='SARIMA') plt.legend(loc='best') plt.show() # In[134]: rms = sqrt(mean_squared_error(valid.Count, y_hat_avg.SARIMA)) print(rms)
# Using Holts winter method will be the best option among the rest of the models beacuse of the seasonality factor. # The Holt-Winters seasonal method comprises the forecast equation and three smoothing equations: # one for the level t, one for trend t and one for the seasonal component denoted by st, with smoothing parameters. # plot the SPP load as daily average, # in which the red line represents the trainning dataset, # the green line represents the test dataset, and the blue line represents the forecasted value name = 'method 6' draw(train, test, predicted6, name) rms_Method6 = sqrt(mean_squared_error(test['CSWS'], predicted6['CSWS'])) print("rms_Method6:", rms_Method6) # Method 7: ARIMA:# Method # Autoregressive Integrated Moving average. # ARIMA models aim to describe the correlations in the data with each other. predicted7 = test.copy() fit1 = sm.tsa.statespace.SARIMAX(train['CSWS'], order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit() predicted7['CSWS'] = fit1.predict(start="2018-1-1", end="2018-5-31", dynamic=True) # plot the SPP load as daily average, # in which the red line represents the trainning dataset, # the green line represents the test dataset, and the blue line represents the forecasted value name = 'method 7' draw(train, test, predicted7, name) rms_Method7 = sqrt(mean_squared_error(test['CSWS'], predicted6['CSWS'])) print("rms_Method7:", rms_Method7)
dynamic=True) rms_arimas.append( sqrt(mean_squared_error(test.total_cases, y_hat_avg.SARIMA))) except: continue data_tuples = list(zip(params, rms_arimas)) rms = pd.DataFrame(data_tuples, columns=['Parameters', 'RMS value']) minimum = int(rms[['RMS value']].idxmin()) parameters = params[minimum] #SARIMA y_hat_avg = test.copy() fit1 = sm.tsa.statespace.SARIMAX(train.total_cases, order=parameters, seasonal_order=(0, 0, 0, 0), enforce_stationarity=False, enforce_invertibility=False).fit() y_hat_avg['SARIMA'] = fit1.predict(start="2020-06-01", end="2020-06-05", dynamic=True).astype(int) plt.figure(figsize=(16, 8)) plt.plot(train['total_cases'], label='Train') plt.plot(test['total_cases'], label='Test') plt.plot(y_hat_avg['SARIMA'], label='SARIMA') plt.title("ARIMA Forecast") plt.legend(loc='best') plt.show() rms_arima = sqrt(mean_squared_error(test.total_cases, y_hat_avg.SARIMA)) print(rms_arima)
ascending=True, inplace=True, na_position='last') sns.heatmap(delhidata.isnull(), cbar=True) delhidata3.tail() delhidata3.isna().sum() delhidata3.info() delhidata3.set_index(['date'], inplace=True) delhidata3.shape delhidata3.isnull().sum() delhidata3_time = delhidata3.interpolate(method='time') delhidata3_time.plot() from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt hw = Holt(delhidata3_time["pm25"]).fit() hw_pred = hw.predict(start=2100, end=2616) hw_train = hw.predict(start=0, end=2100) hw_rmse_train = np.sqrt( mean_squared_error(hw_train, delhidata3_time["pm25"].iloc[:2101])) hw_rmse_train #52 hw_test = hw.predict(start=2102, end=2616) hw_rmse_test = np.sqrt( mean_squared_error(hw_test, delhidata3_time["pm25"].iloc[2102:])) hw_rmse_test # 43.31605304441928 plt.plot(hw_test, color='red') plt.plot(delhidata3_akima['pm25'].iloc[2103:]) import pickle pickle.dump(hw, open('holts_model.pkl', 'wb'))
start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) autoarimamodel.fit(train) y_hat['autoarima'] = autoarimamodel.predict(n_periods=test.shape[0]) fit1 = sm.tsa.statespace.SARIMAX(train['人数'], order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit() y_hat['SARIMA'] = fit1.predict(start=list(test.index)[0], end=list(test.index)[-1], dynamic=True) ''' # 分离季节性、趋势性 sm.tsa.seasonal_decompose(train['人数']).plot() result=sm.tsa.stattools.adfuller(train['人数']) plt.show() ''' from pyramid.arima import auto_arima plt.figure(figsize=(12, 8)) plt.plot(train.index, train['人数'], label='Train') plt.plot(test.index, test['人数'], label='Test') plt.plot(y_hat.index, y_hat['naive'], label='Naive Forecast') plt.plot(y_hat.index, y_hat['avg_forcast'], label='avg_forcast') plt.plot(y_hat.index, y_hat['moving_avg_forcast'], label='moving_avg_forcast')
y_hat_avg['Holt_Winter'] = fit1.forecast(len(test_df)) plt.figure() plt.plot(train_df['Count'], label='Train') plt.plot(test_df['Count'], label='Test') plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter') plt.legend(loc='best') plt.savefig(output_folder + 'holt_winter.png') plt.close() rms_holt_winter = sqrt(mean_squared_error(test_df.Count, y_hat_avg.Holt_Winter)) logger.debug('holt-winter model root-mean-squared error: %.3f' % rms_holt_winter) y_hat_avg = test_df.copy() fit1 = SARIMAX(train_df.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit() y_hat_avg['SARIMA'] = fit1.predict(start=test_df.index[0], end=test_df.index[-1], dynamic=True) plt.figure(figsize=(12, 8)) plt.plot(train_df['Count'], label='Train') plt.plot(test_df['Count'], label='Test') plt.plot(y_hat_avg['SARIMA'], label='SARIMA') plt.legend(loc='best') plt.savefig(output_folder + 'sarimax.png') plt.close() rms_sarimax = sqrt(mean_squared_error(test_df.Count, y_hat_avg.SARIMA)) logger.debug('SARIMAX root-mean-squared error: %.3f' % rms_sarimax) logger.debug('done') finish_time = time.time() elapsed_hours, elapsed_remainder = divmod(finish_time - start_time, 3600) elapsed_minutes, elapsed_seconds = divmod(elapsed_remainder, 60)
def get_object(self, queryset=None): for i in os.listdir('media'): os.remove('media' + '/' + i) warnings.filterwarnings("ignore") # отключает предупреждения methods = self.kwargs['methods'] methods = methods.split('+') if self.kwargs['slug']: slug = self.kwargs['slug'] try: obj = Modules.objects.get(m_module=slug) obj.no_active = None if obj.m_is_active == False: obj.no_active = "Модуль неактивен" else: rms_arr, data_pred, labels_pred, pars = [], [], [], [] try: queryset = Containers.objects.filter( c_module__m_module=slug, c_incr__isnull=False) for item in queryset: labels_pred.append(item.c_date.date()) data_pred.append(item.c_incr) dd = np.asarray(data_pred) df = pd.DataFrame(data=dd, index=pd.to_datetime(labels_pred), columns=['value']) max_period = Analitics.objects.filter( a_module__m_module=obj.m_module).aggregate( Max('a_period')) forecast_period = int(max_period['a_period__max']) train = df[0:-forecast_period] test = df[-forecast_period:] # df = df.resample('D').mean() # train = train.resample('D').mean() # test = test.resample('D').mean() y_hat_avg = test.copy() plt.rcParams.update({'font.size': 14}) # проверка на стационарность analiz, d_7 = self.stationarity(train.value) for item in methods: start = time.time() rms = 1000000000.0 # =================================================================================== if item == 'Наивный подход': y_hat_avg['naive'] = dd[len(train) - 1] # Расчет среднеквадратичной ошибки (RMSE) rms = sqrt( mean_squared_error(test.value, y_hat_avg.naive)) duration = time.time() - start plt.figure(figsize=(16, 10)) plt.plot(train.index, train['value'], label='Train') plt.plot(test.index, test['value'], label='Test') plt.plot(y_hat_avg.index, y_hat_avg['naive'], label='Naive Forecast') plt.legend(loc='best') plt.title("Naive Forecast \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/naive_forecast.png') pars.append(None) # =================================================================================== elif item == 'Простое среднее': y_hat_avg['avg_forecast'] = train[ 'value'].mean() duration = time.time() - start plt.figure(figsize=(16, 10)) plt.plot(train.index, train['value'], label='Train') plt.plot(test.index, test['value'], label='Test') plt.plot(y_hat_avg['avg_forecast'], label='Average Forecast') plt.legend(loc='best') rms = sqrt( mean_squared_error(test.value, y_hat_avg.avg_forecast)) plt.title("Average Forecast \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/average_forecast.png') pars.append(None) # =================================================================================== elif item == 'Скользящее среднее': y_hat_avg['moving_avg_forecast'] = train[ 'value'].rolling(48).mean().iloc[-1] rms = sqrt( mean_squared_error( test.value, y_hat_avg.moving_avg_forecast)) duration = time.time() - start plt.figure(figsize=(16, 10)) plt.plot(train.index, train['value'], label='Train') plt.plot(test.index, test['value'], label='Test') plt.plot(y_hat_avg['moving_avg_forecast'], label='Moving Average Forecast') plt.legend(loc='best') plt.title( "Moving Average Forecast \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/mov_avg_forecast.png') pars.append(None) # =================================================================================== elif item == 'Простое экспоненциальное сглаживание': for s_l in np.arange(0, 1, 0.1): fit2_curr = SimpleExpSmoothing( np.asarray(train['value'])).fit( smoothing_level=s_l, optimized=False) y_hat_avg['SES'] = fit2_curr.forecast( len(test)) rms_curr = sqrt( mean_squared_error( test.value, y_hat_avg.SES)) if (rms_curr < rms): rms = rms_curr plt.plot(y_hat_avg['SES'], label='SES') fit2 = fit2_curr p4 = {'s_l': round(s_l, 4)} y_hat_avg['SES'] = fit2.forecast(len(test)) duration = time.time() - start plt.figure(figsize=(16, 10)) plt.plot(train.index, train['value'], label='Train') plt.plot(test.index, test['value'], label='Test') plt.plot(y_hat_avg['SES'], label='SES') plt.legend(loc='best') plt.title( "Simple Exponential Smoothing \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/ses.png') pars.append(p4) # =================================================================================== elif item == 'Метод линейного тренда Холта': for s_l in np.arange(0, 1, 0.1): for s_s in np.arange(0, 1, 0.1): fit1_curr = Holt( np.asarray(train['value'])).fit( smoothing_level=s_l, smoothing_trend=s_s) y_hat_avg[ 'Holt_linear'] = fit1_curr.forecast( len(test)) rms_curr = sqrt( mean_squared_error( test.value, y_hat_avg.Holt_linear)) if (rms_curr < rms): rms = rms_curr fit1 = fit1_curr p5 = { 's_l': round(s_l, 4), 's_s': round(s_s, 4) } duration = time.time() - start y_hat_avg['Holt_linear'] = fit1.forecast( len(test)) plt.figure(figsize=(16, 10)) plt.plot(train.index, train['value'], label='Train') plt.plot(test.index, test['value'], label='Test') plt.plot(y_hat_avg['Holt_linear'], label='Holt_linear') plt.legend(loc='best') plt.title( "Holt linear trend method \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/holt_linear.png') pars.append(p5) # =================================================================================== elif item == 'Метод Холта-Винтерса': params = ['add', None] for t in params: for s in params: for s_p in [7, 12]: try: fit1_curr = ExponentialSmoothing( np.asarray(train['value']), seasonal_periods=s_p, trend=t, seasonal=s, ).fit() y_hat_avg[ 'Holt_Winter'] = fit1_curr.forecast( len(test)) rms_curr = sqrt( mean_squared_error( test.value, y_hat_avg.Holt_Winter)) if (rms_curr < rms): rms = rms_curr fit1 = fit1_curr p6 = { 's_p': s_p, 't': t, 's': s } except: pass duration = time.time() - start y_hat_avg['Holt_Winter'] = fit1.forecast( len(test)) plt.figure(figsize=(16, 10)) plt.plot(train.index, train['value'], label='Train') plt.plot(test.index, test['value'], label='Test') plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter') plt.legend(loc='best') plt.title(" Holt-Winters method \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/holt_winter.png') pars.append(p6) # =================================================================================== elif item == 'SARIMA': y_hat_avg = test.copy() p = q = range(0, 4) D = range(0, 2) m = [7, 12] pdq = list(itertools.product(p, d_7, q)) seasonal_pdq = [ (x[0], x[1], x[2], x[3]) for x in list( itertools.product(p, D, q, m)) ] for param in pdq: for param_seasonal in seasonal_pdq: try: fit1_curr = sm.tsa.statespace.SARIMAX( train.value, order=param, seasonal_order=param_seasonal, enforce_stationarity=False, enforce_invertibility=False ).fit() y_hat_avg[ 'SARIMA'] = fit1_curr.predict( start=test.index[0].date(), end=self.get_today().date( ), dynamic=True) rms_curr = sqrt( mean_squared_error( test.value, y_hat_avg.SARIMA)) if (rms_curr < rms): rms = rms_curr fit1 = fit1_curr p7 = { 'p': param[0], 'd': param[1], 'q': param[2], 'P': param_seasonal[0], 'D': param_seasonal[1], 'Q': param_seasonal[2], 'm': param_seasonal[3] } except: pass duration = time.time() - start y_hat_avg['SARIMA'] = fit1.predict( start=test.index[0].date(), end=self.get_today().date(), dynamic=True) plt.figure(figsize=(16, 10)) plt.plot(train['value'], label='Train') plt.plot(test['value'], label='Test') plt.plot(y_hat_avg['SARIMA'], label='SARIMA') plt.legend(loc='best') plt.title(" SARIMA method \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/arima.png') pars.append(p7) # =========================================================================================== elif item == 'LSTM': # transform data to be stationary # transform data to be supervised learning if analiz[0] != 'Стационарный': supervised = self.timeseries_to_supervised( self.difference(data_pred, 1), 1) else: supervised = self.timeseries_to_supervised( data_pred, 1) supervised_values = supervised.values # split data into train and test-sets train_lstm, test_lstm = supervised_values[ 0:-len(test)], supervised_values[-len(test ):] # transform the scale of the data scaler, train_scaled, test_scaled = self.scale( train_lstm, test_lstm) # walk-forward validation on the test data error_scores, pred = list(), list() for r in range(5): # fit the model lstm_model = self.fit_lstm( train_scaled, 1, 5, 5) # forecast the entire training dataset to build up state for forecasting train_reshaped = train_scaled[:, 0].reshape( len(train_scaled), 1, 1) lstm_model.predict(train_reshaped, batch_size=1) # walk-forward validation on the test data predictions = list() for i in range(len(test_scaled)): # make one-step forecast X, y = test_scaled[ i, 0:-1], test_scaled[i, -1] yhat = self.forecast_lstm( lstm_model, 1, X) # invert scaling yhat = self.invert_scale( scaler, X, yhat) if analiz[0] != 'Стационарный': # invert differencing yhat = self.inverse_difference( data_pred, yhat, len(test_scaled) + 1 - i) # store forecast predictions.append(yhat) # report performance rms = sqrt( mean_squared_error( test.value, predictions)) error_scores.append(rms) pred.append(predictions) rms = np.array(error_scores).min() i_min = error_scores.index(rms) predictions = pred[i_min] duration = time.time() - start plt.figure(figsize=(16, 10)) plt.plot(train.index, train['value'], label='Train') plt.plot(test.index, test['value'], label='Test') plt.plot(test.index, predictions, label='LSTM') plt.legend(loc='best') plt.title("LSTM \n (RMSE = " + str(round(rms, 10)) + ", time = " + str(round(duration, 3)) + "c)", fontsize=35, fontweight='bold') plt.savefig('media/lstm.png') pars.append(None) # # =========================================================================================== rms_arr.append(rms) obj.data_pred = data_pred i = rms_arr.index(np.array(rms_arr).min()) if i < 7: obj.pars = pars[i] obj.method = methods[i] obj.data_max = np.array(data_pred).max() obj.data_min = np.array(data_pred).min() obj.data_mean = np.array(data_pred).mean().round() obj.data_std = np.array(data_pred).std().round() obj.rms_min = round(np.array(rms_arr).min(), 10) obj.analiz = analiz except Containers.DoesNotExist: pass except Modules.DoesNotExist: pass else: obj = None return obj
#Computing root mean squared error #Since pedictions is in form of series, RMSE cannot be applied in direct form print("\n Root mean squared error for ARIMA model\n") #Dividing sqrt of dot product by no of observations ARIMA_rms = np.sqrt(np.dot(ARIMA_predict, valid['Count']))/valid.shape[0] print(ARIMA_rms) ############################################################################################################################################################ #6.SARIMA model on daily time series #Extension of ARIMA; This takes seasonality also into account fit1 = sm.tsa.statespace.SARIMAX(Train.Count, order=(1, 1, 1),seasonal_order=(1,1,1,7)).fit() #To predict based on values out of trained model y_hat['SARIMA'] = fit1.predict(start="2014-6-25", end="2014-9-25", dynamic=True) #Plotting graph plt.figure(figsize=(16,8)) plt.plot( Train['Count'], label='Train') plt.plot(valid['Count'], label='Valid') plt.plot(y_hat['SARIMA'], label='SARIMAX') plt.legend(loc='best') plt.title('SARIMA') plt.show() #Computing root mean squared error print("\n Root mean squared error for SARIMA model on daily time series model\n") SARIMA_rms = sqrt(mean_squared_error(valid.Count, y_hat.SARIMA)) print(SARIMA_rms)
comb_predict1 = comb_predict1.add(comb_predict, fill_value=0) comb_predict = np.exp(comb_predict1) plt.plot(train_validate['Count'], label="Valid") plt.plot(comb_predict, color='red', label="Predict") plt.legend(loc='best') plt.title('RMSE: %.4f' % (np.sqrt(np.dot(comb_predict, train_validate['Count'])) / train_validate.shape[0])) plt.show() # SARIMAX takes into account the seasonality of a dataseries import statsmodels.api as sm y_hat_avg = train_validate.copy() fit1 = sm.tsa.statespace.SARIMAX(train_set.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit() y_hat_avg['SARIMAX'] = fit1.predict(start='2014-06-25', end='2014-09-22', dynamic=True) plt.figure(figsize=(16, 8)) plt.plot(train_set['Count'], label='Train') plt.plot(train_validate['Count'], label='Valid') plt.plot(y_hat_avg['SARIMAX'], label='SARIMA') plt.legend(loc='best') plt.show() rms = sqrt(mean_squared_error(train_validate.Count, y_hat_avg['SARIMAX'])) rms
y_hat_avg = test.copy() fit1 = ExponentialSmoothing(np.asarray(train['Count']), seasonal_periods=7, trend='add', seasonal='add', ).fit() y_hat_avg['Holt_Winter'] = fit1.forecast(len(test)) # plt.figure(figsize=(16, 8)) # plt.plot(train['Count'], label='Train') # plt.plot(test['Count'], label='Test') # plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter') # plt.legend(loc='best') # plt.show() RMSE = RMSE.append( {"method": 'Holt-Winters Method', "result": sqrt(mean_squared_error(test.Count, y_hat_avg.Holt_Winter))}, ignore_index=True) ''' Method 7 – ARIMA ''' y_hat_avg = test.copy() fit1 = sm.tsa.statespace.SARIMAX(train.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit() y_hat_avg['SARIMA'] = fit1.predict(start="2013-11-1", end="2013-12-31", dynamic=True) plt.figure(figsize=(16, 8)) # plt.plot(train['Count'], label='Train') # plt.plot(test['Count'], label='Test') plt.plot(y_hat_avg['SARIMA'], label='SARIMA') plt.legend(loc='best') plt.show() RMSE = RMSE.append({"method": 'ARIMA', "result": sqrt(mean_squared_error(test.Count, y_hat_avg.SARIMA))}, ignore_index=True) print(RMSE.head(10))