def doHoltsLinear(train_set, test_set, predict_set): print('>Holts Linear') try: # copy test dataframe dates y_hat_avg = pd.DataFrame(index=test_set.index.copy()) # fit model fit1 = Holt(np.asarray(train_set['Sales'])).fit(smoothing_level=0.3, smoothing_slope=0.1) # predict test dataframe y_hat_avg['Sales'] = fit1.forecast(len(test_set)) # calculate error rms = sqrt(mean_squared_error(test_set.Sales, y_hat_avg.Sales)) # create final predict dataframe predict_set['FutureValue'] = fit1.forecast(len(predict_set)) # plot chart #plotChart(train_set, test_set, y_hat_avg, 'Holt_linear', 'Sales') except: rms = 999999999 # return dataframes: error and prediction return (rms, predict_set)
def test_holt(self): fit1 = Holt(self.air_ausair).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fit2 = Holt(self.air_ausair, exponential=True).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fit3 = Holt(self.air_ausair, damped=True).fit(smoothing_level=0.8, smoothing_slope=0.2) assert_almost_equal(fit1.forecast(5), [43.76, 45.59, 47.43, 49.27, 51.10], 2) assert_almost_equal(fit1.slope, [ 3.617628, 3.59006512, 3.33438212, 3.23657639, 2.69263502, 2.46388914, 2.2229097, 1.95959226, 1.47054601, 1.3604894, 1.28045881, 1.20355193, 1.88267152, 2.09564416, 1.83655482 ], 4) assert_almost_equal(fit1.fittedfcast, [ 21.8601, 22.032368, 25.48461872, 27.54058587, 30.28813356, 30.26106173, 31.58122149, 32.599234, 33.24223906, 32.26755382, 33.07776017, 33.95806605, 34.77708354, 40.05535303, 43.21586036, 43.75696849 ], 4) assert_almost_equal(fit2.forecast(5), [44.60, 47.24, 50.04, 53.01, 56.15], 2) assert_almost_equal(fit3.forecast(5), [42.85, 43.81, 44.66, 45.41, 46.06], 2)
def model(self, column_name, df, apply_smoothing, smoothing_level=None, smoothing_slope=None): """ performs predictions using the double exponential smoothing without damping model approach :input column_name : str, name of column to hold the predicted values :input df : dataframe, weekly-level data :input apply_smoothing : bool, indicates whether to factor-in smoothing parameters in the Holt model :input smoothing_level : int, default=None, l parameter in Holt model :input smoothing_slope : int, default=None, b parameter in Holt model :returns df : dataframe, weekly-level, with predictions :returns params : dictionary, default=None, placeholder for saving the best hyperparameters chosen by the model, if not provided as arguments to this method """ m = self.prediction_period if apply_smoothing == True: fit1 = Holt(df["train"][:-m], damped=False).fit(smoothing_level=smoothing_level, smoothing_slope=smoothing_slope, optimized=True) params = None elif apply_smoothing == False: fit1 = Holt(df["train"][:-m], damped=False).fit(optimized=True) params = fit1.params if params['smoothing_slope'] == 0: print('Smoothing Slope is 0') fit1 = Holt(df["train"][:-m], damped=True).fit(smoothing_slope=0.1, optimized=True) params = fit1.params print('Model is refitted with smoothing slope fixed at 0.1') print('====================') print(params) print('====================') df[column_name] = np.nan #y_fit = fit1.fittedvalues y_fore = fit1.forecast(m) #y_fore = fit1.predict(df.shape[0]-m) #df[column_name][:-1] = y_fit df[column_name][:-m] = df['train'].iloc[:-m] df[column_name][-m:] = y_fore return df
def test_holt(self): fit1 = Holt(self.air_ausair).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fit2 = Holt(self.air_ausair, exponential=True).fit( smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fit3 = Holt(self.air_ausair, damped=True).fit(smoothing_level=0.8, smoothing_slope=0.2) assert_almost_equal(fit1.forecast(5), [43.76,45.59,47.43,49.27,51.10], 2) assert_almost_equal(fit1.slope, [3.617628 ,3.59006512,3.33438212,3.23657639,2.69263502, 2.46388914,2.2229097 ,1.95959226,1.47054601,1.3604894 , 1.28045881,1.20355193,1.88267152,2.09564416,1.83655482], 4) assert_almost_equal(fit1.fittedfcast, [21.8601 ,22.032368 ,25.48461872,27.54058587, 30.28813356,30.26106173,31.58122149,32.599234 , 33.24223906,32.26755382,33.07776017,33.95806605, 34.77708354,40.05535303,43.21586036,43.75696849], 4) assert_almost_equal(fit2.forecast(5), [44.60,47.24,50.04,53.01,56.15], 2) assert_almost_equal(fit3.forecast(5), [42.85,43.81,44.66,45.41,46.06], 2)
def trend(tr_dict): """ version of holt calculation, which fit and predict for each new sample :param tr_dict: :return: """ trends_dict = OrderedDict() for k, v in tr_dict.items(): pred_des = [v[0], v[1]] for i in range(2, len(v)): des = Holt(v[:i]).fit(optimized=True) pred_des.append(des.forecast(2)[0]) trends_dict[k] = np.array(pred_des) return trends_dict
def Holtmethod(paramsList=['pollution.csv', '0.93','pm', 'humidity', 'date'], specialParams=['0.3','0.1']): path = paramsList[0] trainRows = float(paramsList[1]) saveto = 'result.csv' df = pd.read_csv(path, usecols=paramsList[2:]) allRows = df.shape[0] smoothing_level = specialParams[0] smoothing_slope = specialParams[1] train = df[0:int(allRows*trainRows)] test = df[int(allRows*trainRows)+1:] df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M') df.index = df['Timestamp'] df = df.resample('D').mean() train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M') train.index = train['Timestamp'] train = train.resample('D').mean() test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M') test.index = test['Timestamp'] test = test.resample('D').mean() y_hat = test.copy() nullArray = train.copy() nullArray['time'] = train.index # 以上可通用---------------------------- for i in range(2,len(paramsList)-1): fit = Holt(np.asarray(train[paramsList[i]])).fit(smoothing_level=float(smoothing_level), smoothing_slope=float(smoothing_slope)) y_hat[paramsList[i]] = fit.forecast(len(test)) y_hat[paramsList[i]] = round(y_hat[paramsList[i]],2) rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]])) print(rms) y_hat['time'] = test.index yhat_naive = np.array(y_hat) nArray = np.array(nullArray) newArray = np.concatenate((nArray,yhat_naive),axis=0) s = pd.DataFrame(newArray, columns=paramsList[2:]) for i in range(2,len(paramsList)-1): s[paramsList[i]][0:int(len(s)*trainRows)] = "" s.to_csv(saveto,index=False,header=True,float_format='%.2f')
def pop_sim(init_data, num_increments): data = init_data for key, county in init_data.items(): population = pd.Series(county) # https://www.statsmodels.org/stable/examples/notebooks/generated/exponential_smoothing.html#Holt's-Method fit1 = Holt(np.asarray(population)).fit(smoothing_level=0.7, smoothing_slope=0.3) future_pop = fit1.forecast(num_increments) last_inc = int(max(data[key].keys())) for inc, value in zip(range(num_increments), future_pop): # round negative population values to 0 data[key][str(last_inc + 1 + inc)] = value if value > 0 else 0 return data
def valueForecast(file): """ 电费预测 :param data: 电量数据 格式为:用户 日期 使用电量值 :return: 预测电量值 """ logging.debug('开始运行') data = pd.read_excel(file) if data.shape[0] == 0: raise ValueError('相关性原始数据不存在') data.iloc[:, 0] = data.iloc[:, 0].astype(str) users = set(data.iloc[:, 0].values) # 用电量预测 result_pre = pd.DataFrame(columns=[ 'DATA_DATE', 'DATA_DATE1', 'DATA_DATE2', 'DATA_DATE3', 'DATA_DATE4', 'DATA_DATE5' ]) for user in users: subdata = data.loc[data.iloc[:, 0] == user] df_index = pd.MultiIndex.from_frame(subdata.iloc[:, 1:2]) df = pd.DataFrame(np.array(subdata.iloc[:, -1]).reshape(1, -1), columns=df_index) df.dropna(axis=1, inplace=True) df_values = df.values.flatten() model = Holt( endog=df_values, initialization_method='estimated', ).fit() pre = model.forecast(steps=5) print(f'数据的预测 {pre}') res2 = pd.DataFrame(pre).T res2.columns = [ 'DATA_DATE1', 'DATA_DATE2', 'DATA_DATE3', 'DATA_DATE4', 'DATA_DATE5' ] res2['DATA_DATE'] = datetime.date.today() res2['USRE'] = user print(f'RES2 {res2}') result_pre = result_pre.append(res2, ignore_index=True) print(result_pre) return result_pre
def calculate_time_serie(data, time_serie_type, trend_seasonal, period, forecast): if time_serie_type == 'simpsmoothing': data_simp_exp = SimpleExpSmoothing(data).fit() proyeccion = data_simp_exp.forecast(int(forecast)) return data_simp_exp.fittedvalues, proyeccion elif time_serie_type == 'holt': data_holt = Holt(data).fit() proyeccion = data_holt.forecast(int(forecast)) return data_holt.fittedvalues, proyeccion elif time_serie_type == 'holt_winters': print(trend_seasonal) if trend_seasonal == 'add': print('periodo', period) data_holtwinters = ExponentialSmoothing( data, trend='add', seasonal='add', seasonal_periods=period).fit(use_boxcox=True) print(data_holtwinters.fittedvalues) elif trend_seasonal == 'mult': data_holtwinters = ExponentialSmoothing( data, trend='mul', seasonal='mul', seasonal_periods=period).fit(use_boxcox=True) proyeccion = data_holtwinters.forecast(int(forecast)) return data_holtwinters.fittedvalues, proyeccion elif time_serie_type == 'arima': arima = pmdarima.auto_arima(data, seasonal=False, error_action='ignore', suppress_warnings=True) proyeccion, int_conf = arima.predict(n_periods=int(forecast), return_conf_int=True) prediccion = arima.predict_in_sample() print('pro', proyeccion) print('pre', prediccion) return prediccion, proyeccion
ses_forecast_2.plot(c=COLORS[2], legend=True, label=r'$\alpha=0.5$') ses_2.fittedvalues.plot(c=COLORS[2]) ses_forecast_3.plot(c=COLORS[3], legend=True, label=r'$\alpha={0:.4f}$'.format(alpha)) ses_3.fittedvalues.plot(c=COLORS[3]) plt.show() #For some reason from colors in legend #Holt's variants: # Holt's model with linear trend hs_1 = Holt(goog_train).fit() hs_forecast_1 = hs_1.forecast(test_length) # Holt's model with exponential trend hs_2 = Holt(goog_train, exponential=True).fit() # equivalent to ExponentialSmoothing(goog_train, trend='mul').fit() hs_forecast_2 = hs_2.forecast(test_length) # Holt's model with exponential trend and damping hs_3 = Holt(goog_train, exponential=False, damped=True).fit(damping_slope=0.99) hs_forecast_3 = hs_3.forecast(test_length) goog.plot(color=COLORS[0], title="Holt's Smoothing models", label='Actual', legend=True)
ses_fcast = ses_model.forecast(24) airlines.Passengers.plot(label='Original', legend=True) ses_fcast.plot(label='Predicted', legend=True) ses_model.fittedvalues.plot(label='Fitted', legend=True) def MAPE(org, pred): t = (np.abs(org - pred) * 100) / org return np.mean(t) ses_mape = MAPE(test.Passengers, ses_fcast) # --> Holts smoothing holt_model_lin = Holt(train.Passengers).fit() holt_fcast_lin = holt_model_lin.forecast(24) holt_model_exp = Holt(train.Passengers, exponential=True).fit() holt_fcast_exp = holt_model_exp.forecast(24) holt_model_dam = Holt(train.Passengers, damped=True).fit() holt_fcast_dam = holt_model_dam.forecast(24) airlines.Passengers.plot(label='Original', legend=True) holt_model_lin.fittedvalues.plot(label='Holt Fitted', legend=True) holt_fcast_lin.plot(label='Linear Predicted', legend=True) holt_fcast_exp.plot(label='Exponential Predicted', legend=True) holt_fcast_dam.plot(label='Damped Predicted', legend=True) holt_lin_mape = MAPE(test.Passengers, holt_fcast_lin) holt_exp_mape = MAPE(test.Passengers, holt_fcast_exp) holt_dam_mape = MAPE(test.Passengers, holt_fcast_dam)
def get_holt(data): model = Holt(data, damped_trend=True, initialization_method="estimated") model = model.fit(smoothing_level = 0.8, smoothing_trend = 0.2) preds = model.forecast(DAYS_TO_PREDICT) return preds.tolist()
Trend: Additive BIC 106.395 Seasonal: None AICC 94.421 Seasonal Periods: None Date: Fri, 26 Mar 2021 Box-Cox: False Time: 00:23:58 Box-Cox Coeff.: None ============================================================================== coeff code optimized ------------------------------------------------------------------------------ smoothing_level 0.4354824 alpha True smoothing_trend 1.8914e-12 beta True initial_level 25.574165 l.0 True initial_trend 0.0144988 b.0 True ------------------------------------------------------------------------------''' #forecasting/ predicting births_pred1 = births_holt.forecast(steps=19) print(births_pred1) #Plot actual and forecast plt.plot(births) plt.plot(births_pred1) plt.legend(['Actual', 'Forecast - Holt'], bbox_to_anchor=(1, 1), loc=2) plt.show() #Model with triple exponential smoothing from statsmodels.tsa.holtwinters import ExponentialSmoothing births_es = ExponentialSmoothing(births, seasonal_periods=12, trend='add', seasonal='add').fit() births_es.summary()
print(results.params) best_alpha = results.params['smoothing_level'] best_beta = results.params['smoothing_slope'] print('最优水平平滑因子:\n', best_alpha) print('最优趋势平滑因子:\n', best_beta) # 其余类似:评估、可视化等等 # 6.应用模型 # 1)预测历史值 pred = results.predict(start='2015-02-01', end='2015-02-07') print(pred) # 2)预测未来值(滚动预测) pred = results.forecast(5) print(pred) # 3)保存模型 fname = 'out.pkl' results.save(fname) # 4)加载模型 from statsmodels.iolib.smpickle import load_pickle results = load_pickle(fname) # 5)应用模型 print(results.params) pred = results.forecast(3) print(pred)
fit3 = SimpleExpSmoothing(data).fit() # plot l3, = plt.plot(list(fit3.fittedvalues) + list(fit3.forecast(5)), marker='o') l4, = plt.plot(data1, marker='o') plt.legend(handles = [l1, l2, l3, l4], labels = ['a=0.2', 'a=0.6', 'auto', 'data'], loc = 'best', prop={'size': 7}) plt.show() """ #二次指数平滑 # Holt’s Method fit1 = Holt(data).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) l1, = plt.plot(list(fit1.fittedvalues) + list(fit1.forecast(5)), marker='^') fit2 = Holt(data, exponential=True).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) l2, = plt.plot(list(fit2.fittedvalues) + list(fit2.forecast(5)), marker='.') fit3 = Holt(data, damped=True).fit(smoothing_level=0.8, smoothing_slope=0.2) l3, = plt.plot(list(fit3.fittedvalues) + list(fit3.forecast(5)), marker='.') l4, = plt.plot(data1, marker='.') plt.legend(handles=[l1, l2, l3, l4], labels=[ "Holt's linear trend", "Exponential trend", "Additive damped trend", 'data' ],
train_close_daily = close_daily[:-validation_size] validation_close_daily = close_daily[-validation_size:] close_daily_log = np.log(close_daily) train_close_daily_log = np.log(train_close_daily) validation_close_daily_log = np.log(validation_close_daily) def sse(x, y): # sse: sum of squared error return np.sum(np.power(x - y, 2)) from statsmodels.tsa.holtwinters import Holt fit1 = Holt(train_close_daily).fit(optimized = True) smooth_Holt = fit1.fittedvalues forecast_set = pd.Series(fit1.forecast(validation_size)) forecast_set.index = validation_close_daily.index plt.figure(figsize = (16,5)) plt.plot(close_daily) plt.plot(smooth_Holt,linestyle='--') plt.figure(figsize = (16,5)) plt.plot(close_daily[-50:]) plt.plot(smooth_Holt[-43:]) plt.plot(forecast_set) validation_set = np.exp(close_daily_log[-validation_size:].values) print("SSE for Holt’s linear method\n",sse(forecast_set,validation_set))
def MAPE(pred, org): temp = np.abs(pred - org) * 100 / org return np.mean(temp) ses_mape_dict = { 'ses_model1': MAPE(fcast1, test.Sales), 'ses_model2': MAPE(fcast2, test.Sales), 'ses_model3': MAPE(fcast3, test.Sales) } #Holts Exponential Smoothing holt_model1 = Holt(train.Sales).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fcast1 = holt_model1.forecast(15) holt_model2 = Holt(train.Sales, exponential=True).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fcast2 = holt_model2.forecast(15) holt_model3 = Holt(train.Sales, damped=True).fit(smoothing_level=0.8, smoothing_slope=0.2) fcast3 = holt_model3.forecast(15) holt_model1.fittedvalues.plot() fcast1.plot(color='red', legend=True, label='Linear Trend') holt_model2.fittedvalues.plot() fcast2.plot(color='blue', legend=True, label='Exponential Trend') holt_model3.fittedvalues.plot()
Train = delhidata3_linear.head(1873) Test = delhidata3_linear.tail(744) Train Test def MAPE(pred, org): temp = np.abs((pred - org)) * 100 / org return np.mean(temp) fit1 = Holt(delhidata3_linear.pm25).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fcast1 = fit1.forecast(12).rename("Holt's linear trend") fit2 = Holt(delhidata3_linear['pm25'], exponential=True).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) fcast2 = fit2.forecast(12).rename("Exponential trend") fit3 = Holt(delhidata3_linear['pm25'], damped=True).fit(smoothing_level=0.8, smoothing_slope=0.2) fcast3 = fit3.forecast(12).rename("Additive damped trend") fit1.fittedvalues.plot(marker="o", color='blue') fcast1.plot(color='blue', marker="o", legend=True) fit2.fittedvalues.plot(marker="o", color='blue') fcast2.plot(color='blue', marker="o", legend=True) fit3.fittedvalues.plot(marker="o", color='blue')