def evaluate_holt_model(X): """ Evaluate a Holt Model :param X: list or series containing all historical data :return: mse (error metric) and the fitted model """ # Prepare training dataset train_size = int(len(X) * 0.75) train, test = X[0:train_size], X[train_size:] history = [x for x in train] # Make predictions predictions = list() for t in range(len(test)): # Fit model model = Holt(history) model_fit = model.fit() # Forecast yhat = model_fit.forecast()[0] # Store prediction and move forward one time step predictions.append(yhat) history.append(test[t]) # calculate out of sample error mse = mean_squared_error(test, predictions) return mse, model_fit
def multi_output(input1): model = Holt(df[input1]).fit() # fit the Exponential Smoothing model exp_sm = model.fittedvalues # fitted values of the model # calculate the mean absolute error mae = np.round(mean_absolute_error(df[input1], exp_sm), decimals=2) # calculate the mean absolute percentage error y_true = list(filter(lambda x: x > 0, df[input1])) # actual observations y_pred = exp_sm[len(df[input1]) - len(y_true):] # fitted/predicted observations mape = np.round(mean_absolute_percentage_error(y_true, y_pred), decimals=2) # find out the 7-day forecast preds = model.predict(start=len(df), end=len(df) + 6) dates = pd.date_range(df['Date'][len(df) - 1], periods=8, closed='right') # line plot showing the observed/actual datapoints, fitted datapoints and forecasts fig = px.line(df, x='Date', y=input1, title='Number of COVID19 cases') fig['data'][0]['showlegend'] = True fig['data'][0]['name'] = 'Actual Values' fig.add_scatter(x=df['Date'], y=exp_sm, mode='lines', name='Exponential Smoother') fig.add_scatter(x=dates, y=preds, mode='lines', name='Forecasts') return fig, 'Mean Absolute Error of the Fits: {}'.format( mae), 'Mean Absolute Percentage Error of the Fits: {}'.format(mape)
def WalkForwardCV_HOLT(param, X): n_train = len(X) // 2 n_records = len(X) error_list = [] aic_list = [] for i in range(n_train, n_records): # Split train and test train, test = X[0:i], X[i:i + 1] # Fit Holt's linear model fit1 = Holt(train).fit(smoothing_level=param[0], smoothing_slope=param[1]) # predict next day fcast1 = fit1.forecast(1) # calculate error error = MAPE(fcast1, test) error_list.append(error) # obtain AIC aic_list.append(fit1.aic) return np.mean(error_list), np.mean(aic_list), fit1
def date_forecast(date, min_list): ddate = date["y_year"].values.tolist() list_value = date["list_value"].values.tolist() data = date.set_index('y_year') data['list_value'] = data['list_value'].astype('float64') ddate_diff = 20 - len(ddate) + 1 test = date # if len(date) <= 12 and len(date) >5: # test = data[12-len(data):12] # elif len(data) <= 3: # test = date.copy() # else: # test = data[len(data)-12:len(data)] # print(test) # 这里是设置特征函数,规划出一条一元一次方程,出生情况直接按照所有数据做出一条直线 fit = Holt(np.asarray(test['list_value'])).fit(smoothing_level=0.3, smoothing_slope=0.1) # 这里预测截止到2020年12月的数据 # num_list = fit.forecast(len(test)+ddate_diff+1)[:len(test)+1] num_list = fit.forecast(ddate_diff) # 这里存在一个问题,他不是所有的数据都是从02年开始的,要么就是补全,要么就是全部设为0 for i in num_list: list_value.append(i) # 这里是按照02年开始取数,补全前面的删除后面的 if min_list != None: list_value = get_zero_list(min_list) + list_value list_value = list_value[:21] return list_value
def holts_linear_trend(input_df, kunag, matnr, smoothing_level=0.3, smoothing_slope=0.1): df = input_df.copy() df = remove_negative_rows(df) df_series = individual_series(df, kunag, matnr) df_series = data_transformation.get_weekly_aggregate(df_series) df_series["date"] = df_series["dt_week"].map(str) df_series["date"] = df_series["date"].apply(lambda x: x.replace("-", "")) df_series["prediction"] = df_series["quantity"] df_series_train, df_series_test = splitter(df_series) k = 0 for index, row in df_series_test.iterrows(): df_series_train["quantity"] = df_series_train["quantity"].map(float) fit1 = Holt(np.asarray(df_series_train["quantity"])).fit( smoothing_level=smoothing_level, smoothing_slope=smoothing_slope) predicted = fit1.forecast(1) row["prediction"] = predicted[0] df_series_train = pd.concat([df_series_train, pd.DataFrame(row).T ]).reset_index(drop=True) if k == 0: test_index = df_series_train.shape[0] - 1 k = 1 output_df = df_series_train test_df = df_series_train.iloc[test_index:] # print("mean squared error is :",mean_squared_error(output_df["quantity"], output_df["prediction"])) return output_df, mean_squared_error(test_df["quantity"], test_df["prediction"])
def estimate_holt(df, seriesname, alpha=0.2, slope=0.1, trend="add"): numbers = np.asarray(df[seriesname], dtype='float') model = Holt(numbers) fit = model.fit(alpha, slope, trend) estimate = fit.forecast(2)[-1] print("Dollar estimation:", estimate) return estimate
def estimate_Holt(array, alpha, slope, sizeestimate): model = Holt(array) fit = model.fit(smoothing_level=alpha, smoothing_slope=slope) forecast = fit.forecast(sizeestimate) for index in range(len(forecast)): forecast[index] = round(forecast[index], 4) return forecast
def estimate_holt(df, seriesname, alpha=0.2, slope=0.1, trend="add", estimationlength=2): numbers = np.asarray(df[seriesname], dtype='float') model = Holt(numbers) fit = model.fit(alpha, slope, trend) estimate = fit.forecast(estimationlength) return estimate # Perform Dickey-Fuller test: print("Results of Dickey-Fuller Test:") array = np.asarray(timeseries, dtype='float') np.nan_to_num(array, copy=False) dftest = adfuller(array, autolag='AIC') dfoutput = pd.Series(dftest[0:4], index=[ 'Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used' ]) for key, value in dftest[4].items(): dfoutput['Critical Value (%s)' % key] = value print(dfoutput)
def HOLT(df): train = df[0:20] test = df[20:] rms = 0.0 num = 0 for id_idx in range(1, 25, 1): if train['id' + str(id_idx)].sum() > 30: fit = Holt(train['id' + str(id_idx)]).fit(smoothing_level=0.3, smoothing_slope=0.1, optimized=False) fcast = fit.forecast(len(test)) plt.plot(train['id' + str(id_idx)], marker='o', label='train_id' + str(id_idx)) plt.plot(test['id' + str(id_idx)], marker='o', label='test_id' + str(id_idx)) plt.plot(fcast, marker='o', label='holt_linear' + str(id_idx)) plt.legend(loc='best') rms = rms + math.sqrt( mean_squared_error(test['id' + str(id_idx)], fcast)) num = num + 1 plt.show() mean_error = rms / num print(mean_error)
def holts_linear(input_df, kunag, matnr, n, sl, ss): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit1 = Holt(np.asarray(train['quantity'])).fit(smoothing_level=sl, smoothing_slope=ss) y_hat_avg['Holt_linear'] = fit1.forecast(len(test1)) pred = y_hat_avg['Holt_linear'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='Holts linear', marker='.') plt.legend(loc='best') plt.title("Holts linear") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['Holt_linear'] return y_hat_avg, rms, mae
def estimate_Holt(dataframe, name, alpha, slope, sizeestimate): array = np.asarray(dataframe[name]) model = Holt(array) fit = model.fit(smoothing_level = alpha,smoothing_slope = slope) forecast = fit.forecast(sizeestimate) for index in range ( len(forecast) ): forecast[index] = round(forecast[index], 4) return forecast
def fit(self, df): self.df = ajust_df(df) self.model = Holt(self.df['y'], exponential=True).fit(smoothing_level=0.5, smoothing_slope=0.05, optimized=False) return self
def estimate_Holt(dataframe, name, alpha, slope, sizeestimate): # Holt requires an array to work with, so we convert the column into an array array = np.asarray(dataframe[name]) model = Holt(array) fit = model.fit(smoothing_level = alpha,smoothing_slope = slope) forecast = fit.forecast(sizeestimate) for index in range ( len(forecast) ): forecast[index] = round(forecast[index], 4) return forecast
def holt(): forecast_steps = 100 fit1 = Holt(origin_series).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) forecast1 = fit1.forecast(forecast_steps).rename("Holt's linear trend") fit2 = Holt(origin_series, exponential=True).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False) forecast2 = fit2.forecast(forecast_steps).rename("Exponential trend") fit3 = Holt(origin_series, damped=True).fit(smoothing_level=0.8, smoothing_slope=0.2, damping_slope=0.8) forecast3 = fit3.forecast(forecast_steps).rename("Additive damped trend") fit1.fittedvalues.plot(marker="o", color='blue') forecast1.plot(color='blue', marker="o", legend=True) # plt.show() fit2.fittedvalues.plot(marker="o", color='red') forecast2.plot(color='red', marker="o", legend=True) # plt.show() fit3.fittedvalues.plot(marker="o", color='green') forecast3.plot(color='green', marker="o", legend=True) plt.show()
def run_holts(train, validate, target_variable,exponential, smoothing_level = .1, smoothing_slope = .1): # Create model object model = Holt(train[target_variable], exponential = exponential) # Fit model model = model.fit(smoothing_level = smoothing_level, smoothing_slope=smoothing_slope, optimized = False) # Create predictions y_pred = model.predict(start=validate.index[0], end=validate.index[-1]) return model, y_pred
def next_two_weeks_Holt(df): import matplotlib.pyplot as plt from statsmodels.tsa.api import Holt df = df.set_index('Date') final = pd.DataFrame() for var in df.columns: model = Holt(df[var]).fit(smoothing_level=.3, smoothing_slope=.1, optimized=False) final[var] = pd.Series(model.forecast(14)) return final
def DEF_damping_f(self, df, alpha, beta): try: double_exp = Holt(np.array(df['Actual']), exponential=True, damped=True) fit_double_exp = double_exp.fit(smoothing_level=alpha, smoothing_slope=beta,optimized=False) forecast = fit_double_exp.forecast()[0] Cluster, Warehouse, WF, YF = generate_attrib(df) self.df_forecast.append({'Cluster':Cluster, 'Warehouse':Warehouse, 'Year':YF, "Week": WF, "Forecast":forecast}) return print(f'DEBUG:Forecast:{Cluster}:{Warehouse}:{YF}:{WF}:{forecast}') except: return print("ERROR:FORECAST-DEF_DAMPING")
def train(self, array_X, array_Y): self.train_X = array_X self.train_Y = array_Y self.model = Holt(array_Y, exponential=self.exponential, damped=self.damped) self.fit = self.model.fit(smoothing_level=self.smoothing_level, smoothing_slope=self.smoothing_slope, damping_slope=self.damping_slope, optimized=self.optimized) res = self.fit.fittedvalues return res
def forecast_holt(ticker, data): ticker = str(ticker).upper() dataset = data[ticker] X = dataset.values year = 365 diff = difference(X, year) model = Holt(diff).fit(smoothing_level=0.3, smoothing_slope=0.1) fc_holt = model.forecast() fc_holt = inverse_difference(X, fc_holt, year) return fc_holt[0]
def holttm(i): df = i train = np.asarray(df.iloc[:(round(len(df) * .85)), 0]) hell = df.iloc[(round(len(df) * .85)):, 0] fit1p = Holt(train).fit(smoothing_level=0.3, smoothing_slope=0.13, optimized=False) fcastp = fit1p.forecast(len(hell)) sreal = (sum(hell)) spred = (sum(fcastp)) mape = calculomape(sreal, spred) return (mape)
def twocolorball_holt_forecast(df): l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = Holt(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, smoothing_slope=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() l.append(int(predict[0])) print(l) return l
def ts_holt(train, test, **kwargs): yhat = pd.DataFrame(dict(actual=test)) sm.tsa.seasonal_decompose(train).plot() result = sm.tsa.stattools.adfuller(train) plt.show() holt = Holt(train).fit(**kwargs) yhat["holt_linear"] = holt.forecast(test.shape[0]) plot_and_eval(train, test, yhat.holt_linear, test) return holt
def create_Holt_Winters(self, series, param): assert type(param) == str self.args = param # self.print_params(param) model = Holt(series, exponential=self.get_bool('exponential'), damped=self.get_bool('damped')) return model.fit(smoothing_level=self.get_float('smoothing_level'), smoothing_slope=self.get_float('smoothing_slope'), optimized=self.get_bool('optimized'), damping_slope=self.get_float('damping_slope'))
def predict(self, test_X, test_Y): predictions = numpy.empty(0) for t in range(0, test_Y.shape[0]): array = numpy.hstack((self.train_Y, test_Y[:t])) model = Holt(array, exponential=self.exponential, damped=self.damped) fit = model.fit(smoothing_level=self.smoothing_level, smoothing_slope=self.smoothing_slope, damping_slope=self.damping_slope, optimized=self.optimized) predictions = numpy.append(predictions, fit.forecast(1)[0]) return predictions
def holts(train, validate, yhat_df): ''' This function sets default parameters for Holt's model. yhat_items makes predictions based on model. ''' for col in train.columns: model = Holt(train[col], exponential=False, damped=True) model = model.fit(smoothing_level=.1, smoothing_slope=.1, optimized=True) yhat_items = model.predict(start=validate.index[0], end=validate.index[-1]) yhat_df[col] = round(yhat_items, 2) return yhat_df
def holt_forecast(df): print("==== 逐一对每位数字进行霍尔特预测 ====") l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = Holt(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, smoothing_slope=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() is_blue = False if i < 7 else True l = add_number_pool(l, int(round(predict[0], 0)), is_blue) # print("霍尔特预测结果:%s" % l); return l
def holts_linear(train, test, column, smoothing, slope): #holt linear technique with smoothing and slope as parameters y_hat_avg = test.copy() start = time.time() model_hl = Holt(np.asarray(train[column])).fit(smoothing_level = smoothing, smoothing_slope = slope) y_hat_avg['holt_linear']= model_hl.forecast(len(test)) y_hat_avg['transformed_values'] = inverse_transform(y_hat_avg,'holt_linear') rmse_hl = sqrt(mean_squared_error(y_hat_avg[column], y_hat_avg['transformed_values'])) end = time.time() time_hl = end - start print('\n Total RMSE of the holt linear model : %.3f ' % rmse_hl) result.loc[len(result)] = [column, 'Holts Linear', rmse_hl, time_hl] #test_plt((20,10), test_df, predictions, column, 'Holts Linear') result_plots['Holts_linear'] = y_hat_avg['transformed_values']
def holtt(i, fc_periods, df): df = df hell = np.asarray(df.iloc[0:, 0]) train = df.iloc[394:, 0] modelo = SimpleExpSmoothing(hell).fit(smoothing_level=.2, optimized=False) resultado = modelo.fittedvalues df['pronostico'] = resultado nombre = list(df.columns.values.tolist()) fit1 = Holt(hell).fit(smoothing_level=0.35, smoothing_slope=0.1, optimized=True) print(fit1.summary()) fcast1 = fit1.forecast(fc_periods) return (fcast1)
def HLM_model(train,test): #alpha=smoothing_level and beta=smoothing slope fit1 = Holt(train).fit(smoothing_level = 0.3,smoothing_slope = 0.1) fcast=fit1.forecast(len(test)) plt.figure(figsize=(18,8)) plt.plot(train,label='train data',color='black') plt.plot(test,label='test data',color='green') plt.plot(fcast,label='forecast',color='red') plt.legend(loc='best') plt.title('Load Forecast using HLM trend Method',fontsize=15) plt.xlabel('day----->') plt.ylabel('Consumption in Mwh') plt.show() print("Verification of HLM(trend) Forecasting Model") modelverification(fit1,fcast,test) return(fit1)
def holt(): N, t = 200, 160 realisations = pd.Series(list(sample_random_walk(0, 0.1, N)), range(N)) mod = Holt(realisations[:t + 1]).fit(optimized=True) params = [ 'smoothing_level', 'smoothing_slope', 'initial_level', 'initial_slope' ] results = pd.DataFrame(index=["alpha", "beta", "l_0", "b_0", "SSE"], columns=["Holt's"]) results["Holt's"] = [mod.params[p] for p in params] + [mod.sse] print(results) forecasts = mod.forecast(N - (t + 1)).rename(r'$\alpha=0.5$ and $\beta=0.5$') plot(realisations, pd.Series(np.nan, range(t + 1)).append(forecasts)) plot_components(mod) py.show()
plt.show() # ### Seasonally adjusted data # Lets look at some seasonally adjusted livestock data. We fit five Holt's # models. # The below table allows us to compare results when we use exponential # versus additive and damped versus non-damped. # # Note: ```fit4``` does not allow the parameter $\phi$ to be optimized by # providing a fixed value of $\phi=0.98$ fit1 = SimpleExpSmoothing(livestock2).fit() fit2 = Holt(livestock2).fit() fit3 = Holt(livestock2, exponential=True).fit() fit4 = Holt(livestock2, damped=True).fit(damping_slope=0.98) fit5 = Holt(livestock2, exponential=True, damped=True).fit() params = [ 'smoothing_level', 'smoothing_slope', 'damping_slope', 'initial_level', 'initial_slope' ] results = pd.DataFrame( index=[r"$\alpha$", r"$\beta$", r"$\phi$", r"$l_0$", "$b_0$", "SSE"], columns=['SES', "Holt's", "Exponential", "Additive", "Multiplicative"]) results["SES"] = [fit1.params[p] for p in params] + [fit1.sse] results["Holt's"] = [fit2.params[p] for p in params] + [fit2.sse] results["Exponential"] = [fit3.params[p] for p in params] + [fit3.sse] results["Additive"] = [fit4.params[p] for p in params] + [fit4.sse] results["Multiplicative"] = [fit5.params[p] for p in params] + [fit5.sse] results