def ses(input_df, kunag, matnr, n, alpha): i = 0 lst = [] test1 = train_test_split(df, kunag, matnr, n)[1] y_hat_avg = test1.copy() for i in range(n, 0, -1): train, test = train_test_split(df, kunag, matnr, i) dd = np.asarray(train["quantity"]) fit2 = SimpleExpSmoothing(np.asarray(train['quantity'])).fit( smoothing_level=alpha, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test1)) pred = y_hat_avg['SES'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst plt.figure(figsize=(12, 8)) plt.plot(train.set_index("date")['quantity'], label='Train', marker='.') plt.plot(test1.set_index("date")['quantity'], label='Test', marker='.') plt.plot(y_hat_avg.set_index("date")['pred_column'], label='SES', marker='.') plt.legend(loc='best') plt.title("SES") plt.show() rms = sqrt(mean_squared_error(test1.quantity, y_hat_avg.pred_column)) mae = mean_absolute_error(test1.quantity, y_hat_avg.pred_column) del y_hat_avg['SES'] return y_hat_avg, rms, mae
def get_gravity_from_acc(acc): """ Get gravity from acc data by using low pass filter Parameters ---------- acc : numpy array [time, x, y, z] Returns -------- gravity : 1-D array The gravity component in 3 axis """ if debug: print('get gravity component...') gravity_component = [0.0] * 3 # use low pass filter (exponential) # https://developer.android.com/guide/topics/sensors/sensors_motion # https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1 for i in range(3): # TODO: we don't need to use all rows to get the component top_rows = min(len(acc), 1000) acc_x = acc[:top_rows, i + 1] fit_x = SimpleExpSmoothing(acc_x).fit() fcast_x = fit_x.forecast(1) gravity_component[i] = fcast_x[0] if debug: print_floats(*gravity_component, description="Gravity component:") return gravity_component
def ses1(input_df, kunag, matnr, n, l, alpha): index = str(kunag) + "-" + str(matnr) dfw = n_series(df, kunag, matnr) test1 = test(df, kunag, matnr) mae = [] for i in range(0, l): k = n - i lst = [] train1, cv1 = train_cv_split(df, kunag, matnr, k, l) y_hat_avg = cv1.copy() for j in range(k, l, -1): train, cv = train_cv_split(df, kunag, matnr, j, l) dd = np.asarray(train["quantity"]) fit2 = SimpleExpSmoothing(np.asarray(train['quantity'])).fit( smoothing_level=alpha, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(cv1)) pred = y_hat_avg['SES'] lst.append(pred.iloc[-1]) pd.DataFrame(lst) y_hat_avg['pred_column'] = lst rms = sqrt(mean_squared_error(cv1.quantity, y_hat_avg.pred_column)) mae1 = mean_absolute_error(cv1.quantity, y_hat_avg.pred_column) mae.append(mae1) del y_hat_avg['SES'] l = l - 1 return mae
def forecast_ses(og_df): if len(og_df) <= 1: result = [0, 0] else: df = og_df.copy() train = aggregate_by_day(df) test = train.copy() #print('train df before create split') #print(train) test = test.reindex(create_split(train)) y_hat_avg = test.copy() fit2 = SimpleExpSmoothing(np.asarray(train['Count'])).fit( smoothing_level=0.6, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test)) plt.figure(figsize=(16, 8)) plt.plot(train['Count'], label='Train') plt.plot(y_hat_avg['SES'], label='SES') plt.legend(loc='best') # plt.show() # print(y_hat_avg['SES'].iloc[0]) # get max y value and index (x) date_projected = str(y_hat_avg['SES'].idxmax()) qty_projected = str(y_hat_avg.loc[y_hat_avg['SES'].idxmax(), 'SES']) result = [date_projected, qty_projected] return result
def simple_exponential_smoothing_statsmodels(): N, t, alpha, x0 = 200, 160, 0.5, 20 realisations = pd.Series(sample_gaussian_process(20, 5, N), range(N)) mod = SimpleExpSmoothing(realisations[:t+1]).fit(smoothing_level=alpha, initial_level=x0, optimized=False) forecasts = mod.forecast(N-(t+1)).rename(r'$\alpha=0.5$') plot(realisations, pd.Series(np.nan, range(t+1)).append(forecasts), alpha) py.show()
def simple_exponential_smoothing(input_df, kunag, matnr, alpha=0.6): df = input_df.copy() df = remove_negative_rows(df) df_series = individual_series(df, kunag, matnr) df_series = data_transformation.get_weekly_aggregate(df_series) df_series["date"] = df_series["dt_week"].map(str) df_series["date"] = df_series["date"].apply(lambda x: x.replace("-", "")) df_series["prediction"] = df_series["quantity"] df_series_train, df_series_test = splitter(df_series) k = 0 for index, row in df_series_test.iterrows(): fit2 = SimpleExpSmoothing(np.asarray(df_series_train["quantity"])).fit( smoothing_level=alpha, optimized=False) row["prediction"] = fit2.forecast(1) df_series_train = pd.concat([df_series_train, pd.DataFrame(row).T ]).reset_index(drop=True) if k == 0: test_index = df_series_train.shape[0] - 1 k = 1 output_df = df_series_train test_df = df_series_train.iloc[test_index:] # print("mean squared error is :",mean_squared_error(output_df["quantity"], output_df["prediction"])) return output_df, mean_squared_error(test_df["quantity"], test_df["prediction"])
def simpleExpSmoothing(x, y, save_fn): pred = [] for data in x: fit = SimpleExpSmoothing(data).fit(smoothing_level=0.6, optimized=False) pred.append(fit.forecast(1)) save_fn('rate_simpleExpSmoothing.txt', np.array(pred), y)
def gen_plot_forecast(): es_conn = fetchData.elasticSearch( url="https://*****:*****@kf6-stage.ikit.org/es/_search") df = es_conn.get_nginx_reliability(interval='1h') df = df.sort_values('buckets', ascending=True) data_in_window = df.tail(1000) rel_data = data_in_window["reliability"].to_numpy() date_data = data_in_window["buckets"].to_numpy() last_bucket = parse(data_in_window["buckets"].iloc[-1]) if np.isnan(np.sum(rel_data)): print("NaN in data") exit(1) simple_exp_model = SimpleExpSmoothing(rel_data).fit(smoothing_level=.5) predicted_data = np.average(simple_exp_model.forecast(5)) fitted_values = simple_exp_model.fittedvalues for idx, val in enumerate(fitted_values): if val < 0.5: fitted_values[idx] = 0.5 fig = go.Figure() fig.add_trace( go.Scatter(x=date_data, y=rel_data, mode='lines', name="Observed Reliability")) fig.add_trace( go.Scatter(x=date_data, y=fitted_values, mode='lines', name="Predicted Reliability")) print(datetime.datetime.now()) sys.stdout.flush() tm = datetime.datetime.now() return fig, predicted_data, last_bucket, tm
def model(self, column_name, df, apply_smoothing, smoothing_level=None): """ performs predictions using the simple exponential smoothing model approach :input column_name : str, name of column to hold the predicted values :input df : dataframe, weekly-level data :input apply_smoothing : bool, indicates whether to factor-in smoothing parameters in the Holt model :input smoothing_level : int, default=None, l parameter in Simple Exponential Smoothing model :returns df : dataframe, weekly-level, with predictions """ m = self.prediction_period if apply_smoothing == True: fit1 = SimpleExpSmoothing(df["train"][:-m]).fit( smoothing_level=smoothing_level, optimized=True) params = None elif apply_smoothing == False: fit1 = SimpleExpSmoothing(df["train"][:-m]).fit(optimized=True) params = fit1.params y_fit = fit1.fittedvalues y_fore = fit1.forecast(m) df[column_name] = np.nan #df[column_name][:-1] = y_fit df[column_name][:-m] = df['train'].iloc[:-m] df[column_name][-m:] = y_fore #df[column_name].iloc[-1:] = list(y_pred)[-1] return df
def evaluate_simp_avg_model(X): """ Evaluate a Simple Expontential Smoothing Model :param X: list or series containing all historical data :return: mse (error metric) and the fitted model """ # Prepare training dataset train_size = int(len(X) * 0.75) train, test = X[0:train_size], X[train_size:] history = [x for x in train] # Make predictions predictions = list() for t in range(len(test)): # Fit model model = SimpleExpSmoothing(history) model_fit = model.fit(smoothing_level=0.6, optimized=False) # Forecast yhat = model_fit.forecast()[0] # Store prediction and move forward one time step predictions.append(yhat) history.append(test[t]) # calculate out of sample error mse = mean_squared_error(test, predictions) return mse, model_fit
def SES_find_smoothng_level(df): train = df[0:20] test = df[20:] num = 0 rms = 0.0 for id_idx in range(1, 25, 1): if train['id' + str(id_idx)].sum() > 25: fit = SimpleExpSmoothing(train['id' + str(id_idx)]).fit( smoothing_level=0.1, optimized=False) fcast = fit.forecast(len(test)) plt.plot(train['id' + str(id_idx)], marker='o', label='train_id' + str(id_idx)) plt.plot(test['id' + str(id_idx)], marker='o', label='test_id' + str(id_idx)) plt.plot(fcast, marker='o', label='SES' + str(id_idx)) plt.legend(loc='best') rms = rms + math.sqrt( mean_squared_error(test['id' + str(id_idx)], fcast)) num = num + 1 plt.show() mean_error = rms / num #print(mean_error) return 0.1
def simple_smoothing(data,company): fit1 = SimpleExpSmoothing(data).fit(smoothing_level=0.1, optimized=False) fcast1 = fit1.forecast(1).rename(r'$\alpha=0.1$') print(fit1['fittedvalues']) fcast1.plot(marker='o', color='blue', legend=True) fit1.fittedvalues.plot(marker='o', color='blue') pyplot.title("Trade War Impact - "+company) pyplot.show()
def simpleExponentialSmoothing(self, train, test, trend): if trend == 'no trend': sm = SimpleExpSmoothing(train).fit() sm_pred = sm.forecast(len(test)) rmse_sm = rootMeanSquaredError(test, sm_pred) if rmse_sm < self.rmse: self.rmse = rmse_sm self.__model__ = 'simpleExponentialSmoothing'
def estimate_SES(dataframe, name, alpha, sizeestimate): array = np.asarray(dataframe[name]) model = SimpleExpSmoothing(array) fit = model.fit(smoothing_level=alpha,optimized=False) forecast = fit.forecast(sizeestimate) for index in range ( len(forecast) ): forecast[index] = round(forecast[index], 4) return forecast
def twocolorball_ses_forecast(df): l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = SimpleExpSmoothing(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() l.append(int(predict[0])) print(l) return l
def plot_all_graphs(data,company): i=0 for df in data: fit1 = SimpleExpSmoothing(df['sentiment']).fit(smoothing_level=1, optimized=False) fcast1 = fit1.forecast(1).rename(r'$\alpha=0.1$') fcast1.plot(marker='o', color='blue', legend=True) fit1.fittedvalues.plot(marker='o', color='blue') pyplot.title("Trade War Impact - " + company[i]) i=i+1 pyplot.show()
def fit_model(self, n_predict): fit = SimpleExpSmoothing(self.train).fit() forecast = fit.forecast(n_predict) ds = self.ds_test self.forecast = pd.DataFrame({"ds": ds, "yhat": forecast}) return self.forecast
def ES(data=None, horizon=24, alpha=0.3): """ Build ES model => Train model => Get forecasts. :param data: history data. :param horizon: Length of forecasts. :return: list, Forecasts in next h time steps. """ model = SimpleExpSmoothing(data).fit(smoothing_level=alpha) fcasts = model.predict(start=len(data), end=len(data) + horizon) return fcasts
def sess(i, fc_periods, df): df = df saledata = np.asarray(df.iloc[0:, 0]) fit1 = SimpleExpSmoothing(saledata).fit(smoothing_level=0.2, optimized=True) suav = fit1.fittedvalues df['pronostico'] = suav nombre = list(df.columns.values.tolist()) fcast1 = fit1.forecast(fc_periods) return (fcast1)
def sesm(i): df = i train = np.asarray(df.iloc[:(round(len(df) * .85)), 0]) hell = df.iloc[(round(len(df) * .85)):, 0] fit1 = SimpleExpSmoothing(train).fit(smoothing_level=0.2, optimized=True) suav = fit1.fittedvalues fcast1 = fit1.forecast(len(hell)) sreal = (sum(hell)) spred = (sum(fcast1)) mape = calculomape(sreal, spred) return (mape)
def estimate_SES(dataframe, name, alpha, sizeestimate): # SES requires an array to work with, so we convert the column into an array array = np.asarray(dataframe[name]) model = SimpleExpSmoothing(array) fit = model.fit(smoothing_level=alpha,optimized=False) # because this model assumes no trend or seasonality # all forecasts can be the same, i.e. a straight line forecast = fit.forecast(sizeestimate) for index in range ( len(forecast) ): forecast[index] = round(forecast[index], 4) return forecast
def seasonal_prediction(self): from statsmodels.tsa.api import SimpleExpSmoothing y_hat_avg = self.test_y.copy() fit2 = SimpleExpSmoothing(np.asarray(self.train_y['Count'])).fit(smoothing_level=0.6, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(self.test_y)) plt.figure(figsize=(16, 8)) plt.plot(self.train_y['Count'], label='Train') plt.plot(self.test_y['Count'], label='Test') plt.plot(y_hat_avg['SES'], label='SES') plt.legend(loc='best') plt.show()
def SES(paramsList=['pollution.csv', '0.93','pm', 'humidity', 'date'],specialParams=['0.6']): ''' 1.时间序列比较平稳时,选择较小的α值,0.05-0.20。 2.时间序列有波动,但长期趋势没大的变化,可选稍大的α值,0.10-0.40。 3.时间序列波动很大,长期趋势变化大有明显的上升或下降趋势时,宜选较大的α值,0.60-0.80。 4.当时间序列是上升或下降序列,满足加性模型,α取较大值,0.60-1。 ''' path = paramsList[0] trainRows = float(paramsList[1]) saveto = 'result.csv' df = pd.read_csv(path, usecols=paramsList[2:]) allRows = df.shape[0] es = specialParams[0] train = df[0:int(allRows*trainRows)] test = df[int(allRows*trainRows)+1:] df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M') df.index = df['Timestamp'] df = df.resample('D').mean() train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M') train.index = train['Timestamp'] train = train.resample('D').mean() test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M') test.index = test['Timestamp'] test = test.resample('D').mean() y_hat = test.copy() nullArray = train.copy() nullArray['time'] = train.index # 以上可通用---------------------------- for i in range(2,len(paramsList)-1): fit = SimpleExpSmoothing(np.asarray(train[paramsList[i]])).fit(smoothing_level=float(es), optimized=False) y_hat[paramsList[i]] = fit.forecast(len(test)) y_hat[paramsList[i]] = round(y_hat[paramsList[i]],2) rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]])) print(rms) y_hat['time'] = test.index yhat_naive = np.array(y_hat) nArray = np.array(nullArray) newArray = np.concatenate((nArray,yhat_naive),axis=0) s = pd.DataFrame(newArray, columns=paramsList[2:]) for i in range(2,len(paramsList)-1): s[paramsList[i]][0:int(len(s)*trainRows)] = "" s.to_csv(saveto,index=False,header=True,float_format='%.2f')
def SES_f(self, df, a): try: simpleexp = SimpleExpSmoothing(np.array(np.array(df['Actual']))) fit_simpleexp = simpleexp.fit(smoothing_level=a,optimized=False) forecast = fit_simpleexp.forecast()[0] Cluster, Warehouse, WF, YF = generate_attrib(df) self.df_forecast.append({'Cluster':Cluster, 'Warehouse':Warehouse, 'Year':YF, "Week": WF, "Forecast":forecast}) return print(f'DEBUG:Forecast:{Cluster}:{Warehouse}:{YF}:{WF}:{forecast}') except: return print("ERROR:FORECAST-SES")
def ewma(data, col, train, test, frequency): y_hat_avg = test.copy() fit2 = SimpleExpSmoothing(np.asarray(train[col])).fit(smoothing_level=0.6,optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test)) print('Rmse= ', rmse(test[col], y_hat_avg['SES'])) plt.figure(figsize=(16,8)) plt.plot(train[col], label='Train') plt.plot(test[col], label='Test') plt.plot(y_hat_avg['SES'], label='Exponential Smoothing') plt.legend(loc='best') plt.savefig(frequency+'ses.png')
def fit_SES(params, df): data = df.carbon_monoxide[:-1] if 'a' in params: a = params['a'] fit = SimpleExpSmoothing( data, initialization_method="estimated").fit(smoothing_level=float(a)) else: fit = SimpleExpSmoothing(data, initialization_method="estimated").fit() fcast = fit.forecast(1) return fcast[0], fit.model.params['smoothing_level']
def ses_forecast(df): print("==== 逐一对每位数字进行 SES 预测 ====") l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = SimpleExpSmoothing(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() is_blue = False if i < 7 else True l = add_number_pool(l, int(round(predict[0], 0)), is_blue) # print("SEC 预测结果:%s" % l); return l
def run_expo_forecast_week(df, min_date_dict, last_period_available, period, alpha): # Function that creates a naive Forecast for all GEUs # List of GEUs geus = df['geu'].unique() # We check which period we are going to forecast if period == 'month_str': last_period_year = '12' elif period == 'semester': last_period_year = '02' else: last_period_year = '04' # If the last available period is the last period of an existing year, we change the year last_period_string = str(last_period_available) if last_period_string[-2:] == last_period_year: fcst_yr = last_period_string[:4] + "01" forecasted_period = int(fcst_yr) forecasted_period += 100 else: forecasted_period = last_period_available + 1 # Create output sku df_expo_week = pd.DataFrame() for geu in geus: # Filter result df_geu = df[(df['geu'] == geu) & (df['period'] <= last_period_available)].reset_index( drop=True) # If this GEU had a previous sale and there is more than two weeks of data, we add it to the output dataframe if last_period_available > min_date_dict[geu] and len(df_geu) >= 2: # Create and fit model model = SimpleExpSmoothing(df_geu['demand'].values).fit( optimized=False, smoothing_level=alpha) # Create output df df_forecast_geu = pd.DataFrame({ 'geu': [geu], 'period': [forecasted_period], f'expo_{alpha}_forecast': list(model.forecast(1)) }) df_expo_week = pd.concat([df_expo_week, df_forecast_geu], ignore_index=True) return df_expo_week
def simple_exponential_smoothing(self, values, law=6): """ Simple Exponential Smoothing function implementation Arguments: values: Tuple of X: Independent variable and Y: Dependent variable Returns: Predictions for law values in the future. Default: Predict 6 points in the future. """ Y_Train = [x[1] for x in values] model = SimpleExpSmoothing(Y_Train).fit() predictions = model.forecast(law) return predictions
def simple_expo_smoothing(train, test, value): # Simple Exponential Smoothing y_hat_avg = test.copy() alphas = np.linspace(0, 1, 101) #try to find the lowest error with all possible alphas with two decimal error = 100000000000 for alpha in alphas: fit2 = SimpleExpSmoothing(np.asarray(train[value])).fit( smoothing_level=alpha, optimized=False) y_hat_avg['SES'] = fit2.forecast(len(test)) mape = mean_abs_percentage_error(test[value], y_hat_avg.SES) if mape < error: error = mape optimal_alpha = alpha return error, optimal_alpha
# Lets use Simple Exponential Smoothing to forecast the below oil data. ax = oildata.plot() ax.set_xlabel("Year") ax.set_ylabel("Oil (millions of tonnes)") plt.show() print("Figure 7.1: Oil production in Saudi Arabia from 1996 to 2007.") # Here we run three variants of simple exponential smoothing: # 1. In ```fit1``` we do not use the auto optimization but instead choose # to explicitly provide the model with the $\alpha=0.2$ parameter # 2. In ```fit2``` as above we choose an $\alpha=0.6$ # 3. In ```fit3``` we allow statsmodels to automatically find an optimized # $\alpha$ value for us. This is the recommended approach. fit1 = SimpleExpSmoothing(oildata).fit(smoothing_level=0.2, optimized=False) fcast1 = fit1.forecast(3).rename(r'$\alpha=0.2$') fit2 = SimpleExpSmoothing(oildata).fit(smoothing_level=0.6, optimized=False) fcast2 = fit2.forecast(3).rename(r'$\alpha=0.6$') fit3 = SimpleExpSmoothing(oildata).fit() fcast3 = fit3.forecast(3).rename( r'$\alpha=%s$' % fit3.model.params['smoothing_level']) ax = oildata.plot(marker='o', color='black', figsize=(12, 8)) fcast1.plot(marker='o', ax=ax, color='blue', legend=True) fit1.fittedvalues.plot(marker='o', ax=ax, color='blue') fcast2.plot(marker='o', ax=ax, color='red', legend=True) fit2.fittedvalues.plot(marker='o', ax=ax, color='red') fcast3.plot(marker='o', ax=ax, color='green', legend=True) fit3.fittedvalues.plot(marker='o', ax=ax, color='green')