def ES(data=None, horizon=24, alpha=0.3): """ Build ES model => Train model => Get forecasts. :param data: history data. :param horizon: Length of forecasts. :return: list, Forecasts in next h time steps. """ model = SimpleExpSmoothing(data).fit(smoothing_level=alpha) fcasts = model.predict(start=len(data), end=len(data) + horizon) return fcasts
def twocolorball_ses_forecast(df): l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = SimpleExpSmoothing(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() l.append(int(predict[0])) print(l) return l
def ses_forecast(df): print("==== 逐一对每位数字进行 SES 预测 ====") l = [] for i in range(1, 8): column = "红球%d" % i if i < 7 else "蓝球" fit_model = SimpleExpSmoothing(np.asarray(df[column])).fit( smoothing_level=random.randint(1, 10) / 10, optimized=False) predict = fit_model.predict() is_blue = False if i < 7 else True l = add_number_pool(l, int(round(predict[0], 0)), is_blue) # print("SEC 预测结果:%s" % l); return l
def time_series_fun_4(): # 读取 csv 文件,删除无用列 df = pd.read_csv("/temp/time_series_data.csv").drop(labels="Unnamed: 0", axis=1); # 取出最后一条数据 last_data = df.loc[len(df) - 1]; time = datetime.strptime(last_data["date"], "%Y-%m-%d %H:%M:%S"); # 未来三个月预测数据 SES_forecast_start = time + timedelta(hours=2); SES_forecast_end = time + timedelta(days=90); datetime_index = pd.date_range(start=SES_forecast_start, end=SES_forecast_end, freq="2H"); # 传入历史数据集,设置权重值(0 - 1),训练出适应模型 fit_model = SimpleExpSmoothing(np.asarray(df["count"])).fit(smoothing_level=0.7, optimized=False); # 用适应模型获取预测数据 data = fit_model.predict(start=0, end=len(datetime_index)); SES_forecast_dataFrame = df.append(DataFrame(data=list(zip(datetime_index, data)), columns=["date", "count"])); SES_forecast_dataFrame["count"] = SES_forecast_dataFrame["count"].apply(lambda item:int(item)); # 按月平均值重新采集数据 df.index = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S"); df = df.resample(rule="M").mean(); SES_forecast_dataFrame.index = pd.to_datetime(SES_forecast_dataFrame["date"], format="%Y-%m-%d %H:%M:%S"); SES_forecast_dataFrame = SES_forecast_dataFrame.resample(rule="M").mean(); # 绘制折线图 plt.rcParams['font.sans-serif'] = ['SimHei']; plt.plot(SES_forecast_dataFrame.index, SES_forecast_dataFrame["count"], label="预测数据", linewidth=2); plt.plot(df.index, df["count"], label="预测数据", linewidth=2); # 指定标题以及 x、y 轴标签 plt.title("铁路购票预测图"); plt.xlabel("时间"); plt.ylabel("每月购票均值"); plt.legend(loc='upper left'); # 显示图画 plt.show();
SARIMA(0, 0, 2)x(0, 2, 2, 7)7 - AIC:2756.7590501587865 SARIMA(0, 1, 2)x(2, 2, 2, 7)7 - AIC:2750.7295981905936 # In[ ]: df.tail() # In[ ]: fit2 = sm.tsa.statespace.SARIMAX(df.Amount, order=(0, 1, 2),seasonal_order=(2, 2, 2, 7)).fit() Next_10weeks = fit2.predict(start=130, end=139, dynamic=True) plt.figure(figsize=(8,4)) plt.plot( df['Amount'], label='df') plt.plot(Next_10weeks, label='SARIMA') plt.legend(loc='best') plt.show() # In[49]: #p,d,q p = periods taken for autoregressive model #d -> Integrated order, difference # q periods in moving average model from statsmodels.tsa.arima_model import ARIMA model_arima = ARIMA(train,order=(3, 2, 0))
plt.plot(valid['Count'], label='Valid') plt.plot(y_hat, label='SARIMA') plt.title("p={}, d={}, q={}".format(p, d, q)) plt.legend(loc='best') plt.savefig("results/p={}, d={}, q={}".format(p, d, q)) except: continue # Calculate RMSE fit1 = sm.tsa.statespace.SARIMAX(train.Count, order=(3, 1, 2), seasonal_order=(0, 1, 1, 7), enforce_stationarity=False, enforce_invertibility=False).fit() y_hat = fit1.predict(start=16055, end=18285, dynamic=True) rmse.loc[len(rmse)] = "SARIMAX 312_", sqrt(MSE(valid.Count, y_hat)) # Submission using SARIMAX Model 312 submission = pd.read_csv("data/Sample_Submission_QChS6c3.csv") fit1 = sm.tsa.statespace.SARIMAX(train.Count, enforce_stationarity=False, enforce_invertibility=False, order=(3, 1, 2), seasonal_order=(0, 1, 1, 7)).fit() predict = fit1.predict(start=18286, end=23397, dynamic=True) submission.Count = predict submission['ID'] = test['ID'] # Converting the final submission to csv format submission.to_csv("submissions/5.csv", index=False)
def TIME_SERIES_ALGO(df, bool_stat): dict_rmse = dict() bool_log, df_log = log_transformation(df) col = df.columns[0] # 1.. NAIVE APPROACH # IN THIS APPROCAH WE ASSIGN RECENT VALUE TO THE TEST DATAFRAME try: train, test = train_test_split(df) y_prd = np.asarray([train.ix[train.shape[0] - 1].values[0]] * (test.shape[0])) rs_naive = sqrt(mean_squared_error(test[col].values, y_prd)) print(rs_naive) dict_rmse["naive"] = rs_naive insert_into_database("NAIVE", rs_naive, "{}") if bool_log: # PERFORM SAME ABOVE THING FOR LOG TRANSFORMED DATA train, test = train_test_split(df_log) y_prd = np.asarray([train.ix[train.shape[0] - 1].values[0]] * (test.shape[0])) y_prd = np.exp(y_prd) rs_naive_log = sqrt(mean_squared_error(test[col].values, y_prd)) print(rs_naive_log) dict_rmse["naive_log"] = rs_naive_log insert_into_database("NAIVE", rs_naive_log, "{}") except Exception as e: insert_into_database("NAIVE", None, e) print(("error in modelling in naive approach,{}".format(e))) # 2..SIMPLE AVERAGE try: train, test = train_test_split(df) mean_forecast = train[col].mean() y_prd = np.asarray([mean_forecast] * test.shape[0]) rs_mean = sqrt(mean_squared_error(test[col].values, y_prd)) dict_rmse["simple_avg"] = rs_mean insert_into_database("SIMPLE_AVG", rs_mean, "{}") if bool_log: train, test = train_test_split(df_log) mean_forecast = train[col].mean() y_prd = np.asarray([mean_forecast] * test.shape[0]) y_prd = np.exp(y_prd) rs_mean = sqrt(mean_squared_error(test[col].values, y_prd)) dict_rmse["simple_avg_log"] = rs_mean insert_into_database("SIMPLE_AVG", rs_mean, "{}") except Exception as e: insert_into_database("SIMPLE_AVG", None, e) print(("error in moving average,{}".format(e))) # 3..MOVING AVERAGE # IN PROGRESS HAVE TO MODIFY IT... try: train, test = train_test_split(df) for i in range(25, 90): # As rolling mean returns mean fo ecah row we want mean f only last row because it is onlu used to forecast mean_moving = train[col].rolling(i).mean().ix[train.shape[0] - 1] print(mean_moving) y_prd = np.asarray([mean_moving] * test.shape[0]) rs_moving = sqrt(mean_squared_error(test[col].values, y_prd)) insert_into_database("MVG_AVG", rs_moving, "{}") except Exception as e: insert_into_database("MVG_AVG", None, e) print(("error in moving average,{}".format(e))) try: if bool_log: for i in range(25, 90): train, test = train_test_split(df_log) # print(type(train[col].rolling(i).mean())) mean_moving = train[col].rolling(i).mean().ix[train.shape[0] - 1] y_prd = np.array([mean_moving] * test.shape[0]) print(y_prd) y_prd = np.exp(y_prd) rs_moving_log = sqrt( mean_squared_error(test[col].values, y_prd)) insert_into_database("MVG_AVERAGE", rs_moving_log, "{}") except Exception as e: insert_into_database("MVG_AVERAGE", None, e) print(("error in log moving average model, {}".format(e))) # 4.. SIMPLE EXPONENTIAL SMOOTHING try: train, test = train_test_split(df) fit2 = SimpleExpSmoothing(df[col]).fit(smoothing_level=0.6, optimized=False) # print(test.index[0]) # print(test.index[test.shape[0]-1]) y_prd = fit2.forecast(len(test)) print(y_prd) rs_simple = sqrt(mean_squared_error(test.values, y_prd)) dict_rmse["simple"] = rs_simple insert_into_database("SIMPLE_EXP", rs_simple, "{}") except Exception as e: print(("error is simple exp without log,{}".format(e))) insert_into_database("SIMPLE_EXP", None, e) try: if bool_log: train, test = train_test_split(df_log) fit2 = SimpleExpSmoothing(df[col]).fit(smoothing_level=0.6, optimized=False) y_prd = fit2.forecast(len(test)) y_prd = np.exp(y_prd) rs_simple = sqrt(mean_squared_error(test.values, y_prd)) dict_rmse["simple_log"] = rs_simple insert_into_database("SIMPLE_EXP", rs_simple, "{}") except Exception as e: insert_into_database("SIMPLE_EXP", None, e) print(("simple exponential smoothing log,{}".format(e))) # HOT LINEAR METHOD FOR FORECASTING try: train, test = train_test_split(df) fit2 = Holt(train[col], exponential=True, damped=False).fit() y_prd = fit2.predict(test.index.values[0], test.index.values[test.shape[0] - 1]) rs_hotl = sqrt(mean_squared_error(test[col].values, y_prd)) dict_rmse["rs_hotl"] = rs_hotl insert_into_database("HOLT_LINEAR", rs_hotl, "{}") if bool_log: train, test = train_test_split(df) fit2 = Holt(train[col], exponential=True, damped=False).fit() y_prd = fit2.predict(test.index.values[0], test.index.values[test.shape[0] - 1]) y_prd = np.exp(y_prd) rs_hotl_log = sqrt(mean_squared_error(test[col].values, y_prd)) dict_rmse["rs_hotl_log"] = rs_hotl_log insert_into_database("HOLT_LINEAR", rs_hotl_log, "{}") except Exception as e: insert_into_database("HOLT_LINEAR", None, e) print(( "error in HOLT linear forecasting in without damped.{}".format(e))) try: fit2 = Holt(train[col], exponential=True, damped=True).fit() y_prd = fit2.predict(test.index.values[0], test.index.values[test.shape[0] - 1]) rs_holtld = sqrt(mean_squared_error(test[col].values, y_prd)) dict_rmse["rs_holtld"] = rs_holtld insert_into_database("HOLT_LINEAR", rs_holtld, "{}") if bool_log: fit2 = Holt(train[col], exponential=True, damped=True).fit() y_prd = fit2.predict(test.index.values[0], test.index.values[test.shape[0] - 1]) y_prd = np.exp(y_prd) rs_holtld = sqrt(mean_squared_error(test[col].values, y_prd)) dict_rmse["rs_holtld"] = rs_holtld insert_into_database("HOLT_LINEAR", rs_holtld, "{}") except Exception as e: print(("error in HOLT linear smoothing damped,{}".format(e))) insert_into_database("HOLT_LINEAR", None, e) # HOLT WINTERS FORECASTING.. try: train, test = train_test_split(df) # print("fmmf") fit2 = ExponentialSmoothing(test[col], trend="mul", seasonal="mul", seasonal_periods=12).fit() y_prd = fit2.predict(test.index.values[0], test.index.values[test.shape[0] - 1]) rs_hlw = sqrt(mean_squared_error(test[col].values, y_prd)) print(rs_hlw) dict_rmse["rs_hlw"] = rs_hlw insert_into_database("HOLT_WINTER", rs_hlw, "{}") if bool_log: train, test = train_test_split(df_log) fit2 = ExponentialSmoothing(test[col], trend="add", seasonal="add", seasonal_periods=12).fit() y_prd = fit2.predict(test.index.values[0], test.index.values[test.shape[0] - 1]) y_prd = np.exp(y_prd) rs_hlw_log = sqrt(mean_squared_error(test[col].values, y_prd)) print(rs_hlw_log) dict_rmse["rs_hlw_log"] = rs_hlw_log insert_into_database("HOLT_WINTER", rs_hlw_log, "{}") except Exception as e: print(("error in HOLT winter forecasting,{}".format(e))) insert_into_database("HOLT_WINTER", None, e) # ARIMA MODEL.... # try: # rs = test_stationary(df, col) # if rs: # # # Here we decide the order of diffrencing the Time Series # df_diff = df - df.shift() # df_diff.dropna(inplace=True) # rs = test_stationary(df_diff, col) # if rs: # df_diff = df_diff - df_diff.shift() # # df_diff.dropna(inplace=True) # # train, test = train_test_split(df_diff) # # """ The acf and pacf plots are # used to calculate the the parametre for AR # AND MA MODELS""" # # ar_list = get_params_p(train) # ma_list = get_params_q(train) # # for i in ma_list: # for j in ar_list: # try: # model = ARIMA(train, order=(j, 0, i)).fit() # y_prd = model.predict(start=test.index.values[0], end=test.index.values[test.shape[0] - 1]) # # rs = sqrt(mean_squared_error(test[col].values, y_prd)) # insert_into_database("ARIMA", rs, "{}") # except Exception as e: # # print(("error while training arima,{}".format(e))) # insert_into_database("ARIMA", None, e) # except Exception as e: # # print(("error in arima model,{}".format(e))) # insert_into_database("ARIMA", None, e) # .. SARIMAX try: train, test = train_test_split(df) p = d = q = list(range(0, 2)) non_seas = list(itertools.product(p, d, q)) lis = [1, 3, 6, 12, 24, 56] for i in lis: sea_so = [(x[0], x[1], x[2], i) for x in list(itertools.product(p, d, q))] for j in non_seas: for k in sea_so: try: model = SARIMAX(train, order=j, seasonal_order=k, enforce_stationarity=False, enforce_invertibility=False).fit() y_prd = model.predict( start=test.index.values[0], end=test.index.values[test.shape[0] - 1]) rs = sqrt(mean_squared_error(test.values, y_prd)) print(rs) insert_into_database("SARIMAX", rs, "{}") except Exception as e: print(("error while training the SARIMAX MODELS,{}". format(e))) insert_into_database("SARIMAX", None, e) except Exception as e: print(("error in seasonal_arima,{}".format(e))) insert_into_database("SARIMAX", None, e) # ..AUTO_ARIMA.. try: train, test = train_test_split(df) model = auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1, max_p=5, max_q=5, max_P=5, max_Q=1, d=1, D=1, seasonal=True) model = model.fit(train) y_prd = model.predict(n_periods=len(test)) rs = sqrt(mean_squared_error(test.values, y_prd)) print("results in auto_Arima", rs) dict_rmse["auto_arima"] = rs insert_into_database("AUTO_ARIMA", rs, "{}") except Exception as e: print("error in auto_Arima,{}".format(e)) insert_into_database("Auto_arima", None, e)
def get_pred(self, module): labels_pred, data_pred, predictions = [], [], [] # формирование временного ряда queryset = Containers.objects.filter( c_module__m_module=module.m_module, c_incr__isnull=False) object = Containers.objects.filter( c_module__m_module=module.m_module, c_curr=module.m_height).latest('c_date') days = (self.get_today().date() - object.c_date.date()).days for it in queryset: labels_pred.append(it.c_date.date()) data_pred.append(it.c_incr) dd = np.asarray(data_pred) df = pd.DataFrame(data=dd, index=pd.to_datetime(labels_pred), columns=['value']) # df = df.resample('D').mean() # уменьшение количества выбросов obj = get_object_or_404(Containers, c_module__m_module=module.m_module, c_date=self.get_today(), c_is_collected=False) fill_level = obj.fill_level if fill_level >= 100: module.m_plan = self.get_today().date() else: max_period = Analitics.objects.filter( a_module__m_module=module.m_module).aggregate(Max('a_period')) forecast_period = int(max_period['a_period__max']) + 2 - days method = str(module.m_method) if (method == 'Наивный подход'): predictions = [dd[len(dd) - 1]] * forecast_period elif (method == 'Простое среднее'): predictions = [df['value'].mean()] * forecast_period elif (method == 'Скользящее среднее'): predictions = [df['value'].rolling(48).mean().iloc[-1] ] * forecast_period elif (method == 'Простое экспоненциальное сглаживание'): fit = SimpleExpSmoothing(np.asarray(df['value'])).fit( smoothing_level=module.params['s_l'], optimized=False) predictions = fit.forecast(forecast_period) elif (method == 'Метод линейного тренда Холта'): fit = Holt(np.asarray(df['value'])).fit( smoothing_level=module.params['s_l'], smoothing_slope=module.params['s_s']) predictions = fit.forecast(forecast_period) elif (method == 'Метод Холта-Винтерса'): fit = ExponentialSmoothing( np.asarray(df['value']), seasonal_periods=module.params['s_p'], trend=module.params['t'], seasonal=module.params['s'], ).fit() predictions = fit.forecast(forecast_period) elif (method == 'SARIMA'): fit = sm.tsa.statespace.SARIMAX( df.value, order=(module.params['p'], module.params['d'], module.params['q']), seasonal_order=(module.params['P'], module.params['D'], module.params['Q'], module.params['m'])).fit() s_date = self.get_today() + datetime.timedelta(1) e_date = self.get_today() + datetime.timedelta(forecast_period) predictions = fit.predict(start=s_date.date(), end=e_date.date(), dynamic=True) elif (method == 'LSTM'): analiz, d_7 = self.stationarity(df.value) for i in range(forecast_period): if analiz[0] != 'Стационарный': data_pred = self.difference(data_pred, 1) supervised = self.timeseries_to_supervised(data_pred, 1) supervised_values = supervised.values train_lstm = supervised_values[0:len(supervised_values) - 1] test_lstm = supervised_values[len(supervised_values) - 1:] scaler = MinMaxScaler(feature_range=(-1, 1)) scaler = scaler.fit(train_lstm) # transform train train = train_lstm.reshape(train_lstm.shape[0], train_lstm.shape[1]) train_scaled = scaler.transform(train) # fit the model lstm_model = self.fit_lstm(train_scaled, 1, 5, 5) # forecast the entire training dataset to build up state for forecasting train_reshaped = train_scaled[:, 0].reshape( len(train_scaled), 1, 1) lstm_model.predict(train_reshaped, batch_size=1) # walk-forward validation on the test data # transform test test = test_lstm.reshape(test_lstm.shape[0], test_lstm.shape[1]) test_scaled = scaler.transform(test) for i in range(len(test_scaled)): # make one-step forecast X, y = test_scaled[i, 0:-1], test_scaled[i, -1] yhat = self.forecast_lstm(lstm_model, 1, X) # invert scaling yhat = self.invert_scale(scaler, X, yhat) if analiz[0] != 'Стационарный': # invert differencing yhat = self.inverse_difference( data_pred, yhat, len(test_scaled) + 1 - i) # store forecast predictions.append(round(yhat)) data_pred.append(yhat) count = self.get_count_days(fill_level, predictions) date_plan = self.get_today() + datetime.timedelta(days=count) module.m_plan = date_plan.date() module.save() return