def arima(window_values): weighted_window_vaues = deepcopy(window_values) weights = [0.06, 0.07, 0.08, 0.09, 0.1, 0.1, 0.11, 0.12, 0.13, 0.14] for x in range(0, 10): weighted_window_vaues[x] *= weights[x] arma_mod = sm.tsa.ARMA(weighted_window_vaues, order=(0, 0)) arma_res = arma_mod.fit() # print(arma_res.summary()) res = arma_res params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 1 return _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=weighted_window_vaues, exog=None, start=len(weighted_window_vaues)) * 10
def arima_predict_out_of_sample(res): ''' res = results from statsmodels.tsa.arima_model.ARIMA().fit(X, y) ''' # this is the nsteps ahead predictor function from statsmodels.tsa.arima_model import _arma_predict_out_of_sample res = sm.tsa.ARMA(y, (3, 2)).fit(trend="nc") # get what you need for predicting one-step ahead params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 1 new_prediction_one_step_ahead = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=y, exog=None, start=len(y)) # tack this on to y, then update residuals return new_prediction_one_step_ahead
def arma(data,p,q,n): result = [] for i in range(p): for j in range(q): try: arma_mod = sm.tsa.ARMA(data, (i,j)).fit() arma_predict = arma_mod.predict(n,dynamic=True) error = arma_predict - np.array(data[n:]) error = sum(error**2) result.append((i,j,error)) except: pass result.sort(key=lambda x:x[2]) select_p = result[0][0] select_q = result[0][1] print result print select_p print select_q arma_mod = sm.tsa.ARMA(data, (select_p,select_q)).fit() params = arma_mod.params residuals = arma_mod.resid p = arma_mod.k_ar q = arma_mod.k_ma k_exog = arma_mod.k_exog k_trend = arma_mod.k_trend steps = 1 result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data, exog=None, start=len(data)) return result[0]
def MA_predict(data, p=2, w=None, step=1): """ :param data: ts data :param p: p parameter of MA :param w: weight of WMA :param step: predict step :return: """ # params = [0.5] * order[0] # steps = 3 # residuals = [0] # p = order[0] # q = order[1] # k_exog = 0 # k_trend = 0 # y = a # _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=y, exog=None, start=len(y)) p = min(len(data), p) w = w[::-1] if w is not None else [1.0 / p] * p residuals = [0] q = 0 k_exog = 0 k_trend = 0 res = _arma_predict_out_of_sample(w, step, residuals, p, q, k_trend, k_exog, endog=data) return res
def predict_arma_next_days(self, item): ts = df_train[item] ts = ts.sort_index() # sorting index Date ts_last_day = ts[self.fc] # real last data ts = ts[0:self.fc] # index 0 until last data - 1 model = ARMA(ts, order=(self.p, self.q), freq='D') # build a model fitting = model.fit(disp=False) params = fitting.params residuals = fitting.resid p = fitting.k_ar q = fitting.k_ma k_exog = fitting.k_exog k_trend = fitting.k_trend # n_days forecasting forecast = _arma_predict_out_of_sample(params, self.n_days, residuals, p, q, k_trend, k_exog, endog=ts, exog=None, start=len(ts)) # ts: history until 1 day before self.fc # ts[self.fc]: last day # forecast: 1 day forecast (time equalto ts[self.fc]) return ts, ts_last_day, forecast
def forecast_transfer (params, step, model_name, endog): forecast_transfer = _arma_predict_out_of_sample(params, step, model_name.resid, model_name.k_ar, model_name.k_ma, model_name.k_trend, model_name.k_exog, endog, exog=None, method=model_name.model.method) pred_test = model_name.forecast(steps=int(24/ave_window)) return forecast_transfer
def optPredictedValue(self, train_data): """ :description calculate a optimized predicted value :param train_data: model train data :return: """ self.test_data = train_data[123:183] dta = pd.Series(train_data[:123]) dta.index = pd.Index(sm.tsa.datetools.dates_from_range('2001', '2123')) # dta = dta.diff(1) # plt.plot(dta.index, dta.values) # plt.show() p = sm.tsa.acf(dta, nlags=20) print(p) p = sm.tsa.pacf(dta, nlags=20) print(p) arma_mod = sm.tsa.ARMA(dta, (15, 5)).fit(disp=-1, trend="c", solver='powell', method="css") # print(arma_mod.summary()) # get what you need for predicting one-step ahead params = arma_mod.params residuals = arma_mod.resid p = arma_mod.k_ar q = arma_mod.k_ma k_exog = arma_mod.k_exog k_trend = arma_mod.k_trend steps = 60 self.pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) # print "#################################" print(self.pre_result) # result_data_fd = open("./oprResult.txt", 'w+') # date_list = [] # for iter in range(20150901, 20150931): # date_list.append(iter) # for iter in range(20151001, 20151031): # date_list.append(iter) # # item = self.pre_result # art_id = "" # output = "" # for iter, one_date in zip(item, date_list): # output = "%s,%s,%s\n" % (art_id, str(int(iter)), one_date) # result_data_fd.write(output) # result_data_fd.close() plt.plot(range(183), train_data) plt.plot(range(123,183), self.pre_result, 'red') plt.show()
def predict_arma_next_days(self, item, Xy='train'): ts = df_train[item] if Xy == 'train' else df_test[item] # sorting index Date ts = ts.sort_index() ts_last_day = ts[self.fc] # later, it will be stored as output start = self.fc - 100 ts = ts[start:self.fc] # http://statsmodels.sourceforge.net/devel/examples/generated/ex_dates.html # https://groups.google.com/forum/#!msg/pystatsmodels/_ItLBVpePIY/nBiP3fn4kDkJ # https://github.com/statsmodels/statsmodels/issues/1857 # http://stackoverflow.com/questions/27931571/arma-predict-for-out-of-sample-forecast-does-not-work-with-floating-points # http://statsmodels.sourceforge.net/devel/examples/generated/ex_dates.html # https://bicorner.com/2015/11/16/time-series-analysis-using-ipython/ # http://stackoverflow.com/questions/35593759/python-arima-model-predicted-values-are-shifted # http://www.statsmodels.org/dev/examples/notebooks/generated/statespace_sarimax_stata.html # freq : str {'B','D','W','M','A', 'Q'} # 'B' - business day, ie., Mon. - Fri. # 'D' - daily # 'W' - weekly # 'M' - monthly # 'A' - annual # 'Q' - quarterly # Example: model = ARMA(ts, (self.p, self.q)) # build a model model = ARMA(ts, order=(self.p, self.q), freq='D') # fitting model fitting = model.fit(disp=False) # print ( "ARMA: fitting model: '{}'.".format(item) ) # preparing input for forecasting out of sample data params = fitting.params residuals = fitting.resid p = fitting.k_ar q = fitting.k_ma k_exog = fitting.k_exog k_trend = fitting.k_trend # n_days forecasting forecast = _arma_predict_out_of_sample(params, self.n_days, residuals, p, q, k_trend, k_exog, endog=ts, exog=None, start=len(ts)) # print ( "ARMA: forecast '{0}' for next {1} days.".format(item, self.n_days)) # ts: history until 1 day before self.fc # ts[self.fc]: last day # forecast: 1 day forecast (time equalto ts[self.fc]) return ts, ts_last_day, forecast
def _arma_predict(rw, data, steps, order): #print rw.resid return _arma_predict_out_of_sample(np.array(rw.params), steps, np.array(rw.resid), order[0], order[1], rw.k_trend, rw.k_exog, data, exog=None, start=0, method='mle')
def predict(self): m = self.model data = self.model_info.endog return _arma_predict_out_of_sample(m.params, 1, m.resid, m.k_ar, m.k_ma, m.k_trend, m.k_exog, endog=data, exog=None, start=data.shape[0])
def arma_predict(data, step): #res = sm.tsa.ARMA(data, (2, 1)).fit(trend="nc") res = sm.tsa.ARMA(data, (2, 1)).fit() # get what you need for predicting one-step ahead params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = step return _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data, exog=None, start=len(data))
def main(): # Q1 df = pd.read_excel('C:\\Users\\hpatil\\Desktop\\fredgraph.xls', skiprows=10) print(statsmodels.tsa.stattools.adfuller(df['CSUSHPISA'], maxlag=1)) #t-stat seems to be higher than 1,5 and 10. #So we cannot rejects the hypothesis of gamma = 0. #Thus, so possibility of unit roots. # Q2 diff = df['CSUSHPISA'] - df['CSUSHPISA'].shift() diff = df['CSUSHPISA'] fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(311) fig = statsmodels.graphics.tsaplots.plot_acf(diff, lags=200, ax=ax1) ax2 = fig.add_subplot(312) fig = statsmodels.graphics.tsaplots.plot_pacf(diff, lags=200, ax=ax2) ax3 = fig.add_subplot(313) fig = plt.plot(diff) plt.show() # from the graphs plotted, it looks like ACF is decreasing very very slowly. And PACF cuts off at 2. # Thus, we can conclude this to be a ARIMA(1,0,0) = AR(1) model # Q3 from statsmodels.tsa.arima_model import _arma_predict_out_of_sample res = sm.tsa.ARMA(diff.values, (1, 0)).fit() params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 4 print( _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=diff.values, exog=None, start=len(diff.values)))
def _arima_predict(rw, data, steps, order): d = order[1] _endog = np.diff(data, n=d) forecast = _arma_predict_out_of_sample(np.array(rw.params), steps, np.array(rw.resid), order[0], order[2], rw.k_trend, rw.k_exog, _endog, exog=None, method='css-mle') endog = data[-d:] forecast = unintegrate(forecast, unintegrate_levels(endog, d))[d:] return forecast
def MA_predict(data, p, w=None, step=1): # params = [0.5] * order[0] # steps = 3 # residuals = [0] # p = order[0] # q = order[1] # k_exog = 0 # k_trend = 0 # y = a # _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=y, exog=None, start=len(y)) w = w[::-1] or [1.0 / p] * p residuals = [0] q = 0 k_exog = 0 k_trend = 0 res = _arma_predict_out_of_sample(w, step, residuals, p, q, k_trend, k_exog, endog=data) return res
def armaPredict(data, ratio): s = time.time() # 开始的时候进行两次差分,选择最平稳的数据进入 result = [] for i in range(len(data)): print('loop', i) try: train_data = data[i, :int(ratio * len(data[i]))].ravel() order = sm.tsa.arma_order_select_ic(train_data, ic='aic')['aic_min_order'] #order = (4,2) # 结合生成ARIMA模型 model = ARMA(train_data, order=order) res = model.fit() # 这一步值得商榷,未知arma使用什么来获得最后的结果 pre = _arma_predict_out_of_sample(res.params, 48, res.resid, res.k_ar, res.k_ma, res.k_trend, res.k_exog, endog=train_data, exog=None, start=len(train_data)) result.append(pre[1:]) # 有可能中断,这时候尝试一阶差分,不然直接转线性 except: #可能因为各种原因发生错误,这时候需要使用简单直接预测 # 根据上一个点的差分来预测下一个点 pre = np.zeros(len(data[i]) - int(len(data[i]) * ratio) - 1) for j in range(len(pre)): index = int(len(data[i]) * ratio) + j pre[j] = data[i, index, 0] + (data[i, index, 0] - data[i, index - 1, 0] ) #第i个点的预测值由上一个点的真实值加上上一个点与上两个点的差分 result.append(pre) print(len(result)) e = time.time() print('arma predict time:', e - s, 's') return result
def predict_out_of_sample_ARMA(data, AR=3, MA=0): res = sm.tsa.ARMA(data, order=(AR, MA)).fit(trend="nc") # get what you need for predicting one-step ahead params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 1 prediction = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=y, exog=None, start=len(data)) return prediction
def arma(data, p, q, n): result = [] for i in range(p): for j in range(q): try: arma_mod = sm.tsa.ARMA(data, (i, j)).fit() arma_predict = arma_mod.predict(n, dynamic=True) error = arma_predict - np.array(data[n:]) error = sum(error**2) result.append((i, j, error)) except: pass result.sort(key=lambda x: x[2]) select_p = result[0][0] select_q = result[0][1] print result print select_p print select_q arma_mod = sm.tsa.ARMA(data, (select_p, select_q)).fit() params = arma_mod.params residuals = arma_mod.resid p = arma_mod.k_ar q = arma_mod.k_ma k_exog = arma_mod.k_exog k_trend = arma_mod.k_trend steps = 1 result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data, exog=None, start=len(data)) return result[0]
def wma(data, p, w=None, step=1): """ Use the data series to calculate the wma series. :param list data: ts data :param int p: p parameter of MA, use the length of data (from right) :param list w: weight of WMA :param int step: predict step :return: the predict of wma """ w = w[::-1] if isinstance(w, list) else [1.0 / p] * int(p) residuals = [0] q = 0 k_exog = 0 k_trend = 0 res = _arma_predict_out_of_sample(w, step, residuals, p, q, k_trend, k_exog, endog=data) return res
def main(): # Q1 df = pd.read_excel('C:\\Users\\hpatil\\Desktop\\fredgraph.xls',skiprows=10) print(statsmodels.tsa.stattools.adfuller(df['CSUSHPISA'],maxlag=1)) #t-stat seems to be higher than 1,5 and 10. #So we cannot rejects the hypothesis of gamma = 0. #Thus, so possibility of unit roots. # Q2 diff = df['CSUSHPISA']-df['CSUSHPISA'].shift() diff = df['CSUSHPISA'] fig = plt.figure(figsize=(12,8)) ax1 = fig.add_subplot(311) fig = statsmodels.graphics.tsaplots.plot_acf(diff, lags=200, ax=ax1) ax2 = fig.add_subplot(312) fig = statsmodels.graphics.tsaplots.plot_pacf(diff, lags=200, ax=ax2) ax3 = fig.add_subplot(313) fig = plt.plot(diff) plt.show() # from the graphs plotted, it looks like ACF is decreasing very very slowly. And PACF cuts off at 2. # Thus, we can conclude this to be a ARIMA(1,0,0) = AR(1) model # Q3 from statsmodels.tsa.arima_model import _arma_predict_out_of_sample res = sm.tsa.ARMA( diff.values, (1, 0)).fit() params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 4 print(_arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=diff.values, exog=None, start=len(diff.values)))
def forecast_oot(): # Set of 8 weeks for forecasting out of time samples # test_period = [[201706, 201713], # [201710, 201717], # [201714, 201721] # ] test_period = [ [201636, 201643], [201640, 201647], [201645, 201652], [201649, 201704], [201701, 201708] ] # Out of time period # oot_period = [[201714, 201717], # [201718, 201721], # [201722, 201726] # ] oot_period = [ [201648, 201652], [201701, 201704], [201705, 201708], [201709, 201713], [201714, 201717] ] for filename in sales_files: # Load the model hyperparameters for file model_file_name = rename_file(filename, 'HyperParameters', 'pickle') os.chdir(result_path) model_params = pickle.load(open(model_file_name, 'rb')) # Read sku-sales data for forecasting os.chdir(data_path) df = pd.read_csv(filename) # df = df[df.ForecastUnitCode.isin(['3100:FGB0723'])] # df = df[df.ForecastUnitCode.isin(['3100:FGB0737', '3100: FGB0723', '3100: FGB6542'])] sku_group = df.groupby('ForecastUnitCode', as_index=False) sku_list = sku_group.groups.keys() total_predictions = [] for sku in sku_list: df_sku = df[df.ForecastUnitCode.isin([sku])] period_index = 0 print('-----------------------------------------------------') print('Result for SKU:', sku) for period in test_period: x_train = df_sku[ (df_sku.ForecastWeek >= period[0]) & (df_sku.ForecastWeek <= period[1]) ] x_train = x_train['Weekly_Volume_Sales'].reset_index(drop=True) x_log = transform_data(x_train) history = [x for x in x_log] # y_test = df_sku[ # (df_sku.ForecastWeek >= oot_period[period_index][0]) & # (df_sku.ForecastWeek <= oot_period[period_index][1]) # ] # y_test = y_test['Weekly_Volume_Sales'].reset_index(drop=True) for model_param in model_params: if model_param['sku'] == sku: p_order, d_order, q_order = model_param['best_cfg'] if d_order > 0: print('Difference SKU %s with order %d' % (sku, d_order)) # No second order differencing exists in our model, hence only 1st order is required history = difference(history) print('week:', period_index) params = model_param['params'] residuals = model_param['residuals'] p = model_param['p'] q = model_param['q'] k_exog = model_param['k_exog'] k_trend = model_param['k_trend'] intercept = model_param['intercept'] # steps = 4 y_real = df_sku[ (df_sku.ForecastWeek >= oot_period[period_index][0]) & ( df_sku.ForecastWeek <= oot_period[period_index][1])].reset_index( drop=True) # if len(y_real) > 4: # steps = 5 steps = len(y_real) # print('intercept %d' % intercept) y_predicted_log = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=history, exog=None, start=len(history)) y_predicted = revert_to_order(y_predicted_log, x_log, d_order) y_pred_series = pd.Series(y_predicted) y_real.drop(y_real.columns[[0]], axis=1, inplace=True) y_real['Predicted_Weekly_Volume_Sales'] = y_pred_series # print(y_real) # print() total_predictions.append(y_real) period_index += 1 # Save predicted sales for respective files result_file_name = rename_file(filename, 'Result', 'csv') result_df = pd.concat(total_predictions) result_df.reset_index(drop=True, inplace=True) # Change path to result folder os.chdir(result_path) result_df.to_csv(result_file_name, sep=',') print('Forecasting completed for %s' % filename)
def _forecast_sku(df, models, analysis_windows, category, country): # Future forecast at week level sku_list = df.groupby('sku', as_index=False).groups.keys() obp = analysis_windows['future_forecast']['obs_window_future'] fp = analysis_windows['future_forecast']['future_window'] f_steps = analysis_windows['future_forecast']['forecast_steps'] f_select = analysis_windows['future_forecast']['forecast_select'] total_predictions = [] for sku in sku_list: df_sku = df[df['sku'].isin([sku])] x_train = df_sku[(df_sku['forecastWeek'] >= obp[0]) & (df_sku['forecastWeek'] <= obp[1])] x_train = x_train['actualVolume'].reset_index(drop=True) x_log = transform_data(x_train) obs_mat = [x for x in x_log] for model in models: if model['sku'] == sku: p_order, d_order, q_order = model['best_cfg'] if d_order > 0: print('Difference SKU %s with order %d' % (sku, d_order)) obs_mat = difference(obs_mat) params = model['params'] residuals = model['residuals'] p = model['p'] q = model['q'] k_trend = model['k_trend'] k_exog = model['k_exog'] # Forecast y_pred_log = _arma_predict_out_of_sample(params=params, steps=f_steps, errors=residuals, p=p, q=q, k_trend=k_trend, k_exog=k_exog, endog=obs_mat, start=len(obs_mat)) y_pred = revert_to_order(y_pred_log, x_log, d_order) y_pred_series = pd.Series(y_pred) # Select the data to which we will append the forecast volumes y_hat = df_sku[(df_sku['forecastWeek'] >= fp[0]) & ( df_sku['forecastWeek'] <= fp[1])].reset_index(drop=True) y_hat['forecastVolume'] = round(y_pred_series, 0) total_predictions.append(y_hat) res_week_forecast = pd.concat(total_predictions) res_week_forecast.reset_index(drop=True, inplace=True) res_month_forecast = _monthly_sku_forecast(df=res_week_forecast, category=category, country=country) res = { 'weeklyForecast': res_week_forecast, 'monthlyForecast': res_month_forecast } return res
res = sm.tsa.ARMA(data['blue'].iloc[i:end], (2, 0)).fit(trend="nc") # get what you need for predicting one-step ahead params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 1 pred = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data['blue'].iloc[i:end], exog=None, start=len(data['blue'].iloc[i:end])) out_sample_AR.append(pred) # In[110]: # 1-step ahead prediction for VAR model out_sample_VAR = [] for i in range(0, 112): end = 100 + i model = api.VAR(data.iloc[i:end]) results = model.fit(3)
params['ma.L1.value'] = 1 steps = 2 residuals = arma_mod.resid p = arma_mod.k_ar q = arma_mod.k_ma k_exog = arma_mod.k_exog k_trend = arma_mod.k_trend y = a_pd from statsmodels.tsa.arima_model import _arma_predict_out_of_sample _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=y, exog=None, start=len(y)) # -------------------------- Example -------------------------- import numpy as np from scipy import stats import pandas as pd import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.graphics.api import qqplot
temp = [] for iterm in itemrList: # print "iterm = ",iterm selector = dateframe[iterm] timeSerize = selector[fc - fit_length:fc] # print "timeSerize = ",len(timeSerize) model = ARIMA(timeSerize, order=(p,d,q), freq='D') fitting = model.fit(disp=False) params = fitting.params residuals = fitting.resid p = fitting.k_ar q = fitting.k_ma k_exog = fitting.k_exog k_trend = fitting.k_trend # n_days forecasting forecast = _arma_predict_out_of_sample(params, 1, residuals, p, q, k_trend, k_exog, endog=timeSerize, exog=None, start=len(timeSerize)) # forecast, fcasterr, conf_int = fitting.forecast(steps=1, alpha=.05) real = selector[fc-1:fc] # print "forecast = ",(forecast,type(forecast)) # print "real = ",(real,type(real)) temp.append(float(real)) temp.append(float(forecast)) # print "temp = ",temp x_sample.append(temp) y = dateframe['Close'][fc:fc+1] y_sample.append(float(y)) x_sample = np.array(x_sample) print "x_sample = ",x_sample.shape
def train(filename): """ Trains ARIMA model post least MSE per sku & selects the best model and saves it :return: None """ begin = 0 end = 1 df = pd.read_csv(r'C:\Users\ashok.swarna\bosch_agg.csv') df['To_Date'] = pd.to_datetime(df.To_Date, format='%m/%d/%Y') # ExcelFile #df = pd.read_excel(file_path) #df = pd.read_excel(file_path) # Columns: Sku, Week, Sales material = 'M303.160.117' df_1 = df[df['Material'].isin([material])] material_group = df_1.groupby('Material', as_index=False) material_list = material_group.groups.keys() material_best_model = [] for material in material_list: print() print(material) # Select SKU to train & validate model df_sku = df_1[df_1['Material'].isin([material])] price = get_unitprice(material, material_values, unit_price) period_index = 0 best_period_models = [] for tp in train_period: print() #print('Begin:%d End:%d' % (tp[0], tp[1])) print() # Select SKU data from beginning to end of train period df_train_period = df_sku[ (df_sku['To_Date'] >= tp[begin]) & (df_sku['To_Date'] <= tp[end])] df_for_loss = df_train_period[['agg_closing_stock','Total_Issue_quantities']] # Select SKU data from beginning to end of in-time validation period df_validation_period = df_sku[ (df_sku['To_Date'] >= validation_period[period_index][begin]) & ( df_sku['To_Date'] <= validation_period[period_index][end]) ] df_mse_period = df_sku[ (df_sku['To_Date'] >= mse_period[period_index][begin]) & ( df_sku['To_Date'] <= mse_period[period_index][end]) ] print('%d train samples for %d period.' % (len(df_train_period), (period_index + 1))) print('%d validation samples for %d period.' % (len(df_validation_period), (period_index + 1))) print('%d mse samples for %d period.' % (len(df_mse_period), (period_index + 1))) # Select sales data for training & validation train_sales = df_train_period['Total_Issue_quantities'].reset_index(drop=True) validation_sales = df_validation_period['Total_Issue_quantities'].reset_index(drop=True) mse_sales = df_mse_period['Total_Issue_quantities'].reset_index(drop=True) train_valid_set = (train_sales, validation_sales, mse_sales) # Evaluate best model of selected train period best_score, best_cfg, best_params, best_residuals, best_p, best_q, best_k_exog, best_k_trend, best_intercept, y_predict_log = evaluate_models( train_valid_set, p_range, d_range, q_range, df_for_loss, price) #forecast y_pred_log = _arma_predict_out_of_sample(params=best_params, steps=4, errors=best_residuals, p=1, q=1, k_trend= best_k_trend, k_exog= best_k_exog, endog = df_sku.To_Date) best_period_model = {'best_cfg': best_cfg, 'mse': best_score, 'Material': sku, 'week': (period_index + 1), 'residuals': best_residuals, 'p': best_p, 'q': best_q, 'k_exog': best_k_exog, 'k_trend': best_k_trend, 'params': best_params, 'intercept': best_intercept} best_period_models.append(best_period_model) period_index += 1 # Select best model in entire period best_model = find_best_model(best_period_models) # Add to best models list material_best_model.append(best_model) print('____________________________________________________________________________________________') print('____________________________________________________________________________________________') # Save model to disk model_path = app_settings['model_path'] file_parts = filename.split('.') # model_file_name = file_parts[0] + '_HyperParameters.pickle' model_file_name = 'model.pickle' model_file_path = path.join(model_path, model_file_name) save_model_to_disk(model_file_path, sku_best_model) print('Training completed')
pass # get what you need for predicting one-step ahead params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 1 try: prediction = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dataset, exog=None, start=len(dataset)) except: pass count += 1 print('Point %d prediction = %f' % (j * 100, prediction)) print('Target: %f' % vals[j * 100]) with open(mseFile, 'a+') as rf: rf.writelines( ['%d\t%f\n' % (j * 100, (prediction - vals[j * 100])**2)]) with open(absFile, 'a+') as rf: rf.writelines(['%d\t%f\n' % (j * 100, (prediction - vals[j * 100]))])
def predict(pq_file, train_file): if os.path.isfile(pq_file): pq_fd = open(pq_file, 'r') else: print "pq_file error" if os.path.isfile(train_file): train_fd = open(train_file, 'r') else: print "train_file error" pq_cont = pq_fd.readlines() train_cont = train_fd.readlines() pq_fd.close() train_fd.close() play_data = [] artist_id = [] score = [] for index in range(1, len(train_cont), 4): play_data.append(train_cont[index]) # print train_cont[index] artist_id.append(train_cont[index - 1][:-2]) print len(play_data) oneline_list = [] artist_pq = {} for item in pq_cont: oneline_list = item.split(',') print oneline_list artist_pq[oneline_list[0]] = [int(oneline_list[1]), int(oneline_list[2][:-1])] arma_model = [] pre_data = [] for one_id, one_train_data in zip(artist_id, play_data): p = artist_pq[one_id][0] q = artist_pq[one_id][1] list_data = one_train_data.split(',') for i in range(0, len(list_data), 1): list_data[i] = int(list_data[i]) dta = pd.Series(list_data[105:183]) dta.index = pd.Index(sm.tsa.datetools.dates_from_range('2046', '2123')) try: one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css") arma_model.append(one_model) # get what you need for predicting one-step ahead params = one_model.params residuals = one_model.resid p = one_model.k_ar q = one_model.k_ma k_exog = one_model.k_exog k_trend = one_model.k_trend steps = 60 pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) pre_data.append(pre_result) one_score = Calculate_score(list_data[123:123 + steps], pre_result) # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@:%d" % one_score if one_score < -100: one_score = 0 score.append(one_score) except: q = q - p try: one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css") arma_model.append(one_model) # get what you need for predicting one-step ahead params = one_model.params residuals = one_model.resid p = one_model.k_ar q = one_model.k_ma k_exog = one_model.k_exog k_trend = one_model.k_trend steps = 60 pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) pre_data.append(pre_result) one_score = Calculate_score(list_data[123:123 + steps], pre_result) # if one_score < 0: # one_score = -1 # print "###########################:%d" % one_score score.append(one_score) except: q = q + p p = 0 try: one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css") arma_model.append(one_model) # get what you need for predicting one-step ahead params = one_model.params residuals = one_model.resid p = one_model.k_ar q = one_model.k_ma k_exog = one_model.k_exog k_trend = one_model.k_trend steps = 60 pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) pre_data.append(pre_result) one_score = Calculate_score(list_data[123:123 + steps], pre_result) # if one_score < 0: # one_score = -1 # print "#################################################################################:%d" % one_score score.append(one_score) except: pre_result = list_data[123:123 + steps] p = 0 q = 0 pre_data.append(pre_result) one_score = Calculate_score(list_data[123:123 + steps], pre_result) # if one_score < 0: # one_score = -1 # print "#################################################################################:%d" % one_score score.append(one_score) arma_model.append([]) pre_data.append([]) score.append(0) print list_data print one_id result_data_fd = open("./new_mars_tianchi_artist_plays_predict.csv", 'w') date_list = [] for iter in range(20150901, 20150931): date_list.append(iter) for iter in range(20151001, 20151031): date_list.append(iter) for item, art_id in zip(pre_data, artist_id): output = "" for iter, one_date in zip(item, date_list): output = "%s,%s,%s\n" % (art_id, str(int(iter)), one_date) result_data_fd.write(output) result_data_fd.close() print sum(score)
def predict(data, Ds, AL, steps): key = data.keys() key = key[0] V = len(data[key]) # Create N-step prediction using ARMA method on the initial timeseries res = sm.tsa.ARMA(data[key][0:(V-1-steps)], (3, 0)).fit() params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend temp = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=data[key], exog=None, start=V-steps) pArma = [data[key][V-steps-1]] pArma.extend(temp) arma_t = Series(pArma, index=DatetimeIndex([data[key].index[V-steps-1+i] for i in range(steps+1)],freq="D")) print("ARMA: \n",arma_t) pred = deepcopy(data) offset = 1 # Create N-step prediction using recursive ARMA method on the initial timeseries for ss in range(steps, 0, -offset): res = sm.tsa.ARMA(pred[key][0:(V-1-ss)], (3, 0)).fit() params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend pred[key][V-ss] = _arma_predict_out_of_sample(params, offset, residuals, p, q, k_trend, k_exog, endog=data[key], exog=None, start=V-ss)[0] rArma = [data[key][V-steps-1]] rArma.extend(pred[key][V-steps:(V+1)]) arma_t_r = Series(rArma, index=DatetimeIndex([data[key].index[V-steps-1+i] for i in range(steps+1)],freq="D")) print("rARMA: \n",arma_t_r) # Create N-step prediction using Summarization Features ext_Ds = np.pad(Ds, steps, mode='symmetric') ext_Ds = [ext_Ds[len(ext_Ds)-steps+i] for i in range(steps)] #print("Ds:",ext_Ds) m, s = stanDev(data[key]) a,b = linreg(range(len(AL)), AL) r = [a*index + b for index in range(len(AL)+steps)] temp2 = [(ext_Ds[i]+r[len(AL)-1+i])/10 for i in range(steps)] fcst = [data[key][V-steps-1]] fcst.extend(temp2) summarized_t = Series(fcst, index=DatetimeIndex([data[key].index[V-steps-1+i] for i in range(steps+1)],freq="D")) print("Summarized: \n",summarized_t) return(arma_t, arma_t_r, summarized_t)
def model_fit_pred(self, num): """ :description fit model and predict num days data :return: False: predict fail True: predict success """ # divide train data and test data artist_pq_value_fd = open('./artist_pq_value.txt', 'w') for item, artist_id in zip(self.play_data, self.artist_id): list_data = item.split(',') for i in range(0, len(list_data), 1): list_data[i] = int(list_data[i]) self.train_data.append(list_data[45:123]) self.test_data.append(list_data[123:]) dta = pd.Series(list_data[105:183]) dta.index = pd.Index(sm.tsa.datetools.dates_from_range('2046', '2123')) p = self.get_cut_off_value(sm.tsa.acf(dta, nlags=10)) q = self.get_cut_off_value(sm.tsa.pacf(dta, nlags=10)) try: one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css") self.arma_model.append(one_model) self.ar_p_value.append(p) self.ma_q_value.append(q) # get what you need for predicting one-step ahead params = one_model.params residuals = one_model.resid p = one_model.k_ar q = one_model.k_ma k_exog = one_model.k_exog k_trend = one_model.k_trend steps = int(num) pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) self.pre_data.append(pre_result) one_score = self.Calculate_score(list_data[123:123+steps], pre_result) # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@:%d" % one_score if one_score < -100: one_score = 0 self.score.append(one_score) except: q = q - p try: one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css") self.arma_model.append(one_model) self.ar_p_value.append(p) self.ma_q_value.append(q) # get what you need for predicting one-step ahead params = one_model.params residuals = one_model.resid p = one_model.k_ar q = one_model.k_ma k_exog = one_model.k_exog k_trend = one_model.k_trend steps = int(num) pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) self.pre_data.append(pre_result) one_score = self.Calculate_score(list_data[123:123 + steps], pre_result) # if one_score < 0: # one_score = -1 # print "###########################:%d" % one_score self.score.append(one_score) except: q = q + p p = 0 try: one_model = sm.tsa.ARMA(dta, (p, q)).fit(disp=-1, trend="c", solver='powell', method="css") self.arma_model.append(one_model) self.ar_p_value.append(p) self.ma_q_value.append(q) # get what you need for predicting one-step ahead params = one_model.params residuals = one_model.resid p = one_model.k_ar q = one_model.k_ma k_exog = one_model.k_exog k_trend = one_model.k_trend steps = int(num) pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) self.pre_data.append(pre_result) one_score = self.Calculate_score(list_data[123:123 + steps], pre_result) # if one_score < 0: # one_score = -1 # print "#################################################################################:%d" % one_score self.score.append(one_score) except: pre_result = list_data[123:123 + steps] p = 0 q = 0 self.ar_p_value.append(p) self.ma_q_value.append(q) self.pre_data.append(pre_result) one_score = self.Calculate_score(list_data[123:123 + steps], pre_result) # if one_score < 0: # one_score = -1 # print "#################################################################################:%d" % one_score self.score.append(one_score) self.arma_model.append([]) self.pre_data.append([]) self.score.append(0) print list_data print artist_id output = "%s,%d,%d\n" % (artist_id, p, q) artist_pq_value_fd.write(output) artist_pq_value_fd.close()
# In[663]: sm.stats.diagnostic.acorr_ljungbox(sm.tsa.stattools.acf(arma_mod.resid),lags=20) # We start using the <a href="https://en.wikipedia.org/wiki/Ljung%E2%80%93Box_test" > Ljung–Box test </a>. It is a test for whether any of a group of autocorrelations of a time series are different from zero. It is reported Ljung-Box test has better small sample properties compared to another test which is Box-Pierce statistic. # The probaility p-value (the second array) are all larger than 5%. Means this model is sort of good. # In[664]: size=3900 sample=10 predict_data=np.zeros(size) predict_data[0:len(my_data)] = np.asarray(fitresult[0:len(my_data)]).copy() from statsmodels.tsa.arima_model import _arma_predict_out_of_sample predict_data[len(my_data):len(my_data)+sample]= _arma_predict_out_of_sample(arma_mod.params, sample, arma_mod.resid, 3, 1, arma_mod.k_trend, arma_mod.k_exog, endog=my_data, exog=None, start=len(my_data))*sample # Here, we start to predict the future values based on our fitted model. # In[665]: xlim(3750,3850) ylim(-0.03,0.03) plot(my_data,'r',linewidth=2, label="Original Data") plot(fitresult,'b', linewidth=2, label="ARMA(3,1) Model Fitted") plot(predict_data[0:len(my_data)+sample], 'g',linewidth=2, linestyle='--', label="Predict Values") legend(loc='upper right') # The prediction is done with 10 steps further. <br>
def predict_with_residues(filenames): # Set of 8 weeks feed into ARIMA for forecasting out of time samples test_period = [[201706, 201713], [201710, 201717], [201714, 201721] ] oot_period = [[201714, 201717], [201718, 201721], [201722, 201726] ] for filename in filenames: # Load the model hyperparameters for file model_file_name = rename_file(filename, 'HyperParameters', 'pickle') os.chdir(result_path) model_params = pickle.load(open(model_file_name, 'rb')) # Read sku-sales data for forecasting os.chdir(data_path) df = pd.read_csv(filename) # 3100: FGB0723 # 3100: FGB6542 # 3100: FGB0737 # df = df[df.ForecastUnitCode.isin(['3100:FGB0737'])] sku_group = df.groupby('ForecastUnitCode', as_index=False) sku_list = sku_group.groups.keys() total_predictions = [] for sku in sku_list: df_sku = df[df.ForecastUnitCode.isin([sku])] period_index = 0 print('-----------------------------------------------------') print('Result for SKU:', sku) for period in test_period: # data set to be fed for forecasting x_valid = df_sku[ (df_sku.ForecastWeek >= period[0]) & (df_sku.ForecastWeek <= period[1]) ] x_valid_sales = x_valid['Weekly_Volume_Sales'].reset_index(drop=True) x_log = transform_data(x_valid_sales) history = [x for x in x_log] for model_param in model_params: if model_param['sku'] == sku: p_order, d_order, q_order = model_param['best_cfg'] if d_order > 0: print('Difference SKU %s with order %d' % (sku, d_order)) # No second order differencing exists in our model, hence only 1st order is required history = difference(history) print('Period:', period_index + 1) params = model_param['params'] residuals = model_param['residuals'] p = model_param['p'] q = model_param['q'] k_exog = model_param['k_exog'] k_trend = model_param['k_trend'] # intercept = model_param['intercept'] best_ols_params = model_param['best_ols_params'] steps = 4 y_actual = df_sku[ (df_sku.ForecastWeek >= oot_period[period_index][0]) & ( df_sku.ForecastWeek <= oot_period[period_index][1])].reset_index( drop=True) if len(y_actual) > 4: steps = 5 y_actual_sales = y_actual['Weekly_Volume_Sales'] # y_actual_log = np.log(y_actual_sales) y_predicted_log = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=history, exog=None, start=len(history)) y_pred_promo_log = y_predicted_log.copy() try: if len(best_ols_params['pvalues'].keys()) > 1: # SKU has promo significance promo_sig_values = [] ols_pvalues_dict = best_ols_params['pvalues'] for index, row in y_actual.iterrows(): # result = None # result = ols_pvalues_dict['const'] result = 0 for key, value in ols_pvalues_dict.items(): if key != 'const': result += (row[key] * value) promo_sig_values.append(result) y_pred_promo_log += promo_sig_values except KeyError: print('Key error %s' % sku) continue print() print('-----------------') print() y_predicted = revert_to_order(y_predicted_log, x_log, d_order) y_pred_series = pd.Series(y_predicted) y_actual.drop(y_actual.columns[[0]], axis=1, inplace=True) y_actual['Predicted_Weekly_Volume_Sales'] = y_pred_series # print(y_real) # print() y_predicted_promo = revert_to_order(y_pred_promo_log, x_log, d_order) y_pred_promo_series = pd.Series(y_predicted_promo) y_actual['Promo_Weekly_Volume_Sales'] = y_pred_promo_series total_predictions.append(y_actual) period_index += 1 # Save predicted sales for respective files result_file_name = rename_file(filename, 'Result', 'csv') result_df = pd.concat(total_predictions) result_df.reset_index(drop=True, inplace=True) #Change path to result folder os.chdir(result_path) result_df.to_csv(result_file_name, sep=',') print('Forecasting completed for %s' %filename)
q = arima100.k_ma k_exog = arima100.k_exog k_trend = arima100.k_trend steps = 4 # # Obtain the Information Criterion (IC) values # arima100.aic # Akaike Information Criterion (AIC) arima100.bic # Bayesian Information Criterion (BIC) # Forecast the evolution of HPI using predict function pred = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=hpi_log, exog=None, start=len(hpi_log)) # # Provide one month, two-month and three-month forecasts # output = pd.Series(np.exp(pred), index=[ 'One-month Forecast', 'Two-month Forecast', 'Three-month Forecast', 'Four-month Forecast' ]) print('Results for one month, two-month and three-month forecasts:', output[0:4])
def queryandinsert(): """ This is the main function which will be call by main... it integrate several other functions. Please do not call this function in other pack, otherwise it will cause unexpected result!!!!""" global gtbuDict # gtbuDict, being used to store query data from gtbu database..... global omsDict # being used to store query data from OMS database..... global presisDict global counter global testingDict starttime = datetime.datetime.now() print len(presisDict) print "connect to databae!" # connect to the database use my own toolkits querydbinfoOMS = getdbinfo('OMS') querydbnameOMS = "wifi_data" querydbinfoGTBU = getdbinfo("GTBU") querydbnameGTBU = "ucloudplatform" insertdbinfo = getdbinfo('REMOTE') insertdbname = 'login_history' # print the database information for verification for key, value in querydbinfoOMS.iteritems(): print key + " : " + str(value) queryStatementRemote = """ SELECT epochTime,visitcountry,onlinenum FROM t_fordemo WHERE butype =2 AND visitcountry IN ('JP','DE','TR') AND epochTime BETWEEN DATE_SUB(NOW(),INTERVAL 2 DAY) AND NOW() ORDER BY epochTime ASC """ # get the online data which will be used to calculate the daily uer number ( Daily user number is bigger than the max number... # and the max number is actually what being used in this scenario queryStatementTraining = """ SELECT t1,t2,DATEDIFF(t2,t1) AS dif,imei,visitcountry FROM ( SELECT DATE(logindatetime) AS t1,DATE(logoutdatetime) AS t2, imei,visitcountry FROM t_usmguserloginlog WHERE visitcountry IN ('JP','DE','TR') ) AS z GROUP BY t1,t2,imei """ # (output data) get the max online number for each of these countries every day ( this record is incomplete due to the constant network partition # therefore a lot of corresponding operation is necessary for aligning the input and output date by day!... queryStatementOnline =""" SELECT epochTime,visitcountry,MAX(onlinenum) FROM ( SELECT DATE(epochTime) AS epochTime,visitcountry,onlinenum FROM t_fordemo WHERE butype =2 and visitcountry IN ('JP','DE','TR') ) AS z GROUP BY epochTime,visitcountry """ # (input data) get the order number information which will be used to calculate the daily maximum number for each country... # this number could be ridiculously large with respect to the real number for some specific countries. querystatementOMS = """ SELECT DATE(date_goabroad),DATE(date_repatriate),DATEDIFF(date_repatriate,date_goabroad),imei,package_id FROM tbl_order_basic WHERE imei IS NOT NULL AND (DATE(date_repatriate)) > '2016-01-01' AND DATE(date_goabroad) < DATE(NOW()) ORDER BY date_repatriate ASC """ querystatementOMSCount = """ SELECT date_goabroad,date_repatriate,DATEDIFF(date_repatriate,date_goabroad),t1.package_id,t3.iso2 FROM tbl_order_basic AS t1 LEFT JOIN tbl_package_countries AS t2 ON t1.package_id = t2.package_id LEFT JOIN tbl_country AS t3 ON t2.country_id = t3.pk_global_id WHERE t1.data_status = 0 AND DATE(date_goabroad) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(),INTERVAL 3 MONTH) OR ( DATE(date_repatriate) >= DATE(NOW()) ) """ # establish connection to the mysql databases................ querydbGTBU = MySQLdb.connect(user = querydbinfoGTBU['usr'], passwd = querydbinfoGTBU['pwd'], host = querydbinfoGTBU['host'], port = querydbinfoGTBU['port'], db = querydbnameGTBU) querydbOMS = MySQLdb.connect(user = querydbinfoOMS['usr'], passwd = querydbinfoOMS['pwd'], host = querydbinfoOMS['host'], port = querydbinfoOMS['port'], db = querydbnameOMS) insertdb = MySQLdb.connect(user = insertdbinfo['usr'], passwd = insertdbinfo['pwd'], host = insertdbinfo['host'], port = insertdbinfo['port'], db = insertdbname) queryCurGTBU = querydbGTBU.cursor() queryCurOMS = querydbOMS.cursor() insertCur = insertdb.cursor() print "executing query!!! By using generator!!!" insertCur.execute(queryStatementRemote) remoteGenerator = fetchsome(insertCur,100) #fetchsome is a generator which will fetch a certain number of query each time. for row in remoteGenerator: accumulatOnlineNumber(row,testingDict) onlineList = getTestingList(testingDict) countryList = onlineList[1] jpIndex = countryList.index('JP') datalist = onlineList[2][jpIndex] timelist = onlineList[0] tsJP = Series(datalist,index = timelist) df = DataFrame() df['JP'] = tsJP print df.index print df.columns print df tsJP_log = np.log(tsJP) lag_acf = acf(tsJP_log,nlags=200) lag_pacf = pacf(tsJP_log,nlags=200,method='ols') # model = ARIMA(tsJP_log,order=(2,1,2)) model = ARMA(tsJP_log,(5,2)) res = model.fit(disp=-1) print "Here is the fit result" print res params = res.params residuals = res.resid p = res.k_ar q = res.k_ma k_exog = res.k_exog k_trend = res.k_trend steps = 300 newP = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=tsJP_log, exog=None, start=len(tsJP_log)) newF,stdF,confiF = res.forecast(steps) print newP newP = np.exp(newP) print newP print " Forecast below!!" print newF newF = np.exp(newF) print newF print stdF stdF = np.exp(stdF) print stdF x_axis = range(len(lag_acf)) y_axis = lag_acf onlineEWMA=go.Scatter( x = x_axis, y = y_axis, mode = 'lines+markers', name = "lag_acf" ) onlinePre=go.Scatter( x = x_axis, y = newP, mode = 'lines+markers', name = "predictJP" ) layout = dict(title = 'predicewma', xaxis = dict(title = 'Date'), yaxis = dict(title = 'online Number'), ) data = [onlineEWMA,onlinePre] fig = dict(data=data, layout=layout) plot(fig,filename ="/ukl/apache-tomcat-7.0.67/webapps/demoplotly/EWMAprediction.html",auto_open=False)
train = y[: 200] test = y[200 :240] y = range(1000) random.shuffle(y) #for i in range(1, 250): # y[i] += y[i - 1] train = y[: 500] test = y[500 :700] # Now, optionally, we can add some dates information. For this example, we'll use a pandas time series. res = sm.tsa.stattools.arma_order_select_ic(train, ic='aic') arma_mod = sm.tsa.ARMA(train, order=res.aic_min_order) arma_res = arma_mod.fit(trend='nc', disp=-1) #print res.params # get what you need for predicting one-step ahead params = arma_res.params residuals = arma_res.resid p = arma_res.k_ar q = arma_res.k_ma k_exog = arma_res.k_exog k_trend = arma_res.k_trend steps = 300 print y[700: ] print _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=test, exog=None, start=len(test)) #for y1, y2 in zip(y, y_): # print '%f\t%f' % (y1, y2)
timeSerize = selector[fc - fit_length:fc] # print "timeSerize = ",len(timeSerize) model = ARIMA(timeSerize, order=(p, d, q), freq='D') fitting = model.fit(disp=False) params = fitting.params residuals = fitting.resid p = fitting.k_ar q = fitting.k_ma k_exog = fitting.k_exog k_trend = fitting.k_trend # n_days forecasting forecast = _arma_predict_out_of_sample(params, 1, residuals, p, q, k_trend, k_exog, endog=timeSerize, exog=None, start=len(timeSerize)) # forecast, fcasterr, conf_int = fitting.forecast(steps=1, alpha=.05) real = selector[fc - 1:fc] # print "forecast = ",(forecast,type(forecast)) # print "real = ",(real,type(real)) temp.append(float(real)) temp.append(float(forecast)) # print "temp = ",temp x_sample.append(temp) y = dateframe['Close'][fc:fc + 1] y_sample.append(float(y))
# as you need a couple of measurements first. This means that the first predictions can be off a bit.) prediction_residuals_abs = np.zeros(test_length) count = 0 count_pos = 0 #threshold = 0.3 # Make the predictions for the test data, using only the ARMA model generated with the training data for position in range(measures_needed, test_length): #print("Looking at residuals:", prediction_residuals[position-measures_needed:position]) #print("And previous values:", series_test[position-measures_needed:position]) predictions[position] = _arma_predict_out_of_sample( params, 1, prediction_residuals[position - measures_needed:position], p, q, k_trend, k_exog, endog=series_test[position - measures_needed:position], exog=None, start=measures_needed) prediction_residuals[ position] = series_test[position] - predictions[position] resi_abs = np.abs(series_test[position] - predictions[position]) prediction_residuals_abs[position] = resi_abs # We don't throw any alarms for the first max(p, q) predictions, # as the first couple of predictions are typically more off then others if position > 2 * measures_needed and resi_abs > threshold: count += 1 time = series_test[position:position + 1].index #print("Alert on {}: {} positive".format(time, attack_at_time(time)))
# # # # r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True) # data = np.c_[range(1,41), r[1:], q, p] # table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) # # print(table.set_index('lag')) # # predict_sunspots = arma_mod20.forecast(30, alpha=.1) # # print(predict_sunspots) # # # fig, ax = plt.subplots(figsize=(12, 8)) # # ax = dta.ix['2001':].plot(ax=ax) # # fig = arma_mod30.plot_predict(83, 203, alpha=.1, exog=s_dta[80:183], dynamic=False, ax=ax, plot_insample=True) # # fig.show() # get what you need for predicting one-step ahead params = arma_mod30.params residuals = arma_mod30.resid p = arma_mod30.k_ar q = arma_mod30.k_ma k_exog = arma_mod30.k_exog k_trend = arma_mod30.k_trend steps = 20 pre_result = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=dta, exog=None, method='ols', start=len(dta)) # print "#################################" print(pre_result) plt.plot(s_dta[123:143]) plt.plot(pre_result,'red') plt.show()