def aa(j, p, d, q): pltf.clf() ap_df[j].plot() order = (p, d, q) model = ARIMA(ap_df[j], order, freq='D') model = model.fit() model.predict(1, 255).plot()
def forecastARIMA(df, H, P, D, Q): order = (P, D, Q) model = ARIMA(df.values[:], order=order).fit(trend='nc') # Forecast n_periods = H fc, se, confint = model.forecast(n_periods) index_of_fc = pd.RangeIndex(start=df.index.stop, stop=df.index.stop+H) th = dumps([x for x in range(len(df), len(df)+H)]) # index_of_fc = pd.PeriodIndex((pd.to_datetime(df.values[:,0]) + H*timedelta(weeks=12))[-H:],freq='Q') fc_series = pd.Series(fc, index=index_of_fc).to_json(orient='values') lower_series = pd.Series( confint[:, 0], index=index_of_fc).to_json(orient='values') upper_series = pd.Series( confint[:, 1], index=index_of_fc).to_json(orient='values') summary = {'order': order, 'params': list(model.arparams)+list(model.maparams), 'summary': model.summary().as_html()} try: pred = model.predict(df.index.stop, df.index.stop + H - 1, typ='linear').tolist() except Exception: pred = model.predict(df.index.stop, df.index.stop + H - 1).tolist() pred_series = pd.Series(pred).to_json(orient='values') return pred_series, fc_series, th, lower_series, upper_series, summary
def in_sample_prediction(p, q, y_true, train_ratio): if isinstance(y_true, pd.Series): # on oublie les dates et on regarde que si la position du jour par rapport au debut du test y_true = y_true.values # cela permet d'eviter des problemes comme par ex. l'existence d'un seul NaN au milieu des donnees t = round(train_ratio * len(y_true)) model = ARIMA(y_true, order=(p, 0, q)).fit() # on fit avec toutes les donnees train_data = y_true[:t] # on ne fit pas le modele dessus pred_start = t pred_end = len(y_true) pred_index = np.arange(pred_start + 1, pred_end + 1) dynamic_predictions = model.predict(start=pred_start, end=pred_end - 1, dynamic=True) one_step_ahead_predictions = model.predict(start=pred_start, end=pred_end - 1, dynamic=False) plt.figure(figsize=(16, 4)) plt.plot(np.arange(1, t + 1), train_data, label="Observed (train)", marker="o", ms=4) plt.plot(pred_index, y_true[t:], label="Test period (truth)", marker="o", ms=4) plt.plot(pred_index, dynamic_predictions, label="Dynamic pred", marker="o", ms=4) plt.plot(pred_index, one_step_ahead_predictions, label="1-step pred", marker="o", ms=4) plt.legend() plt.title(f"[train_ratio={train_ratio}] Resultats de prédiction pour AR={p} MA={q}") plt.xlabel("Jour du test") plt.xticks() plt.show()
def buildARIMA(data, start, end): p, c, q = definePQ(data) model = ARIMA(data, (p, c, q)).fit() if c == 0: result = model.predict(start=start, end=end) else: result = model.predict(start=start, end=end, typ='levels') result = np.exp(result) return result
def calc_predictions(self, error_times: [int]) -> bool: train = self.window[:len(self.index_train) - 1] train.index = pd.to_datetime(self.index_train) test = self.window[len(self.index_train):] test.index = pd.to_datetime(self.index_test) stepwise_fit = auto_arima(train, seasonal=False, start_p=3, start_d=1, start_q=2, trace=True, error_action='ignore', suppress_warnings=False, stepwise=True) if sum(stepwise_fit.order): self.order = stepwise_fit.order preds, conf_int = stepwise_fit.predict(n_periods=len(test), return_conf_int=True) prediction = pd.Series(preds, index=test.index) model = ARIMA(train, order=stepwise_fit.order).fit(disp=0) if stepwise_fit.order[1] == 0: #train_compare = model.predict() data = model.predict(end=3600 + 180) else: #train_compare = model.predict(typ='levels') data = model.predict(typ='levels', end=3600 + 180) # train.plot(label='Training data') # plt.plot(prediction, label='auto prediction') # plt.plot(train_compare, label='train compare') # plt.plot(data, label='prediction') # plt.legend() # plt.savefig(path+'results/test') index = np.arange(self.pw.userData.train_length() + sampleTime, self.pw.userData.simlength * 60 + 1, self.sample_time) self.prediction_values_all = prediction self.prediction_values_all.index = index self.prediction_values = self.prediction_values_all.loc[ error_times + self.pw.userData.train_length()] self.prediction_values = self.prediction_values.tolist() return True else: return False, None
def arima(demand, validation_points): model_count = 0 pdq = list(itertools.product(params.p, params.d, params.q)) for param in pdq: arima_cv_error = np.empty(shape=(0, 0)) if param == pdq[0]: continue error = np.empty(shape=(0, 0)) for split_count in range(1, validation_points - 1): demand_train, demand_valid = split_data(demand, validation_points, split_count) if len(demand_valid) != params.validation_steps: break try: arima_fit = ARIMA(demand_train, order=(param[0], param[1], param[2])).fit(solver='bfgs',transparams=True,method='mle') arima_fcast = arima_fit.predict(start=len(demand_train), end=len(demand_train) + params.validation_steps - 1, typ='levels') error = mean_squared_error(demand_valid, arima_fcast) except: traceback.print_exc() arima_cv_error = np.append(arima_cv_error, error) arima_mean_error = np.nanmean(arima_cv_error) if math.isnan(arima_mean_error): arima_mean_error = float('Inf') if (model_count == 0): arima_best_error = arima_mean_error arima_best_model = param if arima_mean_error < arima_best_error: arima_best_error = arima_mean_error arima_best_model = param model = model + 1 arima_best_fit = arima_fit = ARIMA(demand, order=(arima_best_model[0], arima_best_model[1], arima_best_model[2])).fit(solver='bfgs',transparams=True,method='mle') return [arima_best_model, arima_best_error, arima_best_fit]
def arima(ahead, start_exp, n_samples, labels): var = [] for idx in range(ahead): var.append([]) error = np.zeros(ahead) count = 0 for test_sample in range(start_exp, n_samples - ahead): # print(test_sample) count += 1 err = 0 for j in range(labels.shape[0]): ds = labels.iloc[j, :test_sample - 1].reset_index() if (sum(ds.iloc[:, 1]) == 0): yhat = [0] * (ahead) else: try: fit2 = ARIMA(ds.iloc[:, 1].values, (2, 0, 2)).fit() except: fit2 = ARIMA(ds.iloc[:, 1].values, (1, 0, 0)).fit() #yhat = abs(fit2.predict(start = test_sample , end = (test_sample+ahead-1) )) yhat = abs( fit2.predict(start=test_sample, end=(test_sample + ahead - 2))) y_me = labels.iloc[j, test_sample:test_sample + ahead] e = abs(yhat - y_me.values) err += e error += e for idx in range(ahead): var[idx].append(err[idx]) return error, var
def arima_(self, train, test, seasonal): arima_order = auto_arima(train, seasonal= seasonal, information_criterion= 'aic') order = arima_order.order seasonal_order = arima_order.seasonal_order if seasonal_order != (0, 0, 0, 0): sarima = SARIMAX(train, order = order, seasonal_order = seasonal_order).fit() start = len(train) end = start + len(test) - 1 sar_pred = sar_forecast.predict(start= start, end = end, dynamic= False, typ = 'levels') rmse_sarima = rootMeanSquaredError(test, sar_pred) self.__arimaOrder__ = order self.__seasonalOrder__ = seasonal_order else: arima_model = ARIMA(train_data, order = order) arima_model = arima_model.fit() arima_model.summary() start = len(train) end = len(train) + len(test) - 1 arima_pred = arima_model.predict(start, end, dynamic = False, typ = 'levels') rmse_arima = rootMeanSquaredError(test, arima_pred) self.__arimaOrder__ = order if rmse_arima < rmse_sarima : if rmse_arima < self.rmse: self.rmse = rmse_arima self.__model__ = 'arima' else: if rmse_sarima < self.rmse: self.rmse = rmse_sarima self.__model__ = 'sarima'
def predict_trend(df_train): diff1 = df_train.diff().dropna() res = arma_order_select_ic(diff1,max_ar=6,max_ma=4,ic='aic')['aic_min_order'] arima_mod = ARIMA(df_train, order=(res[0],1,res[1])).fit() # Make a prediction for 3 months prediction = arima_mod.predict('2020-01-01', '2020-03-31') return prediction
class ARIMAModel(StatModels): def __init__(self, params): super(ARIMAModel, self).__init__(params) self.p_ls = params.p_ls self.q_ls = params.q_ls self.d_ls = params.d_ls self.name = "ARIMA" def validate(self): best_value = float("inf") for p in self.p_ls: for q in self.q_ls: for d in self.d_ls: self.model = ARIMA(self.train, order=(p, q, d)) self.model = self.model.fit(disp=0) predictions = self.model.predict(start=self.start, end=self.end) eval_metric = EvalMetrics(self.validation, predictions) rmse = eval_metric.val_rmse() if rmse < best_value: best_value = rmse self.saved_model = self.model
def get_forecast(org_ts, forecast_periods, orders=(2, 1, 2), seasonal_orders=(0, 1, 1, 48), freq='30min'): ''' 获得预测的数据 :param org_ts: 原始的数据 :param forecast_periods: 预测多少个point :param orders: p d q的值。p、q分别和acf和pacf相关,d是差分的阶数建议先使用auto_arima( get_suitable_orders )测试出合适的值 :param seasonal_orders: 同上,最后一位是序列的周期 :param freq: 表示每一个point 之间的间隔 :return: 预测的值 ''' order, seasonal_order = orders, seasonal_orders stepwise_fit = ARIMA(order=order, seasonal_order=seasonal_order).fit(y=org_ts) forecast_ts = stepwise_fit.predict(n_periods=forecast_periods) forecasts_date_start = org_ts.index[-1] + (org_ts.index[-1] - org_ts.index[-2]) forecast_ts = pd.Series(forecast_ts, index=pd.date_range(forecasts_date_start, periods=forecast_periods, freq=freq)) return forecast_ts
def arima_predictions(dataset, pdq, frequency, number_of_predictions): price_data = dataset.values primed_price_data = differenciate_dataset(price_data, frequency) model_fit = ARIMA(primed_price_data, order=pdq).fit() forecast = model_fit.predict(start=len(primed_price_data), end=len(primed_price_data) + number_of_predictions) price_data = price_data.tolist() my_predictions = [] my_predictions.append(dataset.iloc[-1].values) my_index = [] my_index.append(dataset.index[-1]) counter = 0 for result in forecast: inverted = result + price_data[-frequency] price_data.append(inverted) my_predictions.append(round(inverted[0], 2)) if (dataset.index[-1] + timedelta(days=counter)).isoweekday() == 5: counter += 3 else: counter += 1 my_index.append(dataset.index[-1] + timedelta(days=counter)) return pd.DataFrame(data={ 'Close': my_predictions, 'time': my_index }).set_index('time')
def test(train_set, test_set): # %% # r2s = [] y_true = [] y_pred = [] for store_id in train_set['storeId'].unique(): train = train_set[train_set['storeId'] == store_id].iloc[:, :6] test = test_set[test_set['storeId'] == store_id].iloc[:, :6] # train = train[train['weekday'] == 0] # test = test[test['weekday'] == 0] train = pd.Series(train['Inside'].values, train['dateTime'].values) test = pd.Series(test['Inside'].values, test['dateTime'].values) # del fcast1 # fcast1 = SimpleExpSmoothing(train).fit() # fcast1 = ExponentialSmoothing(train, seasonal='add', trend='add', seasonal_periods=7).fit() # fcast1 = Holt(train).fit() # pred = fcast1.forecast(test.shape[0]) fcast1 = ARIMA(train, order=(1, 1, 1)).fit() pred = fcast1.predict(train.shape[0], train.shape[0] + test.shape[0] - 1, typ='levels') # pred = fcast1.predict(1, 10, typ='levels') # fcast1.predict(train.shape[0], train.shape[0], typ='levels') y_true.append(test.values) y_pred.append(pred.values) # print(test) # print(y_pred) # r2 = r2_score(test.values[:10], y_pred.values[:10]) y_true = np.concatenate(y_true) y_pred = np.concatenate(y_pred) print(y_true.shape) print(y_pred.shape) # %% df_result = pd.DataFrame({ 'storeId': test_set['storeId'].values, 'Inside': y_true, 'Inside_pred': y_pred, }) # df_result = pd.read_csv('/Users/yinchuandong/PycharmProjects/ka/experiments/mlruns/1/3393e54394004e7497030f299258a955/artifacts/result.csv') r2_list = [] for store_id in df_result['storeId'].unique(): df_store = df_result[df_result['storeId'] == store_id] r2 = r2_score(df_store['Inside'], df_store['Inside_pred']) r2_list.append((store_id, r2)) df_result2 = pd.DataFrame(r2_list, columns=['storeId', 'r2']) r2_score(df_result['Inside'], df_result['Inside_pred']) df_result2 df_result2.describe() # %% return
def arimaModelCheck(): ''' 模型检验 :return: ''' discfile = 'data/discdata_processed.xls' # 残差延迟个数 lagnum = 12 data = pd.read_excel(discfile, index_col='COLLECTTIME') data = data.iloc[:len(data) - 5] xdata = data['CWXT_DB:184:D:\\'] # 建立ARIMA(0,1,1)模型 from statsmodels.tsa.arima_model import ARIMA # 建立并训练模型 arima = ARIMA(xdata, (0, 1, 1)).fit() # 预测 xdata_pred = arima.predict(typ='levels') # 计算残差 pred_error = (xdata_pred - xdata).dropna() from statsmodels.stats.diagnostic import acorr_ljungbox # 白噪声检验 lb, p = acorr_ljungbox(pred_error, lags=lagnum) # p值小于0.05,认为是非白噪声。 h = (p < 0.05).sum() if h > 0: print(u'模型ARIMA(0,1,1)不符合白噪声检验') else: print(u'模型ARIMA(0,1,1)符合白噪声检验')
def evaluate_prediction_for_stock(stock_close_rtn, split=0.7): """Split the stock close returns into training (70%) and test set (30%) Train ARIMA on the training set, perform predict on the test set, and compute RMSE to evaluate the performance of the model return: the model, predicted values and RMSE """ total_length = stock_close_rtn.shape[0] split_point = int(np.ceil(total_length * split)) training_set = stock_close_rtn.iloc[:split_point] test_set = stock_close_rtn.iloc[split_point:] # get the p, q values diff_series = (stock_close_rtn - stock_close_rtn.shift()).dropna() #p, q = find_params_arima(diff_series) p, q = find_params_arima_acf_pacf(diff_series) model = ARIMA(training_set.values, (p, q, 0)).fit() predicted = model.predict(end=len(test_set)) rmse = np.sqrt(mse(test_set.values, predicted)) return predicted, rmse
def arima_model(data): model = ARIMA(data, order=(1, 0, 0)) model = model.fit() model.summary() start = len(data) end = len(data) + 6 pred = model.predict(start=start, end=end, typ='levels') return pred.tolist()
class ARIMAModel(Model): def __init__(self): Model.__init__(self) self._order = None def select_order_brute_force(self): def objfunc(order, endog, exog): from statsmodels.tsa.arima_model import ARIMA fit = ARIMA(endog, order, exog).fit(full_output=False) return fit.aic ts = self.get_series() bic = arma_order_select_ic(ts).bic_min_order grid = (slice(bic[0], bic[0] + 1, 1), slice(1, 2, 1), slice(bic[1], bic[1] + 1, 1)) from scipy.optimize import brute return brute(objfunc, grid, args=(ts, None), finish=None) def select_order(self): ts = self.get_series() if is_stationary(ts): bic = arma_order_select_ic(ts).bic_min_order return bic[0], 0, bic[1] ts1diff = ts.diff(periods=1).dropna() if is_stationary(ts1diff): bic = arma_order_select_ic(ts1diff).bic_min_order return bic[0], 1, bic[1] ts2diff = ts.diff(periods=2).dropna() bic = arma_order_select_ic(ts2diff).bic_min_order return bic[0], 2, bic[1] def get_fitted_values(self): return self._model.fittedvalues def auto(self): ts = self.get_series() self._period = ts.index[1] - ts.index[0] freq = Second(self._period.total_seconds()) self._order = self.select_order() self._model = ARIMA(self.get_series(), order=self._order, freq=freq).fit() def predict(self): start_date = self._model.fittedvalues.index[-1] end_date = start_date + self._predict * self._period forecast = self._model.predict(start_date.isoformat(), end_date.isoformat()) if self._order[1] > 0: shift = self.max() - self.min() forecast += shift return forecast
class ARIMAModel(Model): def __init__(self, ts): Model.__init__(self, ts) self.order = None def select_order_brute_force(self): def objfunc(order, endog, exog): from statsmodels.tsa.arima_model import ARIMA fit = ARIMA(endog, order, exog).fit(full_output=False) return fit.aic bic = arma_order_select_ic(self.ts, max_ar=6, max_ma=4).bic_min_order grid = (slice(bic[0], bic[0] + 1, 1), slice(1, 2, 1), slice(bic[1], bic[1] + 1, 1)) from scipy.optimize import brute return brute(objfunc, grid, args=(self.ts, None), finish=None) def _select_order_impl(self, ic): if is_stationary(self.ts): bic = _arma_order_selector(self.ts, ic) return bic[0], 0, bic[1] ts1diff = self.ts.diff(periods=1).dropna() if is_stationary(ts1diff): bic = _arma_order_selector(ts1diff, ic) return bic[0], 1, bic[1] ts2diff = self.ts.diff(periods=2).dropna() bic = _arma_order_selector(ts2diff, ic) return bic[0], 2, bic[1] def select_order(self): return self._select_order_impl('bic') def reselect_order(self): return self._select_order_impl('aic') def auto(self, order=None): self.period = self.ts.index[1] - self.ts.index[0] self.order = order if order is not None else self.select_order() logging.debug('Model order is {}'.format(self.order)) self.model = ARIMA(self.ts, order=self.order).fit(disp=False, method='css') def predict(self, length): start_date = self.model.fittedvalues.index[-1] end_date = start_date + length * self.period forecast = self.model.predict(start_date.isoformat(), end_date.isoformat()) if self.order[1] > 0: shift = abs(self.model.fittedvalues[-1] - self.ts[-1]) forecast += shift return forecast
class ArimaModel: """ ARIMA滑动平均差分自回归模型. """ def __init__(self, data: pd.DataFrame, p: int, d: int, q: int): ''' Init. ''' self.data = data self.p = p self.d = d self.q = q # self.model = self.__define() def _define(self): """ Model Define. """ self.model = ARIMA(self.data, order=(self.p, self.d, self.q)) def show_params(self): """ Display p, d, q """ print('ARIMA Model: p={0} d={1} q={2}'.format(self.p, self.d, self.q)) def _train(self): ''' Train. ''' return self.model.fit(disp=-1, method='css', start_ar_lags=13) def _predict(self, start, end): ''' Predict ''' self.model.predict(start=start, end=end, typ='levels') def _forecast(self, pred_len: int): ''' Forecast. ''' return self.model.forecast(pred_len)
def ARIMA_50(city, max): """ Params: city -- time-series dataframe object containing Date and ZHVI columns max -- datetime object from index of city representing peak ZHVI """ before = city[city['Date'] < max] model = ARIMA(before, (5, 1, 1)) return model.predict(start, city['Date'].iloc[-1])
def get_arima_predictions(y, train_subset, order = [1,0,0], X = None): if X == None: arima = ARIMA(y[train_subset], order = order).fit() predictions = arima.predict() else: arima = ARIMA(y[train_subset], order = order, exog = X[train_subset,:]).fit() predictions = arima.predict(exog = X[train_subset,:]) for i in range(max(train_subset)+1,len(y)): if X == None: arima = ARIMA(y[0:i], order = order).fit() predictions = np.append(predictions, arima.predict(0, len(y) + i)[-1]) else: arima = ARIMA(y[0:i], order = order, exog = X[0:i,:]).fit() predictions = np.append(predictions, arima.predict(0, len(y) + i, exog = X[0:i+1,:])[-1]) return predictions
def arima_forecast(self, history): # converting to a series series = self.make_series(history) # defining the model model = ARIMA(series, order=(7, 0, 0)) # fitting model = model.fit(disp=False) # make a forecast yhat = model.predict(len(series), len(series) + 6) return yhat
def arima_forecast(self, ts, p, i, q,): arima = ARIMA(ts, order=(p, i, q)).fit(disp=-1) ts_predict = arima.predict() next_ret = arima.forecast(1)[0] #print("Forecast stock extra return of next day: ", next_ret) # plt.clf() # plt.plot(ts_predict, label="Predicted") # plt.plot(ts, label="Original") # plt.legend(loc="best") # plt.title("AR Test {},{}".format(p, q)) # #plt.show() return next_ret, arima.summary2()
def arima_predictions(data, num_future_days=7): # Adjust for volitility log_price = np.log(data) model = ARIMA(log_price, order=(3, 1, 0)).fit(disp=0) y_hat = model.predict(len(data) + 1, len(data) + num_future_days, typ='levels') # Undo log return np.exp(y_hat)
def recursive_estimation(T1_size, T2=30, steps_ahead=1): i = 0 for j in range(T2): train_data = Y[i][:(T1_size+j)] IMA_model = ARIMA(train_data, order=(0, 1, 1)) results = IMA_model.fit(trend='nc') fc = IMA_model.predict(Y[i][T1+j+1]) forecasts.append(fc) RMSPE = np.sqrt(np.mean(np.square(((Y[i][T-T2:] - forecasts) / Y[i][T-T2])), axis=0)) return RMSPE
def predstl(): predict_file_path = "./data/mars_tianchi_artist_plays_predict.csv" fp = open(predict_file_path, 'wb') fpwriter = csv.writer(fp, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONE) for j in range(0, 50): orig = np.log(ap_df[j]) stl_w = sm.tsa.seasonal_decompose(orig.tolist(), freq=7) stl_w_se = stl_w.seasonal w_s = stl_w_se[-7:] stl_w_rest = orig - stl_w_se stl_m = sm.tsa.seasonal_decompose(np.nan_to_num(stl_w_rest).tolist(), freq=30) stl_m_se = stl_m.seasonal m_s = stl_m_se[-30:] rest = stl_w_rest - stl_m_se rest_s = pd.Series(rest, index=d, name='artist' + str(j) + 'rest') order = (2, 0, 1) model = ARIMA(rest_s, order, freq='D') model = model.fit() rest_pred = model.predict(1, 244) rest_pred_nda = rest_pred.values for i in range(0, 8): stl_w_se = np.append(stl_w_se, w_s) stl_w_se = np.append(stl_w_se, w_s[:5]) stl_m_se = np.append(stl_m_se, 0) stl_m_se = np.append(stl_m_se, m_s) stl_m_se = np.append(stl_m_se, m_s) compose_stl = stl_w_se + stl_m_se + rest_pred_nda fit_ap = np.exp(compose_stl) artist_id = artists_rank_to_id[j] for idx in range(184, 244): date = rank_to_date[idx] play_num = int(math.ceil(fit_ap[idx])) if play_num < 0: play_num = 0 row = [artist_id, play_num, date] print row fpwriter.writerow(row) fp.close()
def arima(df, horizon): col = df.columns[0] d = adf_test(df[col]) d = min(d, 2) AIC = ARIMA(df[col], order=(0, d, 0)).fit().aic # print((0,d,0)) # print(AIC) details = {'order': (0, d, 0), 'AIC': AIC, 'MAPE': 100} for p in range(0, 6): for q in range(0, 6): if (p == 0 and q == 0): continue try: results = ARIMA(df[col], order=(p, d, q)).fit() aic = results.aic # print((p,d,q)) # print(aic) if aic < AIC: AIC = aic details['order'] = (p, d, q) details['AIC'] = aic except: pass n_train = int(len(df) * 0.7) n_records = len(df) k = 0 mape = 0 for l in range(n_train, n_records - horizon + 1): try: train, test = df.iloc[0:l], df.iloc[l:l + horizon] # print('train=%d, test=%d' % (len(train), len(test))) model = ARIMA(train[col], order=details['order']) # model = ARIMA(train[col],order=order) results = model.fit() start = len(train) end = len(train) + len(test) - 1 test_predictions = results.predict(start=start, end=end, dynamic=False, typ='levels') x = mean_absolute_percentage_error(test, test_predictions) if x > 0: mape += x k += 1 except: pass if k > 0: mape = mape / k details['MAPE'] = mape return details
def auto_arima(ts): def objfunc(order, x): mod = ARIMA(x, order) try: mod = mod.fit(disp=0) except ValueError: return 1e6 return mod.aic grid = (slice(0, 3, 1), slice(0, 3, 1), slice(0, 3, 1)) res = brute(objfunc, grid, args=[ts], finish=None) best_order = [int(i) for i in res] best_model = ARIMA(ts, best_order).fit(disp=0) pred = best_model.predict(60, 71, typ='levels') return pred
def predict_stock(stock_close_rtn, n_steps=5, plot=False): """Given the close returns of a stock (as a dataframe), predict the next n_step values""" diff_series = (stock_close_rtn - stock_close_rtn.shift()).dropna() p, q = find_params_arima(diff_series) model = ARIMA(stock_close_rtn.values, (p, q, 0)).fit(disp=plot) predicted = model.predict(end=n_steps) if plot: model.plot_predict( len(stock_close_rtn) - 10, len(stock_close_rtn) + n_steps) plt.axhline(y=0, linestyle='--', color='gray') return predicted
class ArimaModel(): def __init__(self, order): self.order = order def fit(self, Y, X): self.model = ARIMA(Y.values, order=self.order, exog=X.values, freq=None).fit(disp=0) def predict(self, Y, X, X_test): Y_hat = pd.Series(data=self.model.predict(start=0, end=X_test.shape[0] - 1, exog=X_test), index=X_test.index) return Y_hat
class Arima: def __init__(self, df, cfg): self.series = df[cfg['target_feature']] self.model = ARIMA(self.series, order=(3, 1, 0)) def fit_model(self): # Fit model self.model = self.model.fit(disp=0) print(self.model.summary()) def plot_autocorrelation(self): # Plot auto correlation autocorrelation_plot(self.series) plt.show() def predict_arima(self, series): return self.model.predict(series)
def programmer_5(): discfile = "data/discdata_processed.xls" # 残差延迟个数 lagnum = 12 data = pd.read_excel(discfile, index_col="COLLECTTIME") data = data.iloc[:len(data) - 5] xdata = data["CWXT_DB:184:D:\\"] # 训练模型并预测,计算残差 arima = ARIMA(xdata, (0, 1, 1)).fit() xdata_pred = arima.predict(typ="levels") pred_error = (xdata_pred - xdata).dropna() lb, p = acorr_ljungbox(pred_error, lags=lagnum) h = (p < 0.05).sum() if h > 0: print(u"模型ARIMA(0,1,1)不符合白噪声检验") else: print(u"模型ARIMA(0,1,1)符合白噪声检验") print(lb)
ax = feature_indiv.plot(y='value',use_index=True) if feature == 'mood': ax.set_ylim((0,10)) ax.set_xlim((min(feature_indiv.index),max(feature_indiv.index))) fig = ax.get_figure() plt.show(block=False) plt.close(fig) for individual in indiv_ids: print individual #plot_histogram(individual, 'mood') #plot_series(individual, 'mood') #%% y = get_feature_by_day(feature, current_indiv).values train_subset = range(30) arima = ARIMA(y[train_subset], [1,0,0]).fit() predictions = arima.predict() for i in range(max(train_subset)+1,len(y)): arima = ARIMA(y[0:i], [1,0,0]).fit() predictions = np.append(predictions,arima.predict(0, len(y) + i)[-1]) y = get_feature_by_day(feature, current_indiv) y['preds'] = predictions y.plot() rmse(y['preds'].values,y['value'].values) #%% arima.predict(start = min(y.index), end = 50) #%% from statsmodels.tsa.stattools import acf, pacf def get_feature_by_day(feature, current_indiv): y = get_feature(feature, current_indiv) avg_features = ['mood', 'circumplex.valence', 'circumplex.arousal'] sum_features = [s for s in feature_names if s not in avg_features]
# -*- coding: utf-8 -*- # 模型检验 import pandas as pd # 参数初始化 discfile = '../data/discdata_processed.xls' lagnum = 12 # 残差延迟个数 data = pd.read_excel(discfile, index_col='COLLECTTIME') data = data.iloc[: len(data) - 5] # 不使用最后5个数据 xdata = data['CWXT_DB:184:D:\\'] from statsmodels.tsa.arima_model import ARIMA # 建立ARIMA(0,1,1)模型 arima = ARIMA(xdata, (0, 1, 1)).fit() # 建立并训练模型 xdata_pred = arima.predict(typ='levels') # 预测 print "-------預測模型------------\n", xdata_pred pred_error = (xdata_pred - xdata).dropna() # 计算残差 from statsmodels.stats.diagnostic import acorr_ljungbox # 白噪声检验 lb, p = acorr_ljungbox(pred_error, lags=lagnum) h = (p < 0.05).sum() # p值小于0.05,认为是非白噪声。 if h > 0: print(u'模型ARIMA(0,1,1)不符合白噪声检验') else: print(u'模型ARIMA(0,1,1)符合白噪声检验')