def q3_d(): print("begin") df = get_data("data/HW5_WMT.xlsx", "HW5_WMT") df.index = pd.to_datetime(df.index, format='%Y%m%d') df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1) df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4) df_test = df.tail(len(df.index) - df.index.get_loc('2016-03-31')) df_test = df_test.head(df_test.index.get_loc('2020-03-31')) df_p = df.head(df.index.get_loc('2016-03-31')) print(df_test) rst_arima_list = [] rst_airline_list = [] i = 1 for index in df_test.index: ARIMA_model = ARIMA(np.log(df_p['WMT']), order=(0, 1, 1)).fit() # p=0, d=1, q=1 airline_model = ARIMA(np.log(df_p['WMT']), order=(0, 1, 1), seasonal_order=(0, 1, 1, 4)).fit() rst_arima_list.append(ARIMA_model.forecast()[0]) rst_airline_list.append(airline_model.forecast()[0]) df_p = df.head(df.index.get_loc('2016-03-31') + i) i += 1 plt.plot(df_test.index, rst_arima_list, label='ARIMA Model') plt.plot(df_test.index, rst_airline_list, label='AIRLINE Model') np.log(df_test['WMT']).plot(label='Reality') plt.legend() plt.show()
def arimamodel(self, train, test, ar_param, order, ma_param, iterative=True): history = [x for x in train] preds = list() stamps = [int(len(test) / 10) * i for i in range(1, 10)] if iterative: for i in range(len(test)): arima_model = ARIMA(history, order=(ar_param, order, ma_param)) arima_model = arima_model.fit() output = arima_model.forecast() yhat = output[0] preds.append(yhat) obs = test[i] history.append(obs) if i in stamps: print( str(datetime.now()) + ': Arima Prediction {}0% Complete: {} out of {}'. format((stamps.index(i) + 1), i, len(test))) else: arima_model = ARIMA(history, order=(ar_param, order, ma_param)) arima_model = arima_model.fit() preds = arima_model.predict(start=len(history), end=len(history) + len(test) - 1) return preds
def q3_e(): print("begin") df = get_data("data/HW5_WMT.xlsx", "HW5_WMT") df.index = pd.to_datetime(df.index, format='%Y%m%d') df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1) df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4) df_test = df.tail(len(df.index) - df.index.get_loc('2016-03-31')) df_test = df_test.head(df_test.index.get_loc('2020-03-31')) df_p = df.head(df.index.get_loc('2016-03-31')) print(df_test) rst_arima_list = [] rst_airline_list = [] i = 1 for index in df_test.index: ARIMA_model = ARIMA(np.log(df_p['WMT']), order=(0, 1, 1)).fit() # p=0, d=1, q=1 airline_model = ARIMA(np.log(df_p['WMT']), order=(0, 1, 1), seasonal_order=(0, 1, 1, 4)).fit() rst_arima_list.append(ARIMA_model.forecast()[0]) rst_airline_list.append(airline_model.forecast()[0]) df_p = df.head(df.index.get_loc('2016-03-31') + i) i += 1 rst_arima_error_list = [] rst_airline_error_list = [] for j in range(0, len(df_test), 1): rst_arima_error_list.append( np.log(df_test.iloc[j].at['WMT']) - rst_arima_list[j]) rst_airline_error_list.append( np.log(df_test.iloc[j].at['WMT']) - rst_airline_list[j]) plt.plot(df_test.index, rst_arima_error_list, label='ARIMA ERROR') plt.plot(df_test.index, rst_airline_error_list, label='AIRLINE ERROR') plt.legend() plt.show() arima_mse = 0 airline_mse = 0 for j in range(0, len(df_test), 1): arima_mse += rst_arima_error_list[j] * rst_arima_error_list[j] airline_mse += rst_airline_error_list[j] * rst_airline_error_list[j] arima_mse = arima_mse / len(df_test) airline_mse = airline_mse / len(df_test) print("arima_mse = ", arima_mse) print("airline_mse = ", airline_mse)
def get_forecast(self, finance_data): self._logger.info('Calculating forecast with the given data...') # Assuming that we've properly trained the model before and that the # hyperparameters are correctly tweaked, we use the full dataset to fit y = finance_data['Low'].values model = ARIMA(y, order=(5, 0, 1)).fit() forecast = model.forecast(steps=1)[0] # Returning the last real data and the forecast for the next minute return (y[len(y) - 1], forecast)
def get_forecast(): df = get_finance_data() # Assuming that we've properly trained the model before and that the # hyperparameters are correctly tweaked, we use the full dataset to fit y = df['Low'].values model = ARIMA(y, order=(5, 0, 1)).fit() forecast = model.forecast(steps=1)[0] # Returning the last real data and the forecast for the next minute return (y[len(y) - 1], forecast)
def _find_optimal_model(train, val, test, data_props, examples): results = [] for row in train['y_data']: model = auto_arima(row, trace=True) results.append(model.order) ARIMA_potentials = list(dict.fromkeys(results)) val_results = {} for props in ARIMA_potentials: if props not in val_results: val_results[props] = {'val': {}, 'test': {}} for set, X, y in zip(['val', 'test'], [ train['y_data'], np.concatenate((train['y_data'], val['y_data']), axis=1) ], [val['y_data'], test['y_data']]): for i in range(len(X)): mod = ARIMA(X[i], order=props).fit() y_pred = float(mod.forecast()) y_true = float(y[i]) mae = abs(y_pred - y_true) mda = int( np.sign(y_pred) == np.sign(y_true) or np.sign( np.round(y_pred, 4)) == np.sign(np.round(y_true, 4))) mse = (y_pred - y_true)**2 pos = int(np.sign(np.round(y_true, 6))) for err, vale in zip(['mae', 'mda', 'mse', 'pos'], [mae, mda, mse, pos]): if 'mae' not in val_results[props][set]: val_results[props][set] = { 'mae': [], 'mda': [], 'mse': [], 'pos': [] } val_results[props][set][err].append(vale) final_results = {} for props in ARIMA_potentials: if props not in final_results.items(): final_results[props] = {} for set in ['val', 'test']: for err, vals in val_results[props][set].items(): final_results[props][f'{set}_{err}'] = statistics.mean(vals) if props != (0, 0, 0) and (f'{set}_best_score' not in final_results or final_results[f'{set}_best_score'] > final_results[props][f'{set}_{err}']): final_results[f'{set}_best_score'] = final_results[props][ f'{set}_{err}'] final_results[f'{set}_best_param'] = props return final_results
def arima(args): train, test = getData(args) train = train.transpose((1, 0)) test = test.transpose((1, 0)) res_list = [] for i in range(121): model = ARIMA(train[i], order=(5, 1, 5)).fit() res = model.forecast(744) res_list.append(res) res_list = np.array(res_list) # res = np.load('./save/arima_res.npy',allow_pickle=True) test = test.reshape((test.shape[0], 11, 11)) print(res.shape) print(test.shape) print('ARIMA: ') # print('RMSE: {}'.format(RMSE(y_pred=res, y_true=test))) print('MAPE: {}'.format(MAPE(y_pred=res, y_true=test)))
def get_arima(data): model = ARIMA(data, order=(1,1,1)) model = model.fit() preds = model.forecast(DAYS_TO_PREDICT) return preds.tolist()
output['value']['Critical Value(10%)'] = t[4]['10%'] print(output) #这里ts1没有过,ADF检验,但是看时序图,较为平稳.有几个周期的方差较大怀疑是异方差 #做lm检验模型是显著的异方差,但是书中根据经验判断,适用arima模型. #那么臆测一下,异方差由于历史的几个周期导致,因为数据比较历史.并且周期少 #那么可以认为适用arima,解决之后开始绘制自相关图和偏自相关图 c = acorr_lm(data['ts1'].dropna()) print(c) # lag_acf = acf(data['ts1'].dropna(), nlags=10,fft=False) # lag_pacf = pacf(data['ts1'].dropna(), nlags=10, method='ols') # fig, axes = plt.subplots(1,2, figsize=(20,5)) # plot_acf(data['ts1'].dropna(), lags=10, ax=axes[0]) # plot_pacf(data['ts1'].dropna(), lags=10, ax=axes[1], method='ols') # plt.show(block=True) # 疏系数模型书中给出的是ARIMA((1,4),1,0) # 但是我在看貌似ARIMA((1,4),1,1)更好些 # order_trend=arma_order_select_ic(data['ts1'].dropna())#这里由于异方差,可能没给出最好结果 # print(order_trend['bic_min_order']) # python疏系数方法,对比了arima(4,1,0)和(4,1,1)后根据AIC和BIC使用(4,1,0)更好 result_trend = ARIMA(data['fertility'], order=(4, 1, 0), enforce_stationarity=False) with result_trend.fix_params({'ar.L2': 0, 'ar.L3': 0}): result_trend = result_trend.fit() print(result_trend.param_names) print(result_trend.forecast())
df = google.history( period='1d', interval="1m", ) low_df = df[['Low']] low_df['date'] = pd.to_datetime(low_df.index).time low_df.set_index('date', inplace=True) X = df.index.values y = df['Low'].values offset = int(0.1 * len(df)) X_train = X[:-offset] y_train = y[:-offset] X_test = X[-offset:] y_test = X[-offset:] plt.plot(range(0, len(y_train)), y_train, label='Train') plt.show() model = ARIMA(y_train, order=(5, 0, 1)).fit() forecast = model.forecast(steps=1)[0] print('Real data for time 0: %f' % y_train[len(y_train) - 1]) print('Real data for time 1: %f' % y_test[0]) print('pred data for time 1: %f' % forecast) # this is terrible
class ARIMAModel(ModelStrategy): ''' A class for an Autoregressive Integrated Moving Average Model and the standard operations on it ''' def __init__(self, hparams, log_dir=None): univariate = True model = None name = 'ARIMA' self.auto_params = hparams['AUTO_PARAMS'] self.p = int(hparams.get('P', 30)) self.d = int(hparams.get('D', 0)) self.q = int(hparams.get('Q', 0)) super(ARIMAModel, self).__init__(model, univariate, name, log_dir=log_dir) def fit(self, dataset): ''' Fits an ARIMA forecasting model :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption ''' if dataset.shape[1] != 2: raise Exception( 'Univariate models cannot fit with datasets with more than 1 feature.' ) dataset.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) series = dataset.set_index('ds') if self.auto_params: best_model = pmdarima.auto_arima(series, seasonal=False, stationary=False, information_criterion='aic', max_order=2 * (self.p + self.q), max_p=2 * self.p, max_d=2 * self.d, max_q=2 * self.q, error_action='ignore') order = best_model.order print("Best ARIMA params: (p, d, q):", best_model.order) else: order = (self.p, self.d, self.q) self.model = ARIMA(series, order=order).fit() print(self.model.summary()) return def evaluate(self, train_set, test_set, save_dir=None, plot=False): ''' Evaluates performance of ARIMA model on test set :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption :param save_dir: Directory in which to save forecast metrics :param plot: Flag indicating whether to plot the forecast evaluation ''' train_set.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) test_set.rename(columns={ 'Date': 'ds', 'Consumption': 'y' }, inplace=True) train_set = train_set.set_index('ds') test_set = test_set.set_index('ds') train_set["model"] = self.model.fittedvalues test_set["forecast"] = self.forecast( test_set.shape[0])['Consumption'].tolist() df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'}) test_metrics = self.evaluate_forecast(df_forecast, save_dir=save_dir, plot=plot) return test_metrics def forecast(self, days, recent_data=None): ''' Create a forecast for the test set. Note that this is different than obtaining predictions for the test set. The model makes a prediction for the provided example, then uses the result for the next prediction. Repeat this process for a specified number of days. :param days: Number of days into the future to produce a forecast for :param recent_data: A factual example for the first prediction :return: An array of predictions ''' forecast_df = self.model.forecast(steps=days).reset_index(level=0) forecast_df.columns = ['Date', 'Consumption'] return forecast_df def save(self, save_dir, scaler_dir=None): ''' Saves the model to disk :param save_dir: Directory in which to save the model ''' if self.model: model_path = os.path.join(save_dir, self.name + self.train_date + '.pkl') self.model.save(model_path) # Serialize and save the model object def load(self, model_path, scaler_path=None): ''' Loads the model from disk :param model_path: Path to saved model ''' if os.path.splitext(model_path)[1] != '.pkl': raise Exception('Model file path for ' + self.name + ' must have ".pkl" extension.') self.model = ARIMAResults.load(model_path) return
# invert differenced value def inverse_difference(history, yhat, interval=1): return yhat + history[-interval] # seasonal difference X = btc['close'].astype(float) duration = int(168) differenced = difference(X, duration) # fit model model = ARIMA(differenced, order=order).fit(disp=0) # multi-step out-of-sample forecast fcast = model.forecast(steps=fcast_out)[0] # Walk forward validation predict = [x for x in X] hour = 1 # invert the differenced forecast for yhat in fcast: inverted = inverse_difference(predict, yhat, duration) #print('Minute %d: %f' % (minute, inverted)) predict.append(inverted) hour += 1 from pandas import DataFrame fcast2 = DataFrame(predict[-fcast_out:])