def core_model(self): predicted = [] error = [] for i in range(len(self.X_test)): # Rebuild Model every iteration model = ARIMA(self.X_train, order = (5,1,0)).fit(disp=0) # Predict test data predicted.append(model.forecast()[0]) # Update training data self.X_train.append(self.X_test[i]) #Calculate error error.append(math.fabs(predicted[i][0] - self.X_test[i])) #print('%f, predicted=%f, expected=%f, error=%f' % (i, predicted[i][0], self.X_test[i], error[i])) self.mse = self.get_mse(self.X_test, predicted) self.mae = self.get_mae(self.X_test, predicted) #To save model model.save('arima_model.pkl') print("Mean Squared error = {:.4f}".format(self.mse)) print("Mean Absolute error = {:.4f}".format(self.mae))
model = Models.TABL(template, dropout, projection_regularizer, projection_constraint, attention_regularizer, attention_constraint) model.summary() # create class weight class_weight = {0: 1e6 / 300.0, 1: 1e6 / 400.0, 2: 1e6 / 300.0} # trainin # remove .iloc[2:] for single day model.fit(X_train, lbls_train.iloc[2:], batch_size=256, epochs=100, class_weight=class_weight) model.save('model.h5') pred = model.predict(X_train) total = pd.concat((stock_train, stock_test), axis=0) inputs = total[len(total) - len(stock_test) - n_past:].values inputs = sc_predict.transform(inputs) # n_past + (n_future - 1) add = np.zeros((n_past + (n_future - 1), stock.shape[1])) inputs = np.vstack((inputs, add)) X_test = [] for i in range(n_past, len(inputs) - n_future + 1): X_test.append(inputs[i - n_past:i, 0:stock.shape[1]]) X_test = np.array(X_test)
最小的p值和q值为:p = 0, q = 0 ''' # 第 4 步--C盘---------模型检验 # 确定模型后,需要检验其残差序列是否是白噪声,若不是,说明,残差中还存在有用的信息,需要修改模型或者进一步提取。 # 若其残差不是白噪声,重新更换p,q的值,重新确定 while 1: p, q = matrix.stack().idxmin() print('最小的p值和q值为:p = %s, q = %s' % (p, q)) lagnum = 12 arima = ARIMA(xdata2, (p, 1, q)).fit() arima.save('arima_BIC.pkl') # 保存模型 xdata_pred = arima.predict(typ='levels') pred_error = (xdata_pred - xdata2).dropna() print('pred_error:\n', pred_error) # 白噪音检验 lbx, px = acorr_ljungbox(pred_error, lags=lagnum) print('pred_error的p值为:', px) h = (px < 0.05).sum() print('h=', h) if h > 0: print('模型ARIMA(%s,1, %s)不符合白噪音检验' % (p, q)) print('在BIC矩阵中去掉[%s,%s]组合,重新进行计算' % (p, q)) matrix.iloc[p, q] = np.nan arimafail = arima
validation_data=(X_test, y_test)) # + colab={"base_uri": "https://localhost:8080/", "height": 291} colab_type="code" id="slgKH-bqcWwe" outputId="e918d0d2-117f-43cf-f50b-e8b5503bcd98" # Check for overfitting, which is when val_loss starts to go up but # loss stays decreases or stays constant. plt.plot(history.history['loss'], label='loss') plt.plot(history.history['val_loss'], label='val_loss') plt.title('Learning curve') plt.ylabel('Loss') plt.xlabel('Epochs') plt.legend() plt.show() # + colab={} colab_type="code" id="6IvVquwzKqQD" model.save('model_id_revenue_prediction.h5') # + colab={"base_uri": "https://localhost:8080/", "height": 429} colab_type="code" id="2t24z4WzOSG2" outputId="0282cce6-ef89-417d-8de6-19a40c62a74d" # Get a prediction from our model for our data and plot it against the truth y_pred = model.predict(X) df_pred = pd.DataFrame(index=X.index, data={'predictions': y_pred.ravel()}) df_pred # - type(y_pred) # + colab={"base_uri": "https://localhost:8080/", "height": 479} colab_type="code" id="ppUviuuZO-oQ" outputId="2b858b5a-b3e5-4dad-f358-391e48803eb4" fig, ax = plt.subplots(figsize=(15, 8)) ax.plot(y, label='Truth', marker='o')
class Model: def __init__(self, model_folder_abs_path: str): self._model_folder_abs_path: str = model_folder_abs_path if not os.path.exists(self._model_folder_abs_path): os.makedirs(self._model_folder_abs_path) model_file_abs_path = self._get_model_abs_file_path() model_meta_data_file_abs_path = self._get_model_meta_data_abs_file_path( ) if os.path.exists(model_file_abs_path) and os.path.exists( model_meta_data_file_abs_path): self._arima_model: ARIMAResults = ARIMAResults.load( model_file_abs_path) with open(model_meta_data_file_abs_path) as file_meta_data: self._meta_data = json.load(file_meta_data) else: self._arima_model: ARIMAResults = None self._meta_data = None def _get_model_abs_file_path(self): return self._model_folder_abs_path + "/" + consts_model.MODEL_FILE_NAME def _get_model_meta_data_abs_file_path(self): return self._model_folder_abs_path + "/" + consts_model.META_DATA_FILE_NAME def train(self, train_data: pd.DataFrame): if self._arima_model is None: list_train_data = train_pre_processor.convert_to_model_data( train_data) self._arima_model = ARIMA(list_train_data, order=config_model.ORDER_FOR_MODEL) self._arima_model = self._arima_model.fit(disp=0) self._meta_data = { consts_model.META_DATA_KEY_DATA_END_DATE: train_data.index.max().strftime( consts_model.DEFAULT_DATETIME_FORMAT), consts_model.META_DATA_KEY_DATA_TRAIN_DATA_SET: [label[0] for label in list_train_data] } else: raise Exception("This directory contains a trained model.") def save(self): self._arima_model.save(self._get_model_abs_file_path()) with open(self._get_model_meta_data_abs_file_path(), "w") as file_meta_data: json.dump(self._meta_data, file_meta_data) def test(self, train_data: pd.DataFrame, test_data: pd.DataFrame): list_train_data = train_pre_processor.convert_to_model_data(train_data) model_for_test: ARIMAResults = self._arima_model np_test_data = test_data.values test_results = [] for test_index in range(len(np_test_data)): test_date = test_data.index[test_index] predicted_value = model_for_test.forecast()[0] expected_value = np_test_data[test_index][0] test_results.append({ consts_model.TEST_COLUMN_KEY_DATE: test_date, consts_model.TEST_COLUMN_KEY_PREDICTED: predicted_value, consts_model.TEST_COLUMN_KEY_EXPECTED: expected_value }) list_train_data.append(expected_value) model_for_test = ARIMA(list_train_data, order=config_model.ORDER_FOR_MODEL) model_for_test = model_for_test.fit(disp=0) return test_results def get_mean_squared_error(self, test_data: pd.DataFrame, test_results: list): np_test_data = test_data.values predictions = [ test_result[consts_model.TEST_COLUMN_KEY_PREDICTED] for test_result in test_results ] return mean_squared_error(np_test_data, predictions) def plot_results(self, test_data: pd.DataFrame, test_results: list, color="red"): np_test_data = test_data.values predictions = [ test_result[consts_model.TEST_COLUMN_KEY_PREDICTED] for test_result in test_results ] pyplot.plot(np_test_data) pyplot.plot(predictions, color=color) pyplot.show() def predict(self, on_date_str: str): if self._arima_model is None: raise Exception("This model has not been fitted.") else: on_date = datetime.strptime(on_date_str, consts_model.DEFAULT_DATETIME_FORMAT) model_train_end_date = datetime.strptime( self._meta_data[consts_model.META_DATA_KEY_DATA_END_DATE], consts_model.DEFAULT_DATETIME_FORMAT) if utils_dates.is_first_date_bigger(on_date, model_train_end_date): difference_dates = utils_dates.diff_days( on_date, model_train_end_date) model_for_predict = self._arima_model model_train_data: list = [ np.array([label]) for label in self._meta_data[ consts_model.META_DATA_KEY_DATA_TRAIN_DATA_SET] ] last_prediction = None for _ in range(difference_dates): last_prediction = model_for_predict.forecast()[0] model_train_data.append(last_prediction) model_for_predict = ARIMA( model_train_data, order=config_model.ORDER_FOR_MODEL) model_for_predict = model_for_predict.fit(disp=0) return last_prediction[0] else: raise Exception( "Please provide a date in the future. This model is not used to predict for the data in which it was trained to." )
try: value = ARIMA(D_data, (p, 1, q)).fit().bic temp.append(value) except: temp.append(None) bic_matrix.append(temp) bic_matrix = pd.DataFrame(bic_matrix) #将其转换成Dataframe 数据结构 p, q = bic_matrix.stack().idxmin() #先使用stack 展平, 然后使用 idxmin 找出最小值的位置 print(u'BIC 最小的p值 和 q 值:%s,%s' % (p, q)) # BIC 最小的p值 和 q 值:0,1 #所以可以建立ARIMA 模型 model = ARIMA(data, (p, 1, q)).fit() model.summary2() #保存模型 model.save('model.pkl') #模型加载 from statsmodels.tsa.arima_model import ARIMAResults loaded = ARIMAResults.load('model.pkl') #预测未来五个单位 predictions = loaded.forecast(5) #预测结果为: pre_result = predictions[0] print(u'预测结果为:', pre_result) #标准误差为: error = predictions[1] print(u'标准误差为:', error) #置信区间为: confidence = predictions[2] print(u'置信区间为:', confidence)
#Fetch and split data raw_x, raw_y = data_handler.fetch_data() train_x, test_x = data_handler.split_data(raw_x) train_y, test_y = data_handler.split_data(raw_y) #Difference the whole dataset to remove seasonality history = np.append(train_x, test_x) differenced = data_handler.remove_seasonality(history, lag=order[0]).values train = sys.argv[0:3] if (train == 'train'): #Declare model model = ARIMA(differenced[:len(train_x)], order=order) #Fit model and save parameters model = model.fit(disp=1) model.save(path_to_model) elif (train == 'mape'): mape_sum = 0 mape_scores = list() #Iterates through the test set, predicting and calculating MAPE for seq in range(len(test_y) // output_seq_length): #Start with the first test week #We append the training set since our values will depend on those values #After each sequence is evaluated, its actual values become part of the training data model = ARIMA(differenced[:len(train_x) + (seq + 1) * output_seq_length], order=order) #Fit model to previous values model = model.fit() preds = model.forecast(output_seq_length)[0] #We convert the np.float64 to integers due to the weird behaviour of matplotlib
activation='tanh', input_shape=(1, 1), return_sequences=False)) else: print('wrong option') model.add(Dropout(0.8)) model.add(Dense(1)) model.add(LeakyReLU()) model.compile(loss='mse', optimizer='adam') model.fit(trainX, trainY, epochs=10, batch_size=10, validation_data=(testX, testY), verbose=1) model.save('./savedModel') #predict trainHat = model.predict(trainX) testHat = model.predict(testX) #invert trainHat = scaler.inverse_transform(trainHat) trainY = scaler.inverse_transform(trainY) testHat = scaler.inverse_transform(testHat) testY = scaler.inverse_transform(testY) #rmse trainScore = math.sqrt(mean_squared_error(trainY[:, 0], trainHat[:, 0])) print('Train: %.2f RMSE' % (trainScore)) testScore = math.sqrt(mean_squared_error(testY[:, 0], testHat[:, 0]))
max_ma=7, ic=['aic', 'bic', 'hqic']) p, q = order.bic_min_order print("p,q") print(p, q) # 建立ARIMA(0, 1, 1)模型 order = (p, 1, q) train_X = diff_1_df[:] arima_model = ARIMA(train_X, order).fit() # 模型报告 # print(arima_model.summary2()) # 保存模型 arima_model.save('./data/arima_model.h5') # # load model arima_model = ARIMAResults.load('./data/arima_model.h5') # 预测未来两天数据 predict_data_02 = arima_model.predict(start=len(train_X), end=len(train_X) + 1, dynamic=False) # 预测历史数据 predict_data = arima_model.predict(dynamic=False) # 逆log差分 # original_series = np.exp(train_X.values[1:] + np.log(dau.values[1:-1])) # predict_series = np.exp(predict_data.values + np.log(dau.values[1:-1]))