def main(): if sys.argv[1] == 'daily': print('Using daily data...') path_to_dataset = '../data/household_power_consumption_daily.csv' model, y_test, predictions = run(path_to_dataset, 10, 50, 1.0) elif sys.argv[1] == 'monthly': print('Using monthly data...') path_to_dataset = '../data/household_power_consumption_monthly.csv' model, y_test, predictions = run(path_to_dataset, 30, 5, 1.0) elif sys.argv[1] == 'hourly': print('Using hourly data...') path_to_dataset = '../data/household_power_consumption_hourly.csv' model, y_test, predictions = run(path_to_dataset, 30, 50, 1.0) else: print('Using minute data...') path_to_dataset = '../data/household_power_consumption.csv' model, y_test, predictions = run(path_to_dataset) # save for later use model.save_weights('../output/lstm.h5', overwrite=True) # model.load_weights('../output/lstm.h5') graph_utils.plot('lstm', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def input_metrics(targetxml, referencexml): rvars = pull_tune_variables(referencexml) tvars = pull_tune_variables(targetxml, referencexml) rvars.variables.sort(key=lambda x: x.group) tvars.variables.sort(key=lambda x: x.group) data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []} for r, t in zip(rvars.variables, tvars.variables): if r.group == t.group: key = ';'.join([r.idfclass, r.idfobject, r.idffield]) data['target'].append(float(t.value)) data['reference'].append(float(r.value)) data['min'].append(r.minimum) data['max'].append(r.maximum) data['key'].append(key) paes = metrics.pae(data['target'], data['reference'], data['min'], data['max']) m = { 'pae': {}, 'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {} } for k, p in zip(data['key'], paes): m['pae'][k] = p m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference']) m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'], data['reference']) m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference']) m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference']) m['mape']['all inputs'] = metrics.mape(data['target'], data['reference']) return m
def test_HaLRTC(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24), # burn_iter = 1100, gibbs_iter = 100 HaLRTC_res2 = HaLRTC(sparse_tensor2, rho=1e-5, epsilon=1e-4, maxiter=200).reshape(dense_mat.shape) HaLRTC_res2_mape2 = mape(dense_mat[pos2], HaLRTC_res2[pos2]) HaLRTC_res2_rmse2 = rmse(dense_mat[pos2], HaLRTC_res2[pos2]) print("HaLRTC_res2_mape2", HaLRTC_res2_mape2) print("HaLRTC_res2_rmse2", HaLRTC_res2_rmse2)
def test_BTRMF(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) BTRMF_res2 = BTRMF(sparse_mat2, rank=50, time_lags=(1, 2, 288), burn_iter=100, gibbs_iter=20) BTRMF_res2_mape2 = mape(dense_mat[pos2], BTRMF_res2[pos2]) BTRMF_res2_rmse2 = rmse(dense_mat[pos2], BTRMF_res2[pos2]) print("BTRMF_res2_mape2", BTRMF_res2_mape2) print("BTRMF_res2_rmse2", BTRMF_res2_rmse2)
def test_TRTF(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24), # burn_iter = 1100, gibbs_iter = 100 # TRTF(sparse_tensor_ori, rank=30, time_lags=(1, 2, 24), # lambda_u=500, lambda_v=500, lambda_ar=500, # eta=2e-2, lambda_theta=100, maxiter=1000) TRTF_res2 = TRTF(sparse_tensor2, rank=50, time_lags=(1, 2, 288), maxiter=200).reshape(dense_mat.shape) TRTF_res2_mape2 = mape(dense_mat[pos2], TRTF_res2[pos2]) TRTF_res2_rmse2 = rmse(dense_mat[pos2], TRTF_res2[pos2]) print("TRTF_res2_mape2", TRTF_res2_mape2) print("TRTF_res2_rmse2", TRTF_res2_rmse2)
def input_metrics(targetxml, referencexml): rvars = pull_tune_variables(referencexml) tvars = pull_tune_variables(targetxml, referencexml) rvars.variables.sort(key=lambda x: x.group) tvars.variables.sort(key=lambda x: x.group) data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []} for r, t in zip(rvars.variables, tvars.variables): if r.group == t.group: key = ';'.join([r.idfclass, r.idfobject, r.idffield]) data['target'].append(float(t.value)) data['reference'].append(float(r.value)) data['min'].append(r.minimum) data['max'].append(r.maximum) data['key'].append(key) paes = metrics.pae(data['target'], data['reference'], data['min'], data['max']) m = {'pae': {}, 'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {}} for k, p in zip(data['key'], paes): m['pae'][k] = p m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference']) m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'], data['reference']) m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference']) m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference']) m['mape']['all inputs'] = metrics.mape(data['target'], data['reference']) return m
def test_TRMF(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) # def TRMF(sparse_mat, lambda_w=500, # lambda_x=500, # lambda_theta=500, # eta=0.03, time_lags=(1, 2, 144), maxiter=200) TRMF_res2 = TRMF(sparse_mat2, lambda_w=500, lambda_x=500, lambda_theta=500, eta=0.03, time_lags=(1, 2, 3, 4, 144), maxiter=200) # print(TRMF_res2) # print(dense_mat) TRMF_res2_mape2 = mape(dense_mat[pos2], TRMF_res2[pos2]) TRMF_res2_rmse2 = rmse(dense_mat[pos2], TRMF_res2[pos2]) print("TRMF_res2_mape2", TRMF_res2_mape2) print("TRMF_res2_rmse2", TRMF_res2_rmse2)
def main(): # minute y_test, predictions = run() # hourly # y_test, predictions = run(50, 1.0) # daily # y_test, predictions = run(50, 1.0) graph_utils.plot('linear', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def output_metrics(estresults, actresults): m = {'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {}} estres = column_vectors(estresults) actres = column_vectors(actresults) for col in actres: try: m['rmse'][col] = metrics.rmse(estres[col], actres[col]) m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col]) m['mbe'][col] = metrics.mbe(estres[col], actres[col]) m['nmbe'][col] = metrics.nmbe(estres[col], actres[col]) m['mape'][col] = metrics.mape(estres[col], actres[col]) except: # If anything crashes it here, just ignore the column in the output. pass return m
def main(): # minute model, y_test, predictions = run() # hourly # model, y_test, predictions = run(30, 50, 1.0) # daily # model, y_test, predictions = run(100, 50, 1.0) # save for later use model.save_weights('../output/lstm.h5', overwrite=True) # model.load_weights('../output/lstm.h5') graph_utils.plot('lstm', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def output_metrics(estresults, actresults): m = {'rmse': {}, 'cvrmse':{}, 'mbe': {}, 'nmbe':{}, 'mape': {}} estres = column_vectors(estresults) actres = column_vectors(actresults) for col in actres: try: m['rmse'][col] = metrics.rmse(estres[col], actres[col]) m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col]) m['mbe'][col] = metrics.mbe(estres[col], actres[col]) m['nmbe'][col] = metrics.nmbe(estres[col], actres[col]) m['mape'][col] = metrics.mape(estres[col], actres[col]) except: # If anything crashes it here, just ignore the column in the output. pass return m
def main(): if sys.argv[1] == 'daily': print('Using daily data...') path_to_dataset = '../data/household_power_consumption_daily.csv' y_test, predictions = run(path_to_dataset, 50, 1.0) elif sys.argv[1] == 'monthly': print('Using monthly data...') path_to_dataset = '../data/household_power_consumption_monthly.csv' y_test, predictions = run(path_to_dataset, 5, 1.0) elif sys.argv[1] == 'hourly': print('Using hourly data...') path_to_dataset = '../data/household_power_consumption_hourly.csv' y_test, predictions = run(path_to_dataset, 50, 1.0) else: print('Using minute data...') path_to_dataset = '../data/household_power_consumption.csv' y_test, predictions = run(path_to_dataset) graph_utils.plot('linear', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def test_PPCA(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) PPCA_res2 = PPCA(sparse_mat2, 20) PPCA_res2_mape2 = mape(dense_mat[pos2], PPCA_res2[pos2]) PPCA_res2_rmse2 = rmse(dense_mat[pos2], PPCA_res2[pos2]) print("PPCA_res2_mape2", PPCA_res2_mape2) print("PPCA_res2_rmse2", PPCA_res2_rmse2)
true_x.append(row['timestamp']) true_y.append(row['downstream']) pred_y.append(preds.inferences["multiStepBestPredictions"][1]) pred_x.append(row['timestamp'] + timedelta(minutes=5)) np.savez("pred_data/{}-htm-pred-data".format(fname), true_x=true_x, true_y=true_y, pred_x=pred_x, pred_y=pred_y) np_tx = np.array(true_x)[1:] np_ty = np.array(true_y)[1:] np_py = np.array(pred_y)[:-1] print() print("GEH: ", geh(np_ty, np_py)) print("MAPE: ", mape(np_ty, np_py)) print("RMSE: ", rmse(np_ty, np_py)) print() print("True x:", len(true_x)) print("True y:", len(true_x)) print("Pred y:", len(true_x)) plt.plot(true_x, true_y, 'b-', label='Readings') plt.plot(pred_x, pred_y, 'r-', label='Predictions') plt.legend(prop={'size': 23}) plt.grid(b=True, which='major', color='black', linestyle='-') plt.grid(b=True, which='minor', color='black', linestyle='dotted') df = "%A %d %B, %Y" plt.title("3002: Traffic Flow from {} to {}".format(
def calculate_train_and_forecast_metrics( self, train: pd.DataFrame, oos: pd.DataFrame, target_index: int, hps: dict, horizon: int, mae_rmse_ignore_when_actual_and_pred_are_zero: bool, mape_ignore_when_actual_is_zero: bool): train_dataset = TrainDataset(train_df=train, target_index=target_index, hyperparams=hps, horizon=horizon) train_loader = DataLoader(train_dataset, batch_size=1, num_workers=1) inputs, train_actual = next(iter(train_loader)) inputs = inputs.to(device=self.device) self.net = self.net.to(device=self.device) train_pred = self.net(inputs.float()) train_actual = train_actual[0, 0, :].cpu().numpy() train_pred = train_pred[0, 0, :].cpu().detach().numpy() forecast_actual = oos.iloc[:horizon, target_index].values forecast_pred = self.predict(train_df, target_index, hps, horizon) assert (train_actual.shape == train_pred.shape) assert (forecast_actual.shape == forecast_pred.shape) train_dict = { 'mae': metrics.mae(train_actual, train_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'rmse': metrics.rmse(train_actual, train_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'mape': metrics.mape(train_actual, train_pred, mape_ignore_when_actual_is_zero), 'presence_accuracy': metrics.presence_accuracy(train_actual, train_pred), 'peak_accuracy': metrics.peak_accuracy(train_actual, train_pred), 'total_volume': int(metrics.total_actual_volume(train_actual)), 'num_timestamps_predicted_on': int(train_pred.shape[0]) } forecast_dict = { 'mae': metrics.mae(forecast_actual, forecast_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'rmse': metrics.rmse(forecast_actual, forecast_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'mape': metrics.mape(forecast_actual, forecast_pred, mape_ignore_when_actual_is_zero), 'presence_accuracy': metrics.presence_accuracy(forecast_actual, forecast_pred), 'peak_accuracy': metrics.peak_accuracy(forecast_actual, forecast_pred), 'total_volume': int(metrics.total_actual_volume(forecast_actual)), 'num_time_stamps_predicted_on': int(forecast_pred.shape[0]) } train_metrics = pd.DataFrame.from_dict(train_dict, columns=[None], orient='index').iloc[:, 0].round(3) forecast_metrics = pd.DataFrame.from_dict( forecast_dict, columns=[None], orient='index').iloc[:, 0].round(3) return train_metrics, forecast_metrics
predictions = { k: np.array(v[split_idx:]) for k, v in predictions.items() } print() table = [] print(' & '.join(['step', 'geh', 'mape', 'rmse'])+' \\\\') for step in steps: # true values stepped_vals = flow_values[step:len(predictions)] # predicted values pred_vals = predictions[step][:-step] + eps table.append(OrderedDict([ ('steps', step), ('geh', geh(stepped_vals, pred_vals)), ('mape', mape(stepped_vals, pred_vals)), ('rmse', rmse(stepped_vals, pred_vals)) ])) print(tabulate.tabulate(table, 'keys', 'latex')) print("Loading matplotlib") import matplotlib.pyplot as plt true_y = [] true_x = [] pred_y = [] print("Predicting data rows: {}".format(data_len - row_count)) progress = pyprind.ProgBar(data_len - row_count, width=50, stream=1) for row in it: progress.update()
adfuller(train_set_diff) acf_pacf(train_set_diff) # ряд стационарен, можно переходить к поиску оптимальных параметров # best_params.append(sarima_best_params(train_set, 7, 1, 0, 7, 3)) # построение модели model = sm.tsa.SARIMAX(train_set, order=(best_params[j][0], 1, best_params[j][1]), seasonal_order=(best_params[j][2], 0, best_params[j][3], 7)).fit(disp=False) # вывод информации о модели print(model.summary()) # проверка остатков модели на случайность ljungbox(model.resid) acf_pacf(model.resid) # SARIMA прогноз на конкретный час hour_pred = model.forecast(2)[-1] # добавление прогноза на час к итоговому прогнозу total_pred.append(hour_pred) # оценка прогноза по метрикам nnf = nnfmetrics(total_pred, test_set, plan_set) mse = mse(test_set, total_pred) mape = mape(test_set, total_pred) acc = accuracy(test_set, total_pred) print('Оценка по NNFMETRICS = ', nnf) print('Оценка по MSE = ', mse) print('Оценка по MAPE = ', mape) print('Точность прогноза = ', acc) # отрисовка графика plot_results(pred_date.strftime('%d-%m-%Y'), test_set, total_pred, plan_set)
activation='relu', solver='lbfgs', max_iter=10000, learning_rate='constant') mlp.fit(x_train, y_train) predict = mlp.predict(x_test) top_test = [y_test[i][0] for i in range(len(y_test))] bottom_test = [y_test[i][1] for i in range(len(y_test))] top_pred = [predict[i][0] for i in range(len(predict))] bottom_pred = [predict[i][1] for i in range(len(predict))] mse_high = MSE(top_test, top_pred) mape_high = mape(top_test, top_pred) mse_low = MSE(bottom_test, bottom_pred) mape_low = mape(bottom_test, bottom_pred) # ########################################### FIGURE ############################################################################## plt.figure(1) plt.plot(top_test, label="High -- Test", color='green', linewidth=2) plt.plot(top_pred, label='High -- Prediction', linewidth=1) plt.plot(bottom_test, label="Low -- Test", color='red', linewidth=2) plt.plot(bottom_pred, label='Low -- Prediction', linewidth=1) plt.legend() plt.title("PETR4 High and Low\nPrice predictions") plt.ylabel('Price') plt.xlabel('Days')
def main(): #getmodel model = lstm() adam = Adam(lr=lr) model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse, metrics.mape, metrics.ma]) all_scores_train = [] all_scores_test = [] #model.summary() #get data data = load_data_() #nodes x slots ts1 = time.time() trueY_train = [] predictY_train = [] trueY_test = [] predictY_test = [] for i in range(len(data)): print('grid %d.....' % (i)) ts = time.time() #makedata set testslots = T * days_test trainx, trainy = makedataset(data[i, :-testslots]) testx, testy = makedataset(data[i, -testslots:]) print('trainx shape:', (trainx.shape)) print('trainy shape:', (trainy.shape)) print('testx shape:', (testx.shape)) print('testy shape:', (testy.shape)) #scaler print(trainy, testy) mmn = MinMaxScaler(feature_range=(-1, 1)) trainlen = len(trainy) Y = np.concatenate([trainy, testy], axis=0) Y = mmn.fit_transform(Y.reshape(-1, 1)) trainy, testy = Y[:trainlen], Y[trainlen:] print(trainy.shape, testy.shape) #train adam = Adam(lr=lr) model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse, metrics.mape, metrics.ma]) early_stopping = EarlyStopping(monitor='val_rmse', patience=patience, mode='min') history = model.fit(trainx, trainy, epochs=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping], verbose=0) #evalute predict_y_train = model.predict([trainx], batch_size=batch_size, verbose=0)[:, 0:1] score = model.evaluate(trainx, trainy, batch_size=batch_size, verbose=0) print( 'Train score: %.6f rmse (norm): %.6f rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2., score[1] * (mmn._max - mmn._min) / 2. / mmn.inverse_transform(np.mean(y_train)), score[2], score[3])) predict_y_test = model.predict([testx], batch_size=batch_size, verbose=0)[:, 0:1] score = model.evaluate(testx, testy, batch_size=batch_size, verbose=0) print( 'Test score: %.6f rmse (norm): %.6f rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2., score[1] * (mmn._max - mmn._min) / 2. / mmn.inverse_transform(np.mean(y_test)), score[2], score[3])) predictY_train.append( mmn.inverse_transform(predict_y_train).reshape(-1).tolist()) predictY_test.append( mmn.inverse_transform(predict_y_test).reshape(-1).tolist()) trueY_train.append(mmn.inverse_transform(trainy).reshape(-1).tolist()) trueY_test.append(mmn.inverse_transform(testy).reshape(-1).tolist()) print("\nestimate on grid%d ,elapsed time (eval): %.3f seconds\n" % (i, time.time() - ts)) #all_scores_train = np.asarray(all_scores_train) #all_scores_train = np.mean(all_scores_train, axis = 0) #all_scores_test = np.asarray(all_scores_test) #all_Scores_test = np.mean(all_scores_test,axis = 0) print('\n\n') evaluate = lambda y1, y2: (metrics.rmse(y1, y2), metrics.rmse( y1, y2) / np.mean(y1), metrics.mape(y1, y2), metrics.ma(y1, y2)) print('All Train rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (evaluate(trueY_train, predictY_train))) print('All Test rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (evaluate(trueY_test, predictY_test))) print('elapsed time: %3f seconds\n' % (time.time() - ts1))
# автокорреляции и частные автокорреляции acf_pacf(remainder) # поиск лучших параметров # sarima_best_params(remainder, 24, 0, 0, 4, 3) # найденные параметры для 27.11.2019: best_params = [2, 3, 1, 2] # построение модели model = sm.tsa.SARIMAX(remainder, order=(best_params[0], 0, best_params[1]), seasonal_order=(best_params[2], 0, best_params[3], 24)).fit(disp=False) # вывод информации о модели print(model.summary()) # проверка остатков модели на случайность ljungbox(model.resid) acf_pacf(model.resid) # SARIMA прогноз остатков remainder_pred = model.forecast(48) # итоговый прогноз total_pred = trend_pred + seasonal_pred + remainder_pred[24:] # оценка прогноза по метрикам nnf_val = nnfmetrics(total_pred, test_set, plan_set) mse_val = mse(test_set, total_pred) mape_val = mape(test_set, total_pred) acc_val = accuracy(test_set, total_pred) print('Оценка по NNFMETRICS = ', nnf_val) print('Оценка по MSE = ', mse_val) print('Оценка по MAPE = ', mape_val) print('Точность прогноза = ', acc_val) # отрисовка графика plot_results(pred_date.strftime('%d-%m-%Y'), test_set, total_pred, plan_set)
]] in_row = scaler.fit_transform(scaler.fit_transform(in_row)) pred = model.predict(np.array([in_row])) # flow_val = pred[0][0] pred_y.append(scaler.inverse_transform([0, 0, 0, 0, 0, pred[0][0]])) true_x = true_x[1:] true_y = true_y[1:] pred_y = pred_y[:-1] pred_y = np.array(pred_y, dtype=np.float32) true_y_max = np.copy(true_y) true_y_max[true_y_max == 0] = 1 print( "GEH: ", np.sqrt(2 * np.power(pred_y - true_y_max, 2) / (pred_y + true_y_max)).mean(axis=0)) print("MAPE: ", mape(true_y_max, pred_y)) print("RMSE: ", np.sqrt(((pred_y - true_y_max)**2).mean(axis=0))) import matplotlib.pyplot as plt plt.plot(true_x, true_y, 'b-', label='Readings') plt.plot(true_x, pred_y, 'r-', label='Predictions') df = "%A %d %B, %Y" plt.title("3002: Traffic Flow from {} to {}".format( true_x[0].strftime(df), true_x[-1].strftime(df))) plt.legend() plt.ylabel("Vehicles/ 5 min") plt.xlabel("Time") plt.show()
plan = dataset.plan # длина периода сезонной составляющей p = 168 # тренировочный датасет train_set, данные с 14.01.2019 по 25.11.2019 train_start_date = datetime.datetime(2019, 1, 14, 0, 0, 0) train_end_date = datetime.datetime(2019, 11, 25, 23, 0, 0) train_set = fact[train_start_date:train_end_date] # тестовый датасет test_set, данные за прогнозный день 27.11.2019 pred_date = datetime.date(2019, 11, 27) pred_end_date = train_end_date + datetime.timedelta(days=2) pred_start_date = pred_end_date - datetime.timedelta(hours=23) test_set = fact[pred_start_date:pred_end_date] # план предприятия plan_set для сверки, данные за прогнозный день 27.11.2019 plan_set = plan[pred_start_date:pred_end_date] # выбор метода method = additiveHoltWinters # найденные постоянные сглаживания best_params = [[0.06846567, 0.00032291, 0.26071966]] # прогнозирование pred = method(best_params[0], train_set, p, 48)[24:] # оценка прогноза по метрикам nnf_val = nnfmetrics(pred, test_set, plan_set) mse_val = mse(test_set, pred) mape_val = mape(test_set, pred) acc_val = accuracy(test_set, pred) print('Оценка по NNFMETRICS = ', nnf_val) print('Оценка по MSE = ', mse_val) print('Оценка по MAPE = ', mape_val) print('Точность прогноза = ', acc_val) # отрисовка графика plot_results(pred_date.strftime('%d-%m-%Y'), test_set, pred, plan_set)
true_x = true_xy[:, 0] pred_x = np.reshape(pred_xy[:, 0], (-1, 1)) pred_y = np.reshape(pred_xy[:, 1].astype(dtype=np.float32), (-1, 1)) true_y = true_xy[:, 1].astype(np.float32) true_y_max = np.copy(true_y)[:-1] true_y_max[true_y_max == 0] = 1 np.savez('pred_data/3002-no-reset-on-error-all-sensor', true_x=true_x, true_y=true_y, pred_x=pred_x, pred_y=pred_y) true_y_max = true_y_max.reshape((true_y_max.shape[0], 1)) # print ("true_y_max", true_y_max.shape) # print("pred_y", pred_y.shape) print("GEH: ", geh(true_y_max, pred_y[:-1])) print("MAPE: ", mape(true_y_max, pred_y[:-1])) print("RMSE: ", rmse(true_y_max, pred_y[:-1])) font = {'size': 30} import matplotlib matplotlib.rc('font', **font) import matplotlib.pyplot as plt plt.plot(true_x, true_y, 'b-', label='Readings') plt.plot(pred_x, pred_y, 'r-', label='LSTM-Online Predictions') df = "%A %d %B, %Y" plt.title("3002: Traffic Flow from {} to {}".format( true_x[0].strftime(df), true_x[-1].strftime(df))) plt.legend()
sales = data['Sales'] elements = seasonal_decompose(sales, model='multiplicative') data.plot() elements.plot() plt.show() print( tabulate( [[ "MSE", mse(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]), mse(data['Sales'], data['α=0.1']), mse(data['Sales'], data['α=0.5']) ], [ "MAPE", mape(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]), mape(data['Sales'], data['α=0.1']), mape(data['Sales'], data['α=0.5']) ], [ "LAD", lad(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]), lad(data['Sales'], data['α=0.1']), lad(data['Sales'], data['α=0.5']) ]], headers=["Moving Average", "α=0.1", "α=0.5"], tablefmt='fancy_grid', floatfmt='.2f'))
def regression_report(y_true, y_pred): print("MSE", metrics.mse(y_true, y_pred).item()) print("RMSE", metrics.rmse(y_true, y_pred).item()) print("MAPE", metrics.mape(y_true, y_pred).item()) print("MPE", metrics.mpe(y_true, y_pred).item()) print("R2", metrics.r2(y_true, y_pred).item())
def do_model(all_data, steps, run_model=True): _steps = steps print("steps:", _steps) all_data = all_data if not run_model: return None, None features = all_data[:-_steps] labels = all_data[_steps:, -1:] tts = train_test_split(features, labels, test_size=0.4) X_train = tts[0] X_test = tts[1] Y_train = tts[2].astype(np.float64) Y_test = tts[3].astype(np.float64) # optimiser = 'adam' # hidden_neurons = 300 # loss_function = 'mse' # batch_size = 105 # dropout = 0.056 # inner_hidden_neurons = 269 # dropout_inner = 0.22 if steps == 1: hidden_neurons = 332 loss_function = 'mse' batch_size = 128 dropout = 0.0923 inner_hidden_neurons = 269 dropout_inner = 0.2269 elif steps == 3: hidden_neurons = 256 loss_function = 'mse' batch_size = 105 dropout = 0.0923 inner_hidden_neurons = 72 dropout_inner = 0.001 else: hidden_neurons = 332 loss_function = 'mse' batch_size = 105 dropout = 0.0042 inner_hidden_neurons = 329 dropout_inner = 0.1314 batch_size = 1 nb_epochs = 1 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) print("X train shape:\t", X_train.shape) print("X test shape:\t", X_test.shape) # print("Y train shape:\t", Y_train.shape) # print("Y test shape:\t", Y_test.shape) # print("Steps:\t", _steps) in_neurons = X_train.shape[2] out_neurons = 1 model = Sequential() gpu_cpu = 'cpu' best_weight = BestWeight() reset_state = ResetStatesCallback() model.add(LSTM(output_dim=hidden_neurons, input_dim=in_neurons, batch_input_shape=(1,1, in_neurons) ,return_sequences=True, init='uniform', consume_less=gpu_cpu, stateful=True)) model.add(Dropout(dropout)) dense_input = inner_hidden_neurons model.add(LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu, stateful=True)) model.add(Dropout(dropout_inner)) model.add(Activation('relu')) model.add(Dense(output_dim=out_neurons, input_dim=dense_input)) model.add(Activation('relu')) model.compile(loss=loss_function, optimizer=optimiser) # run through all the training data # learning training set print("Learning training set") # progress = pyprind.ProgBar(len(X_train)/batch_size +1, width=50, stream=1) for epoch in xrange(nb_epochs): mean_tr_loss = [] print("Epoch {}".format(epoch)) for x_chunk, y_chunk in chunks(X_train, Y_train, batch_size): tr_loss = model.train_on_batch(x_chunk, y_chunk) mean_tr_loss.append(tr_loss) model.reset_states() print("Training Loss: {}".format(np.mean(mean_tr_loss))) geh_l = [] rmse_l = [] mape_l = [] training_done = 0 # progress = pyprind.ProgBar(len(X_test) / batch_size +1, width=50, stream=1) for x_chunk, y_chunk in chunks(X_test, Y_test, batch_size): # start collecting stats predicted = model.predict_on_batch(x_chunk) + EPS model.reset_states() model.train_on_batch(x_chunk, y_chunk) model.reset_states() geh_l.append(geh(y_chunk, predicted)) rmse_l.append(rmse(y_chunk, predicted)) mape_l.append(mape(y_chunk, predicted)) # progress.update() print("Testing RMSE: {} GEH: {} MAPE: {}".format(np.mean(rmse_l), np.mean(geh_l), np.mean(mape_l))) print() # predict on the same chunk and collect stats, averaging them metrics = OrderedDict([ ('online', True), ('hidden', hidden_neurons), ('steps', _steps), ('geh', np.mean(geh_l)), ('rmse', np.mean(rmse_l)), ('mape', np.mean(mape_l)), # ('smape', smape(predicted, _Y_test)), # ('median_pe', median_percentage_error(predicted, Y_test)), # ('mase', MASE(_Y_train, _Y_test, predicted)), # ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)), ('batch_size', batch_size), # ('optimiser', optimiser), ('dropout', dropout), ('extra_layer_dropout', dropout_inner), ('extra_layer_neurons', inner_hidden_neurons), # ('loss function', loss_function) # 'history': history.history ]) # print (tabulate.tabulate([metrics], tablefmt='latex', headers='keys')) return metrics, model