def input_metrics(targetxml, referencexml): rvars = pull_tune_variables(referencexml) tvars = pull_tune_variables(targetxml, referencexml) rvars.variables.sort(key=lambda x: x.group) tvars.variables.sort(key=lambda x: x.group) data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []} for r, t in zip(rvars.variables, tvars.variables): if r.group == t.group: key = ';'.join([r.idfclass, r.idfobject, r.idffield]) data['target'].append(float(t.value)) data['reference'].append(float(r.value)) data['min'].append(r.minimum) data['max'].append(r.maximum) data['key'].append(key) paes = metrics.pae(data['target'], data['reference'], data['min'], data['max']) m = { 'pae': {}, 'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {} } for k, p in zip(data['key'], paes): m['pae'][k] = p m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference']) m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'], data['reference']) m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference']) m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference']) m['mape']['all inputs'] = metrics.mape(data['target'], data['reference']) return m
def main(): if sys.argv[1] == 'daily': print('Using daily data...') path_to_dataset = '../data/household_power_consumption_daily.csv' model, y_test, predictions = run(path_to_dataset, 10, 50, 1.0) elif sys.argv[1] == 'monthly': print('Using monthly data...') path_to_dataset = '../data/household_power_consumption_monthly.csv' model, y_test, predictions = run(path_to_dataset, 30, 5, 1.0) elif sys.argv[1] == 'hourly': print('Using hourly data...') path_to_dataset = '../data/household_power_consumption_hourly.csv' model, y_test, predictions = run(path_to_dataset, 30, 50, 1.0) else: print('Using minute data...') path_to_dataset = '../data/household_power_consumption.csv' model, y_test, predictions = run(path_to_dataset) # save for later use model.save_weights('../output/lstm.h5', overwrite=True) # model.load_weights('../output/lstm.h5') graph_utils.plot('lstm', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def test_TRTF(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24), # burn_iter = 1100, gibbs_iter = 100 # TRTF(sparse_tensor_ori, rank=30, time_lags=(1, 2, 24), # lambda_u=500, lambda_v=500, lambda_ar=500, # eta=2e-2, lambda_theta=100, maxiter=1000) TRTF_res2 = TRTF(sparse_tensor2, rank=50, time_lags=(1, 2, 288), maxiter=200).reshape(dense_mat.shape) TRTF_res2_mape2 = mape(dense_mat[pos2], TRTF_res2[pos2]) TRTF_res2_rmse2 = rmse(dense_mat[pos2], TRTF_res2[pos2]) print("TRTF_res2_mape2", TRTF_res2_mape2) print("TRTF_res2_rmse2", TRTF_res2_rmse2)
def input_metrics(targetxml, referencexml): rvars = pull_tune_variables(referencexml) tvars = pull_tune_variables(targetxml, referencexml) rvars.variables.sort(key=lambda x: x.group) tvars.variables.sort(key=lambda x: x.group) data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []} for r, t in zip(rvars.variables, tvars.variables): if r.group == t.group: key = ';'.join([r.idfclass, r.idfobject, r.idffield]) data['target'].append(float(t.value)) data['reference'].append(float(r.value)) data['min'].append(r.minimum) data['max'].append(r.maximum) data['key'].append(key) paes = metrics.pae(data['target'], data['reference'], data['min'], data['max']) m = {'pae': {}, 'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {}} for k, p in zip(data['key'], paes): m['pae'][k] = p m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference']) m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'], data['reference']) m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference']) m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference']) m['mape']['all inputs'] = metrics.mape(data['target'], data['reference']) return m
def cross_val_score(model=None, data=None, cv=10, scorer=rmse): data=np.array(data) print(data.shape) chunks=chunk(data, cv) #print chunks score=list() for i in range(10): iter_data=list() for j in range(len(chunks)): if j!=i: iter_data.extend(chunks[j]) pred_data=np.array(chunks[i]) iter_data=np.array(iter_data) model.fit(iter_data) pred=model.predict(pred_data) score.append(rmse(pred_data[ : , model.formatizer['value']], pred)) print(score[i]) return np.mean(score)
def test_HaLRTC(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24), # burn_iter = 1100, gibbs_iter = 100 HaLRTC_res2 = HaLRTC(sparse_tensor2, rho=1e-5, epsilon=1e-4, maxiter=200).reshape(dense_mat.shape) HaLRTC_res2_mape2 = mape(dense_mat[pos2], HaLRTC_res2[pos2]) HaLRTC_res2_rmse2 = rmse(dense_mat[pos2], HaLRTC_res2[pos2]) print("HaLRTC_res2_mape2", HaLRTC_res2_mape2) print("HaLRTC_res2_rmse2", HaLRTC_res2_rmse2)
def test_TRMF(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) # def TRMF(sparse_mat, lambda_w=500, # lambda_x=500, # lambda_theta=500, # eta=0.03, time_lags=(1, 2, 144), maxiter=200) TRMF_res2 = TRMF(sparse_mat2, lambda_w=500, lambda_x=500, lambda_theta=500, eta=0.03, time_lags=(1, 2, 3, 4, 144), maxiter=200) # print(TRMF_res2) # print(dense_mat) TRMF_res2_mape2 = mape(dense_mat[pos2], TRMF_res2[pos2]) TRMF_res2_rmse2 = rmse(dense_mat[pos2], TRMF_res2[pos2]) print("TRMF_res2_mape2", TRMF_res2_mape2) print("TRMF_res2_rmse2", TRMF_res2_rmse2)
def test_BTRMF(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) BTRMF_res2 = BTRMF(sparse_mat2, rank=50, time_lags=(1, 2, 288), burn_iter=100, gibbs_iter=20) BTRMF_res2_mape2 = mape(dense_mat[pos2], BTRMF_res2[pos2]) BTRMF_res2_rmse2 = rmse(dense_mat[pos2], BTRMF_res2[pos2]) print("BTRMF_res2_mape2", BTRMF_res2_mape2) print("BTRMF_res2_rmse2", BTRMF_res2_rmse2)
def val_model(model, criterion): dset_sizes = len(val_dataset) model.eval() running_loss = 0.0 running_corrects = 0 cont = 0 outPre = [] outLabel = [] pres_list = [] labels_list = [] for data in val_loader: inputs, labels, month = data['X']['x'], data['Y'], data['X']['m'] x = inputs[0] labels = labels.type(torch.float).cuda() inputs = [x.cuda()] outputs = model(inputs) loss = criterion(outputs, labels) if cont == 0: outPre = outputs.data.cpu() outLabel = labels.data.cpu() else: outPre = torch.cat((outPre, outputs.data.cpu()), 0) outLabel = torch.cat((outLabel, labels.data.cpu()), 0) pres_list += outputs.cpu().numpy().tolist() labels_list += labels.data.cpu().numpy().tolist() running_loss += loss.item() * outputs.size(0) cont += 1 # labels_arr = np.array(labels_list) pre_arr = np.array(pres_list) val_score = score(labels_arr, pre_arr) val_rmse = rmse(labels_arr, pre_arr) return val_score, val_rmse
def crossval_and_predict(self, n_folds: int, df: pd.DataFrame, df_test: pd.DataFrame, feature_col: list, target_col: str, model_params: dict): oof = np.zeros((len(df))) cv_preds = np.zeros((len(df_test))) kfold = KFold(n_splits=n_folds, random_state=self.random_state, shuffle=True) for train_idx, valid_idx in kfold.split(df): X_train, y_train = df[feature_col].iloc[train_idx], df[ target_col].iloc[train_idx] X_valid, y_valid = df[feature_col].iloc[valid_idx], df[ target_col].iloc[valid_idx] model_params['n_estimators'] = 5000 model_params['learning_rate'] = 1e-2 model = LGBMRegressor(**model_params) model.fit(X_train, y_train, eval_set=((X_valid, y_valid)), early_stopping_rounds=500, verbose=0) oof[valid_idx] = model.predict(X_valid) cv_preds += model.predict(df_test[feature_col]) / n_folds rmse_score = rmse(df[target_col], oof) return rmse_score, cv_preds
def prediction_pipeline(model, X_train, X_test, y_train, y_test): """ This function performs a pipeline for prediction and score on test set. param model: Estimator param X_train: Train dataframe without target. param X_test: Test dataframe without target. param y_train: Train target. param y_test: Test target. return: Predictions and score """ model.fit(X_train, y_train) predict_train = model.predict(X_train) predict_test = model.predict(X_test) score_train = rmse(y_train, predict_train) score_test = rmse(y_test, predict_test) return predict_train, predict_test, score_train, score_test
def update(self, test_predictions, val_predictions=None, year=None): self.test_predictions = self.test_predictions.append(test_predictions) try: self.test_metrics[str(year + 2014) + '/' + str(year + 15)] = [ metrics.crps(test_predictions), metrics.nll(test_predictions), metrics.mae(test_predictions), metrics.rmse(test_predictions), metrics.smape(test_predictions), metrics.corr(test_predictions), np.ma.masked_invalid(metrics.mb_log(test_predictions)).mean(), metrics.sdp(test_predictions) ] except: pass if year == 3: self.test_metrics['Average'] = self.test_metrics.mean(1) self.test_metrics['Average'].loc['SDP'] = np.abs( self.test_metrics.loc['SDP'].values[-1]).mean() try: self.val_predictions = self.val_predictions.append(val_predictions) self.val_metrics[str(year + 2013) + '/' + str(year + 14)] = [ metrics.crps(val_predictions), metrics.nll(val_predictions), metrics.mae(val_predictions), metrics.rmse(val_predictions), metrics.smape(val_predictions), metrics.corr(val_predictions), metrics.mb_log(val_predictions).mean(), metrics.sdp(val_predictions) ] except: pass self.val_metrics['Average'] = self.val_metrics.mean(1) self.val_metrics['Average'].loc['SDP'] = np.abs( self.val_metrics.loc['SDP'].values[-1]).mean() self.test_metrics['Average'] = self.test_metrics.mean(1) self.test_metrics['Average'].loc['SDP'] = np.abs( self.test_metrics.loc['SDP'].values[-1]).mean()
def crossval_and_predict(self, n_folds: int, df: pd.DataFrame, df_test: pd.DataFrame, feature_col: list, target_col: str, model_params: dict): oof = np.zeros((len(df))) cv_preds = np.zeros((len(df_test))) kfold = KFold(n_splits=n_folds, random_state=self.random_state, shuffle=True) for train_idx, valid_idx in kfold.split(df): X_train, y_train = df[feature_col].values[train_idx], df[ target_col].values[train_idx].reshape(-1, 1) X_valid, y_valid = df[feature_col].values[valid_idx], df[ target_col].values[valid_idx].reshape(-1, 1) X_test = df_test[feature_col].values params = self.default_params() params['seed'] = self.random_state params['n_d'] = model_params['n_d'] params['n_a'] = model_params['n_d'] params['gamma'] = model_params['gamma'] params['momentum'] = model_params['momentum'] params['n_steps'] = model_params['n_steps'] params['n_shared'] = model_params['n_shared'] params['n_independent'] = model_params['n_independent'] logging.info( f'Parameters used for TabNet supervised training: {params}') unsupervised_model = TabNetPretrainer(**params) unsupervised_model.fit(X_train=X_train, eval_set=[X_valid], pretraining_ratio=0.5, max_epochs=20) model = TabNetRegressor(**params) model.fit(X_train=X_train, y_train=y_train, eval_set=[(X_valid, y_valid)], eval_name=['valid'], eval_metric=['rmse'], max_epochs=100, patience=10, batch_size=1024, from_unsupervised=unsupervised_model) oof[valid_idx] = model.predict(X_valid).squeeze() cv_preds += model.predict(X_test).squeeze() / n_folds logging.info( f'Finished fold with score {rmse(y_valid, oof[valid_idx])}') rmse_score = rmse(df[target_col], oof) return rmse_score, cv_preds
def main(): # minute y_test, predictions = run() # hourly # y_test, predictions = run(50, 1.0) # daily # y_test, predictions = run(50, 1.0) graph_utils.plot('linear', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def output_metrics(estresults, actresults): m = {'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {}} estres = column_vectors(estresults) actres = column_vectors(actresults) for col in actres: try: m['rmse'][col] = metrics.rmse(estres[col], actres[col]) m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col]) m['mbe'][col] = metrics.mbe(estres[col], actres[col]) m['nmbe'][col] = metrics.nmbe(estres[col], actres[col]) m['mape'][col] = metrics.mape(estres[col], actres[col]) except: # If anything crashes it here, just ignore the column in the output. pass return m
def main(): # minute model, y_test, predictions = run() # hourly # model, y_test, predictions = run(30, 50, 1.0) # daily # model, y_test, predictions = run(100, 50, 1.0) # save for later use model.save_weights('../output/lstm.h5', overwrite=True) # model.load_weights('../output/lstm.h5') graph_utils.plot('lstm', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def _run_base_model_dfm(dfTrain, dfTest, folds, dfm_params): fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS) data_parser = DataParser(feat_dict=fd) Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True) Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest) dfm_params["feature_size"] = fd.feat_dim dfm_params["field_size"] = len(Xi_train[0]) y_train_meta = np.zeros((dfTrain.shape[0], 1), dtype=float) y_test_meta = np.zeros((dfTest.shape[0], 1), dtype=float) _get = lambda x, l: [x[i] for i in l] gini_results_cv = np.zeros(len(folds), dtype=float) gini_results_epoch_train = np.zeros((len(folds), dfm_params["epoch"]), dtype=float) gini_results_epoch_valid = np.zeros((len(folds), dfm_params["epoch"]), dtype=float) for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) dfm = DeepFM(**dfm_params) dfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_) y_train_meta[valid_idx,0] = dfm.predict(Xi_valid_, Xv_valid_) y_test_meta[:,0] += dfm.predict(Xi_test, Xv_test) gini_results_cv[i] = rmse(y_valid_, y_train_meta[valid_idx]) gini_results_epoch_train[i] = dfm.train_result gini_results_epoch_valid[i] = dfm.valid_result y_test_meta /= float(len(folds)) # save result if dfm_params["use_fm"] and dfm_params["use_deep"]: clf_str = "DeepFM" elif dfm_params["use_fm"]: clf_str = "FM" elif dfm_params["use_deep"]: clf_str = "DNN" print("%s: %.5f (%.5f)"%(clf_str, gini_results_cv.mean(), gini_results_cv.std())) filename = "%s_Mean%.5f_Std%.5f.csv"%(clf_str, gini_results_cv.mean(), gini_results_cv.std()) _make_submission(ids_test, y_test_meta, filename) _plot_fig(gini_results_epoch_train, gini_results_epoch_valid, clf_str) return y_train_meta, y_test_meta
def main(): # dataset has format like [user_id, song_id, play_count] file = 'train_triplets.txt' print("Loading data...") load_data(file) print("Starting evaluation...") calc_neighbours() print("Finished evaluations.") print_top_songs_for_user(1) print("Starting cross validation...") print("RMSE result: ", str(rmse(train_set, test_set))) print("MAE result: ", str(mae(train_set, test_set))) print("NDCG result: ", str(ndcg(train_set, test_set)))
def evaluate(self, observed, estimated, zone, res = 0.5): ''' Returns evaluation between observed and estimated rainfall maps by computing following metrics: ['BIAS', 'CORREALTION', 'Nash-Sutcliffe', 'RMSE', 'MAE', 'MEAN_OBS','MEAN_EST'] Inputs: observed - 2D array. Observed rainfall map. estimated - 2D array. Estimated rainfall map. zone - (2,2) tuple. Evaluation study zone [km x km] Optional: res - scalar. Resolution for comparison, [km]. Outputs: metrics - dictionary. Statistical metrics: ['bias', 'corr', 'nash', 'rmse', 'mae', 'mean_obs','mean_est'] ''' # We neglect area that is not estimated for comparison purpose. estimated[estimated <= -999] = -999 observed[estimated <= -999] = -999 ((x0, x1), (y0, y1)) = zone # Cut the zone for evaluation t1, t2, t3, t4 = int(y0/res), int(y1/res), int(x0/res), int(x1/res) observed = observed[t1:t2, t3:t4] estimated = estimated[t1:t2, t3:t4] est = estimated[estimated<>-999].flatten() obs = observed[observed<>-999].flatten() stats = dict() stats['bias'] = metrics.bias(obs, est) stats['corr'] = metrics.corr(obs, est) stats['nash'] = metrics.nash(obs, est) stats['rmse'] = metrics.rmse(obs, est) stats['mae'] = metrics.mae(obs, est) stats['mean_obs'] = metrics.average(obs) stats['mean_est'] = metrics.average(est) # additional metrics can be added ##stats['likelihood'] = metrics.likelihood(obs, est) ##stats['mape'] = metrics.mape(obs, est) ##stats['mse'] = metrics.mse(obs, est) ##stats['mspe'] = metrics.mspe(obs, est) ##stats['rmspe'] = metrics.rmspe(obs, est) return stats
def output_metrics(estresults, actresults): m = {'rmse': {}, 'cvrmse':{}, 'mbe': {}, 'nmbe':{}, 'mape': {}} estres = column_vectors(estresults) actres = column_vectors(actresults) for col in actres: try: m['rmse'][col] = metrics.rmse(estres[col], actres[col]) m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col]) m['mbe'][col] = metrics.mbe(estres[col], actres[col]) m['nmbe'][col] = metrics.nmbe(estres[col], actres[col]) m['mape'][col] = metrics.mape(estres[col], actres[col]) except: # If anything crashes it here, just ignore the column in the output. pass return m
def forward(self, u, v, r_matrix): u_z, v_z = self.gcl1(self.u_features, self.v_features, range(self.num_users), range(self.num_items), r_matrix) u_z, v_z = self.gcl2(u_z, v_z, u, v, r_matrix) u_f = torch.relu(self.denseu1(self.u_features_side[u])) v_f = torch.relu(self.densev1(self.v_features_side[v])) u_h = self.denseu2(F.dropout(torch.cat((u_z, u_f), 1), self.dropout)) v_h = self.densev2(F.dropout(torch.cat((v_z, v_f), 1), self.dropout)) output, m_hat = self.bilin_dec(u_h, v_h, u, v) r_mx = r_matrix.index_select(1, u).index_select(2, v) loss = softmax_cross_entropy(output, r_mx.float()) rmse_loss = rmse(m_hat, r_mx.float()) return output, loss, rmse_loss
def lambdas_cross_validate(self, lambdas, fold_ct): # Use cross-validation to test the lambdas. Set self.lambda_cv to a # series with index (lambda, fold index) and one column RMSE. index = pd.MultiIndex.from_product([lambdas, range(fold_ct)], names=["lambda", "fold"]) out = pd.Series(index=index, name="RMSE", dtype=np.float64) for lambda_ in lambdas: folds = sklearn.model_selection.KFold(fold_ct, shuffle=False) \ .split(self.features_train, self.incidence_train) for (fold_idx, (cv_train_is, cv_test_is)) in enumerate(folds): X_train = self.features_train.iloc[cv_train_is] y_train = self.incidence_train.iloc[cv_train_is] X_test = self.features_train.iloc[cv_test_is] y_test = self.incidence_train.iloc[cv_test_is] betas = linear_fit(X_train, y_train, self.penalty, lambda_) y_predicted = linear_predict(X_test, betas, y_test.index) out.loc[lambda_, fold_idx] = metrics.rmse(y_test, y_predicted) self.lambda_cv = out self.lambda_cv_means = out.groupby(["lambda" ]).mean() # avg across folds
def main(): if sys.argv[1] == 'daily': print('Using daily data...') path_to_dataset = '../data/household_power_consumption_daily.csv' y_test, predictions = run(path_to_dataset, 50, 1.0) elif sys.argv[1] == 'monthly': print('Using monthly data...') path_to_dataset = '../data/household_power_consumption_monthly.csv' y_test, predictions = run(path_to_dataset, 5, 1.0) elif sys.argv[1] == 'hourly': print('Using hourly data...') path_to_dataset = '../data/household_power_consumption_hourly.csv' y_test, predictions = run(path_to_dataset, 50, 1.0) else: print('Using minute data...') path_to_dataset = '../data/household_power_consumption.csv' y_test, predictions = run(path_to_dataset) graph_utils.plot('linear', predictions, y_test) print('RMSE: %.4f'% metrics.rmse(predictions, y_test)) print('MAPE: %.4f'% metrics.mape(predictions, y_test))
def test_PPCA(): dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0) rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0) dense_mat = dense_mat.values rm = rm.values binary_mat2 = np.round(rm + 0.5 - 0.2) nan_mat2 = binary_mat2.copy() nan_mat2[nan_mat2 == 0] = np.nan sparse_mat2 = np.multiply(nan_mat2, dense_mat) pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0)) # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288]) PPCA_res2 = PPCA(sparse_mat2, 20) PPCA_res2_mape2 = mape(dense_mat[pos2], PPCA_res2[pos2]) PPCA_res2_rmse2 = rmse(dense_mat[pos2], PPCA_res2[pos2]) print("PPCA_res2_mape2", PPCA_res2_mape2) print("PPCA_res2_rmse2", PPCA_res2_rmse2)
def __call__(self, trial): df_train, df_valid = train_test_split(self.df, test_size=0.1, random_state=self.random_state) X_train, y_train = df_train[self.feature_col].values, df_train[ self.target_col].values.reshape(-1, 1) X_valid, y_valid = df_valid[self.feature_col].values, df_valid[ self.target_col].values.reshape(-1, 1) logging.info( f'Train/valid split: {X_train.shape[0]} for training, {X_valid.shape[0]} for validation' ) n_d = trial.suggest_int('n_d', 8, 64) params = self.default_params params['n_d'] = n_d params['n_a'] = n_d params['seed'] = self.random_state params['n_steps'] = trial.suggest_int('n_steps', 3, 10) params['n_shared'] = trial.suggest_int('n_shared', 2, 5) params['n_independent'] = trial.suggest_int('n_independent', 2, 5) params['momentum'] = trial.suggest_float('momentum', 0.01, 0.4) params['gamma'] = trial.suggest_float('gamma', 1.0, 2.0) model = TabNetRegressor(**params) model.fit(X_train=X_train, y_train=y_train, eval_set=[(X_valid, y_valid)], eval_metric=['rmse'], max_epochs=20, patience=10, batch_size=1024) score = rmse(y_valid, model.predict(X_valid).squeeze()) return score
def cross_val_score(model=None, data=None, cv=10, scorer=rmse): data = np.array(data) print(data.shape) chunks = chunk(data, cv) #print chunks score = list() for i in range(10): iter_data = list() for j in range(len(chunks)): if j != i: iter_data.extend(chunks[j]) pred_data = np.array(chunks[i]) iter_data = np.array(iter_data) model.fit(iter_data) pred = model.predict(pred_data) score.append(rmse(pred_data[:, model.formatizer['value']], pred)) print(score[i]) return np.mean(score)
utilMat = svd(utilMat,k=15) pred = [] #to store the predicted ratings for _,row in test.iterrows(): user = row['userId'] item = row['movieId'] if user in user_index: u_index = user_index[user] if item in item_index: i_index = item_index[item] pred_rating = utilMat[u_index, i_index] else: pred_rating = np.mean(utilMat[u_index, :]) else: if item in item_index: i_index = item_index[item] pred_rating = np.mean(utilMat[:, i_index]) else: pred_rating = np.mean(utilMat[:, :]) pred.append(pred_rating) error = rmse(test['rating'], pred) print(error) errors.append(error) del error, pred print np.mean(errors)
def fit(self, X, y, validation_data=None): y = y.reshape(-1, 1) start_time = time.time() l = y.shape[0] train_idx_shuffle = np.arange(l) epoch_best_ = 4 rmsle_best_ = 10. cycle_num = 0 decay_steps = self.params["first_decay_steps"] global_step = 0 global_step_exp = 0 global_step_total = 0 snapshot_num = 0 learning_rate_need_big_jump = False total_rmse = 0. rmse_decay = 0.9 for epoch in range(self.params["epoch"]): print("epoch: %d" % (epoch + 1)) np.random.seed(epoch) if snapshot_num >= self.params["snapshot_before_restarts"] and self.params["shuffle_with_replacement"]: train_idx_shuffle = np.random.choice(np.arange(l), l) else: np.random.shuffle(train_idx_shuffle) batches = self._get_batch_index(train_idx_shuffle, self.params["batch_size_train"]) for i, idx in enumerate(batches): if snapshot_num >= self.params["max_snapshot_num"]: break if learning_rate_need_big_jump: learning_rate = self.params["lr_jump_rate"] * self.params["max_lr_exp"] learning_rate_need_big_jump = False else: learning_rate = self.params["max_lr_exp"] lr = _exponential_decay(learning_rate=learning_rate, global_step=global_step_exp, decay_steps=decay_steps, # self.params["num_update_each_epoch"], decay_rate=self.params["lr_decay_each_epoch_exp"]) feed_dict = self._get_feed_dict(X, idx, dropout=0.1, training=False) feed_dict[self.target] = y[idx] feed_dict[self.learning_rate] = lr feed_dict[self.training] = True rmse_, opt = self.sess.run((self.rmse, self.train_op), feed_dict=feed_dict) if self.params["RUNNING_MODE"] != "submission": # scaling rmsle' = (1/scale_) * (raw rmsle) # raw rmsle = scaling rmsle' * scale_ total_rmse = rmse_decay * total_rmse + (1. - rmse_decay) * rmse_ * (self.target_scaler.scale_) self.logger.info("[batch-%d] train-rmsle=%.5f, lr=%.5f [%.1f s]" % ( i + 1, total_rmse, lr, time.time() - start_time)) # save model global_step += 1 global_step_exp += 1 global_step_total += 1 if self.params["enable_snapshot_ensemble"]: if global_step % decay_steps == 0: cycle_num += 1 if cycle_num % self.params["snapshot_every_num_cycle"] == 0: snapshot_num += 1 print("snapshot num: %d" % snapshot_num) self._save_state() self.logger.info("[model-%d] cycle num=%d, current lr=%.5f [%.5f]" % ( snapshot_num, cycle_num, lr, time.time() - start_time)) # reset global_step and first_decay_steps decay_steps = self.params["first_decay_steps"] if self.params["lr_jump_exp"] or snapshot_num >= self.params["snapshot_before_restarts"]: learning_rate_need_big_jump = True if snapshot_num >= self.params["snapshot_before_restarts"]: global_step = 0 global_step_exp = 0 decay_steps *= self.params["t_mul"] if validation_data is not None and global_step_total % self.params["eval_every_num_update"] == 0: y_pred = self._predict(validation_data[0]) y_valid_inv = self.target_scaler.inverse_transform(validation_data[1]) y_pred_inv = self.target_scaler.inverse_transform(y_pred) rmsle = rmse(y_valid_inv, y_pred_inv) self.logger.info("[step-%d] train-rmsle=%.5f, valid-rmsle=%.5f, lr=%.5f [%.1f s]" % ( global_step_total, total_rmse, rmsle, lr, time.time() - start_time)) if rmsle < rmsle_best_: rmsle_best_ = rmsle epoch_best_ = epoch + 1 return rmsle_best_, epoch_best_
os.close(cfile) runner = eplus.EnergyPlus() eplus_data = runner.run(candidate_filepath, eplus_weather, eplus_schedule, eplus_params['output_directory']) if eplus_data is None or user_data is None: if eplus_data is None: logger.error('evaluator() :: EnergyPlus output is None.') elif user_data is None: logger.error('evaluator() :: User data is None.') fitness = WORST_FITNESS else: ep = utilities.column_vectors(eplus_data) ud = utilities.column_vectors(user_data) errors = {} for key in ud: if 'Date/Time' not in key: errors[key] = metrics.rmse(ep[key], ud[key]) if eplus_tune_keys is None or len(eplus_tune_keys) == 0: fitness = sum([errors[k] for k in errors if errors[k] is not None]) else: fitness = 0 for k in eplus_tune_keys: k = k.strip() try: fitness += errors[k] except KeyError: logger.warning('evaluator() :: Tune key {} does not exist in model output.'.format(k)) except TypeError: logger.warning('evaluator() :: Tune key {} has error value None and is excluded from fitness.'.format(k)) try: os.remove(candidate_filepath) except:
if __name__ == "__main__": # Matrix of movie ratings train_data, test_data = get_movie_matrix() # Get all user mean values user_mean = get_user_mean(train_data) start = time.time() prediction_matrix = pd.DataFrame(index=train_data.index) for name, data in train_data.iteritems(): prediction_matrix[name] = main(train_data, name, k) # break logging.info("Process done in: {0:.2f} seconds".format(time.time() - start)) inter_columns = np.intersect1d(prediction_matrix.columns.values, test_data.columns.values) small_pred = prediction_matrix[inter_columns].dropna(how='all') small_test = test_data.loc[:, inter_columns].dropna(how='all') print("Test Matrix\n", small_test) print("Predicted Matrix\n", small_pred.loc[small_test.index, :]) logging.info('\nMetric Calculations RMSE and MAE') rmse_value = metrics.rmse(test_data, prediction_matrix) print(f'RMSE:\t{rmse_value}') mae_value = metrics.mae(test_data, prediction_matrix) print(f'MAE:\t{mae_value}')
def main(): #getmodel model = lstm() adam = Adam(lr=lr) model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse, metrics.mape, metrics.ma]) all_scores_train = [] all_scores_test = [] #model.summary() #get data data = load_data_() #nodes x slots ts1 = time.time() trueY_train = [] predictY_train = [] trueY_test = [] predictY_test = [] for i in range(len(data)): print('grid %d.....' % (i)) ts = time.time() #makedata set testslots = T * days_test trainx, trainy = makedataset(data[i, :-testslots]) testx, testy = makedataset(data[i, -testslots:]) print('trainx shape:', (trainx.shape)) print('trainy shape:', (trainy.shape)) print('testx shape:', (testx.shape)) print('testy shape:', (testy.shape)) #scaler print(trainy, testy) mmn = MinMaxScaler(feature_range=(-1, 1)) trainlen = len(trainy) Y = np.concatenate([trainy, testy], axis=0) Y = mmn.fit_transform(Y.reshape(-1, 1)) trainy, testy = Y[:trainlen], Y[trainlen:] print(trainy.shape, testy.shape) #train adam = Adam(lr=lr) model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse, metrics.mape, metrics.ma]) early_stopping = EarlyStopping(monitor='val_rmse', patience=patience, mode='min') history = model.fit(trainx, trainy, epochs=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping], verbose=0) #evalute predict_y_train = model.predict([trainx], batch_size=batch_size, verbose=0)[:, 0:1] score = model.evaluate(trainx, trainy, batch_size=batch_size, verbose=0) print( 'Train score: %.6f rmse (norm): %.6f rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2., score[1] * (mmn._max - mmn._min) / 2. / mmn.inverse_transform(np.mean(y_train)), score[2], score[3])) predict_y_test = model.predict([testx], batch_size=batch_size, verbose=0)[:, 0:1] score = model.evaluate(testx, testy, batch_size=batch_size, verbose=0) print( 'Test score: %.6f rmse (norm): %.6f rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2., score[1] * (mmn._max - mmn._min) / 2. / mmn.inverse_transform(np.mean(y_test)), score[2], score[3])) predictY_train.append( mmn.inverse_transform(predict_y_train).reshape(-1).tolist()) predictY_test.append( mmn.inverse_transform(predict_y_test).reshape(-1).tolist()) trueY_train.append(mmn.inverse_transform(trainy).reshape(-1).tolist()) trueY_test.append(mmn.inverse_transform(testy).reshape(-1).tolist()) print("\nestimate on grid%d ,elapsed time (eval): %.3f seconds\n" % (i, time.time() - ts)) #all_scores_train = np.asarray(all_scores_train) #all_scores_train = np.mean(all_scores_train, axis = 0) #all_scores_test = np.asarray(all_scores_test) #all_Scores_test = np.mean(all_scores_test,axis = 0) print('\n\n') evaluate = lambda y1, y2: (metrics.rmse(y1, y2), metrics.rmse( y1, y2) / np.mean(y1), metrics.mape(y1, y2), metrics.ma(y1, y2)) print('All Train rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (evaluate(trueY_train, predictY_train))) print('All Test rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' % (evaluate(trueY_test, predictY_test))) print('elapsed time: %3f seconds\n' % (time.time() - ts1))
x_test = x_test[-days_test:, :, -30:] # y_test = y_test[-days_test:,:] x_train = x_train[-days_train:, :, -30:] y_train = y_train[-days_train:, :] model = model_builder(x_train, y_train, args) model.fit(x_train, y_train) pred = model.predict(x_test, y_test) results[str(fold_num + 2013) + '/' + str(fold_num + 14)] = [ metrics.crps(pred), metrics.nll(pred), metrics.mae(pred), metrics.rmse(pred), metrics.smape(pred), metrics.corr(pred), metrics.mb_log(pred), metrics.sdp(pred) ] tf.keras.backend.clear_session() results['Average'] = results.mean(1) results['Average'].loc['SDP'] = np.abs(results.loc['SDP'].values[-1]).mean() plt.plot(pred.index, pred['True'], color='black') plt.plot(pred.index, pred['Pred'], color='red') plt.fill_between(pred.index, pred['Pred'] - pred['Std'], pred['Pred'] + pred['Std'],
def do_model(all_data, steps, run_model=True): _steps = steps print("steps:", _steps) scaler = MinMaxScaler() all_data = scaler.fit_transform(all_data) if not run_model: return None, None, scaler features = all_data[:-_steps] labels = all_data[_steps:, -1:] tts = train_test_split(features, labels, test_size=0.4) X_train = tts[0] X_test = tts[1] Y_train = tts[2].astype(np.float64) Y_test = tts[3].astype(np.float64) optimiser = 'adam' hidden_neurons = 200 loss_function = 'mse' batch_size = 105 dropout = 0.056 inner_hidden_neurons = 269 dropout_inner = 0.22 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) print("X train shape:\t", X_train.shape) print("X test shape:\t", X_test.shape) # print("Y train shape:\t", Y_train.shape) # print("Y test shape:\t", Y_test.shape) # print("Steps:\t", _steps) in_neurons = X_train.shape[2] out_neurons = 1 model = Sequential() gpu_cpu = 'cpu' best_weight = BestWeight() model.add( LSTM(output_dim=hidden_neurons, input_dim=in_neurons, return_sequences=True, init='uniform', consume_less=gpu_cpu)) model.add(Dropout(dropout)) dense_input = inner_hidden_neurons model.add( LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu)) model.add(Dropout(dropout_inner)) model.add(Activation('relu')) model.add(Dense(output_dim=out_neurons, input_dim=dense_input)) model.add(Activation('relu')) model.compile(loss=loss_function, optimizer=optimiser) history = model.fit(X_train, Y_train, verbose=0, batch_size=batch_size, nb_epoch=30, validation_split=0.3, shuffle=False, callbacks=[best_weight]) model.set_weights(best_weight.get_best()) predicted = model.predict(X_test) + EPS rmse_val = rmse(Y_test, predicted) metrics = OrderedDict([ # ('hidden', hidden_neurons), ('steps', _steps), ('geh', geh(Y_test, predicted)), ('rmse', rmse_val), ('mape', mean_absolute_percentage_error(Y_test, predicted)), # ('smape', smape(predicted, _Y_test)), # ('median_pe', median_percentage_error(predicted, Y_test)), # ('mase', MASE(_Y_train, _Y_test, predicted)), # ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)), # ('batch_size', batch_size), # ('optimiser', optimiser), # ('dropout', dropout), # ('extra_layer_dropout', dropout_inner), # ('extra_layer_neurons', inner_hidden_neurons), # ('loss function', loss_function) # 'history': history.history ]) return metrics, model, scaler
def get_scores(y_true, y_predict): return dice_coef(y_true, y_predict), sensitivity(y_true, y_predict), specificity(y_true, y_predict), MeanSurfaceDistance(y_true, y_predict), mutual_information(y_true, y_predict), rmse(y_true, y_predict)
# Preprocessing X = shuffled.iloc[:, :-1].squeeze() y = (shuffled.iloc[:, -1:]).T.squeeze() len_estate = len(y) # Splitting data X_train, y_train = X.loc[:split*len_estate], y.loc[:split*len_estate] X_test, y_test = X.loc[split*len_estate+1:].reset_index( drop=True), y.loc[split*len_estate+1:].reset_index(drop=True) # Learning tree print("Please wait for some time, it takes time, you can change max depth if it takes too long time.") tree = DecisionTree(criterion="information_gain", max_depth=max_depth) tree.fit(X_train, y_train) tree.plot() # Printing accuracies for different depths for depth in range(2, max_depth+1): y_hat = tree.predict(X_test, max_depth=depth) print("Depth: ", depth) print('\tRMSE: ', rmse(y_hat, y_test)) print('\tMAE: ', mae(y_hat, y_test)) # Decision Tree Regressor from Sci-kit learn dt = DecisionTreeRegressor(random_state=0) dt.fit(X_train, y_train) y_hat = pd.Series(dt.predict(X_test)) print('Sklearn RMSE: ', rmse(y_hat, y_test)) print('Sklearn MAE: ', mae(y_hat, y_test))
k: np.array(v[split_idx:]) for k, v in predictions.items() } print() table = [] print(' & '.join(['step', 'geh', 'mape', 'rmse'])+' \\\\') for step in steps: # true values stepped_vals = flow_values[step:len(predictions)] # predicted values pred_vals = predictions[step][:-step] + eps table.append(OrderedDict([ ('steps', step), ('geh', geh(stepped_vals, pred_vals)), ('mape', mape(stepped_vals, pred_vals)), ('rmse', rmse(stepped_vals, pred_vals)) ])) print(tabulate.tabulate(table, 'keys', 'latex')) print("Loading matplotlib") import matplotlib.pyplot as plt true_y = [] true_x = [] pred_y = [] print("Predicting data rows: {}".format(data_len - row_count)) progress = pyprind.ProgBar(data_len - row_count, width=50, stream=1) for row in it: progress.update() preds = model.run(row)
def calculate_train_and_forecast_metrics( self, train: pd.DataFrame, oos: pd.DataFrame, target_index: int, hps: dict, horizon: int, mae_rmse_ignore_when_actual_and_pred_are_zero: bool, mape_ignore_when_actual_is_zero: bool): train_dataset = TrainDataset(train_df=train, target_index=target_index, hyperparams=hps, horizon=horizon) train_loader = DataLoader(train_dataset, batch_size=1, num_workers=1) inputs, train_actual = next(iter(train_loader)) inputs = inputs.to(device=self.device) self.net = self.net.to(device=self.device) train_pred = self.net(inputs.float()) train_actual = train_actual[0, 0, :].cpu().numpy() train_pred = train_pred[0, 0, :].cpu().detach().numpy() forecast_actual = oos.iloc[:horizon, target_index].values forecast_pred = self.predict(train_df, target_index, hps, horizon) assert (train_actual.shape == train_pred.shape) assert (forecast_actual.shape == forecast_pred.shape) train_dict = { 'mae': metrics.mae(train_actual, train_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'rmse': metrics.rmse(train_actual, train_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'mape': metrics.mape(train_actual, train_pred, mape_ignore_when_actual_is_zero), 'presence_accuracy': metrics.presence_accuracy(train_actual, train_pred), 'peak_accuracy': metrics.peak_accuracy(train_actual, train_pred), 'total_volume': int(metrics.total_actual_volume(train_actual)), 'num_timestamps_predicted_on': int(train_pred.shape[0]) } forecast_dict = { 'mae': metrics.mae(forecast_actual, forecast_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'rmse': metrics.rmse(forecast_actual, forecast_pred, mae_rmse_ignore_when_actual_and_pred_are_zero), 'mape': metrics.mape(forecast_actual, forecast_pred, mape_ignore_when_actual_is_zero), 'presence_accuracy': metrics.presence_accuracy(forecast_actual, forecast_pred), 'peak_accuracy': metrics.peak_accuracy(forecast_actual, forecast_pred), 'total_volume': int(metrics.total_actual_volume(forecast_actual)), 'num_time_stamps_predicted_on': int(forecast_pred.shape[0]) } train_metrics = pd.DataFrame.from_dict(train_dict, columns=[None], orient='index').iloc[:, 0].round(3) forecast_metrics = pd.DataFrame.from_dict( forecast_dict, columns=[None], orient='index').iloc[:, 0].round(3) return train_metrics, forecast_metrics
print('Items factorization matrix is:') # item factorization matrix print(pd.DataFrame(algo.qi)) print() # predict a single score # algo.predict(192, 302, 4, verbose=True) # show predicted score and actual score side by side # randomly choose first 20 records with percentage 20/100000(# of records in total) print('Randomly choose 1-10 records to compare side by side:') limit = 10 for uid, iid, r, timestamp in data.raw_ratings: if random.random() > limit / 100000: continue if limit == 0: break algo.predict(uid, iid, r, verbose=True) limit -= 1 print() # show rmse print('RMSE value of all rated scores is:') predictions = [] for uid, iid, r, timestamp in data.raw_ratings: prediction = (r, algo.predict(uid, iid, r, verbose=False)) predictions.append(prediction) metrics.rmse(predictions, verbose=True)
import metrics import numpy as np import tensorflow as tf import math import tensorflow.keras.backend as K from tensorflow.keras.layers import Reshape y_true = np.random.rand(1, 55, 74) # y_pred = np.random.rand(1, 55, 74) y_true_tensor = tf.constant(y_true, dtype='float32') y_pred_tensor = tf.constant(y_true, dtype='float32') print("y_true_tensor: " + str(y_true_tensor)) print("y_pred_tensor: " + str(y_pred_tensor)) abs_relative_diff = metrics.abs_relative_diff(y_true_tensor, y_pred_tensor) squared_relative_diff = metrics.squared_relative_diff(y_true_tensor, y_pred_tensor) rmse = metrics.rmse(y_true_tensor, y_pred_tensor) rmse_log = metrics.rmse_log(y_true_tensor, y_pred_tensor) rmse_scale_invariance_log = metrics.rmse_scale_invariance_log( y_true_tensor, y_pred_tensor) print("abs_relative_diff: " + str(abs_relative_diff)) print("squared_relative_diff: " + str(squared_relative_diff)) print("rmse: " + str(rmse)) print("rmse_log: " + str(rmse_log)) print("rmse_scale_invariance_log: " + str(rmse_scale_invariance_log))
def main(): """ Main function for mpc-scheme with receding horizion. """ conf = utils.parse_config() logpath = None log = input("Do you wish to log this run? ") if log in ["y", "yes", "Yes"]: foldername = input("Do you wish to name logfolder? (enter to skip)") logpath = utils.create_logs_folder(conf["logpath"], foldername) openloop = False predictions = conf["predictions"] print("Using {} predictions.".format(predictions)) actions_per_hour = conf["actions_per_hour"] horizon = conf["simulation_horizon"] simulation_horizon = horizon * actions_per_hour start_time = time.time() step_time = start_time PV, PV_pred, PL, PL_pred, grid_buy, grid_sell = utils.load_data() T = conf["prediction_horizon"] N = conf["prediction_horizon"] * actions_per_hour xk = conf["x_inital"] xk_sim = conf["x_inital"] x_opt = np.asarray([xk]) x_sim = np.asarray([xk]) u0 = np.asarray([]) u1 = np.asarray([]) u2 = np.asarray([]) u3 = np.asarray([]) solver = OptiSolver(N) x, lbx, ubx, lbg, ubg = solver.build_nlp( T, N, ) net_cost_grid = 0 net_cost_bat = 0 J = 0 pv_preds = [PV[0]] pl_preds = [PL[0]] pv_error = [] pl_error = [] plt.figure() if predictions in ["arima", "best"]: pv_model = Arima("PV", order=(3, 1, 2)) pl_model = Arima("PL", order=(1, 1, 4), seasonal_order=(0, 0, 0, 0)) for step in range(simulation_horizon - N): # Update NLP parameters x[0] = xk lbx[0] = xk ubx[0] = xk PV_true = PV[step:step + N] PL_true = PL[step:step + N] if predictions == "constant": # Predicted values equal to measurement pv_ref = np.ones(N) * PV[step] pl_ref = np.ones(N) * PL[step] elif predictions == "arima": # Estimate using ARIMA pv_model.update(PV[step]) pl_model.update(PL[step]) pv_ref = pv_model.predict(T) pl_ref = pl_model.predict(T) elif predictions == "data": pv_ref = PV_pred[step:step + N] pl_ref = PL_pred[step:step + N] elif predictions == "scaled_mean": pv_ref = (PV[step] / PV_pred[step]) * PV_pred[step:step + N] pl_ref = (PL[step] / PL_pred[step]) * PL_pred[step:step + N] elif predictions == "best": pv_model.update(PV[step]) pv_ref = pv_model.predict(T) pl_ref = (PL[step] / PL_pred[step]) * PL_pred[step:step + N] else: # Use true predictions pv_ref = PV_true pl_ref = PL_true pv_preds.append(pv_ref[1]) pl_preds.append(pl_ref[1]) pv_error.append(metrics.rmse(PV_true[0:4], pv_ref[0:4])) pl_error.append(metrics.rmse(PL_true[0:4], pl_ref[0:4])) plt.plot(range(step, step + N), pv_ref, c="b") plt.plot(range(step, step + N), PV_true, c="r") xk_opt, Uk_opt, J_opt = solver.solve_nlp([x, lbx, ubx, lbg, ubg], vertcat(pv_ref, pl_ref)) J += J_opt x_opt = np.append(x_opt, xk_opt[1]) xk_sim, Uk_sim = simulate_SOC( xk_sim, Uk_opt, PV[step], PL[step], solver.F, ) x_sim = np.append(x_sim, xk_sim) if openloop: xk = xk_opt[1] # xk is optimal else: xk = xk_sim uk = [u[0] for u in Uk_opt] u0 = np.append(u0, uk[0]) u1 = np.append(u1, uk[1]) u2 = np.append(u2, uk[2]) u3 = np.append(u3, uk[3]) net_cost_grid += metrics.net_spending_grid(uk, 1.5, actions_per_hour) net_cost_bat += metrics.net_cost_battery( uk, conf["system"]["battery_cost"], actions_per_hour) if step % 50 == 0: print("\nFinshed iteration step {}. Current step took {}s".format( step, np.around(time.time() - step_time, 2))) print("xsim {}%, x_opt {}%".format(np.around(xk_sim, 2), np.around(xk_opt[1], 2))) step_time = time.time() peak_power = np.around(np.max(u2), 2) * 70 E_start = conf["x_inital"] * conf["system"]["C_MAX"] E_end = xk * conf["system"]["C_MAX"] battery_change = np.around(grid_buy * (E_end - E_start), 2) print() print("Error PV prediction:", np.mean(pv_error)) print("Error PL prediction:", np.mean(pl_error)) print("Net spending grid: {} kr".format(np.around(net_cost_grid, 2))) print("Peak power cost: {} kr".format(peak_power)) print("Net spending battery: {} kr".format(np.around(net_cost_bat, 2))) print("Grid + battery spending: {} kr".format( np.around(net_cost_grid + net_cost_bat, 2), )) print("Change in battery energy {} kr".format(battery_change)) print("Total spending:", net_cost_grid + net_cost_bat - battery_change + peak_power) # Plotting u = np.asarray([-u0, u1, u2, -u3]) u_bat = np.asarray([-u0, u1]) u_grid = np.asarray([u2, -u3]) p.plot_control_actions(u, horizon - T, actions_per_hour, logpath) p.plot_control_actions( u_bat, horizon - T, actions_per_hour, logpath, title="Battery Controls", legends=["Battery Charge", "Battery Discharge"], ) p.plot_control_actions( u_grid, horizon - T, actions_per_hour, logpath, title="Grid Controls", legends=["Grid Buy", "Grid Sell"], ) p.plot_SOC(x_sim, horizon - T, logpath) p.plot_data( [x_opt, x_sim], logpath=logpath, legends=["SOC optimal", "SOC simulated"], title="Simulated vs optimal SOC", ) p.plot_data( [PV[:simulation_horizon - N], PL[:simulation_horizon - N]], logpath=logpath, legends=["PV Production", "Load Demands"], title="PV Production & Load Demands", ) p.plot_SOC_control_subplots(x_sim, u, horizon - T, logpath=logpath) stop = time.time() print("\nFinished optimation in {}s".format(np.around( stop - start_time, 2))) utils.save_datafile( [x_opt, x_sim, u0, u1, u2, u3, PV, PV_pred, PL, PL_pred], names=[ "x_opt", "x_sim", "u0", "u1", "u2", "u3", "PV", "PV_pred", "PL", "PL_pred", ], logpath=logpath, ) print("One-step PV RMSE:", metrics.rmse_predictions(PV, pv_preds)) print("One-step Load RMSE:", metrics.rmse_predictions(PL, pl_preds)) if conf["plot_predictions"]: p.plot_predictions_subplots(PV, pv_preds, PL, pl_preds, logpath) plt.show(block=True) plt.ion() plt.close("all")