def get_cross_val_score(self, train: pd.DataFrame, normal_cv: bool = False) -> tuple: """ Deliver cross validated evaluation scores :param train: train set :param normal_cv: specify whether normal cv can be performed :return: dictionary with mean and std of cross validated evaluation scores """ # backup model so train on full dataset afterwards is independet of cv training backup_model = copy.deepcopy(self.model) if train.shape[0] < 80: print('Train set too small for Cross Validation') return {}, backup_model train = train.copy() rmse_lst, mape_lst, smape_lst = [], [], [] prefix = 'ts_' splitter = sklearn.model_selection.TimeSeriesSplit(n_splits=3) if normal_cv: splitter = sklearn.model_selection.ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) prefix = 'shuf_' for train_index, test_index in splitter.split(train): cv_train, cv_test = train.loc[train.index[train_index]], train.loc[train.index[test_index]] # ES Model with seasonality is only working if n_samples is bigger than seasonality # noinspection PyUnresolvedReferences if self.name == 'ExponentialSmoothing' and self.seasonal is not None: if cv_train.shape[0] <= self.seasonal_periods: print('CV train set too small for seasonality') continue self.model = copy.deepcopy(backup_model) try: self.train(train=cv_train) predictions = self.predict(test=cv_test, train=cv_train) rmse_test, mape_test, smape_test = EvaluationHelper.get_all_eval_vals( actual=cv_test[self.target_column], prediction=predictions['Prediction']) rmse_lst.append(rmse_test) mape_lst.append(mape_test) smape_lst.append(smape_test) except Exception as exc: print(exc) continue rmse_mean, mape_mean, smape_mean = \ np.mean(np.asarray(rmse_lst)), np.mean(np.asarray(mape_lst)), np.mean(np.asarray(smape_lst)) rmse_std, mape_std, smape_std = \ np.std(np.asarray(rmse_lst)), np.std(np.asarray(mape_lst)), np.std(np.asarray(smape_lst)) cv_dict = {prefix + 'cv_rmse_mean': rmse_mean, prefix + 'cv_rmse_std': rmse_std, prefix + 'cv_mape_mean': mape_mean, prefix + 'cv_mape_std': mape_std, prefix + 'cv_smape_mean': smape_mean, prefix + 'cv_smape_std': smape_std} for cv_number in range(len(rmse_lst)): cv_dict[prefix + 'cv_rmse_' + str(cv_number)] = rmse_lst[cv_number] cv_dict[prefix + 'cv_mape_' + str(cv_number)] = mape_lst[cv_number] cv_dict[prefix + 'cv_smape_' + str(cv_number)] = smape_lst[cv_number] return cv_dict, backup_model
def evaluate(self, train: pd.DataFrame, test: pd.DataFrame) -> dict: """ Evaluate model against all implemented evaluation metrics and baseline methods. Deliver dictionary with evaluation metrics. :param train: train set :param test: test set :return: dictionary with evaluation metrics of model and all baseline methods """ """ insample_rw, prediction_rw = SimpleBaselines.RandomWalk(one_step_ahead=self.one_step_ahead)\ .get_insample_prediction(train=train, test=test, target_column=self.target_column) insample_seasrw, prediction_seasrw = SimpleBaselines.RandomWalk(one_step_ahead=self.one_step_ahead)\ .get_insample_prediction(train=train, test=test, target_column=self.target_column, seasonal_periods=self.seasonal_periods) insample_ha, prediction_ha = SimpleBaselines.HistoricalAverage(one_step_ahead=self.one_step_ahead)\ .get_insample_prediction(train=train, test=test, target_column=self.target_column) insample_model = self.insample(train=train) """ prediction_model = self.predict(test=test, train=train) """ rmse_train_rw = EvaluationHelper.rmse( actual=train[self.target_column], prediction=insample_rw['Insample']) rmse_test_rw = EvaluationHelper.rmse( actual=test[self.target_column], prediction=prediction_rw['Prediction']) rmse_train_seasrw = EvaluationHelper.rmse( actual=train[self.target_column], prediction=insample_seasrw['Insample']) rmse_test_seasrw = EvaluationHelper.rmse( actual=test[self.target_column], prediction=prediction_seasrw['Prediction']) rmse_train_ha = EvaluationHelper.rmse( actual=train[self.target_column], prediction=insample_ha['Insample']) rmse_test_ha = EvaluationHelper.rmse( actual=test[self.target_column], prediction=prediction_ha['Prediction']) rmse_train_model = EvaluationHelper.rmse( actual=train[self.target_column], prediction=insample_model['Insample']) """ rmse_test_model = EvaluationHelper.rmse( actual=test[self.target_column], prediction=prediction_model['Prediction']) """ return {'RMSE_Train_RW': rmse_train_rw, 'RMSE_Test_RW': rmse_test_rw, 'RMSE_Train_seasRW': rmse_train_seasrw, 'RMSE_Test_seasRW': rmse_test_seasrw, 'RMSE_Train_HA': rmse_train_ha, 'RMSE_Test_HA': rmse_test_ha, 'RMSE_Train': rmse_train_model, 'RMSE_Test': rmse_test_model } """ return {'RMSE_Test': rmse_test_model}
def get_cross_val_score(self, train: pd.DataFrame) -> tuple: """ Deliver cross validated evaluation scores :param train: train set :return: dictionary with mean and std of cross validated evaluation scores """ # backup model so train on full dataset afterwards is independet of cv training backup_model = copy.deepcopy(self.model) if train.shape[0] < 30: print('Train set too small for Cross Validation') return {}, backup_model train = train.copy() rmse_lst = [] splitter = sklearn.model_selection.ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) prefix = 'shuf_' for train_index, test_index in splitter.split(train): cv_train, cv_test = train.loc[train.index[train_index]], train.loc[ train.index[test_index]] self.model = copy.deepcopy(backup_model) try: self.train(train=cv_train) predictions = self.predict(test=cv_test, train=cv_train, cv_call=True) rmse_test = EvaluationHelper.rmse( actual=cv_test[self.target_column], prediction=predictions['Prediction']) rmse_lst.append(rmse_test) except Exception as exc: print(exc) continue rmse_mean = np.mean(np.asarray(rmse_lst)) rmse_std = np.std(np.asarray(rmse_lst)) cv_dict = { prefix + 'cv_rmse_mean': rmse_mean, prefix + 'cv_rmse_std': rmse_std } for cv_number in range(len(rmse_lst)): cv_dict[prefix + 'cv_rmse_' + str(cv_number)] = rmse_lst[cv_number] return cv_dict, backup_model
end_ind = scenario['end_ind'] slope = scenario['slope'] max_factor = scenario['max_factor'] try: # get base prediction for manipulated data data_manip = TrainHelper.get_sloped_dataset( dataset=data, target_column=target_column, start_ind=start_ind, end_ind=end_ind, max_factor=max_factor, slope=slope) base_predict = model_sine.predict(train=data_manip[:train_ind], test=data_manip[train_ind:]) rmse_base = EvaluationHelper.rmse( actual=data_manip[train_ind:][target_column], prediction=base_predict['Prediction']) if seas_len not in rmse_base_dict: rmse_base_dict[seas_len] = [rmse_base] else: rmse_base_dict[seas_len].append(rmse_base) print('Base Prediction: RMSE=' + str(rmse_base)) # iterate over all parameter combinations for param_ind, params in enumerate(params_lst): print('### Param ' + str(param_ind + 1) + '/' + str(len(params_lst)) + ' (Scen ' + str(scen_ind + 1) + '/' + str(len(scenario_lst)) + ') (SeasLen ' + str(seas_len) + ') ###') o_percentage = params[ 'o_percentage'] if 'o_percentage' in params else np.nan u_percentage = params[
comparison_partners=True, target_column=target_column, train_ind=int(split_perc*dataset.shape[0]), scale_thr=scale_thr, da=da, o_perc=o_percentage, u_perc=u_percentage, thr=threshold, rel_thr=rel_thr, rel_coef=rel_coef, under_samp=under_samp, append=append, const_hazard=const_hazard, scale_window=scale_window, scale_seasons=scale_seasons, cpd=cpd, cf_r=cf_r, cf_order=cf_order, cf_smooth=cf_smooth, cf_thr_perc=cf_thr_perc, max_samples=max_samples) # end_proc = time.process_time_ns() / 1000000 # end_time = time.time_ns() / 1000000 # runtime_ms_dict['Full_pipeline_Runtime_ms'] = end_proc - start_proc # runtime_ms_dict['Full_pipeline_Time_ms'] = end_time - start_time actual = test[target_column].copy() actual.reset_index(drop=True, inplace=True) pred_evars = predictions_full['Prediction'].copy() pred_evars.reset_index(drop=True, inplace=True) rmse_evars = EvaluationHelper.rmse(actual=actual, prediction=pred_evars) print('RMSE_EVARS-GPR=' + str(rmse_evars)) print('---------- PR 1 ----------') model_period_retr_1 = ModelsGaussianProcessRegression.GaussianProcessRegression( target_column=target_column, seasonal_periods=seasonal_periods, kernel=dict_top_config['kernel'], alpha=dict_top_config['alpha'], n_restarts_optimizer=dict_top_config['n_restarts_optimizer'], standardize=dict_top_config['standardize'], normalize_y=dict_top_config['normalize_y'], one_step_ahead=1) cross_val_dict = model_period_retr_1.train(train=train, cross_val_call=True) # start_proc = time.process_time_ns() / 1000000 # start_time = time.time_ns() / 1000000 eval_dict = model_period_retr_1.evaluate(train=train, test=test) # end_proc = time.process_time_ns() / 1000000 # end_time = time.time_ns() / 1000000 # runtime_ms_dict['PR1_Runtime_ms'] = end_proc - start_proc