예제 #1
0
 def get_cross_val_score(self, train: pd.DataFrame, normal_cv: bool = False) -> tuple:
     """
     Deliver cross validated evaluation scores
     :param train: train set
     :param normal_cv: specify whether normal cv can be performed
     :return: dictionary with mean and std of cross validated evaluation scores
     """
     # backup model so train on full dataset afterwards is independet of cv training
     backup_model = copy.deepcopy(self.model)
     if train.shape[0] < 80:
         print('Train set too small for Cross Validation')
         return {}, backup_model
     train = train.copy()
     rmse_lst, mape_lst, smape_lst = [], [], []
     prefix = 'ts_'
     splitter = sklearn.model_selection.TimeSeriesSplit(n_splits=3)
     if normal_cv:
         splitter = sklearn.model_selection.ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
         prefix = 'shuf_'
     for train_index, test_index in splitter.split(train):
         cv_train, cv_test = train.loc[train.index[train_index]], train.loc[train.index[test_index]]
         # ES Model with seasonality is only working if n_samples is bigger than seasonality
         # noinspection PyUnresolvedReferences
         if self.name == 'ExponentialSmoothing' and self.seasonal is not None:
             if cv_train.shape[0] <= self.seasonal_periods:
                 print('CV train set too small for seasonality')
                 continue
         self.model = copy.deepcopy(backup_model)
         try:
             self.train(train=cv_train)
             predictions = self.predict(test=cv_test, train=cv_train)
             rmse_test, mape_test, smape_test = EvaluationHelper.get_all_eval_vals(
                 actual=cv_test[self.target_column], prediction=predictions['Prediction'])
             rmse_lst.append(rmse_test)
             mape_lst.append(mape_test)
             smape_lst.append(smape_test)
         except Exception as exc:
             print(exc)
             continue
     rmse_mean, mape_mean, smape_mean = \
         np.mean(np.asarray(rmse_lst)), np.mean(np.asarray(mape_lst)), np.mean(np.asarray(smape_lst))
     rmse_std, mape_std, smape_std = \
         np.std(np.asarray(rmse_lst)), np.std(np.asarray(mape_lst)), np.std(np.asarray(smape_lst))
     cv_dict = {prefix + 'cv_rmse_mean': rmse_mean, prefix + 'cv_rmse_std': rmse_std,
                prefix + 'cv_mape_mean': mape_mean, prefix + 'cv_mape_std': mape_std,
                prefix + 'cv_smape_mean': smape_mean, prefix + 'cv_smape_std': smape_std}
     for cv_number in range(len(rmse_lst)):
         cv_dict[prefix + 'cv_rmse_' + str(cv_number)] = rmse_lst[cv_number]
         cv_dict[prefix + 'cv_mape_' + str(cv_number)] = mape_lst[cv_number]
         cv_dict[prefix + 'cv_smape_' + str(cv_number)] = smape_lst[cv_number]
     return cv_dict, backup_model
예제 #2
0
 def evaluate(self, train: pd.DataFrame, test: pd.DataFrame) -> dict:
     """
     Evaluate model against all implemented evaluation metrics and baseline methods.
     Deliver dictionary with evaluation metrics.
     :param train: train set
     :param test: test set
     :return: dictionary with evaluation metrics of model and all baseline methods
     """
     """
     insample_rw, prediction_rw = SimpleBaselines.RandomWalk(one_step_ahead=self.one_step_ahead)\
         .get_insample_prediction(train=train, test=test, target_column=self.target_column)
     insample_seasrw, prediction_seasrw = SimpleBaselines.RandomWalk(one_step_ahead=self.one_step_ahead)\
         .get_insample_prediction(train=train, test=test, target_column=self.target_column,
                                  seasonal_periods=self.seasonal_periods)
     insample_ha, prediction_ha = SimpleBaselines.HistoricalAverage(one_step_ahead=self.one_step_ahead)\
         .get_insample_prediction(train=train, test=test, target_column=self.target_column)
     insample_model = self.insample(train=train)
     """
     prediction_model = self.predict(test=test, train=train)
     """
     rmse_train_rw = EvaluationHelper.rmse(
         actual=train[self.target_column], prediction=insample_rw['Insample'])
     rmse_test_rw = EvaluationHelper.rmse(
         actual=test[self.target_column], prediction=prediction_rw['Prediction'])
     rmse_train_seasrw = EvaluationHelper.rmse(
         actual=train[self.target_column], prediction=insample_seasrw['Insample'])
     rmse_test_seasrw = EvaluationHelper.rmse(
         actual=test[self.target_column], prediction=prediction_seasrw['Prediction'])
     rmse_train_ha = EvaluationHelper.rmse(
         actual=train[self.target_column], prediction=insample_ha['Insample'])
     rmse_test_ha = EvaluationHelper.rmse(
         actual=test[self.target_column], prediction=prediction_ha['Prediction'])
     rmse_train_model = EvaluationHelper.rmse(
         actual=train[self.target_column], prediction=insample_model['Insample'])
     """
     rmse_test_model = EvaluationHelper.rmse(
         actual=test[self.target_column],
         prediction=prediction_model['Prediction'])
     """
     return {'RMSE_Train_RW': rmse_train_rw,
             'RMSE_Test_RW': rmse_test_rw, 
             'RMSE_Train_seasRW': rmse_train_seasrw,
             'RMSE_Test_seasRW': rmse_test_seasrw,
             'RMSE_Train_HA': rmse_train_ha, 
             'RMSE_Test_HA': rmse_test_ha,
             'RMSE_Train': rmse_train_model,
             'RMSE_Test': rmse_test_model
             }
     """
     return {'RMSE_Test': rmse_test_model}
예제 #3
0
 def get_cross_val_score(self, train: pd.DataFrame) -> tuple:
     """
     Deliver cross validated evaluation scores
     :param train: train set
     :return: dictionary with mean and std of cross validated evaluation scores
     """
     # backup model so train on full dataset afterwards is independet of cv training
     backup_model = copy.deepcopy(self.model)
     if train.shape[0] < 30:
         print('Train set too small for Cross Validation')
         return {}, backup_model
     train = train.copy()
     rmse_lst = []
     splitter = sklearn.model_selection.ShuffleSplit(n_splits=5,
                                                     test_size=0.2,
                                                     random_state=0)
     prefix = 'shuf_'
     for train_index, test_index in splitter.split(train):
         cv_train, cv_test = train.loc[train.index[train_index]], train.loc[
             train.index[test_index]]
         self.model = copy.deepcopy(backup_model)
         try:
             self.train(train=cv_train)
             predictions = self.predict(test=cv_test,
                                        train=cv_train,
                                        cv_call=True)
             rmse_test = EvaluationHelper.rmse(
                 actual=cv_test[self.target_column],
                 prediction=predictions['Prediction'])
             rmse_lst.append(rmse_test)
         except Exception as exc:
             print(exc)
             continue
     rmse_mean = np.mean(np.asarray(rmse_lst))
     rmse_std = np.std(np.asarray(rmse_lst))
     cv_dict = {
         prefix + 'cv_rmse_mean': rmse_mean,
         prefix + 'cv_rmse_std': rmse_std
     }
     for cv_number in range(len(rmse_lst)):
         cv_dict[prefix + 'cv_rmse_' + str(cv_number)] = rmse_lst[cv_number]
     return cv_dict, backup_model
예제 #4
0
 end_ind = scenario['end_ind']
 slope = scenario['slope']
 max_factor = scenario['max_factor']
 try:
     # get base prediction for manipulated data
     data_manip = TrainHelper.get_sloped_dataset(
         dataset=data,
         target_column=target_column,
         start_ind=start_ind,
         end_ind=end_ind,
         max_factor=max_factor,
         slope=slope)
     base_predict = model_sine.predict(train=data_manip[:train_ind],
                                       test=data_manip[train_ind:])
     rmse_base = EvaluationHelper.rmse(
         actual=data_manip[train_ind:][target_column],
         prediction=base_predict['Prediction'])
     if seas_len not in rmse_base_dict:
         rmse_base_dict[seas_len] = [rmse_base]
     else:
         rmse_base_dict[seas_len].append(rmse_base)
     print('Base Prediction: RMSE=' + str(rmse_base))
     # iterate over all parameter combinations
     for param_ind, params in enumerate(params_lst):
         print('### Param ' + str(param_ind + 1) + '/' +
               str(len(params_lst)) + ' (Scen ' +
               str(scen_ind + 1) + '/' + str(len(scenario_lst)) +
               ') (SeasLen ' + str(seas_len) + ') ###')
         o_percentage = params[
             'o_percentage'] if 'o_percentage' in params else np.nan
         u_percentage = params[
예제 #5
0
                                   comparison_partners=True,
                                   target_column=target_column, train_ind=int(split_perc*dataset.shape[0]),
                                   scale_thr=scale_thr, da=da, o_perc=o_percentage, u_perc=u_percentage, thr=threshold,
                                   rel_thr=rel_thr, rel_coef=rel_coef, under_samp=under_samp,
                                   append=append, const_hazard=const_hazard, scale_window=scale_window,
                                   scale_seasons=scale_seasons, cpd=cpd, cf_r=cf_r, cf_order=cf_order,
                                   cf_smooth=cf_smooth, cf_thr_perc=cf_thr_perc, max_samples=max_samples)
        # end_proc = time.process_time_ns() / 1000000
        # end_time = time.time_ns() / 1000000
        # runtime_ms_dict['Full_pipeline_Runtime_ms'] = end_proc - start_proc
        # runtime_ms_dict['Full_pipeline_Time_ms'] = end_time - start_time
        actual = test[target_column].copy()
        actual.reset_index(drop=True, inplace=True)
        pred_evars = predictions_full['Prediction'].copy()
        pred_evars.reset_index(drop=True, inplace=True)
        rmse_evars = EvaluationHelper.rmse(actual=actual, prediction=pred_evars)
        print('RMSE_EVARS-GPR=' + str(rmse_evars))

        print('---------- PR 1 ----------')
        model_period_retr_1 = ModelsGaussianProcessRegression.GaussianProcessRegression(
            target_column=target_column, seasonal_periods=seasonal_periods, kernel=dict_top_config['kernel'],
            alpha=dict_top_config['alpha'], n_restarts_optimizer=dict_top_config['n_restarts_optimizer'],
            standardize=dict_top_config['standardize'], normalize_y=dict_top_config['normalize_y'],
            one_step_ahead=1)
        cross_val_dict = model_period_retr_1.train(train=train, cross_val_call=True)
        # start_proc = time.process_time_ns() / 1000000
        # start_time = time.time_ns() / 1000000
        eval_dict = model_period_retr_1.evaluate(train=train, test=test)
        # end_proc = time.process_time_ns() / 1000000
        # end_time = time.time_ns() / 1000000
        # runtime_ms_dict['PR1_Runtime_ms'] = end_proc - start_proc