def test_weighted_root_mean_squared_error(self): prediction = array([1, 0, 1, 1]) target = array([1, 0, 1, 1]) weight_vector = array([0.5, 0.5, 0.5, 0.5]) metric = WeightedRootMeanSquaredError(weight_vector) value = metric.evaluate(prediction, target) self.assertEqual(value, 0) self.assertTrue(is_better(1, 2, WeightedRootMeanSquaredError))
def _select_best_learner(self, time_limit=TIME_LIMIT_SECONDS, time_buffer=TIME_BUFFER, verbose=False): # Best learner found (lowest validation error). best_learner = None # Lowest validation error found. best_validation_value = float( '-Inf') if self.metric.greater_is_better else float('Inf') # Validation error list. validation_value_list = list() # Current time in seconds. time_seconds = lambda: default_timer() # Random order of configurations. shuffle(self.configurations) # Number of configurations run. number_of_runs = 0 # Start of run. run_start = time_seconds() # Time left. time_left = lambda: time_limit - (time_seconds() - run_start) # Iterate though all configurations. for configuration in tqdm(self.configurations): # Create learner from configuration. learner = self.model(**configuration) # Train learner. if self.__class__.__bases__[0] == EvaluatorSklearn: learner.fit( get_input_variables(self.training_set).as_matrix(), get_target_variable(self.training_set).as_matrix()) else: learner.fit( get_input_variables(self.training_set).as_matrix(), get_target_variable(self.training_set).as_matrix(), self.metric, verbose) # Calculate validation value. validation_value = self._calculate_value(learner, self.validation_set) # If validation error lower than best validation error, set learner as best learner and validation error as best validation error. if is_better(validation_value, best_validation_value, self.metric): best_learner = learner best_validation_value = validation_value # Add configuration and validation error to validation error list. validation_value_list.append((configuration, validation_value)) # Increase number of runs. number_of_runs += 1 # Calculate time left. run_end = time_left() # Calculate time expected for next run. run_expected = (time_limit - run_end) / number_of_runs # If no time left or time expected for next run is greater than time left, break. if run_end < 0 or run_end * (1 + time_buffer) < run_expected: break # When all configurations tested, return best learner. return { 'best_learner': best_learner, 'validation_value_list': validation_value_list }
def evaluate(self, algorithm): # if current generation is 0, return False (since there exists no champion) if algorithm.current_generation == 0: return False champion = algorithm.champion # subsets offspring that are better than the current champion superior_solutions = [ solution for solution in algorithm.population if is_better(solution.value, champion.value, algorithm.metric) ] # if not superior offspring exist, determine parent stopping criterion if not superior_solutions: return super().evaluate(algorithm) # calculates nr of superior solutions # nr_superior_solutions = len(superior_solutions) # calculate percentage of superior solutions percentage_superior_solutions = len(superior_solutions) / len( algorithm.population) if percentage_superior_solutions < self.threshold: return True else: return super().evaluate(algorithm)
def run_nested_cv(self): """ runs benchmark study on a nested cross-validation environment """ #======================================================================= # print('self.learning_metric =', self.learning_metric) # print('self.selection_metric =', self.selection_metric) #======================================================================= """ N configuration for each method, trained on all data, selected from the same data """ print('Entering training benchmark for dataset:', self.dataset_name) training_data = pd.DataFrame(self.samples.values) for outer_cv in range(_OUTER_FOLDS): print('\n\tIndex of outer fold:', outer_cv) for key in self.models.keys(): print('\t\tAlgorithm with key:', key) if not self.results[key][outer_cv]: if self.classification: best_training_value = float('-Inf') else: best_training_value = float('-Inf') if self.selection_metric.greater_is_better else float('Inf') training_value_list = list() for configuration in range(self.models[key]['max_combinations']): print('\n\t\t\tIndex of algorithm configuration:', len(training_value_list)) if(len(self.models[key]['algorithms'])) > 1: option = randint(0, 2) algorithm = self.models[key]['algorithms'][option] config = self.models[key]['configuration_method'](option) else: algorithm = self.models[key]['algorithms'][0] #=================================================== # if (key == 'mlpc_sgd' or key == 'mlpc_adam' or key == 'mlpr_sgd' or key == 'mlpr_adam'): #=================================================== if key.startswith('mlp'): # version from 01-22 # config = self.models[key]['configuration_method'](self.get_dataset_size(training_outer)) # version from 01-25 batch_size = int(training_data.shape[0]) # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2 config = self.models[key]['configuration_method'](batch_size) else: config = self.models[key]['configuration_method']() if key.startswith('mlp'): config['max_iter'] = DEFAULT_NUMBER_OF_ITERATIONS else: config['stopping_criterion'] = MaxGenerationsCriterion(DEFAULT_NUMBER_OF_ITERATIONS) results = self._evaluate_algorithm(algorithm=algorithm, configurations=config, training_set=training_data, validation_set=None, testing_set=training_data, metric=self.learning_metric) if self.classification: training_value = results['training_accuracy'] else: training_value = results['training_value'] if self.classification: print("\t\t\tAUROC training: %.3f" % (training_value)) else: print("\t\t\tRMSE training: %.3f" % (training_value)) if self.classification: if training_value > best_training_value: #=============================================== # print('\n\t\t\t\t\tClassification: %.3f is better than %.3f\n' % (training_value, best_training_value)) #=============================================== best_algorithm = algorithm best_key = key best_configuration = config best_training_value = training_value self.results[key][outer_cv] = results self.results[key][outer_cv]['best_configuration'] = best_configuration self.results[key][outer_cv]['avg_inner_validation_error'] = best_training_value self.results[key][outer_cv]['avg_inner_training_error'] = best_training_value best_overall_algorithm = best_algorithm best_overall_configuration = best_configuration best_overall_key = best_key self.best_result[outer_cv] = self.results[key][outer_cv] self.best_result[outer_cv]['best_overall_algorithm'] = best_overall_algorithm self.best_result[outer_cv]['best_overall_configuration'] = best_overall_configuration self.best_result[outer_cv]['best_overall_key'] = best_overall_key #=================================================== # else: # print('\n\t\t\t\t\tClassification: %.3f is worse (!) than %.3f\n' % (training_value, best_training_value)) #=================================================== else: if is_better(training_value, best_training_value, self.selection_metric): #=============================================== # print('\n\t\t\t\t\tRegression: %.3f is better than %.3f\n' % (training_value, best_training_value)) #=============================================== best_algorithm = algorithm best_key = key best_configuration = config best_training_value = training_value self.results[key][outer_cv] = results self.results[key][outer_cv]['best_configuration'] = best_configuration self.results[key][outer_cv]['avg_inner_validation_error'] = best_training_value self.results[key][outer_cv]['avg_inner_training_error'] = best_training_value best_overall_algorithm = best_algorithm best_overall_configuration = best_configuration best_overall_key = best_key self.best_result[outer_cv] = self.results[key][outer_cv] self.best_result[outer_cv]['best_overall_algorithm'] = best_overall_algorithm self.best_result[outer_cv]['best_overall_configuration'] = best_overall_configuration self.best_result[outer_cv]['best_overall_key'] = best_overall_key #=================================================== # else: # print('\n\t\t\t\t\tRegression: %.3f is worse (!) than %.3f\n' % (training_value, best_training_value)) #=================================================== training_value_list.append((configuration, training_value)) if self.classification: print("\n\t\tAUROC training: %.3f" % (self.results[key][outer_cv]['training_accuracy'])) else: print("\n\t\tRMSE training: %.3f" % (self.results[key][outer_cv]['training_value'])) # Serialize benchmark benchmark_to_pickle(self) print('Leaving training benchmark for dataset:', self.dataset_name)
def _is_better(self, value_1, value_2): """Returns whether value_1 is better than value_2, based on defined metric.""" return is_better(value_1, value_2, self.metric)
def run_nested_cv(self): """ runs benchmark study on a nested cross-validation environment """ #======================================================================= # print('self.learning_metric =', self.learning_metric) # print('self.selection_metric =', self.selection_metric) #======================================================================= print('Entering run_nested_cv for dataset:', self.dataset_name) outer_cv = 0 outer_folds = self._get_outer_folds(outer_cv) for training_outer_index, testing_index in outer_folds.split( get_input_variables(self.samples).values, get_target_variable(self.samples).values): print('\n\tIndex of outer fold:', outer_cv) training_outer, testing = pd.DataFrame( self.samples.values[training_outer_index]), pd.DataFrame( self.samples.values[testing_index]) if self.classification: best_overall_validation_value = float('-Inf') else: best_overall_validation_value = float( '-Inf' ) if self.selection_metric.greater_is_better else float('Inf') for key in self.models.keys(): print('\t\tAlgorithm with key:', key) if not self.results[key][outer_cv]: if self.classification: best_validation_value = float('-Inf') else: best_validation_value = float( '-Inf' ) if self.selection_metric.greater_is_better else float( 'Inf') validation_value_list = list() for configuration in range( self.models[key]['max_combinations']): print('\n\t\t\tIndex of algorithm configuration:', len(validation_value_list)) if (len(self.models[key]['algorithms'])) > 1: option = randint(0, 2) algorithm = self.models[key]['algorithms'][option] config = self.models[key]['configuration_method']( option) else: algorithm = self.models[key]['algorithms'][0] #=================================================== # if (key == 'mlpc_sgd' or key == 'mlpc_adam' or key == 'mlpr_sgd' or key == 'mlpr_adam'): #=================================================== if key.startswith('mlp'): # version from 01-22 # config = self.models[key]['configuration_method'](self.get_dataset_size(training_outer)) # version from 01-25 batch_size = int(training_outer.shape[0] / _INNER_FOLDS) # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2 config = self.models[key][ 'configuration_method'](batch_size) else: config = self.models[key][ 'configuration_method']() inner_folds = self._get_inner_folds(outer_cv) tmp_valid_training_values_list = list() for training_inner_index, validation_index in inner_folds.split( get_input_variables(training_outer).values, get_target_variable(training_outer).values): print('\t\t\t\tIndex of inner fold:', len(tmp_valid_training_values_list)) training_inner, validation = pd.DataFrame( training_outer.values[training_inner_index] ), pd.DataFrame( training_outer.values[validation_index]) results = self._evaluate_algorithm( algorithm=algorithm, configurations=config, training_set=training_inner, validation_set=None, testing_set=validation, metric=self.learning_metric) # print('results[testing_value] =', results['testing_value'], ', results[training_value] =', results['training_value']) if self.classification: tmp_valid_training_values_list.append( (results['testing_accuracy'], results['training_accuracy'])) else: tmp_valid_training_values_list.append( (results['testing_value'], results['training_value'])) # Calculate average validation value and check if the current value is better than the best one average_validation_value = mean( tmp_valid_training_values_list, axis=0)[0] average_training_value = mean( tmp_valid_training_values_list, axis=0)[1] if self.classification: print( "\t\t\tAverage AUROC training vs. validation: %.3f vs. %.3f" % (average_training_value, average_validation_value)) else: print( "\t\t\tAverage RMSE training vs. validation: %.3f vs. %.3f" % (average_training_value, average_validation_value)) if self.classification: if average_validation_value > best_validation_value: #=============================================== # print('\n\t\t\t\t\tClassification: %.3f is better than %.3f\n' % (average_validation_value, best_validation_value)) #=============================================== best_algorithm = algorithm best_key = key best_configuration = config best_validation_value = average_validation_value best_training_value = average_training_value #=================================================== # else: # print('\n\t\t\t\t\tClassification: %.3f is worse (!) than %.3f\n' % (average_validation_value, best_validation_value)) #=================================================== else: if is_better(average_validation_value, best_validation_value, self.selection_metric): #=============================================== # print('\n\t\t\t\t\tRegression: %.3f is better than %.3f\n' % (average_validation_value, best_validation_value)) #=============================================== best_algorithm = algorithm best_key = key best_configuration = config best_validation_value = average_validation_value best_training_value = average_training_value #=================================================== # else: # print('\n\t\t\t\t\tRegression: %.3f is worse (!) than %.3f\n' % (average_validation_value, best_validation_value)) #=================================================== # Add configuration and validation error to validation error list. validation_value_list.append( (configuration, average_validation_value)) """ all allowed configurations assessed of a given variant/algorithm/method (key) """ print( '\n\t\tEvaluating best configuration in outer fold with index', outer_cv) self.results[key][outer_cv] = self._evaluate_algorithm( algorithm=best_algorithm, configurations=best_configuration, training_set=training_outer, validation_set=None, testing_set=testing, metric=self.learning_metric) self.results[key][outer_cv][ 'best_configuration'] = best_configuration self.results[key][outer_cv][ 'avg_inner_validation_error'] = best_validation_value self.results[key][outer_cv][ 'avg_inner_training_error'] = best_training_value if self.classification: self.results[key][outer_cv][ 'avg_inner_validation_accuracy'] = best_validation_value self.results[key][outer_cv][ 'avg_inner_training_accuracy'] = best_training_value if self.classification: print( "\n\t\tAUROC training vs. test: %.3f vs. %.3f" % (self.results[key][outer_cv]['training_accuracy'], self.results[key][outer_cv]['testing_accuracy'])) #======================================================= # print("\n\t\tAlgorithm %s, AUROC training vs. test: %.3f vs. %.3f" % (key, self.results[key][outer_cv]['training_accuracy'], self.results[key][outer_cv]['testing_accuracy'])) #======================================================= else: print("\n\t\tRMSE training vs. test: %.3f vs. %.3f" % (self.results[key][outer_cv]['training_value'], self.results[key][outer_cv]['testing_value'])) #======================================================= # print("\n\t\tAlgorithm %s, RMSE training vs. test: %.3f vs. %.3f" % (key, self.results[key][outer_cv]['training_value'], self.results[key][outer_cv]['testing_value'])) #======================================================= best_overall_algorithm = best_algorithm best_overall_configuration = best_configuration best_overall_key = best_key self.best_result[outer_cv] = self.results[key][outer_cv] self.best_result[outer_cv][ 'best_overall_algorithm'] = best_overall_algorithm self.best_result[outer_cv][ 'best_overall_configuration'] = best_overall_configuration self.best_result[outer_cv][ 'best_overall_key'] = best_overall_key # # Serialize benchmark # benchmark_to_pickle(self) outer_cv += 1 # Serialize benchmark benchmark_to_pickle(self) print('Leaving run_nested_cv for dataset:', self.dataset_name)
def test_root_mean_squared_error(self): prediction = array([1, 2, 3]) target = array([4, 5, 6]) value = RootMeanSquaredError.evaluate(prediction, target) self.assertEqual(value, 3) self.assertTrue(is_better(1, 2, RootMeanSquaredError))
def test_accuracy(self): prediction = array([1, 1, 0, 1]) target = array([1, 0, 0, 1]) value = Accuracy.evaluate(prediction, target) self.assertEqual(value, 0.75) self.assertTrue(is_better(2, 1, Accuracy))
def run_nested_cv(self): """ runs benchmark study on a nested cross-validation environment """ print('Entering run_nested_cv for dataset:', self.data_set_name) outer_cv = 0 outer_folds = self._get_outer_folds(outer_cv) for training_outer_index, testing_index in outer_folds.split( get_input_variables(self.samples).values, get_target_variable(self.samples).values): print('\tIndex of outer fold:', outer_cv) training_outer, testing = pd.DataFrame( self.samples.values[training_outer_index]), pd.DataFrame( self.samples.values[testing_index]) if self.classification: best_overall_validation_value = float('-Inf') else: best_overall_validation_value = float( '-Inf') if self.metric.greater_is_better else float('Inf') for key in self.models.keys(): print('\t\tAlgorithm with key:', key) if not self.results[key][outer_cv]: if self.classification: best_validation_value = float('-Inf') else: best_validation_value = float( '-Inf' ) if self.metric.greater_is_better else float('Inf') validation_value_list = list() for configuration in range( self.models[key]['max_combinations']): print('\t\t\tIndex of algorithm configuration:', len(validation_value_list)) if (len(self.models[key]['algorithms'])) > 1: option = randint(0, 2) algorithm = self.models[key]['algorithms'][option] config = self.models[key]['configuration_method']( option) else: algorithm = self.models[key]['algorithms'][0] if (key == 'mlpc_sgd' or key == 'mlpc_adam' or key == 'mlpr_sgd' or key == 'mlpr_adam'): # version from 01-22 # config = self.models[key]['configuration_method'](self.get_data_set_size(training_outer)) # version from 01-25 batch_size = int(training_outer.shape[0] / _INNER_FOLDS) # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2 config = self.models[key][ 'configuration_method'](batch_size) else: config = self.models[key][ 'configuration_method']() inner_folds = self._get_inner_folds(outer_cv) tmp_valid_training_values_list = list() for training_inner_index, validation_index in inner_folds.split( get_input_variables(training_outer).values, get_target_variable(training_outer).values): print('\t\t\t\tIndex of inner fold:', len(tmp_valid_training_values_list)) training_inner, validation = pd.DataFrame( training_outer.values[training_inner_index] ), pd.DataFrame( training_outer.values[validation_index]) results = self._evaluate_algorithm( algorithm=algorithm, configurations=config, training_set=training_inner, validation_set=None, testing_set=validation, metric=self.metric) # print('results[testing_value] =', results['testing_value'], ', results[training_value] =', results['training_value']) if self.classification: tmp_valid_training_values_list.append( (results['testing_accuracy'], results['training_accuracy'])) else: tmp_valid_training_values_list.append( (results['testing_value'], results['training_value'])) # Calculate average validation value and check if the current value is better than the best one average_validation_value = mean( tmp_valid_training_values_list, axis=0)[0] average_training_value = mean( tmp_valid_training_values_list, axis=0)[1] if self.classification: if average_validation_value > best_validation_value: best_algorithm = algorithm best_key = key best_configuration = config best_validation_value = average_validation_value best_training_value = average_training_value else: if is_better(average_validation_value, best_validation_value, self.metric): best_algorithm = algorithm best_key = key best_configuration = config best_validation_value = average_validation_value best_training_value = average_training_value # Add configuration and validation error to validation error list. validation_value_list.append( (configuration, average_validation_value)) self.results[key][outer_cv] = self._evaluate_algorithm( algorithm=best_algorithm, configurations=best_configuration, training_set=training_outer, validation_set=None, testing_set=testing, metric=self.metric) self.results[key][outer_cv][ 'best_configuration'] = best_configuration self.results[key][outer_cv][ 'avg_inner_validation_error'] = best_validation_value self.results[key][outer_cv][ 'avg_inner_training_error'] = best_training_value if self.classification: self.results[key][outer_cv][ 'avg_inner_validation_accuracy'] = best_validation_value self.results[key][outer_cv][ 'avg_inner_training_accuracy'] = best_training_value # # Serialize benchmark # benchmark_to_pickle(self) if self.classification: if best_validation_value > best_overall_validation_value: best_overall_key = best_key best_overall_algorithm = best_algorithm best_overall_configuration = best_configuration best_overall_validation_value = best_validation_value else: if is_better(best_validation_value, best_overall_validation_value, self.metric): best_overall_key = best_key best_overall_algorithm = best_algorithm best_overall_configuration = best_configuration best_overall_validation_value = best_validation_value print( '\tBest overall configuration found for outer fold with index', outer_cv) self.best_result[outer_cv] = self._evaluate_algorithm( algorithm=best_overall_algorithm, configurations=best_overall_configuration, training_set=training_outer, validation_set=None, testing_set=testing, metric=self.metric) self.best_result[outer_cv][ 'best_overall_algorithm'] = best_overall_algorithm self.best_result[outer_cv][ 'best_overall_configuration'] = best_overall_configuration self.best_result[outer_cv]['best_overall_key'] = best_overall_key if self.ensembles != None: print('\tCreating ensembles') self._run_ensembles( outer_cv, best_overall_algorithm.get_corresponding_algo(), best_overall_configuration, training_outer, testing, self.metric) else: print('\tNo ensembles to create') outer_cv += 1 # Serialize benchmark benchmark_to_pickle(self) print('Leaving run_nested_cv for dataset:', self.data_set_name)