def test_weighted_root_mean_squared_error(self):
     prediction = array([1, 0, 1, 1])
     target = array([1, 0, 1, 1])
     weight_vector = array([0.5, 0.5, 0.5, 0.5])
     metric = WeightedRootMeanSquaredError(weight_vector)
     value = metric.evaluate(prediction, target)
     self.assertEqual(value, 0)
     self.assertTrue(is_better(1, 2, WeightedRootMeanSquaredError))
Exemple #2
0
 def _select_best_learner(self,
                          time_limit=TIME_LIMIT_SECONDS,
                          time_buffer=TIME_BUFFER,
                          verbose=False):
     # Best learner found (lowest validation error).
     best_learner = None
     # Lowest validation error found.
     best_validation_value = float(
         '-Inf') if self.metric.greater_is_better else float('Inf')
     # Validation error list.
     validation_value_list = list()
     # Current time in seconds.
     time_seconds = lambda: default_timer()
     # Random order of configurations.
     shuffle(self.configurations)
     # Number of configurations run.
     number_of_runs = 0
     # Start of run.
     run_start = time_seconds()
     # Time left.
     time_left = lambda: time_limit - (time_seconds() - run_start)
     # Iterate though all configurations.
     for configuration in tqdm(self.configurations):
         # Create learner from configuration.
         learner = self.model(**configuration)
         # Train learner.
         if self.__class__.__bases__[0] == EvaluatorSklearn:
             learner.fit(
                 get_input_variables(self.training_set).as_matrix(),
                 get_target_variable(self.training_set).as_matrix())
         else:
             learner.fit(
                 get_input_variables(self.training_set).as_matrix(),
                 get_target_variable(self.training_set).as_matrix(),
                 self.metric, verbose)
         # Calculate validation value.
         validation_value = self._calculate_value(learner,
                                                  self.validation_set)
         # If validation error lower than best validation error, set learner as best learner and validation error as best validation error.
         if is_better(validation_value, best_validation_value, self.metric):
             best_learner = learner
             best_validation_value = validation_value
         # Add configuration and validation error to validation error list.
         validation_value_list.append((configuration, validation_value))
         # Increase number of runs.
         number_of_runs += 1
         # Calculate time left.
         run_end = time_left()
         # Calculate time expected for next run.
         run_expected = (time_limit - run_end) / number_of_runs
         # If no time left or time expected for next run is greater than time left, break.
         if run_end < 0 or run_end * (1 + time_buffer) < run_expected:
             break
     # When all configurations tested, return best learner.
     return {
         'best_learner': best_learner,
         'validation_value_list': validation_value_list
     }
Exemple #3
0
    def evaluate(self, algorithm):

        # if current generation is 0, return False (since there exists no champion)
        if algorithm.current_generation == 0:
            return False
        champion = algorithm.champion
        # subsets offspring that are better than the current champion
        superior_solutions = [
            solution for solution in algorithm.population
            if is_better(solution.value, champion.value, algorithm.metric)
        ]
        # if not superior offspring exist, determine parent stopping criterion
        if not superior_solutions:
            return super().evaluate(algorithm)
        # calculates nr of superior solutions
        # nr_superior_solutions = len(superior_solutions)
        # calculate percentage of superior solutions
        percentage_superior_solutions = len(superior_solutions) / len(
            algorithm.population)
        if percentage_superior_solutions < self.threshold:
            return True
        else:
            return super().evaluate(algorithm)
 def run_nested_cv(self):
     """ runs benchmark study on a nested cross-validation environment """
     
     #=======================================================================
     # print('self.learning_metric =', self.learning_metric)
     # print('self.selection_metric =', self.selection_metric)
     #=======================================================================
     
     """ N configuration for each method, trained on all data, selected from the same data """
     
     print('Entering training benchmark for dataset:', self.dataset_name)
     
     training_data = pd.DataFrame(self.samples.values)
     
     for outer_cv in range(_OUTER_FOLDS):
         
         print('\n\tIndex of outer fold:', outer_cv)
         
         for key in self.models.keys():
             
             print('\t\tAlgorithm with key:', key)
             
             if not self.results[key][outer_cv]:
                 
                 if self.classification:
                     best_training_value = float('-Inf')
                 else:
                     best_training_value = float('-Inf') if self.selection_metric.greater_is_better else float('Inf')
                 
                 training_value_list = list()
                 for configuration in range(self.models[key]['max_combinations']):
                     
                     print('\n\t\t\tIndex of algorithm configuration:', len(training_value_list))
                     
                     if(len(self.models[key]['algorithms'])) > 1:
                         option = randint(0, 2)
                         algorithm = self.models[key]['algorithms'][option]
                         config = self.models[key]['configuration_method'](option)
                     else:
                         algorithm = self.models[key]['algorithms'][0]
                         #===================================================
                         # if (key == 'mlpc_sgd' or key == 'mlpc_adam' or key == 'mlpr_sgd' or key == 'mlpr_adam'):
                         #===================================================
                         if key.startswith('mlp'):
                             # version from 01-22
                             # config = self.models[key]['configuration_method'](self.get_dataset_size(training_outer))
                             # version from 01-25
                             batch_size = int(training_data.shape[0])
                             # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2
                             config = self.models[key]['configuration_method'](batch_size)
                         else:
                             config = self.models[key]['configuration_method']()
                     
                     if key.startswith('mlp'):
                         config['max_iter'] = DEFAULT_NUMBER_OF_ITERATIONS
                     else:
                         config['stopping_criterion'] = MaxGenerationsCriterion(DEFAULT_NUMBER_OF_ITERATIONS)
                     
                     results = self._evaluate_algorithm(algorithm=algorithm, configurations=config,
                                                        training_set=training_data, validation_set=None, testing_set=training_data, metric=self.learning_metric)
                     
                     if self.classification:
                         training_value = results['training_accuracy']
                     else:
                         training_value = results['training_value']
                     
                     if self.classification:
                         print("\t\t\tAUROC training: %.3f" % (training_value))
                     else:
                         print("\t\t\tRMSE training: %.3f" % (training_value))
                     
                     if self.classification:
                         if training_value > best_training_value:
                             #===============================================
                             # print('\n\t\t\t\t\tClassification: %.3f is better than %.3f\n' % (training_value, best_training_value))
                             #===============================================
                             best_algorithm = algorithm
                             best_key = key
                             best_configuration = config
                             best_training_value = training_value
                             
                             self.results[key][outer_cv] = results
                             self.results[key][outer_cv]['best_configuration'] = best_configuration
                             self.results[key][outer_cv]['avg_inner_validation_error'] = best_training_value
                             self.results[key][outer_cv]['avg_inner_training_error'] = best_training_value
                             
                             best_overall_algorithm = best_algorithm
                             best_overall_configuration = best_configuration
                             best_overall_key = best_key
                             
                             self.best_result[outer_cv] = self.results[key][outer_cv]
                             self.best_result[outer_cv]['best_overall_algorithm'] = best_overall_algorithm
                             self.best_result[outer_cv]['best_overall_configuration'] = best_overall_configuration
                             self.best_result[outer_cv]['best_overall_key'] = best_overall_key
                             
                         #===================================================
                         # else:
                         #     print('\n\t\t\t\t\tClassification: %.3f is worse (!) than %.3f\n' % (training_value, best_training_value))
                         #===================================================
                     else:
                         if is_better(training_value, best_training_value, self.selection_metric):
                             #===============================================
                             # print('\n\t\t\t\t\tRegression: %.3f is better than %.3f\n' % (training_value, best_training_value))
                             #===============================================
                             best_algorithm = algorithm
                             best_key = key
                             best_configuration = config
                             best_training_value = training_value
                             
                             self.results[key][outer_cv] = results
                             self.results[key][outer_cv]['best_configuration'] = best_configuration
                             self.results[key][outer_cv]['avg_inner_validation_error'] = best_training_value
                             self.results[key][outer_cv]['avg_inner_training_error'] = best_training_value
                             
                             best_overall_algorithm = best_algorithm
                             best_overall_configuration = best_configuration
                             best_overall_key = best_key
                             
                             self.best_result[outer_cv] = self.results[key][outer_cv]
                             self.best_result[outer_cv]['best_overall_algorithm'] = best_overall_algorithm
                             self.best_result[outer_cv]['best_overall_configuration'] = best_overall_configuration
                             self.best_result[outer_cv]['best_overall_key'] = best_overall_key
                             
                         #===================================================
                         # else:
                         #     print('\n\t\t\t\t\tRegression: %.3f is worse (!) than %.3f\n' % (training_value, best_training_value))
                         #===================================================
                     
                     training_value_list.append((configuration, training_value))
                 
                 if self.classification:
                     print("\n\t\tAUROC training: %.3f" % (self.results[key][outer_cv]['training_accuracy']))
                 else:
                     print("\n\t\tRMSE training: %.3f" % (self.results[key][outer_cv]['training_value']))
     
     # Serialize benchmark 
     benchmark_to_pickle(self)
     
     print('Leaving training benchmark for dataset:', self.dataset_name)
Exemple #5
0
 def _is_better(self, value_1, value_2):
     """Returns whether value_1 is better than value_2, based on defined metric."""
     return is_better(value_1, value_2, self.metric)
    def run_nested_cv(self):
        """ runs benchmark study on a nested cross-validation environment """

        #=======================================================================
        # print('self.learning_metric =', self.learning_metric)
        # print('self.selection_metric =', self.selection_metric)
        #=======================================================================

        print('Entering run_nested_cv for dataset:', self.dataset_name)

        outer_cv = 0
        outer_folds = self._get_outer_folds(outer_cv)
        for training_outer_index, testing_index in outer_folds.split(
                get_input_variables(self.samples).values,
                get_target_variable(self.samples).values):

            print('\n\tIndex of outer fold:', outer_cv)

            training_outer, testing = pd.DataFrame(
                self.samples.values[training_outer_index]), pd.DataFrame(
                    self.samples.values[testing_index])

            if self.classification:
                best_overall_validation_value = float('-Inf')
            else:
                best_overall_validation_value = float(
                    '-Inf'
                ) if self.selection_metric.greater_is_better else float('Inf')

            for key in self.models.keys():

                print('\t\tAlgorithm with key:', key)

                if not self.results[key][outer_cv]:

                    if self.classification:
                        best_validation_value = float('-Inf')
                    else:
                        best_validation_value = float(
                            '-Inf'
                        ) if self.selection_metric.greater_is_better else float(
                            'Inf')

                    validation_value_list = list()
                    for configuration in range(
                            self.models[key]['max_combinations']):

                        print('\n\t\t\tIndex of algorithm configuration:',
                              len(validation_value_list))

                        if (len(self.models[key]['algorithms'])) > 1:
                            option = randint(0, 2)
                            algorithm = self.models[key]['algorithms'][option]
                            config = self.models[key]['configuration_method'](
                                option)
                        else:
                            algorithm = self.models[key]['algorithms'][0]
                            #===================================================
                            # if (key == 'mlpc_sgd' or key == 'mlpc_adam' or key == 'mlpr_sgd' or key == 'mlpr_adam'):
                            #===================================================
                            if key.startswith('mlp'):
                                # version from 01-22
                                # config = self.models[key]['configuration_method'](self.get_dataset_size(training_outer))
                                # version from 01-25
                                batch_size = int(training_outer.shape[0] /
                                                 _INNER_FOLDS)
                                # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2
                                config = self.models[key][
                                    'configuration_method'](batch_size)
                            else:
                                config = self.models[key][
                                    'configuration_method']()

                        inner_folds = self._get_inner_folds(outer_cv)
                        tmp_valid_training_values_list = list()
                        for training_inner_index, validation_index in inner_folds.split(
                                get_input_variables(training_outer).values,
                                get_target_variable(training_outer).values):

                            print('\t\t\t\tIndex of inner fold:',
                                  len(tmp_valid_training_values_list))

                            training_inner, validation = pd.DataFrame(
                                training_outer.values[training_inner_index]
                            ), pd.DataFrame(
                                training_outer.values[validation_index])

                            results = self._evaluate_algorithm(
                                algorithm=algorithm,
                                configurations=config,
                                training_set=training_inner,
                                validation_set=None,
                                testing_set=validation,
                                metric=self.learning_metric)

                            # print('results[testing_value] =', results['testing_value'], ', results[training_value] =', results['training_value'])

                            if self.classification:
                                tmp_valid_training_values_list.append(
                                    (results['testing_accuracy'],
                                     results['training_accuracy']))
                            else:
                                tmp_valid_training_values_list.append(
                                    (results['testing_value'],
                                     results['training_value']))

                        # Calculate average validation value and check if the current value is better than the best one
                        average_validation_value = mean(
                            tmp_valid_training_values_list, axis=0)[0]
                        average_training_value = mean(
                            tmp_valid_training_values_list, axis=0)[1]

                        if self.classification:
                            print(
                                "\t\t\tAverage AUROC training vs. validation: %.3f vs. %.3f"
                                % (average_training_value,
                                   average_validation_value))
                        else:
                            print(
                                "\t\t\tAverage RMSE training vs. validation: %.3f vs. %.3f"
                                % (average_training_value,
                                   average_validation_value))

                        if self.classification:
                            if average_validation_value > best_validation_value:
                                #===============================================
                                # print('\n\t\t\t\t\tClassification: %.3f is better than %.3f\n' % (average_validation_value, best_validation_value))
                                #===============================================
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value
                            #===================================================
                            # else:
                            #     print('\n\t\t\t\t\tClassification: %.3f is worse (!) than %.3f\n' % (average_validation_value, best_validation_value))
                            #===================================================
                        else:
                            if is_better(average_validation_value,
                                         best_validation_value,
                                         self.selection_metric):
                                #===============================================
                                # print('\n\t\t\t\t\tRegression: %.3f is better than %.3f\n' % (average_validation_value, best_validation_value))
                                #===============================================
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value
                            #===================================================
                            # else:
                            #     print('\n\t\t\t\t\tRegression: %.3f is worse (!) than %.3f\n' % (average_validation_value, best_validation_value))
                            #===================================================

                        # Add configuration and validation error to validation error list.
                        validation_value_list.append(
                            (configuration, average_validation_value))
                    """ all allowed configurations assessed of a given variant/algorithm/method (key) """
                    print(
                        '\n\t\tEvaluating best configuration in outer fold with index',
                        outer_cv)
                    self.results[key][outer_cv] = self._evaluate_algorithm(
                        algorithm=best_algorithm,
                        configurations=best_configuration,
                        training_set=training_outer,
                        validation_set=None,
                        testing_set=testing,
                        metric=self.learning_metric)
                    self.results[key][outer_cv][
                        'best_configuration'] = best_configuration
                    self.results[key][outer_cv][
                        'avg_inner_validation_error'] = best_validation_value
                    self.results[key][outer_cv][
                        'avg_inner_training_error'] = best_training_value
                    if self.classification:
                        self.results[key][outer_cv][
                            'avg_inner_validation_accuracy'] = best_validation_value
                        self.results[key][outer_cv][
                            'avg_inner_training_accuracy'] = best_training_value

                    if self.classification:
                        print(
                            "\n\t\tAUROC training vs. test: %.3f vs. %.3f" %
                            (self.results[key][outer_cv]['training_accuracy'],
                             self.results[key][outer_cv]['testing_accuracy']))
                        #=======================================================
                        # print("\n\t\tAlgorithm %s, AUROC training vs. test: %.3f vs. %.3f" % (key, self.results[key][outer_cv]['training_accuracy'], self.results[key][outer_cv]['testing_accuracy']))
                        #=======================================================
                    else:
                        print("\n\t\tRMSE training vs. test: %.3f vs. %.3f" %
                              (self.results[key][outer_cv]['training_value'],
                               self.results[key][outer_cv]['testing_value']))
                        #=======================================================
                        # print("\n\t\tAlgorithm %s, RMSE training vs. test: %.3f vs. %.3f" % (key, self.results[key][outer_cv]['training_value'], self.results[key][outer_cv]['testing_value']))
                        #=======================================================

                    best_overall_algorithm = best_algorithm
                    best_overall_configuration = best_configuration
                    best_overall_key = best_key

                    self.best_result[outer_cv] = self.results[key][outer_cv]
                    self.best_result[outer_cv][
                        'best_overall_algorithm'] = best_overall_algorithm
                    self.best_result[outer_cv][
                        'best_overall_configuration'] = best_overall_configuration
                    self.best_result[outer_cv][
                        'best_overall_key'] = best_overall_key

                    # # Serialize benchmark
                    # benchmark_to_pickle(self)

            outer_cv += 1

        # Serialize benchmark
        benchmark_to_pickle(self)

        print('Leaving run_nested_cv for dataset:', self.dataset_name)
 def test_root_mean_squared_error(self):
     prediction = array([1, 2, 3])
     target = array([4, 5, 6])
     value = RootMeanSquaredError.evaluate(prediction, target)
     self.assertEqual(value, 3)
     self.assertTrue(is_better(1, 2, RootMeanSquaredError))
 def test_accuracy(self):
     prediction = array([1, 1, 0, 1])
     target = array([1, 0, 0, 1])
     value = Accuracy.evaluate(prediction, target)
     self.assertEqual(value, 0.75)
     self.assertTrue(is_better(2, 1, Accuracy))
Exemple #9
0
    def run_nested_cv(self):
        """ runs benchmark study on a nested cross-validation environment """

        print('Entering run_nested_cv for dataset:', self.data_set_name)

        outer_cv = 0
        outer_folds = self._get_outer_folds(outer_cv)
        for training_outer_index, testing_index in outer_folds.split(
                get_input_variables(self.samples).values,
                get_target_variable(self.samples).values):

            print('\tIndex of outer fold:', outer_cv)

            training_outer, testing = pd.DataFrame(
                self.samples.values[training_outer_index]), pd.DataFrame(
                    self.samples.values[testing_index])

            if self.classification:
                best_overall_validation_value = float('-Inf')
            else:
                best_overall_validation_value = float(
                    '-Inf') if self.metric.greater_is_better else float('Inf')

            for key in self.models.keys():

                print('\t\tAlgorithm with key:', key)

                if not self.results[key][outer_cv]:

                    if self.classification:
                        best_validation_value = float('-Inf')
                    else:
                        best_validation_value = float(
                            '-Inf'
                        ) if self.metric.greater_is_better else float('Inf')

                    validation_value_list = list()
                    for configuration in range(
                            self.models[key]['max_combinations']):

                        print('\t\t\tIndex of algorithm configuration:',
                              len(validation_value_list))

                        if (len(self.models[key]['algorithms'])) > 1:
                            option = randint(0, 2)
                            algorithm = self.models[key]['algorithms'][option]
                            config = self.models[key]['configuration_method'](
                                option)
                        else:
                            algorithm = self.models[key]['algorithms'][0]
                            if (key == 'mlpc_sgd' or key == 'mlpc_adam'
                                    or key == 'mlpr_sgd'
                                    or key == 'mlpr_adam'):
                                # version from 01-22
                                # config = self.models[key]['configuration_method'](self.get_data_set_size(training_outer))
                                # version from 01-25
                                batch_size = int(training_outer.shape[0] /
                                                 _INNER_FOLDS)
                                # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2
                                config = self.models[key][
                                    'configuration_method'](batch_size)
                            else:
                                config = self.models[key][
                                    'configuration_method']()

                        inner_folds = self._get_inner_folds(outer_cv)
                        tmp_valid_training_values_list = list()
                        for training_inner_index, validation_index in inner_folds.split(
                                get_input_variables(training_outer).values,
                                get_target_variable(training_outer).values):

                            print('\t\t\t\tIndex of inner fold:',
                                  len(tmp_valid_training_values_list))

                            training_inner, validation = pd.DataFrame(
                                training_outer.values[training_inner_index]
                            ), pd.DataFrame(
                                training_outer.values[validation_index])

                            results = self._evaluate_algorithm(
                                algorithm=algorithm,
                                configurations=config,
                                training_set=training_inner,
                                validation_set=None,
                                testing_set=validation,
                                metric=self.metric)

                            # print('results[testing_value] =', results['testing_value'], ', results[training_value] =', results['training_value'])

                            if self.classification:
                                tmp_valid_training_values_list.append(
                                    (results['testing_accuracy'],
                                     results['training_accuracy']))
                            else:
                                tmp_valid_training_values_list.append(
                                    (results['testing_value'],
                                     results['training_value']))

                        # Calculate average validation value and check if the current value is better than the best one
                        average_validation_value = mean(
                            tmp_valid_training_values_list, axis=0)[0]
                        average_training_value = mean(
                            tmp_valid_training_values_list, axis=0)[1]
                        if self.classification:
                            if average_validation_value > best_validation_value:
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value
                        else:
                            if is_better(average_validation_value,
                                         best_validation_value, self.metric):
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value

                        # Add configuration and validation error to validation error list.
                        validation_value_list.append(
                            (configuration, average_validation_value))

                    self.results[key][outer_cv] = self._evaluate_algorithm(
                        algorithm=best_algorithm,
                        configurations=best_configuration,
                        training_set=training_outer,
                        validation_set=None,
                        testing_set=testing,
                        metric=self.metric)

                    self.results[key][outer_cv][
                        'best_configuration'] = best_configuration
                    self.results[key][outer_cv][
                        'avg_inner_validation_error'] = best_validation_value
                    self.results[key][outer_cv][
                        'avg_inner_training_error'] = best_training_value
                    if self.classification:
                        self.results[key][outer_cv][
                            'avg_inner_validation_accuracy'] = best_validation_value
                        self.results[key][outer_cv][
                            'avg_inner_training_accuracy'] = best_training_value

                    # # Serialize benchmark
                    # benchmark_to_pickle(self)

                    if self.classification:
                        if best_validation_value > best_overall_validation_value:
                            best_overall_key = best_key
                            best_overall_algorithm = best_algorithm
                            best_overall_configuration = best_configuration
                            best_overall_validation_value = best_validation_value
                    else:
                        if is_better(best_validation_value,
                                     best_overall_validation_value,
                                     self.metric):
                            best_overall_key = best_key
                            best_overall_algorithm = best_algorithm
                            best_overall_configuration = best_configuration
                            best_overall_validation_value = best_validation_value

            print(
                '\tBest overall configuration found for outer fold with index',
                outer_cv)

            self.best_result[outer_cv] = self._evaluate_algorithm(
                algorithm=best_overall_algorithm,
                configurations=best_overall_configuration,
                training_set=training_outer,
                validation_set=None,
                testing_set=testing,
                metric=self.metric)
            self.best_result[outer_cv][
                'best_overall_algorithm'] = best_overall_algorithm
            self.best_result[outer_cv][
                'best_overall_configuration'] = best_overall_configuration
            self.best_result[outer_cv]['best_overall_key'] = best_overall_key
            if self.ensembles != None:
                print('\tCreating ensembles')
                self._run_ensembles(
                    outer_cv, best_overall_algorithm.get_corresponding_algo(),
                    best_overall_configuration, training_outer, testing,
                    self.metric)
            else:
                print('\tNo ensembles to create')

            outer_cv += 1

        # Serialize benchmark
        benchmark_to_pickle(self)

        print('Leaving run_nested_cv for dataset:', self.data_set_name)