Ejemplo n.º 1
0
    def test_with_abalone(self):
        dataset = "abalone"
        dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets")
        D = CompetitionDataManager(dataset, dataset_dir)
        configuration_space = get_configuration_space(
            D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])

        errors = []
        for i in range(N_TEST_RUNS):
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = NestedCVEvaluator(D_,
                                          configuration,
                                          inner_cv_folds=2,
                                          outer_cv_folds=2)
            if not self._fit(evaluator):
                print
                continue
            err = evaluator.predict()
            self.assertLess(err, 0.99)
            self.assertTrue(np.isfinite(err))
            errors.append(err)
        # This is a reasonable bound
        self.assertEqual(10, len(errors))
        self.assertLess(min(errors), 0.77)
    def test_with_abalone(self):
        dataset = "abalone"
        dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets")
        D = CompetitionDataManager(dataset, dataset_dir)
        configuration_space = get_configuration_space(D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])

        errors = []
        for i in range(N_TEST_RUNS):
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = NestedCVEvaluator(D_, configuration, inner_cv_folds=2,
                                          outer_cv_folds=2)
            if not self._fit(evaluator):
                print
                continue
            err = evaluator.predict()
            self.assertLess(err, 0.99)
            self.assertTrue(np.isfinite(err))
            errors.append(err)
        # This is a reasonable bound
        self.assertEqual(10, len(errors))
        self.assertLess(min(errors), 0.77)
    def test_evaluate_multiclass_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset('iris')

        X_valid = X_test[:25, ]
        Y_valid = Y_test[:25, ]
        X_test = X_test[25:, ]
        Y_test = Y_test[25:, ]

        D = Dummy()
        D.info = {'metric': 'acc_metric', 'task': MULTICLASS_CLASSIFICATION,
                  'is_sparse': False, 'target_num': 3}
        D.data = {'X_train': X_train, 'Y_train': Y_train,
                  'X_valid': X_valid, 'X_test': X_test}
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']

        configuration_space = get_configuration_space(D.info,
            include_estimators=['ridge'], include_preprocessors=['select_rates'])

        err = np.zeros([N_TEST_RUNS])
        num_models_better_than_random = 0
        for i in range(N_TEST_RUNS):
            print "Evaluate configuration: %d; result:" % i,
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = NestedCVEvaluator(D_, configuration,
                                          with_predictions=True,
                                          all_scoring_functions=True)

            if not self._fit(evaluator):
                print
                continue
            e_, Y_optimization_pred, Y_valid_pred, Y_test_pred = \
                evaluator.predict()
            err[i] = e_['acc_metric']
            print err[i], configuration['classifier']
            print e_['outer:bac_metric'], e_['bac_metric']

            # Test the outer CV
            num_targets = len(np.unique(Y_train))
            self.assertTrue(np.isfinite(err[i]))
            self.assertGreaterEqual(err[i], 0.0)
            # Test that ten models were trained
            self.assertEqual(len(evaluator.outer_models), 5)
            self.assertTrue(all([model is not None for model in evaluator.outer_models]))

            self.assertEqual(Y_optimization_pred.shape[0], Y_train.shape[0])
            self.assertEqual(Y_optimization_pred.shape[1], num_targets)
            self.assertEqual(Y_valid_pred.shape[0], Y_valid.shape[0])
            self.assertEqual(Y_valid_pred.shape[1], num_targets)
            self.assertEqual(Y_test_pred.shape[0], Y_test.shape[0])
            self.assertEqual(Y_test_pred.shape[1], num_targets)
            # Test some basic statistics of the predictions
            if err[i] < 0.5:
                self.assertTrue(0.3 < Y_valid_pred.mean() < 0.36666)
                self.assertGreaterEqual(Y_valid_pred.std(), 0.1)
                self.assertTrue(0.3 < Y_test_pred.mean() < 0.36666)
                self.assertGreaterEqual(Y_test_pred.std(), 0.1)
                num_models_better_than_random += 1

            # Test the inner CV
            self.assertEqual(len(evaluator.inner_models), 5)
            for fold in range(5):
                self.assertEqual(len(evaluator.inner_models[fold]), 5)
                self.assertTrue(all([model is not None for model
                                     in evaluator.inner_models[fold]]))
                self.assertGreaterEqual(len(evaluator.outer_indices[fold][0]), 75)
                for inner_fold in range(5):
                    self.assertGreaterEqual(len(evaluator.inner_indices[
                        fold][inner_fold][0]), 60)


        self.assertGreater(num_models_better_than_random, 9)
Ejemplo n.º 4
0
def main(dataset_info, mode, seed, params, mode_args=None):
    """This command line interface has three different operation modes:

    * CV: useful for the Tweakathon
    * 1/3 test split: useful to evaluate a configuration
    * cv on 2/3 train split: useful to optimize hyperparameters in a training
      mode before testing a configuration on the 1/3 test split.

    It must by no means be used for the Auto part of the competition!
    """
    if mode != "test":
        num_run = get_new_run_num()

    for key in params:
        try:
            params[key] = int(params[key])
        except:
            try:
                params[key] = float(params[key])
            except:
                pass

    if seed is not None:
        seed = int(float(seed))
    else:
        seed = 1

    output_dir = os.getcwd()

    D = store_and_or_load_data(dataset_info=dataset_info, outputdir=output_dir)

    cs = get_configuration_space(D.info)
    configuration = configuration_space.Configuration(cs, params)
    metric = D.info['metric']

    global evaluator
    # Train/test split
    if mode == 'holdout':
        evaluator = HoldoutEvaluator(D,
                                     configuration,
                                     with_predictions=True,
                                     all_scoring_functions=True,
                                     output_y_test=True,
                                     seed=seed,
                                     num_run=num_run)
        evaluator.fit()
        signal.signal(15, empty_signal_handler)
        evaluator.finish_up()
        model_directory = os.path.join(os.getcwd(), "models_%d" % seed)
        if os.path.exists(model_directory):
            model_filename = os.path.join(model_directory,
                                          "%s.model" % num_run)
            with open(model_filename, "w") as fh:
                pickle.dump(evaluator.model, fh, -1)

    elif mode == 'test':
        evaluator = TestEvaluator(D,
                                  configuration,
                                  all_scoring_functions=True,
                                  seed=seed)
        evaluator.fit()
        scores = evaluator.predict()
        duration = time.time() - evaluator.starttime

        score = scores[metric]
        additional_run_info = ";".join(
            ["%s: %s" % (m_, value) for m_, value in scores.items()])
        additional_run_info += ";" + "duration: " + str(duration)

        print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % (
            "SAT", abs(duration), score, evaluator.seed, additional_run_info)

    # CV on the whole training set
    elif mode == 'cv':
        evaluator = CVEvaluator(D,
                                configuration,
                                with_predictions=True,
                                all_scoring_functions=True,
                                output_y_test=True,
                                cv_folds=mode_args['folds'],
                                seed=seed,
                                num_run=num_run)
        evaluator.fit()
        signal.signal(15, empty_signal_handler)
        evaluator.finish_up()

    elif mode == 'partial_cv':
        evaluator = CVEvaluator(D,
                                configuration,
                                all_scoring_functions=True,
                                cv_folds=mode_args['folds'],
                                seed=seed,
                                num_run=num_run)
        evaluator.partial_fit(mode_args['fold'])
        scores = evaluator.predict()
        duration = time.time() - evaluator.starttime

        score = scores[metric]
        additional_run_info = ";".join(
            ["%s: %s" % (m_, value) for m_, value in scores.items()])
        additional_run_info += ";" + "duration: " + str(duration)

        print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % (
            "SAT", abs(duration), score, evaluator.seed, additional_run_info)

    elif mode == 'nested-cv':
        evaluator = NestedCVEvaluator(D,
                                      configuration,
                                      with_predictions=True,
                                      inner_cv_folds=mode_args['inner_folds'],
                                      outer_cv_folds=mode_args['outer_folds'],
                                      all_scoring_functions=True,
                                      output_y_test=True,
                                      seed=seed,
                                      num_run=num_run)
        evaluator.fit()
        signal.signal(15, empty_signal_handler)
        evaluator.finish_up()

    else:
        raise ValueError("Must choose a legal mode.")
Ejemplo n.º 5
0
    def test_evaluate_multiclass_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset('iris')

        X_valid = X_test[:25, ]
        Y_valid = Y_test[:25, ]
        X_test = X_test[25:, ]
        Y_test = Y_test[25:, ]

        D = Dummy()
        D.info = {
            'metric': 'acc_metric',
            'task': MULTICLASS_CLASSIFICATION,
            'is_sparse': False,
            'target_num': 3
        }
        D.data = {
            'X_train': X_train,
            'Y_train': Y_train,
            'X_valid': X_valid,
            'X_test': X_test
        }
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']

        configuration_space = get_configuration_space(
            D.info,
            include_estimators=['ridge'],
            include_preprocessors=['select_rates'])

        err = np.zeros([N_TEST_RUNS])
        num_models_better_than_random = 0
        for i in range(N_TEST_RUNS):
            print "Evaluate configuration: %d; result:" % i,
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = NestedCVEvaluator(D_,
                                          configuration,
                                          with_predictions=True,
                                          all_scoring_functions=True)

            if not self._fit(evaluator):
                print
                continue
            e_, Y_optimization_pred, Y_valid_pred, Y_test_pred = \
                evaluator.predict()
            err[i] = e_['acc_metric']
            print err[i], configuration['classifier']
            print e_['outer:bac_metric'], e_['bac_metric']

            # Test the outer CV
            num_targets = len(np.unique(Y_train))
            self.assertTrue(np.isfinite(err[i]))
            self.assertGreaterEqual(err[i], 0.0)
            # Test that ten models were trained
            self.assertEqual(len(evaluator.outer_models), 5)
            self.assertTrue(
                all([model is not None for model in evaluator.outer_models]))

            self.assertEqual(Y_optimization_pred.shape[0], Y_train.shape[0])
            self.assertEqual(Y_optimization_pred.shape[1], num_targets)
            self.assertEqual(Y_valid_pred.shape[0], Y_valid.shape[0])
            self.assertEqual(Y_valid_pred.shape[1], num_targets)
            self.assertEqual(Y_test_pred.shape[0], Y_test.shape[0])
            self.assertEqual(Y_test_pred.shape[1], num_targets)
            # Test some basic statistics of the predictions
            if err[i] < 0.5:
                self.assertTrue(0.3 < Y_valid_pred.mean() < 0.36666)
                self.assertGreaterEqual(Y_valid_pred.std(), 0.1)
                self.assertTrue(0.3 < Y_test_pred.mean() < 0.36666)
                self.assertGreaterEqual(Y_test_pred.std(), 0.1)
                num_models_better_than_random += 1

            # Test the inner CV
            self.assertEqual(len(evaluator.inner_models), 5)
            for fold in range(5):
                self.assertEqual(len(evaluator.inner_models[fold]), 5)
                self.assertTrue(
                    all([
                        model is not None
                        for model in evaluator.inner_models[fold]
                    ]))
                self.assertGreaterEqual(len(evaluator.outer_indices[fold][0]),
                                        75)
                for inner_fold in range(5):
                    self.assertGreaterEqual(
                        len(evaluator.inner_indices[fold][inner_fold][0]), 60)

        self.assertGreater(num_models_better_than_random, 9)