def test_evaluate_multiclass_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset('iris')
        X_valid = X_test[:25,]
        Y_valid = Y_test[:25,]
        X_test = X_test[25:,]
        Y_test = Y_test[25:,]

        D = Dummy()
        D.info = {'metric': 'bac_metric', 'task': MULTICLASS_CLASSIFICATION,
                  'is_sparse': False, 'target_num': 3}
        D.data = {'X_train': X_train, 'Y_train': Y_train,
                  'X_valid': X_valid, 'X_test': X_test}
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']

        configuration_space = get_configuration_space(D.info,
            include_estimators = ['ridge'],
            include_preprocessors = ['select_rates'])

        err = np.zeros([N_TEST_RUNS])
        for i in range(N_TEST_RUNS):
            print "Evaluate configuration: %d; result:" % i,
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = HoldoutEvaluator(D_, configuration)

            if not self._fit(evaluator):
                print
                continue
            err[i] = evaluator.predict()
            print err[i]

            self.assertTrue(np.isfinite(err[i]))
            self.assertGreaterEqual(err[i], 0.0)

        print "Number of times it was worse than random guessing:" + str(np.sum(err > 1))
    def test_5000_classes(self):
        weights = ([0.0002] * 4750) + ([0.0001] * 250)
        X, Y = sklearn.datasets.make_classification(n_samples=10000,
                                                    n_features=20,
                                                    n_classes=5000,
                                                    n_clusters_per_class=1,
                                                    n_informative=15,
                                                    n_redundant=5,
                                                    n_repeated=0,
                                                    weights=weights,
                                                    flip_y=0,
                                                    class_sep=1.0,
                                                    hypercube=True,
                                                    shift=None,
                                                    scale=1.0,
                                                    shuffle=True,
                                                    random_state=1)

        self.assertEqual(250, np.sum(np.bincount(Y) == 1))
        D = Dummy()
        D.info = {'metric': 'r2_metric', 'task': MULTICLASS_CLASSIFICATION,
                  'is_sparse': False, 'target_num': 1}
        D.data = {'X_train': X, 'Y_train': Y,
                  'X_valid': X, 'X_test': X}
        D.feat_type = ['numerical'] * 5000

        configuration_space = get_configuration_space(D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])
        configuration = configuration_space.sample_configuration()
        D_ = copy.deepcopy(D)
        evaluator = HoldoutEvaluator(D_, configuration)
        evaluator.fit()
Example #3
0
    def test_with_abalone(self):
        dataset = "abalone"
        dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets")
        D = CompetitionDataManager(dataset, dataset_dir)
        configuration_space = get_configuration_space(
            D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])

        errors = []
        for i in range(N_TEST_RUNS):
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = NestedCVEvaluator(D_,
                                          configuration,
                                          inner_cv_folds=2,
                                          outer_cv_folds=2)
            if not self._fit(evaluator):
                print
                continue
            err = evaluator.predict()
            self.assertLess(err, 0.99)
            self.assertTrue(np.isfinite(err))
            errors.append(err)
        # This is a reasonable bound
        self.assertEqual(10, len(errors))
        self.assertLess(min(errors), 0.77)
Example #4
0
    def test_evaluate_multiclass_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset('iris')

        X_valid = X_test[:25, ]
        Y_valid = Y_test[:25, ]
        X_test = X_test[25:, ]
        Y_test = Y_test[25:, ]

        D = Dummy()
        D.info = {'metric': 'bac_metric', 'task': MULTICLASS_CLASSIFICATION,
                  'is_sparse': False, 'target_num': 3}
        D.data = {'X_train': X_train, 'Y_train': Y_train,
                  'X_valid': X_valid, 'X_test': X_test}
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']

        configuration_space = get_configuration_space(D.info,
            include_estimators=['ridge'],
            include_preprocessors=['select_rates'])

        err = np.zeros([N_TEST_RUNS])
        num_models_better_than_random = 0
        for i in range(N_TEST_RUNS):
            print "Evaluate configuration: %d; result:" % i,
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = CVEvaluator(D_, configuration,
                                    with_predictions=True)

            if not self._fit(evaluator):
                print
                continue
            e_, Y_optimization_pred, Y_valid_pred, Y_test_pred = \
                evaluator.predict()
            err[i] = e_
            print err[i], configuration['classifier']

            num_targets = len(np.unique(Y_train))
            self.assertTrue(np.isfinite(err[i]))
            self.assertGreaterEqual(err[i], 0.0)
            # Test that ten models were trained
            self.assertEqual(len(evaluator.models), 10)
            self.assertEqual(Y_optimization_pred.shape[0], Y_train.shape[0])
            self.assertEqual(Y_optimization_pred.shape[1], num_targets)
            self.assertEqual(Y_valid_pred.shape[0], Y_valid.shape[0])
            self.assertEqual(Y_valid_pred.shape[1], num_targets)
            self.assertEqual(Y_test_pred.shape[0], Y_test.shape[0])
            self.assertEqual(Y_test_pred.shape[1], num_targets)
            # Test some basic statistics of the dataset
            if err[i] < 0.5:
                self.assertTrue(0.3 < Y_valid_pred.mean() < 0.36666)
                self.assertGreaterEqual(Y_valid_pred.std(), 0.01)
                self.assertTrue(0.3 < Y_test_pred.mean() < 0.36666)
                self.assertGreaterEqual(Y_test_pred.std(), 0.01)
                num_models_better_than_random += 1
        self.assertGreater(num_models_better_than_random, 5)
    def test_file_output(self):
        output_dir = os.path.join(os.getcwd(), ".test")

        try:
            shutil.rmtree(output_dir)
        except:
            pass

        X_train, Y_train, X_test, Y_test = get_dataset('iris')
        X_valid = X_test[:25, ]
        Y_valid = Y_test[:25, ]
        X_test = X_test[25:, ]
        Y_test = Y_test[25:, ]

        D = Dummy()
        D.info = {'metric': 'bac_metric', 'task': MULTICLASS_CLASSIFICATION,
                  'is_sparse': False, 'target_num': 3}
        D.data = {'X_train': X_train, 'Y_train': Y_train,
                  'X_valid': X_valid, 'X_test': X_test}
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']
        D.basename = "test"


        configuration_space = get_configuration_space(D.info)

        while True:
            configuration = configuration_space.sample_configuration()
            evaluator = HoldoutEvaluator(D, configuration,
                                         with_predictions=True,
                                         all_scoring_functions=True,
                                         output_dir=output_dir,
                                         output_y_test=True)

            if not self._fit(evaluator):
                print
                continue
            evaluator.predict()
            evaluator.file_output()

            self.assertTrue(os.path.exists(os.path.join(output_dir,
                                                        "y_optimization.npy")))
            break
    def test_predict_proba_binary_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset('iris')

        eliminate_class_two = Y_train != 2
        X_train = X_train[eliminate_class_two]
        Y_train = Y_train[eliminate_class_two]

        eliminate_class_two = Y_test != 2
        X_test = X_test[eliminate_class_two]
        Y_test = Y_test[eliminate_class_two]

        X_valid = X_test[:25, ]
        Y_valid = Y_test[:25, ]
        X_test = X_test[25:, ]
        Y_test = Y_test[25:, ]

        class Dummy2(object):
            def predict_proba(self, y, batch_size=200):
                return np.array([[0.1, 0.9], [0.7, 0.3]])

        model = Dummy2()
        task_type = BINARY_CLASSIFICATION

        D = Dummy()
        D.info = {'metric': 'bac_metric', 'task': task_type,
                  'is_sparse': False, 'target_num': 3}
        D.data = {'X_train': X_train, 'Y_train': Y_train,
                  'X_valid': X_valid, 'X_test': X_test}
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']

        configuration_space = get_configuration_space(
            D.info, include_estimators=['ridge'],
            include_preprocessors=['select_rates'])
        configuration = configuration_space.sample_configuration()

        evaluator = HoldoutEvaluator(D, configuration)
        pred = evaluator.predict_proba(None, model, task_type)
        expected = [[0.9], [0.3]]
        for i in range(len(expected)):
            self.assertEqual(expected[i], pred[i])
    def test_with_abalone(self):
        dataset = "abalone"
        dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets")
        D = CompetitionDataManager(dataset, dataset_dir)
        configuration_space = get_configuration_space(D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])

        errors = []
        for i in range(N_TEST_RUNS):
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = CVEvaluator(D_, configuration, cv_folds=5)
            if not self._fit(evaluator):
                print
                continue
            err = evaluator.predict()
            self.assertLess(err, 0.99)
            self.assertTrue(np.isfinite(err))
            errors.append(err)
        # This is a reasonable bound
        self.assertEqual(10, len(errors))
        self.assertLess(min(errors), 0.77)
Example #8
0
    def _fit(self, D):
        # TODO: check that data and task definition fit together!

        self.metric_ = D.info['metric']
        self.task_ = D.info['task']
        self.target_num_ = D.info['target_num']

        # Set environment variable:
        seed = os.environ.get("AUTOSKLEARN_SEED")
        if seed is not None and int(seed) != self.seed:
            raise ValueError("It seems you have already started an instance "
                             "of AutoSklearn in this thread.")
        else:
            os.environ["AUTOSKLEARN_SEED"] = str(self.seed)

        # == Split dataset and store Data for the ensemble script
        X_train, X_ensemble, Y_train, Y_ensemble = split_data.split_data(
            D.data['X_train'], D.data['Y_train'])

        true_labels_ensemble_filename = os.path.join(self.tmp_dir,
                                                     "true_labels_ensemble.npy")
        true_labels_ensemble_lock = true_labels_ensemble_filename + ".lock"
        with lockfile.LockFile(true_labels_ensemble_lock):
            if not os.path.exists(true_labels_ensemble_filename):
                np.save(true_labels_ensemble_filename, Y_ensemble)

        del X_train, X_ensemble, Y_train, Y_ensemble

        time_needed_to_load_data = self.stopwatch_.wall_elapsed(self.basename_)
        time_left_after_reading = max(0, self.time_left_for_this_task -
                                      time_needed_to_load_data)
        self.logger.info("Remaining time after reading %s %5.2f sec" %
                    (self.basename_, time_left_after_reading))

        self.stopwatch_.stop_task("LoadData")

        # == Calculate metafeatures
        self.stopwatch_.start_task("CalculateMetafeatures")
        categorical = [True if feat_type.lower() in ["categorical"] else False
                       for feat_type in D.feat_type]

        if self.initial_configurations_via_metalearning <= 0:
            ml = None
        elif D.info["task"] in \
                [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION]:
            ml = metalearning.MetaLearning()
            self.logger.debug("Start calculating metafeatures for %s" %
                              self.basename_)
            ml.calculate_metafeatures_with_labels(D.data["X_train"],
                                                  D.data["Y_train"],
                                                  categorical=categorical,
                                                  dataset_name=self.basename_)
        else:
            ml = None
            self.logger.critical("Metafeatures not calculated")
        self.stopwatch_.stop_task("CalculateMetafeatures")
        self.logger.debug("Calculating Metafeatures (categorical attributes) took %5.2f" % self.stopwatch_.wall_elapsed("CalculateMetafeatures"))

        self.stopwatch_.start_task("OneHot")
        D.perform1HotEncoding()
        self.ohe_ = D.encoder_
        self.stopwatch_.stop_task("OneHot")

        # == Pickle the data manager
        self.stopwatch_.start_task("StoreDatamanager")
        data_manager_path = os.path.join(self.tmp_dir,
                                         self.basename_ + "_Manager.pkl")
        data_manager_lockfile = data_manager_path + ".lock"
        with lockfile.LockFile(data_manager_lockfile):
            if not os.path.exists(data_manager_path):
                pickle.dump(D,
                            open(data_manager_path, 'w'), protocol=-1)
                self.logger.debug("Pickled Datamanager at %s" %
                                  data_manager_path)
            else:
                self.logger.debug("Data manager already presend at %s" %
                                  data_manager_path)
        self.stopwatch_.stop_task("StoreDatamanager")

        # = Create a searchspace
        self.stopwatch_.start_task("CreateConfigSpace")
        configspace_path = os.path.join(self.tmp_dir, "space.pcs")
        self.configuration_space = paramsklearn.get_configuration_space(
            D.info)

        self.configuration_space_created_hook()

        sp_string = pcs_parser.write(self.configuration_space)
        configuration_space_lockfile = configspace_path + ".lock"
        with lockfile.LockFile(configuration_space_lockfile):
            if not os.path.exists(configspace_path):
                with open(configspace_path, "w") as fh:
                    fh.write(sp_string)
                self.logger.debug("Configuration space written to %s" %
                                  configspace_path)
            else:
                self.logger.debug("Configuration space already present at %s" %
                                  configspace_path)
        self.stopwatch_.stop_task("CreateConfigSpace")

        if ml is None:
            initial_configurations = []
        elif D.info["task"]in \
                [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION]:
            self.stopwatch_.start_task("CalculateMetafeaturesEncoded")
            ml.calculate_metafeatures_encoded_labels(X_train=D.data["X_train"],
                                                     Y_train=D.data["Y_train"],
                                                     categorical=[False] * D.data["X_train"].shape[0],
                                                     dataset_name=self.basename_)
            self.stopwatch_.stop_task("CalculateMetafeaturesEncoded")
            self.logger.debug(
                "Calculating Metafeatures (encoded attributes) took %5.2fsec" %
                self.stopwatch_.wall_elapsed("CalculateMetafeaturesEncoded"))

            self.logger.debug(ml._metafeatures_labels.__repr__(verbosity=2))
            self.logger.debug(ml._metafeatures_encoded_labels.__repr__(verbosity=2))

            self.stopwatch_.start_task("InitialConfigurations")
            try:
                initial_configurations = ml.create_metalearning_string_for_smac_call(
                    self.configuration_space, self.basename_, self.metric_,
                    self.task_, True if D.info['is_sparse'] == 1 else False,
                    self.initial_configurations_via_metalearning, self.metadata_directory)
            except Exception as e:
                import traceback

                self.logger.error(str(e))
                self.logger.error(traceback.format_exc())
                initial_configurations = []

            self.stopwatch_.stop_task("InitialConfigurations")

            self.logger.debug("Initial Configurations: (%d)", len(initial_configurations))
            for initial_configuration in initial_configurations:
                self.logger.debug(initial_configuration)
            self.logger.debug("Looking for initial configurations took %5.2fsec" %
                              self.stopwatch_.wall_elapsed("InitialConfigurations"))
            self.logger.info(
                "Time left for %s after finding initial configurations: %5.2fsec" %
                (self.basename_, self.time_left_for_this_task -
                 self.stopwatch_.wall_elapsed(self.basename_)))
        else:
            initial_configurations = []
            self.logger.critical("Metafeatures encoded not calculated")

        # == Set up a directory where all the trained models will be pickled to
        if self.keep_models:
            self.model_directory_ = os.path.join(self.tmp_dir,
                                                 "models_%d" % self.seed)
            os.mkdir(self.model_directory_)
        self.ensemble_indices_directory_ = os.path.join(self.tmp_dir,
                                                        "ensemble_indices_%d" % self.seed)
        os.mkdir(self.ensemble_indices_directory_)

        # == RUN SMAC
        self.stopwatch_.start_task("runSmac")
        # = Create an empty instance file
        instance_file = os.path.join(self.tmp_dir, "instances.txt")
        instance_file_lock = instance_file + ".lock"
        with lockfile.LockFile(instance_file_lock):
            if not os.path.exists(instance_file_lock):
                with open(instance_file, "w") as fh:
                    fh.write("holdout")
                self.logger.debug("Created instance file %s" % instance_file)
            else:
                self.logger.debug("Instance file already present at %s" % instance_file)

        # = Start SMAC
        time_left_for_smac = max(0, self.time_left_for_this_task - (
            self.stopwatch_.wall_elapsed(self.basename_)))
        self.logger.debug("Start SMAC with %5.2fsec time left" % time_left_for_smac)
        proc_smac, smac_call = \
            submit_process.run_smac(dataset_name=self.basename_,
                                    dataset=data_manager_path,
                                    tmp_dir=self.tmp_dir,
                                    searchspace=configspace_path,
                                    instance_file=instance_file,
                                    limit=time_left_for_smac,
                                    cutoff_time=self.per_run_time_limit,
                                    initial_challengers=initial_configurations,
                                    memory_limit=self.ml_memory_limit,
                                    seed=self.seed)
        self.logger.debug(smac_call)
        self.stopwatch_.stop_task("runSmac")

        # == RUN ensemble builder
        self.stopwatch_.start_task("runEnsemble")
        time_left_for_ensembles = max(0, self.time_left_for_this_task - (
            self.stopwatch_.wall_elapsed(self.basename_)))
        self.logger.debug("Start Ensemble with %5.2fsec time left" % time_left_for_ensembles)
        proc_ensembles = \
            submit_process.run_ensemble_builder(tmp_dir=self.tmp_dir,
                                                dataset_name=self.basename_,
                                                task_type=self.task_,
                                                metric=self.metric_,
                                                limit=time_left_for_ensembles,
                                                output_dir=self.output_dir,
                                                ensemble_size=self.ensemble_size,
                                                ensemble_nbest=self.ensemble_nbest,
                                                seed=self.seed,
                                                ensemble_indices_output_dir=self.ensemble_indices_directory_)
        self.stopwatch_.stop_task("runEnsemble")

        del D

        if self.queue is not None:
            self.queue.put([time_needed_to_load_data, data_manager_path,
                            proc_smac, proc_ensembles])
        else:
            proc_smac.wait()
            proc_ensembles.wait()

        # Delete AutoSklearn environment variable
        del os.environ["AUTOSKLEARN_SEED"]
        return self
    def test_metalearning(self):
        dataset_name = 'digits'

        initial_challengers = {
            'acc_metric': [
                "--initial-challengers \" "
                "-adaboost:algorithm 'SAMME.R' "
                "-adaboost:learning_rate '0.400363929326' "
                "-adaboost:max_depth '5' "
                "-adaboost:n_estimators '319' "
                "-balancing:strategy 'none' "
                "-classifier 'adaboost' "
                "-imputation:strategy 'most_frequent' "
                "-preprocessor 'no_preprocessing' "
                "-rescaling:strategy 'min/max'\""
            ],
            'auc_metric': [
                "--initial-challengers \" "
                "-adaboost:algorithm 'SAMME.R' "
                "-adaboost:learning_rate '0.966883114819' "
                "-adaboost:max_depth '5' "
                "-adaboost:n_estimators '412' "
                "-balancing:strategy 'weighting' "
                "-classifier 'adaboost' "
                "-imputation:strategy 'median' "
                "-preprocessor 'no_preprocessing' "
                "-rescaling:strategy 'min/max'\""
            ],
            'bac_metric': [
                "--initial-challengers \" "
                "-adaboost:algorithm 'SAMME.R' "
                "-adaboost:learning_rate '0.400363929326' "
                "-adaboost:max_depth '5' "
                "-adaboost:n_estimators '319' "
                "-balancing:strategy 'none' "
                "-classifier 'adaboost' "
                "-imputation:strategy 'most_frequent' "
                "-preprocessor 'no_preprocessing' "
                "-rescaling:strategy 'min/max'\""
            ],
            'f1_metric': [
                "--initial-challengers \" "
                "-adaboost:algorithm 'SAMME.R' "
                "-adaboost:learning_rate '0.966883114819' "
                "-adaboost:max_depth '5' "
                "-adaboost:n_estimators '412' "
                "-balancing:strategy 'weighting' "
                "-classifier 'adaboost' "
                "-imputation:strategy 'median' "
                "-preprocessor 'no_preprocessing' "
                "-rescaling:strategy 'min/max'\""
            ],
            'pac_metric': [
                "--initial-challengers \" "
                "-adaboost:algorithm 'SAMME.R' "
                "-adaboost:learning_rate '0.400363929326' "
                "-adaboost:max_depth '5' "
                "-adaboost:n_estimators '319' "
                "-balancing:strategy 'none' "
                "-classifier 'adaboost' "
                "-imputation:strategy 'most_frequent' "
                "-preprocessor 'no_preprocessing' "
                "-rescaling:strategy 'min/max'\""
            ]
        }

        for metric in initial_challengers:
            configuration_space = get_configuration_space(
                {
                    'metric': metric,
                    'task': MULTICLASS_CLASSIFICATION,
                    'is_sparse': False
                },
                include_preprocessors=['no_preprocessing'])

            X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
            categorical = [False] * X_train.shape[1]

            ml = MetaLearning()
            ml.calculate_metafeatures_with_labels(X_train, Y_train,
                                                  categorical, dataset_name)
            ml.calculate_metafeatures_encoded_labels(X_train, Y_train,
                                                     categorical, dataset_name)
            initial_configuration_strings_for_smac = \
                ml.create_metalearning_string_for_smac_call(
                    configuration_space, dataset_name, metric,
                    MULTICLASS_CLASSIFICATION, False, 1, None)

            print metric
            self.assertEqual(initial_challengers[metric],
                             initial_configuration_strings_for_smac)
    def test_metalearning(self):
        dataset_name = 'digits'

        initial_challengers = {'acc_metric':
                                   ["--initial-challengers \" "
                                    "-adaboost:algorithm 'SAMME.R' "
                                    "-adaboost:learning_rate '0.400363929326' "
                                    "-adaboost:max_depth '5' "
                                    "-adaboost:n_estimators '319' "
                                    "-balancing:strategy 'none' "
                                    "-classifier 'adaboost' "
                                    "-imputation:strategy 'most_frequent' "
                                    "-preprocessor 'no_preprocessing' "
                                    "-rescaling:strategy 'min/max'\""
                                   ],
                               'auc_metric':
                                    ["--initial-challengers \" "
                                     "-adaboost:algorithm 'SAMME.R' "
                                     "-adaboost:learning_rate '0.966883114819' "
                                     "-adaboost:max_depth '5' "
                                     "-adaboost:n_estimators '412' "
                                     "-balancing:strategy 'weighting' "
                                     "-classifier 'adaboost' "
                                     "-imputation:strategy 'median' "
                                     "-preprocessor 'no_preprocessing' "
                                     "-rescaling:strategy 'min/max'\""
                                    ],
                               'bac_metric':
                                   ["--initial-challengers \" "
                                    "-adaboost:algorithm 'SAMME.R' "
                                    "-adaboost:learning_rate '0.400363929326' "
                                    "-adaboost:max_depth '5' "
                                    "-adaboost:n_estimators '319' "
                                    "-balancing:strategy 'none' "
                                    "-classifier 'adaboost' "
                                    "-imputation:strategy 'most_frequent' "
                                    "-preprocessor 'no_preprocessing' "
                                    "-rescaling:strategy 'min/max'\""
                                   ],
                               'f1_metric':
                                   ["--initial-challengers \" "
                                    "-adaboost:algorithm 'SAMME.R' "
                                    "-adaboost:learning_rate '0.966883114819' "
                                    "-adaboost:max_depth '5' "
                                    "-adaboost:n_estimators '412' "
                                    "-balancing:strategy 'weighting' "
                                    "-classifier 'adaboost' "
                                    "-imputation:strategy 'median' "
                                    "-preprocessor 'no_preprocessing' "
                                    "-rescaling:strategy 'min/max'\""
                                   ],
                               'pac_metric':
                                   ["--initial-challengers \" "
                                    "-adaboost:algorithm 'SAMME.R' "
                                    "-adaboost:learning_rate '0.400363929326' "
                                    "-adaboost:max_depth '5' "
                                    "-adaboost:n_estimators '319' "
                                    "-balancing:strategy 'none' "
                                    "-classifier 'adaboost' "
                                    "-imputation:strategy 'most_frequent' "
                                    "-preprocessor 'no_preprocessing' "
                                    "-rescaling:strategy 'min/max'\""
                                   ]}

        for metric in initial_challengers:
            configuration_space = get_configuration_space(
                {'metric': metric,
                 'task': MULTICLASS_CLASSIFICATION,
                 'is_sparse': False}, include_preprocessors=['no_preprocessing'])

            X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
            categorical = [False] * X_train.shape[1]

            ml = MetaLearning()
            ml.calculate_metafeatures_with_labels(
                    X_train, Y_train, categorical, dataset_name)
            ml.calculate_metafeatures_encoded_labels(
                    X_train, Y_train, categorical, dataset_name)
            initial_configuration_strings_for_smac = \
                ml.create_metalearning_string_for_smac_call(
                    configuration_space, dataset_name, metric,
                    MULTICLASS_CLASSIFICATION, False, 1, None)

            print metric
            self.assertEqual(initial_challengers[metric],
                             initial_configuration_strings_for_smac)
def main(dataset_info, mode, seed, params, mode_args=None):
    """This command line interface has three different operation modes:

    * CV: useful for the Tweakathon
    * 1/3 test split: useful to evaluate a configuration
    * cv on 2/3 train split: useful to optimize hyperparameters in a training
      mode before testing a configuration on the 1/3 test split.

    It must by no means be used for the Auto part of the competition!
    """
    if mode != "test":
        num_run = get_new_run_num()

    for key in params:
        try:
            params[key] = int(params[key])
        except:
            try:
                params[key] = float(params[key])
            except:
                pass

    if seed is not None:
        seed = int(float(seed))
    else:
        seed = 1

    output_dir = os.getcwd()

    D = store_and_or_load_data(dataset_info=dataset_info, outputdir=output_dir)

    cs = get_configuration_space(D.info)
    configuration = configuration_space.Configuration(cs, params)
    metric = D.info['metric']

    global evaluator
    # Train/test split
    if mode == 'holdout':
        evaluator = HoldoutEvaluator(D,
                                     configuration,
                                     with_predictions=True,
                                     all_scoring_functions=True,
                                     output_y_test=True,
                                     seed=seed,
                                     num_run=num_run)
        evaluator.fit()
        signal.signal(15, empty_signal_handler)
        evaluator.finish_up()
        model_directory = os.path.join(os.getcwd(), "models_%d" % seed)
        if os.path.exists(model_directory):
            model_filename = os.path.join(model_directory,
                                          "%s.model" % num_run)
            with open(model_filename, "w") as fh:
                pickle.dump(evaluator.model, fh, -1)

    elif mode == 'test':
        evaluator = TestEvaluator(D,
                                  configuration,
                                  all_scoring_functions=True,
                                  seed=seed)
        evaluator.fit()
        scores = evaluator.predict()
        duration = time.time() - evaluator.starttime

        score = scores[metric]
        additional_run_info = ";".join(
            ["%s: %s" % (m_, value) for m_, value in scores.items()])
        additional_run_info += ";" + "duration: " + str(duration)

        print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % (
            "SAT", abs(duration), score, evaluator.seed, additional_run_info)

    # CV on the whole training set
    elif mode == 'cv':
        evaluator = CVEvaluator(D,
                                configuration,
                                with_predictions=True,
                                all_scoring_functions=True,
                                output_y_test=True,
                                cv_folds=mode_args['folds'],
                                seed=seed,
                                num_run=num_run)
        evaluator.fit()
        signal.signal(15, empty_signal_handler)
        evaluator.finish_up()

    elif mode == 'partial_cv':
        evaluator = CVEvaluator(D,
                                configuration,
                                all_scoring_functions=True,
                                cv_folds=mode_args['folds'],
                                seed=seed,
                                num_run=num_run)
        evaluator.partial_fit(mode_args['fold'])
        scores = evaluator.predict()
        duration = time.time() - evaluator.starttime

        score = scores[metric]
        additional_run_info = ";".join(
            ["%s: %s" % (m_, value) for m_, value in scores.items()])
        additional_run_info += ";" + "duration: " + str(duration)

        print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % (
            "SAT", abs(duration), score, evaluator.seed, additional_run_info)

    elif mode == 'nested-cv':
        evaluator = NestedCVEvaluator(D,
                                      configuration,
                                      with_predictions=True,
                                      inner_cv_folds=mode_args['inner_folds'],
                                      outer_cv_folds=mode_args['outer_folds'],
                                      all_scoring_functions=True,
                                      output_y_test=True,
                                      seed=seed,
                                      num_run=num_run)
        evaluator.fit()
        signal.signal(15, empty_signal_handler)
        evaluator.finish_up()

    else:
        raise ValueError("Must choose a legal mode.")