Example #1
0
    def test_grid_scores(self):
        output = os.path.join(self.test_dir, '..', '.tmp_grid_scores')
        self._setUp(output)

        cls = AutoSklearnClassifier(time_left_for_this_task=15,
                                    per_run_time_limit=5,
                                    output_folder=output,
                                    tmp_folder=output,
                                    shared_mode=False,
                                    seed=1,
                                    initial_configurations_via_metalearning=0,
                                    ensemble_size=0)
        cls_ = cls.build_automl()
        automl = cls_._automl
        automl.runhistory_ = unittest.mock.MagicMock()

        RunKey = collections.namedtuple(
            'RunKey', ['config_id', 'instance_id', 'seed'])

        RunValue = collections.namedtuple(
            'RunValue', ['cost', 'time', 'status', 'additional_info'])

        runhistory = dict()
        runhistory[RunKey(1, 1, 1)] = RunValue(1, 1, 1, '')
        automl.runhistory_.data = runhistory
        grid_scores_ = automl.grid_scores_

        self.assertIsInstance(grid_scores_[0], _CVScoreTuple)
        # In the runhistory we store losses, thus the score is zero
        self.assertEqual(grid_scores_[0].mean_validation_score, 0)
        self.assertEqual(grid_scores_[0].cv_validation_scores, [0])
        self.assertIsInstance(grid_scores_[0].parameters, unittest.mock.MagicMock)

        del automl
        self._tearDown(output)
Example #2
0
    def test_grid_scores(self):
        output = os.path.join(self.test_dir, '..', '.tmp_grid_scores')
        self._setUp(output)

        cls = AutoSklearnClassifier(time_left_for_this_task=15,
                                    per_run_time_limit=15,
                                    output_folder=output,
                                    tmp_folder=output,
                                    shared_mode=False,
                                    seed=1,
                                    initial_configurations_via_metalearning=0,
                                    ensemble_size=0)
        cls_ = cls.build_automl()
        automl = cls_._automl
        automl._proc_smac = mock.MagicMock()

        RunKey = collections.namedtuple(
            'RunKey', ['config_id', 'instance_id', 'seed'])

        RunValue = collections.namedtuple(
            'RunValue', ['cost', 'time', 'status', 'additional_info'])

        runhistory = dict()
        runhistory[RunKey(1, 1, 1)] = RunValue(1, 1, 1, '')
        automl._proc_smac.runhistory.data = runhistory
        grid_scores_ = automl.grid_scores_

        self.assertIsInstance(grid_scores_[0], _CVScoreTuple)
        # In the runhistory we store losses, thus the score is zero
        self.assertEqual(grid_scores_[0].mean_validation_score, 0)
        self.assertEqual(grid_scores_[0].cv_validation_scores, [0])
        self.assertIsInstance(grid_scores_[0].parameters, mock.MagicMock)

        del automl
        self._tearDown(output)
Example #3
0
def main(working_directory, time_limit, per_run_time_limit, task_id, seed):
    # Load data and other info.
    X_train, y_train, X_test, y_test, cat = load_task(task_id)

    # path to the metadata directory. Is there ar better way to get this?
    metadata_directory = os.path.abspath(os.path.dirname(__file__))
    metadata_directory = os.path.join(
        metadata_directory,
        "/home/tau/hrakotoa/Code/reproduce/auto-sklearn/auto-sklearn/autosklearn/metalearning/files/"
    )
    #metadata_directory = os.path.dirname(autosklearn.metalearning.files.__file__)

    # Create new metadata directory not containing task_id.
    new_metadata_directory = os.path.abspath(
        os.path.join(working_directory, "metadata_%i" % task_id))

    try:
        os.makedirs(new_metadata_directory)
        remove_dataset(metadata_directory, new_metadata_directory, task_id)
    except:
        pass  # pass because new metadata is created for this task.

    # We need to get task type, metric, is_sparse_or_dense information to
    # construct the path to the specific metadata directory. For details see
    # get_metalearning_suggestion() in smbo.py.
    TASK_TYPES_TO_STRING = {  # Mimic the same dict in autosklearn.constants
        'binary': 'binary.classification',
        'multiclass': 'multiclass.classification',
    }
    task_type = type_of_target(y_train)
    metadata_for_this_task = os.path.abspath(
        os.path.join(
            working_directory, "metadata_%i/balanced_accuracy_%s_sparse" %
            (task_id, TASK_TYPES_TO_STRING[task_type])))
    # how to check if data is sparse before running?

    configuration_output_dir = os.path.join(working_directory, str(seed))
    tmp_dir = os.path.join(configuration_output_dir, str(task_id))
    try:
        if not os.path.exists(configuration_output_dir):
            os.makedirs(configuration_output_dir)
    except Exception as _:
        print(
            "Direcotry {0} aleardy created.".format(configuration_output_dir))

    automl_arguments = {
        'time_left_for_this_task': time_limit,
        'per_run_time_limit': per_run_time_limit,
        'initial_configurations_via_metalearning': 25,
        'ensemble_size': 0,
        'seed': seed,
        'ml_memory_limit': 3072,
        'resampling_strategy': 'holdout',
        'resampling_strategy_arguments': {
            'train_size': 0.67
        },
        'tmp_folder': tmp_dir,
        'delete_tmp_folder_after_terminate': False,
        'disable_evaluator_output': False,
    }

    automl = AutoSklearnClassifier(**automl_arguments)
    # automl._automl._metadata_directory does not work cause clf._automl is not
    # created until fit is called. Therefore, we need to manually create
    # automl._automl and specify metadata_directory there.
    automl._automl = automl.build_automl()
    automl._automl._metadata_directory = metadata_for_this_task

    # Fit.
    automl._automl.fit(
        X_train,
        y_train,
        dataset_name=str(task_id),
        X_test=X_test,
        y_test=y_test,
        metric=balanced_accuracy,
    )

    with open(os.path.join(tmp_dir, "score_metalearning.csv"), 'w') as fh:
        T = 0
        fh.write("Time,Train Performance,Test Performance\n")
        # Add start time:0, Train Performance:1, Test Performance: 1
        best_loss = 1
        fh.write("{0},{1},{2}\n".format(T, 0, 0))
        for key, value in automl._automl.runhistory_.data.items():
            t = value.time
            loss = value.cost
            T += t

            if loss < best_loss:
                fh.write("{0},{1},{2}\n".format(
                    T, 1 - loss,
                    1 - value.additional_info.get('test_loss', 1.0)))
                best_loss = loss