Esempio n. 1
0
    def test_fit_n_jobs(self, _fit_automl_patch, Process_patch,
                        build_automl_patch):
        # Return the process patch on call to __init__
        Process_patch.return_value = Process_patch

        cls = AutoSklearnEstimator()
        cls.fit()
        self.assertEqual(build_automl_patch.call_count, 1)
        self.assertEqual(len(build_automl_patch.call_args[0]), 0)
        self.assertEqual(
            build_automl_patch.call_args[1],
            {
                'seed': 1,
                'shared_mode': False,
                'ensemble_size': 50,
                'initial_configurations_via_metalearning': 25,
                'output_folder': None,
                'tmp_folder': None
            },
        )
        self.assertEqual(Process_patch.call_count, 0)

        cls = AutoSklearnEstimator(n_jobs=5)
        cls.fit()
        # Plus the one from the first call
        self.assertEqual(build_automl_patch.call_count, 6)
        self.assertEqual(len(cls._automl), 5)
        for i in range(1, 6):
            self.assertEqual(len(build_automl_patch.call_args_list[i][0]), 0)
            self.assertEqual(len(build_automl_patch.call_args_list[i][1]), 7)
            # Thee seed is a magic mock so there is nothing to compare here...
            self.assertIn('seed', build_automl_patch.call_args_list[i][1])
            self.assertEqual(
                build_automl_patch.call_args_list[i][1]['shared_mode'],
                True,
            )
            self.assertEqual(
                build_automl_patch.call_args_list[i][1]['ensemble_size'],
                50 if i == 1 else 0,
            )
            self.assertEqual(
                build_automl_patch.call_args_list[i][1]
                ['initial_configurations_via_metalearning'],
                25 if i == 1 else 0,
            )
            if i > 1:
                self.assertEqual(
                    build_automl_patch.call_args_list[i][1]
                    ['smac_scenario_args']['initial_incumbent'],
                    'RANDOM',
                )

        self.assertEqual(Process_patch.start.call_count, 4)
        for i in range(2, 6):
            self.assertEqual(
                len(Process_patch.call_args_list[i - 2][1]['kwargs']),
                3,
            )
            self.assertFalse(
                Process_patch.call_args_list[i -
                                             2][1]['kwargs']['load_models'])
        self.assertEqual(Process_patch.join.call_count, 4)

        self.assertEqual(_fit_automl_patch.call_count, 1)
        self.assertEqual(len(_fit_automl_patch.call_args[0]), 0)
        self.assertEqual(len(_fit_automl_patch.call_args[1]), 3)
        self.assertTrue(_fit_automl_patch.call_args[1]['load_models'])
Esempio n. 2
0
def test_fit_n_jobs_negative(build_automl_patch):
    n_cores = cpu_count()
    cls = AutoSklearnEstimator(n_jobs=-1, ensemble_size=0)
    cls.fit()
    assert cls._n_jobs == n_cores
Esempio n. 3
0
 def test_fit_n_jobs_negative(self, build_automl_patch):
     n_cores = cpu_count()
     cls = AutoSklearnEstimator(n_jobs=-1)
     cls.fit()
     self.assertEqual(len(cls._automl), n_cores)
Esempio n. 4
0
def test_leaderboard(tmp_dir: str, estimator_type: Type[AutoSklearnEstimator],
                     dataset_name: str):
    # Comprehensive test tasks a substantial amount of time, manually set if
    # required.
    MAX_COMBO_SIZE_FOR_INCLUDE_PARAM = 3  # [0, len(valid_columns) + 1]
    column_types = AutoSklearnEstimator._leaderboard_columns()

    # Create a dict of all possible param values for each param
    # with some invalid one's of the incorrect type
    include_combinations = itertools.chain(
        itertools.combinations(column_types['all'], item_count)
        for item_count in range(1, MAX_COMBO_SIZE_FOR_INCLUDE_PARAM))
    valid_params = {
        'detailed': [True, False],
        'ensemble_only': [True, False],
        'top_k': [-10, 0, 1, 10, 'all'],
        'sort_by': [*column_types['all'], 'invalid'],
        'sort_order': ['ascending', 'descending', 'auto', 'invalid', None],
        'include':
        itertools.chain([None, 'invalid', 'type'], include_combinations),
    }

    # Create a generator of all possible combinations of valid_params
    params_generator = iter(
        dict(zip(valid_params.keys(), param_values))
        for param_values in itertools.product(*valid_params.values()))

    X_train, Y_train, _, _ = putil.get_dataset(dataset_name)
    model = estimator_type(time_left_for_this_task=30,
                           per_run_time_limit=5,
                           tmp_folder=tmp_dir,
                           seed=1)
    model.fit(X_train, Y_train)

    for params in params_generator:
        # Convert from iterator to solid list
        if params['include'] is not None and not isinstance(
                params['include'], str):
            params['include'] = list(params['include'])

        # Invalid top_k should raise an error, is a positive int or 'all'
        if not (params['top_k'] == 'all' or params['top_k'] > 0):
            with pytest.raises(ValueError):
                model.leaderboard(**params)

        # Invalid sort_by column
        elif params['sort_by'] not in column_types['all']:
            with pytest.raises(ValueError):
                model.leaderboard(**params)

        # Shouldn't accept an invalid sort order
        elif params['sort_order'] not in ['ascending', 'descending', 'auto']:
            with pytest.raises(ValueError):
                model.leaderboard(**params)

        # include is single str but not valid
        elif (isinstance(params['include'], str)
              and params['include'] not in column_types['all']):
            with pytest.raises(ValueError):
                model.leaderboard(**params)

        # Crash if include is list but contains invalid column
        elif (isinstance(params['include'], list)
              and len(set(params['include']) - set(column_types['all'])) != 0):
            with pytest.raises(ValueError):
                model.leaderboard(**params)

        # Can't have just model_id, in both single str and list case
        elif (params['include'] == 'model_id'
              or params['include'] == ['model_id']):
            with pytest.raises(ValueError):
                model.leaderboard(**params)

        # Else all valid combinations should be validated
        else:
            leaderboard = model.leaderboard(**params)

            # top_k should never be less than the rows given back
            # It can however be larger
            if isinstance(params['top_k'], int):
                assert params['top_k'] >= len(leaderboard)

            # Check the right columns are present and in the right order
            # The model_id is set as the index, not included in pandas columns
            columns = list(leaderboard.columns)

            def exclude(lst, s):
                return [x for x in lst if x != s]

            if params['include'] is not None:
                # Include with only single str should be the only column
                if isinstance(params['include'], str):
                    assert params['include'] in columns and len(columns) == 1
                # Include as a list should have all the columns without model_id
                else:
                    assert columns == exclude(params['include'], 'model_id')
            elif params['detailed']:
                assert columns == exclude(column_types['detailed'], 'model_id')
            else:
                assert columns == exclude(column_types['simple'], 'model_id')

            # Ensure that if it's ensemble only
            # Can only check if 'ensemble_weight' is present
            if (params['ensemble_only'] and 'ensemble_weight' in columns):
                assert all(leaderboard['ensemble_weight'] > 0)