def test_get_splitter(self): ta_args = dict(backend=BackendMock(), autosklearn_seed=1, logger=self.logger, stats=self.stats, memory_limit=3072, metric=accuracy) D = unittest.mock.Mock(spec=AbstractDataManager) D.data = dict(Y_train=np.array([0, 0, 0, 1, 1, 1])) D.info = dict(task=BINARY_CLASSIFICATION) # holdout, binary classification ta = ExecuteTaFuncWithQueue(resampling_strategy='holdout', **ta_args) cv = ta.get_splitter(D) self.assertIsInstance(cv, sklearn.model_selection.StratifiedShuffleSplit) # holdout, binary classification, fallback to shuffle split D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1, 2]) ta = ExecuteTaFuncWithQueue(resampling_strategy='holdout', **ta_args) cv = ta.get_splitter(D) self.assertIsInstance(cv, sklearn.model_selection._split.ShuffleSplit) # cv, binary classification D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1]) ta = ExecuteTaFuncWithQueue(resampling_strategy='cv', folds=5, **ta_args) cv = ta.get_splitter(D) self.assertIsInstance(cv, sklearn.model_selection._split.StratifiedKFold) # cv, binary classification, no fallback anticipated D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1, 2]) ta = ExecuteTaFuncWithQueue(resampling_strategy='cv', folds=5, **ta_args) cv = ta.get_splitter(D) self.assertIsInstance(cv, sklearn.model_selection._split.StratifiedKFold) # regression, shuffle split D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5]) D.info['task'] = REGRESSION ta = ExecuteTaFuncWithQueue(resampling_strategy='holdout', **ta_args) cv = ta.get_splitter(D) self.assertIsInstance(cv, sklearn.model_selection._split.ShuffleSplit) # regression cv, KFold D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5]) D.info['task'] = REGRESSION ta = ExecuteTaFuncWithQueue(resampling_strategy='cv', folds=5, **ta_args) cv = ta.get_splitter(D) self.assertIsInstance(cv, sklearn.model_selection._split.KFold)