def test_datasets(self): for getter in get_dataset_getters(): testname = '%s_%s' % (os.path.basename(__file__).replace( '.pyc', '').replace('.py', ''), getter.__name__) with self.subTest(testname): backend_mock = unittest.mock.Mock(spec=Backend) backend_mock.get_model_dir.return_value = 'dutirapbdxvltcrpbdlcatepdeau' D = getter() D_ = copy.deepcopy(D) y = D.data['Y_train'] if len(y.shape) == 2 and y.shape[1] == 1: D_.data['Y_train'] = y.flatten() metric_lookup = { MULTILABEL_CLASSIFICATION: f1_macro, BINARY_CLASSIFICATION: accuracy, MULTICLASS_CLASSIFICATION: accuracy, REGRESSION: r2 } queue_ = multiprocessing.Queue() evaluator = TestEvaluator(D_, backend_mock, queue_, metric=metric_lookup[D.info['task']]) evaluator.fit_predict_and_loss() rval = get_last_result(evaluator.queue) self.assertEqual(len(rval), 3) self.assertTrue(np.isfinite(rval['loss']))
def test_eval_partial_cv(self): results = [ 0.090909090909090939, 0.047619047619047672, 0.052631578947368474, 0.15789473684210531, 0.0 ] for fold in range(5): instance = json.dumps({'task_id': 'data', 'fold': fold}) eval_partial_cv( queue=self.queue, config=self.configuration, backend=self.backend, seed=1, num_run=1, instance=instance, resampling_strategy='partial-cv', resampling_strategy_args={'folds': 5}, all_scoring_functions=False, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, metric=accuracy, ) rval = get_last_result(self.queue) self.assertAlmostEqual(rval['loss'], results[fold]) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_eval_partial_cv(self): cv = StratifiedKFold(shuffle=True, random_state=1, n_splits=5) results = [ 0.045454545454545414, 0.095238095238095233, 0.052631578947368474, 0.10526315789473684, 0.0 ] for fold in range(5): instance = json.dumps({'task_id': 'data', 'fold': fold}) eval_partial_cv(queue=self.queue, config=self.configuration, datamanager=self.data, backend=self.backend, seed=1, num_run=1, instance=instance, cv=cv, all_scoring_functions=False, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, metric=accuracy) rval = get_last_result(self.queue) self.assertAlmostEqual(rval['loss'], results[fold]) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_get_results(self): backend_mock = unittest.mock.Mock(spec=backend.Backend) backend_mock.get_model_dir.return_value = 'dutirapbdxvltcrpbdlcatepdeau' queue_ = multiprocessing.Queue() for i in range(5): queue_.put((i * 1, 1 - (i * 0.2), 0, "", StatusType.SUCCESS)) result = get_last_result(queue_) self.assertEqual(result[0], 4) self.assertAlmostEqual(result[1], 0.2)
def test_eval_cv_all_loss_functions(self): eval_cv( queue=self.queue, config=self.configuration, backend=self.backend, seed=1, num_run=1, resampling_strategy='cv', resampling_strategy_args={'folds': 3}, all_scoring_functions=True, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy, ) rval = get_last_result(self.queue) fixture = { 'accuracy': 0.06, 'balanced_accuracy': 0.063508064516129004, 'f1_macro': 0.063508064516129004, 'f1_micro': 0.06, 'f1_weighted': 0.06, 'log_loss': 1.1299444831535221, 'pac_score': 0.18306366567302557, 'precision_macro': 0.063508064516129004, 'precision_micro': 0.06, 'precision_weighted': 0.06, 'recall_macro': 0.063508064516129004, 'recall_micro': 0.06, 'recall_weighted': 0.06, 'num_run': 1 } additional_run_info = rval['additional_run_info'] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertIn('duration', additional_run_info) self.assertEqual(len(additional_run_info), len(fixture) + 1, msg=sorted(additional_run_info.items())) self.assertAlmostEqual(rval['loss'], 0.06) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_eval_holdout_all_loss_functions(self): eval_holdout( queue=self.queue, config=self.configuration, backend=self.backend, resampling_strategy='holdout', resampling_strategy_args=None, seed=1, num_run=1, all_scoring_functions=True, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy, ) rval = get_last_result(self.queue) fixture = { 'accuracy': 0.030303030303030276, 'balanced_accuracy': 0.033333333333333326, 'f1_macro': 0.032036613272311221, 'f1_micro': 0.030303030303030276, 'f1_weighted': 0.030441716940572849, 'log_loss': 1.0634089940876672, 'pac_score': 0.092288218582651682, 'precision_macro': 0.02777777777777779, 'precision_micro': 0.030303030303030276, 'precision_weighted': 0.027777777777777901, 'recall_macro': 0.033333333333333326, 'recall_micro': 0.030303030303030276, 'recall_weighted': 0.030303030303030276, 'num_run': 1 } additional_run_info = rval['additional_run_info'] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertIn('duration', additional_run_info) self.assertEqual(len(additional_run_info), len(fixture) + 1, msg=sorted(additional_run_info.items())) self.assertAlmostEqual(rval['loss'], 0.030303030303030276, places=3) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_holdout(self, pipeline_mock): D = get_binary_classification_datamanager() D.name = 'test' pipeline_mock.predict_proba.side_effect = lambda X, batch_size: np.tile( [0.6, 0.4], (len(X), 1)) pipeline_mock.side_effect = lambda **kwargs: pipeline_mock pipeline_mock.get_additional_run_info.return_value = None output_dir = os.path.join(os.getcwd(), '.test_holdout') configuration = unittest.mock.Mock(spec=Configuration) backend_api = backend.create(output_dir, output_dir) backend_api.load_datamanager = lambda: D queue_ = multiprocessing.Queue() evaluator = TrainEvaluator( backend_api, queue_, configuration=configuration, resampling_strategy='holdout', resampling_strategy_args={'train_size': 0.66}, all_scoring_functions=False, output_y_hat_optimization=True, metric=accuracy, subsample=50) evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) evaluator.file_output.return_value = (None, None) evaluator.fit_predict_and_loss() rval = get_last_result(evaluator.queue) result = rval['loss'] self.assertEqual(len(rval), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(result, 0.45833333333333337) self.assertEqual(pipeline_mock.fit.call_count, 1) # three calls because of the holdout, the validation and the test set self.assertEqual(pipeline_mock.predict_proba.call_count, 3) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 24) self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) self.assertEqual(evaluator.model.fit.call_count, 1)
def test_cv(self, pipeline_mock): D = get_binary_classification_datamanager() pipeline_mock.predict_proba.side_effect = lambda X, batch_size: np.tile( [0.6, 0.4], (len(X), 1)) pipeline_mock.side_effect = lambda **kwargs: pipeline_mock pipeline_mock.get_additional_run_info.return_value = None output_dir = os.path.join(os.getcwd(), '.test_cv') configuration = unittest.mock.Mock(spec=Configuration) backend_api = backend.create(output_dir, output_dir) backend_api.load_datamanager = lambda: D queue_ = multiprocessing.Queue() evaluator = TrainEvaluator(backend_api, queue_, configuration=configuration, resampling_strategy='cv', resampling_strategy_args={'folds': 5}, all_scoring_functions=False, output_y_hat_optimization=True, metric=accuracy) evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) evaluator.file_output.return_value = (None, None) evaluator.fit_predict_and_loss() rval = get_last_result(evaluator.queue) result = rval['loss'] self.assertEqual(len(rval), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(result, 0.46376811594202894) self.assertEqual(pipeline_mock.fit.call_count, 5) # Fifteen calls because of the holdout, the validation and the test set self.assertEqual(pipeline_mock.predict_proba.call_count, 15) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], D.data['Y_train'].shape[0]) self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) # The model prior to fitting is saved, this cannot be directly tested # because of the way the mock module is used. Instead, we test whether # the if block in which model assignment is done is accessed self.assertTrue(evaluator._added_empty_model)
def test_eval_cv_all_loss_functions(self): cv = StratifiedKFold(shuffle=True, random_state=1) eval_cv(queue=self.queue, config=self.configuration, datamanager=self.data, backend=self.backend, seed=1, num_run=1, cv=cv, all_scoring_functions=True, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy) rval = get_last_result(self.queue) fixture = { 'accuracy': 0.04, 'balanced_accuracy': 0.042002688172, 'f1_macro': 0.0423387096774, 'f1_micro': 0.04, 'f1_weighted': 0.040020161290, 'log_loss': 1.11651433976, 'pac_score': 0.165226664054, 'precision_macro': 0.0414141414141, 'precision_micro': 0.04, 'precision_weighted': 0.0388484848485, 'recall_macro': 0.042002688172, 'recall_micro': 0.04, 'recall_weighted': 0.04, 'num_run': 1 } additional_run_info = rval['additional_run_info'] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertIn('duration', additional_run_info) self.assertEqual(len(additional_run_info), len(fixture) + 1, msg=sorted(additional_run_info.items())) self.assertAlmostEqual(rval['loss'], 0.040000000000000036) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_eval_holdout_all_loss_functions(self): kfold = ShuffleSplit(random_state=1, n_splits=1, test_size=0.33) eval_holdout(queue=self.queue, config=self.configuration, datamanager=self.data, backend=self.backend, cv=kfold, seed=1, num_run=1, all_scoring_functions=True, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy) rval = get_last_result(self.queue) fixture = { 'accuracy': 0.0606060606061, 'balanced_accuracy': 0.0636363636364, 'f1_macro': 0.0636363636364, 'f1_micro': 0.0606060606061, 'f1_weighted': 0.0606060606061, 'log_loss': 1.14529191037, 'pac_score': 0.203125867166, 'precision_macro': 0.0636363636364, 'precision_micro': 0.0606060606061, 'precision_weighted': 0.0606060606061, 'recall_macro': 0.0636363636364, 'recall_micro': 0.0606060606061, 'recall_weighted': 0.0606060606061, 'num_run': 1 } additional_run_info = rval['additional_run_info'] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertIn('duration', additional_run_info) self.assertEqual(len(additional_run_info), len(fixture) + 1, msg=sorted(additional_run_info.items())) self.assertAlmostEqual(rval['loss'], 0.060606060606060552, places=3) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_eval_test(self): eval_t(queue=self.queue, backend=self.backend, config=self.configuration, metric=accuracy, seed=1, num_run=1, all_scoring_functions=False, output_y_hat_optimization=False, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name) rval = get_last_result(self.queue) self.assertAlmostEqual(rval['loss'], 0.04) self.assertEqual(rval['status'], StatusType.SUCCESS) self.assertNotIn('bac_metric', rval['additional_run_info'])
def test_eval_test_all_loss_functions(self): eval_t( queue=self.queue, backend=self.backend, config=self.configuration, metric=accuracy, seed=1, num_run=1, all_scoring_functions=True, output_y_hat_optimization=False, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, ) rval = get_last_result(self.queue) fixture = { 'accuracy': 0.04, 'balanced_accuracy': 0.0277777777778, 'f1_macro': 0.0341005967604, 'f1_micro': 0.04, 'f1_weighted': 0.0396930946292, 'log_loss': 1.1352229526638984, 'pac_score': 0.19574985585209126, 'precision_macro': 0.037037037037, 'precision_micro': 0.04, 'precision_weighted': 0.0355555555556, 'recall_macro': 0.0277777777778, 'recall_micro': 0.04, 'recall_weighted': 0.04, 'num_run': -1 } additional_run_info = rval['additional_run_info'] for key, value in fixture.items(): self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key) self.assertEqual(len(additional_run_info), len(fixture) + 1, msg=sorted(additional_run_info.items())) self.assertIn('duration', additional_run_info) self.assertAlmostEqual(rval['loss'], 0.04) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_eval_holdout_iterative_fit_no_timeout(self): kfold = ShuffleSplit(random_state=1, n_splits=1, test_size=0.33) eval_iterative_holdout(queue=self.queue, config=self.configuration, datamanager=self.data, backend=self.backend, cv=kfold, seed=1, num_run=1, all_scoring_functions=False, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy) rval = get_last_result(self.queue) self.assertAlmostEqual(rval['loss'], 0.060606060606060552) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_eval_holdout_iterative_fit_no_timeout(self): eval_iterative_holdout( queue=self.queue, config=self.configuration, backend=self.backend, resampling_strategy='holdout', resampling_strategy_args=None, seed=1, num_run=1, all_scoring_functions=False, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy, ) rval = get_last_result(self.queue) self.assertAlmostEqual(rval['loss'], 0.030303030303030276) self.assertEqual(rval['status'], StatusType.SUCCESS)
def test_eval_cv(self): cv = StratifiedKFold(shuffle=True, random_state=1) eval_cv(queue=self.queue, config=self.configuration, datamanager=self.data, backend=self.backend, seed=1, num_run=1, cv=cv, all_scoring_functions=False, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy) rval = get_last_result(self.queue) self.assertAlmostEqual(rval['loss'], 0.040000000000000036) self.assertEqual(rval['status'], StatusType.SUCCESS) self.assertNotIn('bac_metric', rval['additional_run_info'])
def test_eval_cv(self): eval_cv( queue=self.queue, config=self.configuration, backend=self.backend, seed=1, num_run=1, resampling_strategy='cv', resampling_strategy_args={'folds': 3}, all_scoring_functions=False, output_y_hat_optimization=True, include=None, exclude=None, disable_file_output=False, instance=self.dataset_name, metric=accuracy, ) rval = get_last_result(self.queue) self.assertAlmostEqual(rval['loss'], 0.06) self.assertEqual(rval['status'], StatusType.SUCCESS) self.assertNotIn('bac_metric', rval['additional_run_info'])