def setUp(self): """ Creates a backend mock """ self.ev_path = os.path.join(this_directory, '.tmp_evaluations') if not os.path.exists(self.ev_path): os.mkdir(self.ev_path) dummy_model_files = [ os.path.join(self.ev_path, str(n)) for n in range(100) ] dummy_pred_files = [ os.path.join(self.ev_path, str(n)) for n in range(100, 200) ] backend_mock = unittest.mock.Mock() backend_mock.get_model_dir.return_value = self.ev_path backend_mock.get_model_path.side_effect = dummy_model_files backend_mock.get_prediction_output_path.side_effect = dummy_pred_files D = get_multiclass_classification_datamanager() backend_mock.load_datamanager.return_value = D backend_mock.temporary_directory = tempfile.gettempdir() self.backend_mock = backend_mock self.port = logging.handlers.DEFAULT_TCP_LOGGING_PORT self.working_directory = os.path.join(this_directory, '.tmp_%s' % self.id())
def test_error_unsupported_budget_type(self): shutil.rmtree(self.working_directory, ignore_errors=True) os.mkdir(self.working_directory) queue_mock = unittest.mock.Mock() context = BackendContext( prefix='autoPyTorch', temporary_directory=os.path.join(self.working_directory, 'tmp'), output_directory=os.path.join(self.working_directory, 'out'), delete_tmp_folder_after_terminate=True, delete_output_folder_after_terminate=True, ) with unittest.mock.patch.object( Backend, 'load_datamanager') as load_datamanager_mock: load_datamanager_mock.return_value = get_multiclass_classification_datamanager( ) backend = Backend(context, prefix='autoPyTorch') try: AbstractEvaluator(backend=backend, output_y_hat_optimization=False, queue=queue_mock, pipeline_config={ 'budget_type': "error", 'error': 0 }, metric=accuracy, budget=0, configuration=1) except Exception as e: self.assertIsInstance(e, ValueError) shutil.rmtree(self.working_directory, ignore_errors=True)
def setUp(self): self.datamanager = get_multiclass_classification_datamanager() self.tmp = os.path.join(os.getcwd(), '.test_evaluation') try: shutil.rmtree(self.tmp) except: pass
def setUp(self): self.queue = multiprocessing.Queue() self.configuration = get_configuration_space( {'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False}).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join(os.path.dirname(__file__), '.test_cv_functions')
def test_finish_up_model_predicts_NaN(self): '''Tests by handing in predictions which contain NaNs''' rs = np.random.RandomState(1) D = get_multiclass_classification_datamanager() backend_api = unittest.mock.Mock() backend_api.load_datamanager.return_value = D queue_mock = unittest.mock.Mock() ae = AbstractEvaluator(backend=backend_api, output_y_hat_optimization=False, queue=queue_mock, metric=accuracy) ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) # NaNs in prediction ensemble predictions_ensemble[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up(0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 1.0) self.assertEqual( additional_run_info, { 'error': 'Model predictions for optimization set ' 'contains NaNs.' }) # NaNs in prediction validation predictions_ensemble[5, 2] = 0.5 predictions_valid[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up(0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 1.0) self.assertEqual( additional_run_info, { 'error': 'Model predictions for validation set ' 'contains NaNs.' }) # NaNs in prediction test predictions_valid[5, 2] = 0.5 predictions_test[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up(0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 1.0) self.assertEqual( additional_run_info, {'error': 'Model predictions for test set contains ' 'NaNs.'}) self.assertEqual(backend_api.save_predictions_as_npy.call_count, 0)
def setUp(self): self.queue = multiprocessing.Queue() self.configuration = get_configuration_space({ 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False }).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join(os.path.dirname(__file__), '.test_holdout_functions')
def setUp(self): self.queue = multiprocessing.Queue() self.configuration = get_configuration_space( {'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False}).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join(os.path.dirname(__file__), '.test_cv_functions') self.backend = unittest.mock.Mock(spec=Backend) self.backend.load_datamanager.return_value = self.data self.dataset_name = json.dumps({'task_id': 'test'})
def setUp(self): self.queue = multiprocessing.Queue() self.configuration = get_configuration_space({ 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False }).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join(os.path.dirname(__file__), '.test_holdout_functions') self.n = len(self.data.data['Y_train']) self.y = self.data.data['Y_train'].flatten() self.backend = unittest.mock.Mock() self.backend.get_model_dir.return_value = 'udiaetzrpduaeirdaetr' self.backend.output_directory = 'duapdbaetpdbe' self.dataset_name = json.dumps({'task_id': 'test'})
def test_finish_up_model_predicts_NaN(self): '''Tests by handing in predictions which contain NaNs''' rs = np.random.RandomState(1) D = get_multiclass_classification_datamanager() output_dir = os.path.join( os.getcwd(), '.test_finish_up_model_predicts_NaN') try: shutil.rmtree(output_dir) except: pass backend_api = backend.create(output_dir, output_dir) ae = AbstractEvaluator(Datamanager=D, backend=backend_api, output_y_test=False) ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) # NaNs in prediction ensemble predictions_ensemble[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( 0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 2.0) self.assertEqual(additional_run_info, 'Model predictions for ' 'optimization set contains NaNs.') # NaNs in prediction validation predictions_ensemble[5, 2] = 0.5 predictions_valid[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( 0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 2.0) self.assertEqual(additional_run_info, 'Model predictions for ' 'validation set contains NaNs.') # NaNs in prediction test predictions_valid[5, 2] = 0.5 predictions_test[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( 0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 2.0) self.assertEqual(additional_run_info, 'Model predictions for ' 'test set contains NaNs.') self.assertEqual(len(os.listdir(os.path.join(output_dir, '.auto-sklearn'))), 0)
def test_finish_up_model_predicts_NaN(self): '''Tests by handing in predictions which contain NaNs''' rs = np.random.RandomState(1) D = get_multiclass_classification_datamanager() output_dir = os.path.join( os.getcwd(), '.test_finish_up_model_predicts_NaN') try: shutil.rmtree(output_dir) except: pass ae = AbstractEvaluator(Datamanager=D, output_dir=output_dir, output_y_test=False) ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) # NaNs in prediction ensemble predictions_ensemble[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( 0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 2.0) self.assertEqual(additional_run_info, 'Model predictions for ' 'optimization set contains NaNs.') # NaNs in prediction validation predictions_ensemble[5, 2] = 0.5 predictions_valid[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( 0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 2.0) self.assertEqual(additional_run_info, 'Model predictions for ' 'validation set contains NaNs.') # NaNs in prediction test predictions_valid[5, 2] = 0.5 predictions_test[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( 0.1, predictions_ensemble, predictions_valid, predictions_test) self.assertEqual(loss, 2.0) self.assertEqual(additional_run_info, 'Model predictions for ' 'test set contains NaNs.') self.assertEqual(len(os.listdir(os.path.join(output_dir, '.auto-sklearn'))), 0)
def setUp(self): self.datamanager = get_multiclass_classification_datamanager() self.tmp = os.path.join(os.getcwd(), '.test_evaluation') self.logger = logging.getLogger() scenario_mock = unittest.mock.Mock() scenario_mock.wallclock_limit = 10 scenario_mock.algo_runs_timelimit = 1000 scenario_mock.ta_run_limit = 100 self.scenario = scenario_mock stats = Stats(scenario_mock) stats.start_timing() self.stats = stats try: shutil.rmtree(self.tmp) except: pass
def setUp(self): """ Creates a backend mock """ self.ev_path = os.path.join(this_directory, '.tmp_evaluations') if not os.path.exists(self.ev_path): os.mkdir(self.ev_path) dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)] dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)] backend_mock = unittest.mock.Mock() backend_mock.get_model_dir.return_value = self.ev_path backend_mock.get_model_path.side_effect = dummy_model_files backend_mock.get_prediction_output_path.side_effect = dummy_pred_files D = get_multiclass_classification_datamanager() backend_mock.load_datamanager.return_value = D self.backend_mock = backend_mock
def test_file_output(self): shutil.rmtree(self.working_directory, ignore_errors=True) os.mkdir(self.working_directory) queue_mock = unittest.mock.Mock() context = BackendContext( prefix='autoPyTorch', temporary_directory=os.path.join(self.working_directory, 'tmp'), output_directory=os.path.join(self.working_directory, 'out'), delete_tmp_folder_after_terminate=True, delete_output_folder_after_terminate=True, ) with unittest.mock.patch.object( Backend, 'load_datamanager') as load_datamanager_mock: load_datamanager_mock.return_value = get_multiclass_classification_datamanager( ) backend = Backend(context, prefix='autoPyTorch') ae = AbstractEvaluator(backend=backend, output_y_hat_optimization=False, queue=queue_mock, metric=accuracy, budget=0, configuration=1) ae.model = sklearn.dummy.DummyClassifier() rs = np.random.RandomState() ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) ae.file_output( Y_optimization_pred=predictions_ensemble, Y_valid_pred=predictions_valid, Y_test_pred=predictions_test, ) self.assertTrue( os.path.exists( os.path.join(self.working_directory, 'tmp', '.autoPyTorch', 'runs', '1_0_1.0'))) shutil.rmtree(self.working_directory, ignore_errors=True)
def setUp(self): self.datamanager = get_multiclass_classification_datamanager() self.tmp = os.path.join(os.getcwd(), '.test_evaluation') os.mkdir(self.tmp) self.logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT scenario_mock = unittest.mock.Mock() scenario_mock.wallclock_limit = 10 scenario_mock.algo_runs_timelimit = 1000 scenario_mock.ta_run_limit = 100 self.scenario = scenario_mock stats = Stats(scenario_mock) stats.start_timing() self.stats = stats try: shutil.rmtree(self.tmp) except Exception: pass
def test_disable_file_output(self): backend_mock = unittest.mock.Mock() queue_mock = unittest.mock.Mock() rs = np.random.RandomState(1) D = get_multiclass_classification_datamanager() ae = AbstractEvaluator(Datamanager=D, backend=backend_mock, queue=queue_mock, output_y_test=False, disable_file_output=True) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) loss_, additional_run_info_ = ae.file_output( predictions_ensemble, predictions_valid, predictions_test) self.assertIsNone(loss_) self.assertIsNone(additional_run_info_) # This function is not guarded by a an if statement self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0)
def test_add_additional_components(self): shutil.rmtree(self.working_directory, ignore_errors=True) os.mkdir(self.working_directory) queue_mock = unittest.mock.Mock() context = BackendContext( temporary_directory=os.path.join(self.working_directory, 'tmp'), delete_tmp_folder_after_terminate=True, ) with unittest.mock.patch.object( Backend, 'load_datamanager') as load_datamanager_mock: load_datamanager_mock.return_value = get_multiclass_classification_datamanager( ) backend = Backend(context) with unittest.mock.patch.object(_addons['classification'], 'add_component') as _: # If the components in the argument `additional_components` are an empty dict # there is no call to `add_component`, if there's something in it, `add_component # is called (2nd case) for fixture, case in ((0, dict()), (1, dict(abc='def'))): thirdparty_components_patch = unittest.mock.Mock() thirdparty_components_patch.components = case additional_components = dict( classification=thirdparty_components_patch) AbstractEvaluator( backend=backend, output_y_hat_optimization=False, queue=queue_mock, metric=accuracy, port=self.port, additional_components=additional_components, ) self.assertEqual( _addons['classification'].add_component.call_count, fixture)
def load_datamanager(self): return get_multiclass_classification_datamanager()
def test_disable_file_output(self, exists_mock): backend_mock = unittest.mock.Mock() backend_mock.get_model_dir.return_value = 'abc' D = get_multiclass_classification_datamanager() backend_mock.load_datamanager.return_value = D queue_mock = unittest.mock.Mock() rs = np.random.RandomState(1) ae = AbstractEvaluator( backend=backend_mock, queue=queue_mock, disable_file_output=True, metric=accuracy, ) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) loss_, additional_run_info_ = ae.file_output(predictions_ensemble, predictions_valid, predictions_test) self.assertIsNone(loss_) self.assertEqual(additional_run_info_, {}) # This function is not guarded by a an if statement self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0) self.assertEqual(backend_mock.save_model.call_count, 0) ae = AbstractEvaluator( backend=backend_mock, output_y_hat_optimization=False, queue=queue_mock, disable_file_output=['model'], metric=accuracy, ) ae.Y_optimization = predictions_ensemble loss_, additional_run_info_ = ae.file_output(predictions_ensemble, predictions_valid, predictions_test) self.assertIsNone(loss_) self.assertIsNone(additional_run_info_) # This function is not guarded by a an if statement self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 3) self.assertEqual(backend_mock.save_model.call_count, 0) ae = AbstractEvaluator( backend=backend_mock, output_y_hat_optimization=False, queue=queue_mock, metric=accuracy, disable_file_output=['y_optimization'], ) exists_mock.return_value = True ae.Y_optimization = predictions_ensemble ae.model = 'model' loss_, additional_run_info_ = ae.file_output(predictions_ensemble, predictions_valid, predictions_test) self.assertIsNone(loss_) self.assertIsNone(additional_run_info_) # This function is not guarded by a an if statement self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 5) self.assertEqual(backend_mock.save_model.call_count, 1)
def test_disable_file_output(self, exists_mock): backend_mock = unittest.mock.Mock() backend_mock.get_model_dir.return_value = 'abc' D = get_multiclass_classification_datamanager() backend_mock.load_datamanager.return_value = D queue_mock = unittest.mock.Mock() rs = np.random.RandomState(1) ae = AbstractEvaluator( backend=backend_mock, queue=queue_mock, disable_file_output=True, metric=accuracy, ) predictions_train = rs.rand(66, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) loss_, additional_run_info_ = ( ae.file_output( predictions_train, predictions_ensemble, predictions_valid, predictions_test, ) ) self.assertIsNone(loss_) self.assertEqual(additional_run_info_, {}) # This function is not guarded by a an if statement self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0) self.assertEqual(backend_mock.save_model.call_count, 0) ae = AbstractEvaluator( backend=backend_mock, output_y_hat_optimization=False, queue=queue_mock, disable_file_output=['model'], metric=accuracy, ) ae.Y_optimization = predictions_ensemble loss_, additional_run_info_ = ( ae.file_output( predictions_train, predictions_ensemble, predictions_valid, predictions_test, ) ) self.assertIsNone(loss_) self.assertEqual(additional_run_info_, {}) # This function is not guarded by a an if statement self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 3) self.assertEqual(backend_mock.save_model.call_count, 0) ae = AbstractEvaluator( backend=backend_mock, output_y_hat_optimization=False, queue=queue_mock, metric=accuracy, disable_file_output=['y_optimization'], ) exists_mock.return_value = True ae.Y_optimization = predictions_ensemble ae.model = 'model' loss_, additional_run_info_ = ( ae.file_output( predictions_train, predictions_ensemble, predictions_valid, predictions_test, ) ) self.assertIsNone(loss_) self.assertEqual(additional_run_info_, {}) # This function is not guarded by a an if statement self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 5) self.assertEqual(backend_mock.save_model.call_count, 1)
def test_finish_up_model_predicts_NaN(self): '''Tests by handing in predictions which contain NaNs''' rs = np.random.RandomState(1) D = get_multiclass_classification_datamanager() backend_api = unittest.mock.Mock() backend_api.load_datamanager.return_value = D queue_mock = unittest.mock.Mock() ae = AbstractEvaluator(backend=backend_api, output_y_hat_optimization=False, queue=queue_mock, metric=accuracy) ae.Y_optimization = rs.rand(33, 3) predictions_train = rs.rand(66, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) predictions_valid = rs.rand(25, 3) # NaNs in prediction ensemble predictions_ensemble[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, train_pred=predictions_train, opt_pred=predictions_ensemble, valid_pred=predictions_valid, test_pred=predictions_test, additional_run_info=None, final_call=True, file_output=True, ) self.assertEqual(loss, 1.0) self.assertEqual(additional_run_info, {'error': 'Model predictions for optimization set ' 'contains NaNs.'}) # NaNs in prediction validation predictions_ensemble[5, 2] = 0.5 predictions_valid[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, train_pred=predictions_train, opt_pred=predictions_ensemble, valid_pred=predictions_valid, test_pred=predictions_test, additional_run_info=None, final_call=True, file_output=True, ) self.assertEqual(loss, 1.0) self.assertEqual(additional_run_info, {'error': 'Model predictions for validation set ' 'contains NaNs.'}) # NaNs in prediction test predictions_valid[5, 2] = 0.5 predictions_test[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, train_pred=predictions_train, opt_pred=predictions_ensemble, valid_pred=predictions_valid, test_pred=predictions_test, additional_run_info=None, final_call=True, file_output=True, ) self.assertEqual(loss, 1.0) self.assertEqual(additional_run_info, {'error': 'Model predictions for test set contains ' 'NaNs.'}) self.assertEqual(backend_api.save_predictions_as_npy.call_count, 0)