def test_finish_up_model_predicts_NaN(self):
        '''Tests by handing in predictions which contain NaNs'''
        rs = np.random.RandomState(1)
        D = get_multiclass_classification_datamanager()

        backend_api = unittest.mock.Mock()
        backend_api.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()
        ae = AbstractEvaluator(backend=backend_api,
                               output_y_hat_optimization=False,
                               queue=queue_mock,
                               metric=accuracy)
        ae.Y_optimization = rs.rand(33, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        # NaNs in prediction ensemble
        predictions_ensemble[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(0.1,
                                                       predictions_ensemble,
                                                       predictions_valid,
                                                       predictions_test)
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info, {
                'error': 'Model predictions for optimization set '
                'contains NaNs.'
            })

        # NaNs in prediction validation
        predictions_ensemble[5, 2] = 0.5
        predictions_valid[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(0.1,
                                                       predictions_ensemble,
                                                       predictions_valid,
                                                       predictions_test)
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info, {
                'error': 'Model predictions for validation set '
                'contains NaNs.'
            })

        # NaNs in prediction test
        predictions_valid[5, 2] = 0.5
        predictions_test[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(0.1,
                                                       predictions_ensemble,
                                                       predictions_valid,
                                                       predictions_test)
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info,
            {'error': 'Model predictions for test set contains '
             'NaNs.'})

        self.assertEqual(backend_api.save_predictions_as_npy.call_count, 0)
Beispiel #2
0
    def test_add_additional_components(self):
        shutil.rmtree(self.working_directory, ignore_errors=True)
        os.mkdir(self.working_directory)

        queue_mock = unittest.mock.Mock()

        context = BackendContext(
            temporary_directory=os.path.join(self.working_directory, 'tmp'),
            delete_tmp_folder_after_terminate=True,
        )
        with unittest.mock.patch.object(
                Backend, 'load_datamanager') as load_datamanager_mock:
            load_datamanager_mock.return_value = get_multiclass_classification_datamanager(
            )
            backend = Backend(context)

            with unittest.mock.patch.object(_addons['classification'],
                                            'add_component') as _:

                # If the components in the argument `additional_components` are an empty dict
                # there is no call to `add_component`, if there's something in it, `add_component
                # is called (2nd case)
                for fixture, case in ((0, dict()), (1, dict(abc='def'))):

                    thirdparty_components_patch = unittest.mock.Mock()
                    thirdparty_components_patch.components = case
                    additional_components = dict(
                        classification=thirdparty_components_patch)
                    AbstractEvaluator(
                        backend=backend,
                        output_y_hat_optimization=False,
                        queue=queue_mock,
                        metric=accuracy,
                        port=self.port,
                        additional_components=additional_components,
                    )
                    self.assertEqual(
                        _addons['classification'].add_component.call_count,
                        fixture)
    def test_file_output(self):
        shutil.rmtree(self.working_directory, ignore_errors=True)
        os.mkdir(self.working_directory)

        queue_mock = unittest.mock.Mock()

        context = BackendContext(
            temporary_directory=os.path.join(self.working_directory, 'tmp'),
            output_directory=os.path.join(self.working_directory, 'out'),
            delete_tmp_folder_after_terminate=True,
            delete_output_folder_after_terminate=True,
        )
        with unittest.mock.patch.object(
                Backend, 'load_datamanager') as load_datamanager_mock:
            load_datamanager_mock.return_value = get_multiclass_classification_datamanager(
            )

            backend = Backend(context)

            ae = AbstractEvaluator(
                backend=backend,
                output_y_hat_optimization=False,
                queue=queue_mock,
                metric=accuracy,
                port=self.port,
            )
            ae.model = sklearn.dummy.DummyClassifier()

            rs = np.random.RandomState()
            ae.Y_optimization = rs.rand(33, 3)
            predictions_ensemble = rs.rand(33, 3)
            predictions_test = rs.rand(25, 3)
            predictions_valid = rs.rand(25, 3)

            ae.file_output(
                Y_optimization_pred=predictions_ensemble,
                Y_valid_pred=predictions_valid,
                Y_test_pred=predictions_test,
            )

            self.assertTrue(
                os.path.exists(
                    os.path.join(self.working_directory, 'tmp',
                                 '.auto-sklearn', 'runs', '1_0_None')))

            shutil.rmtree(self.working_directory, ignore_errors=True)
Beispiel #4
0
    def test_finish_up_model_predicts_NaN(self):
        '''Tests by handing in predictions which contain NaNs'''
        rs = np.random.RandomState(1)

        queue_mock = unittest.mock.Mock()
        ae = AbstractEvaluator(backend=self.backend_mock,
                               port=self.port,
                               output_y_hat_optimization=False,
                               queue=queue_mock,
                               metric=accuracy)
        ae.Y_optimization = rs.rand(33, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        # NaNs in prediction ensemble
        predictions_ensemble[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_loss=0.1,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
            status=StatusType.SUCCESS,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info, {
                'error': 'Model predictions for optimization set '
                'contains NaNs.'
            })

        # NaNs in prediction validation
        predictions_ensemble[5, 2] = 0.5
        predictions_valid[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_loss=0.1,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
            status=StatusType.SUCCESS,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info, {
                'error': 'Model predictions for validation set '
                'contains NaNs.'
            })

        # NaNs in prediction test
        predictions_valid[5, 2] = 0.5
        predictions_test[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_loss=0.1,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
            status=StatusType.SUCCESS,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info,
            {'error': 'Model predictions for test set contains '
             'NaNs.'})

        self.assertEqual(self.backend_mock.save_predictions_as_npy.call_count,
                         0)
Beispiel #5
0
    def test_disable_file_output(self):
        queue_mock = unittest.mock.Mock()

        rs = np.random.RandomState(1)

        ae = AbstractEvaluator(
            backend=self.backend_mock,
            queue=queue_mock,
            disable_file_output=True,
            metric=accuracy,
            port=self.port,
        )

        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        loss_, additional_run_info_ = (ae.file_output(
            predictions_ensemble,
            predictions_valid,
            predictions_test,
        ))

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is never called as there is a return before
        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 0)

        for call_count, disable in enumerate(['model', 'cv_model'], start=1):
            ae = AbstractEvaluator(
                backend=self.backend_mock,
                output_y_hat_optimization=False,
                queue=queue_mock,
                disable_file_output=[disable],
                metric=accuracy,
                port=self.port,
            )
            ae.Y_optimization = predictions_ensemble
            ae.model = unittest.mock.Mock()
            ae.models = [unittest.mock.Mock()]

            loss_, additional_run_info_ = (ae.file_output(
                predictions_ensemble,
                predictions_valid,
                predictions_test,
            ))

            self.assertIsNone(loss_)
            self.assertEqual(additional_run_info_, {})
            self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count,
                             call_count)
            if disable == 'model':
                self.assertIsNone(
                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
                    ['model'])
                self.assertIsNotNone(
                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
                    ['cv_model'])
            else:
                self.assertIsNotNone(
                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
                    ['model'])
                self.assertIsNone(
                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
                    ['cv_model'])
            self.assertIsNotNone(
                self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
                ['ensemble_predictions'])
            self.assertIsNotNone(
                self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
                ['valid_predictions'])
            self.assertIsNotNone(
                self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
                ['test_predictions'])

        ae = AbstractEvaluator(
            backend=self.backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            metric=accuracy,
            disable_file_output=['y_optimization'],
            port=self.port,
        )
        ae.Y_optimization = predictions_ensemble
        ae.model = 'model'
        ae.models = [unittest.mock.Mock()]

        loss_, additional_run_info_ = (ae.file_output(
            predictions_ensemble,
            predictions_valid,
            predictions_test,
        ))

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})

        self.assertIsNone(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
            ['ensemble_predictions'])
        self.assertIsNotNone(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
            ['valid_predictions'])
        self.assertIsNotNone(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
            ['test_predictions'])
    def test_disable_file_output(self, exists_mock):
        backend_mock = unittest.mock.Mock()
        backend_mock.get_model_dir.return_value = 'abc'
        D = get_multiclass_classification_datamanager()
        backend_mock.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()

        rs = np.random.RandomState(1)

        ae = AbstractEvaluator(
            backend=backend_mock,
            queue=queue_mock,
            disable_file_output=True,
            metric=accuracy,
        )

        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        loss_, additional_run_info_ = ae.file_output(predictions_ensemble,
                                                     predictions_valid,
                                                     predictions_test)

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            disable_file_output=['model'],
            metric=accuracy,
        )
        ae.Y_optimization = predictions_ensemble

        loss_, additional_run_info_ = ae.file_output(predictions_ensemble,
                                                     predictions_valid,
                                                     predictions_test)

        self.assertIsNone(loss_)
        self.assertIsNone(additional_run_info_)
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 3)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            metric=accuracy,
            disable_file_output=['y_optimization'],
        )
        exists_mock.return_value = True
        ae.Y_optimization = predictions_ensemble
        ae.model = 'model'

        loss_, additional_run_info_ = ae.file_output(predictions_ensemble,
                                                     predictions_valid,
                                                     predictions_test)

        self.assertIsNone(loss_)
        self.assertIsNone(additional_run_info_)
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 5)
        self.assertEqual(backend_mock.save_model.call_count, 1)
    def test_disable_file_output(self, exists_mock):
        backend_mock = unittest.mock.Mock()
        backend_mock.get_model_dir.return_value = 'abc'
        D = get_multiclass_classification_datamanager()
        backend_mock.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()

        rs = np.random.RandomState(1)

        ae = AbstractEvaluator(
            backend=backend_mock,
            queue=queue_mock,
            disable_file_output=True,
            metric=accuracy,
        )

        predictions_train = rs.rand(66, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        loss_, additional_run_info_ = (
            ae.file_output(
                predictions_train,
                predictions_ensemble,
                predictions_valid,
                predictions_test,
            )
        )

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            disable_file_output=['model'],
            metric=accuracy,
        )
        ae.Y_optimization = predictions_ensemble

        loss_, additional_run_info_ = (
            ae.file_output(
                predictions_train,
                predictions_ensemble,
                predictions_valid,
                predictions_test,
            )
        )

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 3)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            metric=accuracy,
            disable_file_output=['y_optimization'],
        )
        exists_mock.return_value = True
        ae.Y_optimization = predictions_ensemble
        ae.model = 'model'

        loss_, additional_run_info_ = (
            ae.file_output(
                predictions_train,
                predictions_ensemble,
                predictions_valid,
                predictions_test,
            )
        )

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 5)
        self.assertEqual(backend_mock.save_model.call_count, 1)
    def test_finish_up_model_predicts_NaN(self):
        '''Tests by handing in predictions which contain NaNs'''
        rs = np.random.RandomState(1)
        D = get_multiclass_classification_datamanager()

        backend_api = unittest.mock.Mock()
        backend_api.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()
        ae = AbstractEvaluator(backend=backend_api,
                               output_y_hat_optimization=False,
                               queue=queue_mock, metric=accuracy)
        ae.Y_optimization = rs.rand(33, 3)
        predictions_train = rs.rand(66, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        # NaNs in prediction ensemble
        predictions_ensemble[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_pred=predictions_train,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(additional_run_info,
                         {'error': 'Model predictions for optimization set '
                                   'contains NaNs.'})

        # NaNs in prediction validation
        predictions_ensemble[5, 2] = 0.5
        predictions_valid[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_pred=predictions_train,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(additional_run_info,
                         {'error': 'Model predictions for validation set '
                                   'contains NaNs.'})

        # NaNs in prediction test
        predictions_valid[5, 2] = 0.5
        predictions_test[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_pred=predictions_train,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(additional_run_info,
                         {'error': 'Model predictions for test set contains '
                                   'NaNs.'})

        self.assertEqual(backend_api.save_predictions_as_npy.call_count, 0)