Exemple #1
0
    def setUp(self):
        """
        Creates a backend mock
        """
        self.ev_path = os.path.join(this_directory, '.tmp_evaluations')
        if not os.path.exists(self.ev_path):
            os.mkdir(self.ev_path)
        dummy_model_files = [
            os.path.join(self.ev_path, str(n)) for n in range(100)
        ]
        dummy_pred_files = [
            os.path.join(self.ev_path, str(n)) for n in range(100, 200)
        ]

        backend_mock = unittest.mock.Mock()
        backend_mock.get_model_dir.return_value = self.ev_path
        backend_mock.get_model_path.side_effect = dummy_model_files
        backend_mock.get_prediction_output_path.side_effect = dummy_pred_files
        D = get_multiclass_classification_datamanager()
        backend_mock.load_datamanager.return_value = D
        backend_mock.temporary_directory = tempfile.gettempdir()
        self.backend_mock = backend_mock

        self.port = logging.handlers.DEFAULT_TCP_LOGGING_PORT

        self.working_directory = os.path.join(this_directory,
                                              '.tmp_%s' % self.id())
Exemple #2
0
    def test_error_unsupported_budget_type(self):
        shutil.rmtree(self.working_directory, ignore_errors=True)
        os.mkdir(self.working_directory)

        queue_mock = unittest.mock.Mock()

        context = BackendContext(
            prefix='autoPyTorch',
            temporary_directory=os.path.join(self.working_directory, 'tmp'),
            output_directory=os.path.join(self.working_directory, 'out'),
            delete_tmp_folder_after_terminate=True,
            delete_output_folder_after_terminate=True,
        )
        with unittest.mock.patch.object(
                Backend, 'load_datamanager') as load_datamanager_mock:
            load_datamanager_mock.return_value = get_multiclass_classification_datamanager(
            )

            backend = Backend(context, prefix='autoPyTorch')

            try:
                AbstractEvaluator(backend=backend,
                                  output_y_hat_optimization=False,
                                  queue=queue_mock,
                                  pipeline_config={
                                      'budget_type': "error",
                                      'error': 0
                                  },
                                  metric=accuracy,
                                  budget=0,
                                  configuration=1)
            except Exception as e:
                self.assertIsInstance(e, ValueError)

            shutil.rmtree(self.working_directory, ignore_errors=True)
Exemple #3
0
    def setUp(self):
        self.datamanager = get_multiclass_classification_datamanager()
        self.tmp = os.path.join(os.getcwd(), '.test_evaluation')

        try:
            shutil.rmtree(self.tmp)
        except:
            pass
 def setUp(self):
     self.queue = multiprocessing.Queue()
     self.configuration = get_configuration_space(
         {'task': MULTICLASS_CLASSIFICATION,
          'is_sparse': False}).get_default_configuration()
     self.data = get_multiclass_classification_datamanager()
     self.tmp_dir = os.path.join(os.path.dirname(__file__),
                                 '.test_cv_functions')
    def setUp(self):
        self.datamanager = get_multiclass_classification_datamanager()
        self.tmp = os.path.join(os.getcwd(), '.test_evaluation')

        try:
            shutil.rmtree(self.tmp)
        except:
            pass
    def test_finish_up_model_predicts_NaN(self):
        '''Tests by handing in predictions which contain NaNs'''
        rs = np.random.RandomState(1)
        D = get_multiclass_classification_datamanager()

        backend_api = unittest.mock.Mock()
        backend_api.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()
        ae = AbstractEvaluator(backend=backend_api,
                               output_y_hat_optimization=False,
                               queue=queue_mock,
                               metric=accuracy)
        ae.Y_optimization = rs.rand(33, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        # NaNs in prediction ensemble
        predictions_ensemble[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(0.1,
                                                       predictions_ensemble,
                                                       predictions_valid,
                                                       predictions_test)
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info, {
                'error': 'Model predictions for optimization set '
                'contains NaNs.'
            })

        # NaNs in prediction validation
        predictions_ensemble[5, 2] = 0.5
        predictions_valid[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(0.1,
                                                       predictions_ensemble,
                                                       predictions_valid,
                                                       predictions_test)
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info, {
                'error': 'Model predictions for validation set '
                'contains NaNs.'
            })

        # NaNs in prediction test
        predictions_valid[5, 2] = 0.5
        predictions_test[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(0.1,
                                                       predictions_ensemble,
                                                       predictions_valid,
                                                       predictions_test)
        self.assertEqual(loss, 1.0)
        self.assertEqual(
            additional_run_info,
            {'error': 'Model predictions for test set contains '
             'NaNs.'})

        self.assertEqual(backend_api.save_predictions_as_npy.call_count, 0)
Exemple #7
0
 def setUp(self):
     self.queue = multiprocessing.Queue()
     self.configuration = get_configuration_space({
         'task': MULTICLASS_CLASSIFICATION,
         'is_sparse': False
     }).get_default_configuration()
     self.data = get_multiclass_classification_datamanager()
     self.tmp_dir = os.path.join(os.path.dirname(__file__),
                                 '.test_holdout_functions')
Exemple #8
0
 def setUp(self):
     self.queue = multiprocessing.Queue()
     self.configuration = get_configuration_space(
         {'task': MULTICLASS_CLASSIFICATION,
          'is_sparse': False}).get_default_configuration()
     self.data = get_multiclass_classification_datamanager()
     self.tmp_dir = os.path.join(os.path.dirname(__file__),
                                 '.test_cv_functions')
     self.backend = unittest.mock.Mock(spec=Backend)
     self.backend.load_datamanager.return_value = self.data
     self.dataset_name = json.dumps({'task_id': 'test'})
Exemple #9
0
 def setUp(self):
     self.queue = multiprocessing.Queue()
     self.configuration = get_configuration_space({
         'task': MULTICLASS_CLASSIFICATION,
         'is_sparse': False
     }).get_default_configuration()
     self.data = get_multiclass_classification_datamanager()
     self.tmp_dir = os.path.join(os.path.dirname(__file__),
                                 '.test_holdout_functions')
     self.n = len(self.data.data['Y_train'])
     self.y = self.data.data['Y_train'].flatten()
     self.backend = unittest.mock.Mock()
     self.backend.get_model_dir.return_value = 'udiaetzrpduaeirdaetr'
     self.backend.output_directory = 'duapdbaetpdbe'
     self.dataset_name = json.dumps({'task_id': 'test'})
    def test_finish_up_model_predicts_NaN(self):
        '''Tests by handing in predictions which contain NaNs'''
        rs = np.random.RandomState(1)
        D = get_multiclass_classification_datamanager()
        output_dir = os.path.join(
            os.getcwd(), '.test_finish_up_model_predicts_NaN')

        try:
            shutil.rmtree(output_dir)
        except:
            pass

        backend_api = backend.create(output_dir, output_dir)
        ae = AbstractEvaluator(Datamanager=D, backend=backend_api,
                               output_y_test=False)
        ae.Y_optimization = rs.rand(33, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        # NaNs in prediction ensemble
        predictions_ensemble[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            0.1, predictions_ensemble, predictions_valid, predictions_test)
        self.assertEqual(loss, 2.0)
        self.assertEqual(additional_run_info, 'Model predictions for '
                                              'optimization set contains NaNs.')

        # NaNs in prediction validation
        predictions_ensemble[5, 2] = 0.5
        predictions_valid[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            0.1, predictions_ensemble, predictions_valid, predictions_test)
        self.assertEqual(loss, 2.0)
        self.assertEqual(additional_run_info, 'Model predictions for '
                                              'validation set contains NaNs.')

        # NaNs in prediction test
        predictions_valid[5, 2] = 0.5
        predictions_test[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            0.1, predictions_ensemble, predictions_valid, predictions_test)
        self.assertEqual(loss, 2.0)
        self.assertEqual(additional_run_info, 'Model predictions for '
                                              'test set contains NaNs.')

        self.assertEqual(len(os.listdir(os.path.join(output_dir,
                                                   '.auto-sklearn'))), 0)
    def test_finish_up_model_predicts_NaN(self):
        '''Tests by handing in predictions which contain NaNs'''
        rs = np.random.RandomState(1)
        D = get_multiclass_classification_datamanager()
        output_dir = os.path.join(
            os.getcwd(), '.test_finish_up_model_predicts_NaN')

        try:
            shutil.rmtree(output_dir)
        except:
            pass

        ae = AbstractEvaluator(Datamanager=D, output_dir=output_dir,
                               output_y_test=False)
        ae.Y_optimization = rs.rand(33, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        # NaNs in prediction ensemble
        predictions_ensemble[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            0.1, predictions_ensemble, predictions_valid, predictions_test)
        self.assertEqual(loss, 2.0)
        self.assertEqual(additional_run_info, 'Model predictions for '
                                              'optimization set contains NaNs.')

        # NaNs in prediction validation
        predictions_ensemble[5, 2] = 0.5
        predictions_valid[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            0.1, predictions_ensemble, predictions_valid, predictions_test)
        self.assertEqual(loss, 2.0)
        self.assertEqual(additional_run_info, 'Model predictions for '
                                              'validation set contains NaNs.')

        # NaNs in prediction test
        predictions_valid[5, 2] = 0.5
        predictions_test[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            0.1, predictions_ensemble, predictions_valid, predictions_test)
        self.assertEqual(loss, 2.0)
        self.assertEqual(additional_run_info, 'Model predictions for '
                                              'test set contains NaNs.')

        self.assertEqual(len(os.listdir(os.path.join(output_dir,
                                                   '.auto-sklearn'))), 0)
    def setUp(self):
        self.datamanager = get_multiclass_classification_datamanager()
        self.tmp = os.path.join(os.getcwd(), '.test_evaluation')
        self.logger = logging.getLogger()
        scenario_mock = unittest.mock.Mock()
        scenario_mock.wallclock_limit = 10
        scenario_mock.algo_runs_timelimit = 1000
        scenario_mock.ta_run_limit = 100
        self.scenario = scenario_mock
        stats = Stats(scenario_mock)
        stats.start_timing()
        self.stats = stats

        try:
            shutil.rmtree(self.tmp)
        except:
            pass
Exemple #13
0
    def setUp(self):
        self.datamanager = get_multiclass_classification_datamanager()
        self.tmp = os.path.join(os.getcwd(), '.test_evaluation')
        self.logger = logging.getLogger()
        scenario_mock = unittest.mock.Mock()
        scenario_mock.wallclock_limit = 10
        scenario_mock.algo_runs_timelimit = 1000
        scenario_mock.ta_run_limit = 100
        self.scenario = scenario_mock
        stats = Stats(scenario_mock)
        stats.start_timing()
        self.stats = stats

        try:
            shutil.rmtree(self.tmp)
        except:
            pass
    def setUp(self):
        """
        Creates a backend mock
        """
        self.ev_path = os.path.join(this_directory, '.tmp_evaluations')
        if not os.path.exists(self.ev_path):
            os.mkdir(self.ev_path)
        dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)]
        dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)]

        backend_mock = unittest.mock.Mock()
        backend_mock.get_model_dir.return_value = self.ev_path
        backend_mock.get_model_path.side_effect = dummy_model_files
        backend_mock.get_prediction_output_path.side_effect = dummy_pred_files
        D = get_multiclass_classification_datamanager()
        backend_mock.load_datamanager.return_value = D
        self.backend_mock = backend_mock
Exemple #15
0
    def test_file_output(self):
        shutil.rmtree(self.working_directory, ignore_errors=True)
        os.mkdir(self.working_directory)

        queue_mock = unittest.mock.Mock()

        context = BackendContext(
            prefix='autoPyTorch',
            temporary_directory=os.path.join(self.working_directory, 'tmp'),
            output_directory=os.path.join(self.working_directory, 'out'),
            delete_tmp_folder_after_terminate=True,
            delete_output_folder_after_terminate=True,
        )
        with unittest.mock.patch.object(
                Backend, 'load_datamanager') as load_datamanager_mock:
            load_datamanager_mock.return_value = get_multiclass_classification_datamanager(
            )

            backend = Backend(context, prefix='autoPyTorch')

            ae = AbstractEvaluator(backend=backend,
                                   output_y_hat_optimization=False,
                                   queue=queue_mock,
                                   metric=accuracy,
                                   budget=0,
                                   configuration=1)
            ae.model = sklearn.dummy.DummyClassifier()

            rs = np.random.RandomState()
            ae.Y_optimization = rs.rand(33, 3)
            predictions_ensemble = rs.rand(33, 3)
            predictions_test = rs.rand(25, 3)
            predictions_valid = rs.rand(25, 3)

            ae.file_output(
                Y_optimization_pred=predictions_ensemble,
                Y_valid_pred=predictions_valid,
                Y_test_pred=predictions_test,
            )

            self.assertTrue(
                os.path.exists(
                    os.path.join(self.working_directory, 'tmp', '.autoPyTorch',
                                 'runs', '1_0_1.0')))

            shutil.rmtree(self.working_directory, ignore_errors=True)
    def setUp(self):
        self.datamanager = get_multiclass_classification_datamanager()
        self.tmp = os.path.join(os.getcwd(), '.test_evaluation')
        os.mkdir(self.tmp)
        self.logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
        scenario_mock = unittest.mock.Mock()
        scenario_mock.wallclock_limit = 10
        scenario_mock.algo_runs_timelimit = 1000
        scenario_mock.ta_run_limit = 100
        self.scenario = scenario_mock
        stats = Stats(scenario_mock)
        stats.start_timing()
        self.stats = stats

        try:
            shutil.rmtree(self.tmp)
        except Exception:
            pass
    def test_disable_file_output(self):
        backend_mock = unittest.mock.Mock()
        queue_mock = unittest.mock.Mock()

        rs = np.random.RandomState(1)
        D = get_multiclass_classification_datamanager()

        ae = AbstractEvaluator(Datamanager=D, backend=backend_mock, queue=queue_mock,
                               output_y_test=False, disable_file_output=True)

        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        loss_, additional_run_info_ = ae.file_output(
            predictions_ensemble, predictions_valid, predictions_test)

        self.assertIsNone(loss_)
        self.assertIsNone(additional_run_info_)
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0)
Exemple #18
0
    def test_add_additional_components(self):
        shutil.rmtree(self.working_directory, ignore_errors=True)
        os.mkdir(self.working_directory)

        queue_mock = unittest.mock.Mock()

        context = BackendContext(
            temporary_directory=os.path.join(self.working_directory, 'tmp'),
            delete_tmp_folder_after_terminate=True,
        )
        with unittest.mock.patch.object(
                Backend, 'load_datamanager') as load_datamanager_mock:
            load_datamanager_mock.return_value = get_multiclass_classification_datamanager(
            )
            backend = Backend(context)

            with unittest.mock.patch.object(_addons['classification'],
                                            'add_component') as _:

                # If the components in the argument `additional_components` are an empty dict
                # there is no call to `add_component`, if there's something in it, `add_component
                # is called (2nd case)
                for fixture, case in ((0, dict()), (1, dict(abc='def'))):

                    thirdparty_components_patch = unittest.mock.Mock()
                    thirdparty_components_patch.components = case
                    additional_components = dict(
                        classification=thirdparty_components_patch)
                    AbstractEvaluator(
                        backend=backend,
                        output_y_hat_optimization=False,
                        queue=queue_mock,
                        metric=accuracy,
                        port=self.port,
                        additional_components=additional_components,
                    )
                    self.assertEqual(
                        _addons['classification'].add_component.call_count,
                        fixture)
Exemple #19
0
 def load_datamanager(self):
     return get_multiclass_classification_datamanager()
    def test_disable_file_output(self, exists_mock):
        backend_mock = unittest.mock.Mock()
        backend_mock.get_model_dir.return_value = 'abc'
        D = get_multiclass_classification_datamanager()
        backend_mock.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()

        rs = np.random.RandomState(1)

        ae = AbstractEvaluator(
            backend=backend_mock,
            queue=queue_mock,
            disable_file_output=True,
            metric=accuracy,
        )

        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        loss_, additional_run_info_ = ae.file_output(predictions_ensemble,
                                                     predictions_valid,
                                                     predictions_test)

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            disable_file_output=['model'],
            metric=accuracy,
        )
        ae.Y_optimization = predictions_ensemble

        loss_, additional_run_info_ = ae.file_output(predictions_ensemble,
                                                     predictions_valid,
                                                     predictions_test)

        self.assertIsNone(loss_)
        self.assertIsNone(additional_run_info_)
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 3)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            metric=accuracy,
            disable_file_output=['y_optimization'],
        )
        exists_mock.return_value = True
        ae.Y_optimization = predictions_ensemble
        ae.model = 'model'

        loss_, additional_run_info_ = ae.file_output(predictions_ensemble,
                                                     predictions_valid,
                                                     predictions_test)

        self.assertIsNone(loss_)
        self.assertIsNone(additional_run_info_)
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 5)
        self.assertEqual(backend_mock.save_model.call_count, 1)
 def load_datamanager(self):
     return get_multiclass_classification_datamanager()
    def test_disable_file_output(self, exists_mock):
        backend_mock = unittest.mock.Mock()
        backend_mock.get_model_dir.return_value = 'abc'
        D = get_multiclass_classification_datamanager()
        backend_mock.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()

        rs = np.random.RandomState(1)

        ae = AbstractEvaluator(
            backend=backend_mock,
            queue=queue_mock,
            disable_file_output=True,
            metric=accuracy,
        )

        predictions_train = rs.rand(66, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        loss_, additional_run_info_ = (
            ae.file_output(
                predictions_train,
                predictions_ensemble,
                predictions_valid,
                predictions_test,
            )
        )

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 0)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            disable_file_output=['model'],
            metric=accuracy,
        )
        ae.Y_optimization = predictions_ensemble

        loss_, additional_run_info_ = (
            ae.file_output(
                predictions_train,
                predictions_ensemble,
                predictions_valid,
                predictions_test,
            )
        )

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 3)
        self.assertEqual(backend_mock.save_model.call_count, 0)

        ae = AbstractEvaluator(
            backend=backend_mock,
            output_y_hat_optimization=False,
            queue=queue_mock,
            metric=accuracy,
            disable_file_output=['y_optimization'],
        )
        exists_mock.return_value = True
        ae.Y_optimization = predictions_ensemble
        ae.model = 'model'

        loss_, additional_run_info_ = (
            ae.file_output(
                predictions_train,
                predictions_ensemble,
                predictions_valid,
                predictions_test,
            )
        )

        self.assertIsNone(loss_)
        self.assertEqual(additional_run_info_, {})
        # This function is not guarded by a an if statement
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 5)
        self.assertEqual(backend_mock.save_model.call_count, 1)
    def test_finish_up_model_predicts_NaN(self):
        '''Tests by handing in predictions which contain NaNs'''
        rs = np.random.RandomState(1)
        D = get_multiclass_classification_datamanager()

        backend_api = unittest.mock.Mock()
        backend_api.load_datamanager.return_value = D
        queue_mock = unittest.mock.Mock()
        ae = AbstractEvaluator(backend=backend_api,
                               output_y_hat_optimization=False,
                               queue=queue_mock, metric=accuracy)
        ae.Y_optimization = rs.rand(33, 3)
        predictions_train = rs.rand(66, 3)
        predictions_ensemble = rs.rand(33, 3)
        predictions_test = rs.rand(25, 3)
        predictions_valid = rs.rand(25, 3)

        # NaNs in prediction ensemble
        predictions_ensemble[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_pred=predictions_train,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(additional_run_info,
                         {'error': 'Model predictions for optimization set '
                                   'contains NaNs.'})

        # NaNs in prediction validation
        predictions_ensemble[5, 2] = 0.5
        predictions_valid[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_pred=predictions_train,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(additional_run_info,
                         {'error': 'Model predictions for validation set '
                                   'contains NaNs.'})

        # NaNs in prediction test
        predictions_valid[5, 2] = 0.5
        predictions_test[5, 2] = np.NaN
        _, loss, _, additional_run_info = ae.finish_up(
            loss=0.1,
            train_pred=predictions_train,
            opt_pred=predictions_ensemble,
            valid_pred=predictions_valid,
            test_pred=predictions_test,
            additional_run_info=None,
            final_call=True,
            file_output=True,
        )
        self.assertEqual(loss, 1.0)
        self.assertEqual(additional_run_info,
                         {'error': 'Model predictions for test set contains '
                                   'NaNs.'})

        self.assertEqual(backend_api.save_predictions_as_npy.call_count, 0)