Example #1
0
    def test_do_dummy_prediction(self):
        for name in ['401_bac', '31_bac', 'adult', 'cadata']:
            output = os.path.join(self.test_dir, '..',
                                  '.tmp_test_do_dummy_prediction')
            self._setUp(output)

            dataset = os.path.join(self.test_dir, '..', '.data', name)

            backend_api = backend.create(output, output)
            auto = autosklearn.automl.AutoML(
                backend_api, 20, 5,
                initial_configurations_via_metalearning=25)
            setup_logger()
            auto._logger = get_logger('test_do_dummy_predictions')
            auto._backend._make_internals_directory()
            D = load_data(dataset, backend_api)
            auto._backend.save_datamanager(D)
            auto._do_dummy_prediction(D, 1)

            # Ensure that the dummy predictions are not in the current working
            # directory, but in the output directory (under output)
            self.assertFalse(os.path.exists(os.path.join(os.getcwd(),
                                                         '.auto-sklearn')))
            self.assertTrue(os.path.exists(os.path.join(
                output, '.auto-sklearn', 'predictions_ensemble',
                'predictions_ensemble_1_00001.npy')))

            del auto
            self._tearDown(output)
Example #2
0
    def fit(self, X, y,
            task=MULTICLASS_CLASSIFICATION,
            metric='acc_metric',
            feat_type=None,
            dataset_name=None):
        if dataset_name is None:
            m = hashlib.md5()
            m.update(X.data)
            dataset_name = m.hexdigest()

        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        self._dataset_name = dataset_name
        self._stopwatch.start_task(self._dataset_name)

        logger_name = 'AutoML(%d):%s' % (self._seed, dataset_name)
        setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
        self._logger = get_logger(logger_name)

        if isinstance(metric, str):
            metric = STRING_TO_METRIC[metric]

        loaded_data_manager = XYDataManager(X, y,
                                            task=task,
                                            metric=metric,
                                            feat_type=feat_type,
                                            dataset_name=dataset_name,
                                            encode_labels=False)

        return self._fit(loaded_data_manager)
    def test_do_dummy_prediction(self):
        for name in ['401_bac', '31_bac', 'adult', 'cadata']:
            output = os.path.join(self.test_dir, '..',
                                  '.tmp_test_do_dummy_prediction')
            self._setUp(output)

            dataset = os.path.join(self.test_dir, '..', '.data', name)

            auto = autosklearn.automl.AutoML(
                output, output, 15, 15,
                initial_configurations_via_metalearning=25)
            setup_logger()
            auto._logger = get_logger('test_do_dummy_predictions')
            auto._backend._make_internals_directory()
            D = store_and_or_load_data(dataset, output)
            auto._do_dummy_prediction(D)

            # Assure that the dummy predictions are not in the current working
            # directory, but in the output directory (under output)
            self.assertFalse(os.path.exists(os.path.join(os.getcwd(),
                                                         '.auto-sklearn')))
            self.assertTrue(os.path.exists(os.path.join(output,
                                                        '.auto-sklearn')))

            del auto
            self._tearDown(output)
Example #4
0
 def _get_logger(self, name):
     logger_name = 'AutoML(%d):%s' % (self._seed, name)
     setup_logger(os.path.join(self._backend.temporary_directory,
                               '%s.log' % str(logger_name)),
                  self.logging_config,
                  )
     return get_logger(logger_name)
Example #5
0
    def fit(self, X, y, task=MULTICLASS_CLASSIFICATION, metric="acc_metric", feat_type=None, dataset_name=None):
        if dataset_name is None:
            m = hashlib.md5()
            m.update(X.data)
            dataset_name = m.hexdigest()

        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        self._dataset_name = dataset_name
        self._stopwatch.start_task(self._dataset_name)

        logger_name = "AutoML(%d):%s" % (self._seed, dataset_name)
        setup_logger(os.path.join(self._tmp_dir, "%s.log" % str(logger_name)))
        self._logger = get_logger(logger_name)

        if isinstance(metric, str):
            metric = STRING_TO_METRIC[metric]

        if feat_type is not None and len(feat_type) != X.shape[1]:
            raise ValueError(
                "Array feat_type does not have same number of "
                "variables as X has features. %d vs %d." % (len(feat_type), X.shape[1])
            )
        if feat_type is not None and not all([isinstance(f, bool) for f in feat_type]):
            raise ValueError("Array feat_type must only contain bools.")

        loaded_data_manager = XYDataManager(
            X, y, task=task, metric=metric, feat_type=feat_type, dataset_name=dataset_name, encode_labels=False
        )

        return self._fit(loaded_data_manager)
Example #6
0
    def test_do_dummy_prediction(self):
        for name in ['401_bac', '31_bac', 'adult', 'cadata']:
            output = os.path.join(self.test_dir, '..',
                                  '.tmp_test_do_dummy_prediction')
            self._setUp(output)

            dataset = os.path.join(self.test_dir, '..', '.data', name)

            auto = autosklearn.automl.AutoML(
                output,
                output,
                15,
                15,
                initial_configurations_via_metalearning=25)
            setup_logger()
            auto._logger = get_logger('test_do_dummy_predictions')
            auto._backend._make_internals_directory()
            D = store_and_or_load_data(dataset, output)
            auto._do_dummy_prediction(D)

            # Assure that the dummy predictions are not in the current working
            # directory, but in the output directory (under output)
            self.assertFalse(
                os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn')))
            self.assertTrue(
                os.path.exists(os.path.join(output, '.auto-sklearn')))

            del auto
            self._tearDown(output)
Example #7
0
    def test_do_dummy_prediction(self):
        for name in ['401_bac', '31_bac', 'adult', 'cadata']:
            backend_api = self._create_backend('test_do_dummy_prediction')

            dataset = os.path.join(self.test_dir, '..', '.data', name)

            auto = autosklearn.automl.AutoML(
                backend_api, 20, 5,
                initial_configurations_via_metalearning=25)
            setup_logger()
            auto._logger = get_logger('test_do_dummy_predictions')
            auto._backend._make_internals_directory()
            D = load_data(dataset, backend_api)
            auto._backend.save_datamanager(D)
            auto._do_dummy_prediction(D, 1)

            # Ensure that the dummy predictions are not in the current working
            # directory, but in the temporary directory.
            self.assertFalse(os.path.exists(os.path.join(os.getcwd(),
                                                         '.auto-sklearn')))
            self.assertTrue(os.path.exists(os.path.join(
                backend_api.temporary_directory, '.auto-sklearn', 'predictions_ensemble',
                'predictions_ensemble_1_1.npy')))

            del auto
            self._tearDown(backend_api.temporary_directory)
            self._tearDown(backend_api.output_directory)
Example #8
0
    def start_automl(self, parser):
        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        datamanager = get_data_manager(namespace=parser)
        self._stopwatch.start_task(datamanager.name)

        logger_name = 'AutoML(%d):%s' % (self._seed, datamanager.name)
        setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
        self._logger = get_logger(logger_name)

        self._datamanager = datamanager
        self._dataset_name = datamanager.name
        self.start()
Example #9
0
    def start_automl(self, parser):
        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        datamanager = get_data_manager(namespace=parser)
        self._stopwatch.start_task(datamanager.name)

        logger_name = 'AutoML(%d):%s' % (self._seed, datamanager.name)
        setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
        self._logger = get_logger(logger_name)

        self._datamanager = datamanager
        self._dataset_name = datamanager.name
        self.start()
Example #10
0
    def fit(self, X, y,
            task=MULTICLASS_CLASSIFICATION,
            metric='acc_metric',
            feat_type=None,
            dataset_name=None):
        if dataset_name is None:
            m = hashlib.md5()
            m.update(X.data)
            dataset_name = m.hexdigest()

        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        self._dataset_name = dataset_name
        self._stopwatch.start_task(self._dataset_name)

        logger_name = 'AutoML(%d):%s' % (self._seed, dataset_name)
        setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
        self._logger = get_logger(logger_name)

        if isinstance(metric, str):
            metric = STRING_TO_METRIC[metric]

        if feat_type is not None and len(feat_type) != X.shape[1]:
            raise ValueError('Array feat_type does not have same number of '
                             'variables as X has features. %d vs %d.' %
                             (len(feat_type), X.shape[1]))
        if feat_type is not None and not all([isinstance(f, str)
                                              for f in feat_type]):
            raise ValueError('Array feat_type must only contain strings.')
        if feat_type is not None:
            for ft in feat_type:
                if ft.lower() not in ['categorical', 'numerical']:
                    raise ValueError('Only `Categorical` and `Numerical` are '
                                     'valid feature types, you passed `%s`' % ft)

        loaded_data_manager = XYDataManager(X, y,
                                            task=task,
                                            metric=metric,
                                            feat_type=feat_type,
                                            dataset_name=dataset_name,
                                            encode_labels=False)

        return self._fit(loaded_data_manager)
Example #11
0
    def fit_automl_dataset(self, dataset):
        self._stopwatch = StopWatch()
        self._backend.save_start_time(self._seed)

        name = os.path.basename(dataset)
        self._stopwatch.start_task(name)
        self._start_task(self._stopwatch, name)
        self._dataset_name = name

        logger_name = 'AutoML(%d):%s' % (self._seed, name)
        setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
        self._logger = get_logger(logger_name)

        self._logger.debug('======== Reading and converting data ==========')
        # Encoding the labels will be done after the metafeature calculation!
        loaded_data_manager = CompetitionDataManager(dataset,
                                                     encode_labels=False)
        loaded_data_manager_str = str(loaded_data_manager).split('\n')
        for part in loaded_data_manager_str:
            self._logger.debug(part)

        return self._fit(loaded_data_manager)
Example #12
0
    def fit_automl_dataset(self, dataset):
        self._stopwatch = StopWatch()
        self._backend.save_start_time(self._seed)

        name = os.path.basename(dataset)
        self._stopwatch.start_task(name)
        self._start_task(self._stopwatch, name)
        self._dataset_name = name

        logger_name = 'AutoML(%d):%s' % (self._seed, name)
        setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
        self._logger = get_logger(logger_name)

        self._logger.debug('======== Reading and converting data ==========')
        # Encoding the labels will be done after the metafeature calculation!
        loaded_data_manager = CompetitionDataManager(
            dataset, encode_labels=False,
            max_memory_in_mb=float(self._ml_memory_limit) / 3)
        loaded_data_manager_str = str(loaded_data_manager).split('\n')
        for part in loaded_data_manager_str:
            self._logger.debug(part)

        return self._fit(loaded_data_manager)
Example #13
0
 def _get_logger(self, name):
     logger_name = 'AutoML(%d):%s' % (self._seed, name)
     setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
     return get_logger(logger_name)
Example #14
0
 def _get_logger(self, name):
     logger_name = 'AutoML(%d):%s' % (self._seed, name)
     setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
     return get_logger(logger_name)
Example #15
0
    def test_fail_if_dummy_prediction_fails(self, ta_run_mock):
        backend_api = self._create_backend(
            'test_fail_if_dummy_prediction_fails')

        dataset = os.path.join(self.test_dir, '..', '.data', '401_bac')

        time_for_this_task = 30
        per_run_time = 10
        auto = autosklearn.automl.AutoML(
            backend_api,
            time_for_this_task,
            per_run_time,
            initial_configurations_via_metalearning=25,
        )
        setup_logger()
        auto._logger = get_logger('test_fail_if_dummy_prediction_fails')
        auto._backend._make_internals_directory()
        D = load_data(dataset, backend_api)
        auto._backend.save_datamanager(D)

        # First of all, check that ta.run() is actually called.
        ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test"
        auto._do_dummy_prediction(D, 1)
        ta_run_mock.assert_called_once_with(1, cutoff=time_for_this_task)

        # Case 1. Check that function raises no error when statustype == success.
        # ta.run() returns status, cost, runtime, and additional info.
        ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test"
        raised = False
        try:
            auto._do_dummy_prediction(D, 1)
        except ValueError:
            raised = True
        self.assertFalse(raised, 'Exception raised')

        # Case 2. Check that if statustype returned by ta.run() != success,
        # the function raises error.
        ta_run_mock.return_value = StatusType.CRASHED, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.ABORT, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.TIMEOUT, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.MEMOUT, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.CAPPED, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )

        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Example #16
0
 def _get_logger(self, name):
     logger_name = 'AutoML(%d):%s' % (self._seed, name)
     setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)))
     return get_logger(logger_name)