def test_configurations(self): # Use a limit of ~4GiB limit = 4000 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) cs = ParamSklearnRegressor.get_hyperparameter_search_space() print(cs) cs.seed(1) for i in range(10): config = cs.sample_configuration() config._populate_values() if config['regressor:sgd:n_iter'] is not None: config._values['regressor:sgd:n_iter'] = 5 X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') cls = ParamSklearnRegressor(config, random_state=1) print(config) try: cls.fit(X_train, Y_train) X_test_ = X_test.copy() predictions = cls.predict(X_test) self.assertIsInstance(predictions, np.ndarray) predicted_probabiliets = cls.predict(X_test_) self.assertIsInstance(predicted_probabiliets, np.ndarray) except ValueError as e: if "Floating-point under-/overflow occurred at epoch" in \ e.args[0] or \ "removed all features" in e.args[0] or \ "all features are discarded" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except RuntimeWarning as e: if "invalid value encountered in sqrt" in e.args[0]: continue elif "divide by zero encountered in" in e.args[0]: continue elif "invalid value encountered in divide" in e.args[0]: continue elif "invalid value encountered in true_divide" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except UserWarning as e: if "FastICA did not converge" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except MemoryError as e: continue
def test_default_configuration(self): for i in range(2): cs = ParamSklearnRegressor.get_hyperparameter_search_space() default = cs.get_default_configuration() X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes') auto = ParamSklearnRegressor(default) auto = auto.fit(X_train, Y_train) predictions = auto.predict(copy.deepcopy(X_test)) # The lower the worse r2_score = sklearn.metrics.r2_score(Y_test, predictions) self.assertAlmostEqual(0.41626416529791199, r2_score) model_score = auto.score(copy.deepcopy(X_test), Y_test) self.assertEqual(model_score, r2_score)
def test_predict_batched(self): cs = ParamSklearnRegressor.get_hyperparameter_search_space() default = cs.get_default_configuration() cls = ParamSklearnRegressor(default) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((356,), prediction.shape) self.assertEqual(18, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def get_model(configuration, seed): if 'classifier' in configuration: return ParamSklearnClassifier(configuration, seed) elif 'regressor' in configuration: return ParamSklearnRegressor(configuration, seed)
def test_repr(self): cs = ParamSklearnRegressor.get_hyperparameter_search_space() default = cs.get_default_configuration() representation = repr(ParamSklearnRegressor(default)) cls = eval(representation) self.assertIsInstance(cls, ParamSklearnRegressor)