def test_predict_batched(self): cs = ParamSklearnClassifier.get_hyperparameter_search_space() default = cs.get_default_configuration() cls = ParamSklearnClassifier(default) # Multiclass X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647,), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction) # Multilabel X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') Y_train = np.array([(y, 26 - y) for y in Y_train]) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, 2), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def test_predict_batched(self): cs = ParamSklearnClassifier.get_hyperparameter_search_space() default = cs.get_default_configuration() cls = ParamSklearnClassifier(default) # Multiclass X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, ), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction) # Multilabel X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') Y_train = np.array([(y, 26 - y) for y in Y_train]) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, 2), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def test_predict_batched_sparse(self): cs = ParamSklearnClassifier.get_hyperparameter_search_space( dataset_properties={'sparse': True}) config = Configuration( cs, values={ "balancing:strategy": "none", "classifier:__choice__": "random_forest", "imputation:strategy": "mean", "one_hot_encoding:minimum_fraction": 0.01, "one_hot_encoding:use_minimum_fraction": "True", "preprocessor:__choice__": "no_preprocessing", 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_samples_leaf': 2, 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:n_estimators': 100, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, "rescaling:__choice__": "min/max" }) cls = ParamSklearnClassifier(config) # Multiclass X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, ), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction) # Multilabel X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) Y_train = np.array([(y, 26 - y) for y in Y_train]) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, 2), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def test_predict_batched_sparse(self): cs = ParamSklearnClassifier.get_hyperparameter_search_space( dataset_properties={'sparse': True}) config = Configuration(cs, values={"balancing:strategy": "none", "classifier:__choice__": "random_forest", "imputation:strategy": "mean", "one_hot_encoding:minimum_fraction": 0.01, "one_hot_encoding:use_minimum_fraction": "True", "preprocessor:__choice__": "no_preprocessing", 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_samples_leaf': 2, 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:n_estimators': 100, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, "rescaling:__choice__": "min/max"}) cls = ParamSklearnClassifier(config) # Multiclass X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647,), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction) # Multilabel X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) Y_train = np.array([(y, 26 - y) for y in Y_train]) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, 2), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)