def test_make_prediction_errors_if_asked_for_proba_without_predict_proba_method( self, train_iris_dataset: Dataset): with pytest.raises( MLToolingError, match="LinearRegression does not have a `predict_proba`"): model = Model(LinearRegression()) model.train_estimator(train_iris_dataset) model.make_prediction(train_iris_dataset, 5, proba=True)
def test_bayessearch_best_model_is_not_fitted_if_refit_is_not_true( self, pipeline_logistic: Pipeline, train_iris_dataset: Dataset): model = Model(pipeline_logistic) model, results = model.bayesiansearch( train_iris_dataset, param_distributions={"estimator__penalty": ["l1", "l2"]}, refit=False, ) with pytest.raises(MLToolingError, match="You haven't fitted the estimator"): model.make_prediction(data=train_iris_dataset, idx=1)
def test_sql_dataset_raises_exception_when_load_prediction_data_returns_empty( self, boston_sqldataset: Type[SQLDataset], regression: Model ): class FailingDataset(boston_sqldataset): def load_prediction_data(self, *args, **kwargs): return pd.DataFrame() data = FailingDataset("sqlite:///", schema=None) with pytest.raises( DatasetError, match="An empty dataset was returned by load_prediction_data" ): regression.make_prediction(data, 0)
def test_filedataset_raises_exception_when_load_prediction_data_is_empty( self, regression: Model, boston_filedataset, boston_csv: pathlib.Path): class FailingDataset(boston_filedataset): def load_prediction_data(self, *args, **kwargs): return pd.DataFrame() data = FailingDataset(boston_csv).create_train_test() with pytest.raises( DatasetError, match="An empty dataset was returned by load_prediction_data"): regression.make_prediction(data, 0)
def test_load_prediction_data_works_as_expected(self): dataset = load_demo_dataset("iris") dataset.create_train_test(stratify=True) feature_pipeline = Pipeline([("scale", DFStandardScaler())]) model = Model(LogisticRegression(), feature_pipeline=feature_pipeline) model.train_estimator(dataset) result = model.make_prediction(dataset, 5) expected = pd.DataFrame({"Prediction": [0]}) pd.testing.assert_frame_equal(result, expected, check_dtype=False)
def test_make_prediction_with_regression_sqldataset_works_as_expected( self, boston_sqldataset, loaded_boston_db): dataset = boston_sqldataset(loaded_boston_db, schema=None) dataset.create_train_test(stratify=False) model = Model(LinearRegression()) model.train_estimator(dataset) result = model.make_prediction(dataset, 0) assert result.shape == (1, 1) assert result.columns.tolist() == ["Prediction"]
def test_make_prediction_with_classification_sqldataset_works_as_expected( self, iris_sqldataset, loaded_iris_db): dataset = iris_sqldataset(loaded_iris_db, schema=None) dataset.create_train_test() model = Model(LogisticRegression(solver="lbfgs")) model.train_estimator(dataset) result = model.make_prediction(dataset, 0, proba=True) assert result.shape == (1, 2) assert result.columns.tolist() == [ "Probability Class 0", "Probability Class 1" ]
def test_make_prediction_returns_prediction_if_threshold_is_specified( self, classifier: Model, use_index: bool, expected_index: int, train_iris_dataset: Dataset, ): results = classifier.make_prediction(train_iris_dataset, 5, threshold=0.6, use_index=use_index) assert isinstance(results, pd.DataFrame) assert 2 == results.ndim assert np.all((results == 1) | (results == 0)) assert np.all(np.sum(results, axis=1) == 0) assert results.index == pd.RangeIndex(start=expected_index, stop=expected_index + 1, step=1)
def test_make_prediction_returns_proba_if_proba_is_true( self, classifier: Model, use_index: bool, expected_index: int, train_iris_dataset: Dataset, ): results = classifier.make_prediction(train_iris_dataset, 5, proba=True, use_index=use_index) assert isinstance(results, pd.DataFrame) assert 2 == results.ndim assert np.all((results <= 1) & (results >= 0)) assert np.all(np.sum(results, axis=1) == 1) assert results.index == pd.RangeIndex(start=expected_index, stop=expected_index + 1, step=1)
def test_make_prediction_errors_when_model_is_not_fitted( self, train_iris_dataset): model = Model(LinearRegression()) with pytest.raises(MLToolingError, match="You haven't fitted the estimator"): model.make_prediction(train_iris_dataset, 5)