def test_can_score_estimator_with_no_y_value(self): class DummyEstimator(BaseEstimator, RegressorMixin): def __init__(self): self.average = None def fit(self, x, y=None): self.average = np.mean(x, axis=0) return self def predict(self, x): return self.average class DummyData(Dataset): def load_training_data(self): return pd.DataFrame({ "col1": [1, 2, 3, 4], "col2": [4, 5, 6, 7] }), None def load_prediction_data(self, *args, **kwargs): return pd.DataFrame({ "col1": [1, 2, 3, 4], "col2": [4, 5, 6, 7] }) model = Model(DummyEstimator()) data = DummyData() model.train_estimator(data) assert np.all(np.isclose(model.estimator.average, np.array([2.5, 5.5]))) with pytest.raises(DatasetError, match="The dataset does not define a y value"): data.create_train_test()
def test_train_model_followed_by_score_model_returns_correctly( self, pipeline_logistic: Pipeline, train_iris_dataset): model = Model(pipeline_logistic) model.train_estimator(train_iris_dataset) model.score_estimator(train_iris_dataset) assert isinstance(model.result, Result)
def test_make_prediction_errors_if_asked_for_proba_without_predict_proba_method( self, train_iris_dataset: Dataset): with pytest.raises( MLToolingError, match="LinearRegression does not have a `predict_proba`"): model = Model(LinearRegression()) model.train_estimator(train_iris_dataset) model.make_prediction(train_iris_dataset, 5, proba=True)
def test_save_model_saves_pipeline_correctly(self, pipeline_logistic: Pipeline, tmp_path: pathlib.Path, train_iris_dataset): model = Model(pipeline_logistic) model.train_estimator(train_iris_dataset) saved_model_path = model.save_estimator(FileStorage(tmp_path)) assert saved_model_path.exists()
def test_train_model_errors_correctly_when_not_scored( self, pipeline_logistic: Pipeline, tmp_path: pathlib.Path, train_iris_dataset): model = Model(pipeline_logistic) with pytest.raises(MLToolingError, match="You haven't scored the estimator"): with model.log(str(tmp_path)): model.train_estimator(train_iris_dataset) model.save_estimator(FileStorage(tmp_path))
def test_load_prediction_data_works_as_expected(self): dataset = load_demo_dataset("iris") dataset.create_train_test(stratify=True) feature_pipeline = Pipeline([("scale", DFStandardScaler())]) model = Model(LogisticRegression(), feature_pipeline=feature_pipeline) model.train_estimator(dataset) result = model.make_prediction(dataset, 5) expected = pd.DataFrame({"Prediction": [0]}) pd.testing.assert_frame_equal(result, expected, check_dtype=False)
def test_make_prediction_with_regression_sqldataset_works_as_expected( self, boston_sqldataset, loaded_boston_db): dataset = boston_sqldataset(loaded_boston_db, schema=None) dataset.create_train_test(stratify=False) model = Model(LinearRegression()) model.train_estimator(dataset) result = model.make_prediction(dataset, 0) assert result.shape == (1, 1) assert result.columns.tolist() == ["Prediction"]
def test_make_prediction_with_classification_sqldataset_works_as_expected( self, iris_sqldataset, loaded_iris_db): dataset = iris_sqldataset(loaded_iris_db, schema=None) dataset.create_train_test() model = Model(LogisticRegression(solver="lbfgs")) model.train_estimator(dataset) result = model.make_prediction(dataset, 0, proba=True) assert result.shape == (1, 2) assert result.columns.tolist() == [ "Probability Class 0", "Probability Class 1" ]
def test_train_model_sets_result_to_none(self, regression: Model, train_iris_dataset): assert regression.result is not None regression.train_estimator(train_iris_dataset) assert regression.result is None