Example #1
0
    def test_save_estimator_with_prod_flag_saves_correctly(
            self, classifier: Model):
        mock_storage = MagicMock()
        classifier.save_estimator(mock_storage, prod=True)

        mock_storage.save.assert_called_once_with(classifier.estimator,
                                                  "production_model.pkl",
                                                  prod=True)
Example #2
0
 def test_train_model_errors_correctly_when_not_scored(
         self, pipeline_logistic: Pipeline, tmp_path: pathlib.Path,
         train_iris_dataset):
     model = Model(pipeline_logistic)
     with pytest.raises(MLToolingError,
                        match="You haven't scored the estimator"):
         with model.log(str(tmp_path)):
             model.train_estimator(train_iris_dataset)
             model.save_estimator(FileStorage(tmp_path))
Example #3
0
 def test_can_list_estimators(self, classifier: Model,
                              tmp_path: pathlib.Path):
     storage = FileStorage(tmp_path)
     for _ in range(3):
         classifier.save_estimator(storage)
     storage_context = FileStorage(tmp_path)
     filenames_list = Model.list_estimators(storage_context)
     for filename in filenames_list:
         assert filename.exists()
Example #4
0
    def test_can_save_with_model(self, classifier: Model,
                                 tmp_path: pathlib.Path):
        storage = FileStorage(tmp_path)
        expected_file = classifier.save_estimator(storage)
        assert expected_file.exists()

        storage_context = FileStorage(tmp_path)
        context_expected_file = classifier.save_estimator(storage_context)
        assert context_expected_file.exists()
Example #5
0
    def test_save_estimator_uses_default_storage_if_no_storage_is_passed(
            self, tmp_path: pathlib.Path, classifier: Model):
        classifier.config.ESTIMATOR_DIR = tmp_path
        classifier.save_estimator()

        models = classifier.config.default_storage.get_list()
        assert len(models) == 1
        new_classifier = Model.load_estimator(models[0])
        assert (classifier.estimator.get_params() ==
                new_classifier.estimator.get_params())
Example #6
0
 def test_save_model_saves_pipeline_correctly(self,
                                              pipeline_logistic: Pipeline,
                                              tmp_path: pathlib.Path,
                                              train_iris_dataset):
     model = Model(pipeline_logistic)
     model.train_estimator(train_iris_dataset)
     saved_model_path = model.save_estimator(FileStorage(tmp_path))
     assert saved_model_path.exists()
Example #7
0
 def test_regression_model_filename_is_generated_correctly(
         self, classifier: Model, tmp_path: pathlib.Path,
         train_iris_dataset):
     storage = FileStorage(tmp_path)
     saved_model_path = classifier.save_estimator(storage)
     assert saved_model_path.exists()
     assert datetime.datetime.strptime(
         saved_model_path.stem,
         f"{classifier.estimator_name}_%Y_%m_%d_%H_%M_%S_%f")
Example #8
0
 def test_can_load_with_model(self, classifier: Model,
                              tmp_path: pathlib.Path):
     storage = FileStorage(tmp_path)
     expected_file = classifier.save_estimator(storage)
     assert expected_file.exists()
     loaded_file = classifier.load_estimator(expected_file, storage=storage)
     assert isinstance(loaded_file, Model)
     storage_context = FileStorage(tmp_path)
     context_loaded_file = classifier.load_estimator(
         expected_file, storage=storage_context)
     assert isinstance(context_loaded_file, Model)
Example #9
0
def train_model(year, month, day, graphs=True, clf=RandomForestRegressor()):
    dataset = AirBnBDataset(year=year, month=month, day=day)
    dataset.create_train_test()

    model = Model(clf, feature_pipeline=features)
    result = model.score_estimator(dataset)
    model.config.N_JOBS = 6
    with model.log("randomforest"):
        model.save_estimator()

    if graphs:
        result.plot.feature_importance()
        plt.savefig(VISUALIZATIONS / "confusion_matrix.png")

        result.plot.residuals()
        plt.savefig(VISUALIZATIONS / "residuals.png")

        result.plot.prediction_error()
        plt.savefig(VISUALIZATIONS / "prediction_error.png")

    return result
Example #10
0
    def test_save_estimator_saves_logging_dir_correctly(
            self, mock_hash: MagicMock, classifier: Model,
            tmp_path: pathlib.Path):
        mock_hash.return_value = "1234"

        with classifier.log(str(tmp_path)):
            expected_file = classifier.save_estimator(FileStorage(tmp_path))

        assert expected_file.exists()
        assert ("LogisticRegression"
                in [str(file) for file in tmp_path.rglob("*.yaml")][0])
        mock_hash.assert_called_once()
Example #11
0
    def test_regression_model_can_be_saved(self, classifier: Model,
                                           tmp_path: pathlib.Path,
                                           train_iris_dataset):
        classifier.score_estimator(train_iris_dataset)
        load_storage = FileStorage(tmp_path)

        storage = FileStorage(tmp_path)
        saved_model_path = classifier.save_estimator(storage)
        assert saved_model_path.exists()
        loaded_model = classifier.load_estimator(saved_model_path,
                                                 storage=load_storage)
        assert loaded_model.estimator.get_params(
        ) == classifier.estimator.get_params()