Example #1
0
    def test_random_forest_regressor(self, compress_model_definition):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = RandomForestRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_random_forest_regressor"

        es_model = MLModel.import_model(
            ES_TEST_CLIENT,
            model_id,
            regressor,
            feature_names,
            es_if_exists="replace",
            es_compress_model_definition=compress_model_definition,
        )
        # Get some test results
        check_prediction_equality(es_model, regressor,
                                  random_rows(training_data[0], 20))

        match = f"Trained machine learning model {model_id} already exists"
        with pytest.raises(ValueError, match=match):
            MLModel.import_model(
                ES_TEST_CLIENT,
                model_id,
                regressor,
                feature_names,
                es_if_exists="fail",
                es_compress_model_definition=compress_model_definition,
            )

        # Clean up
        es_model.delete_model()
Example #2
0
    def test_predict_single_feature_vector(self):
        # Train model
        training_data = datasets.make_regression(n_features=1)
        regressor = XGBRegressor()
        regressor.fit(training_data[0], training_data[1])

        # Get some test results
        test_data = [[0.1]]
        test_results = regressor.predict(np.asarray(test_data))

        # Serialise the models to Elasticsearch
        feature_names = ["f0"]
        model_id = "test_xgb_regressor"

        es_model = MLModel.import_model(ES_TEST_CLIENT,
                                        model_id,
                                        regressor,
                                        feature_names,
                                        es_if_exists="replace")

        # Single feature
        es_results = es_model.predict(test_data[0])

        np.testing.assert_almost_equal(test_results, es_results, decimal=2)

        # Clean up
        es_model.delete_model()
Example #3
0
    def test_xgb_regressor(self, compress_model_definition, objective,
                           booster):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = XGBRegressor(objective=objective, booster=booster)
        regressor.fit(
            training_data[0],
            np.exp(training_data[1] - np.max(training_data[1])) /
            sum(np.exp(training_data[1])),
        )

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_xgb_regressor"

        es_model = MLModel.import_model(
            ES_TEST_CLIENT,
            model_id,
            regressor,
            feature_names,
            es_if_exists="replace",
            es_compress_model_definition=compress_model_definition,
        )
        # Get some test results
        check_prediction_equality(es_model, regressor,
                                  random_rows(training_data[0], 20))

        # Clean up
        es_model.delete_model()
Example #4
0
    def test_xgb_classifier_objectives_and_booster(self, objective, booster):
        # test both multiple and binary classification
        if objective.startswith("multi"):
            skip_if_multiclass_classifition()
            training_data = datasets.make_classification(n_features=5,
                                                         n_classes=3,
                                                         n_informative=3)
            classifier = XGBClassifier(booster=booster, objective=objective)
        else:
            training_data = datasets.make_classification(n_features=5)
            classifier = XGBClassifier(booster=booster, objective=objective)

        # Train model
        classifier.fit(training_data[0], training_data[1])

        # Serialise the models to Elasticsearch
        feature_names = [
            "feature0", "feature1", "feature2", "feature3", "feature4"
        ]
        model_id = "test_xgb_classifier"

        es_model = MLModel.import_model(ES_TEST_CLIENT,
                                        model_id,
                                        classifier,
                                        feature_names,
                                        es_if_exists="replace")
        # Get some test results
        check_prediction_equality(es_model, classifier,
                                  random_rows(training_data[0], 20))

        # Clean up
        es_model.delete_model()
Example #5
0
    def test_decision_tree_classifier(self, compress_model_definition):
        # Train model
        training_data = datasets.make_classification(n_features=5)
        classifier = DecisionTreeClassifier()
        classifier.fit(training_data[0], training_data[1])

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_decision_tree_classifier"

        es_model = MLModel.import_model(
            ES_TEST_CLIENT,
            model_id,
            classifier,
            feature_names,
            es_if_exists="replace",
            es_compress_model_definition=compress_model_definition,
        )

        # Get some test results
        check_prediction_equality(es_model, classifier,
                                  random_rows(training_data[0], 20))

        # Clean up
        es_model.delete_model()
Example #6
0
    def test_xgb_classifier(self, compress_model_definition, multi_class):
        # test both multiple and binary classification
        if multi_class:
            skip_if_multiclass_classifition()
            training_data = datasets.make_classification(n_features=5,
                                                         n_classes=3,
                                                         n_informative=3)
            classifier = XGBClassifier(booster="gbtree",
                                       objective="multi:softmax")
        else:
            training_data = datasets.make_classification(n_features=5)
            classifier = XGBClassifier(booster="gbtree")

        # Train model
        classifier.fit(training_data[0], training_data[1])

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_xgb_classifier"

        es_model = MLModel.import_model(
            ES_TEST_CLIENT,
            model_id,
            classifier,
            feature_names,
            es_if_exists="replace",
            es_compress_model_definition=compress_model_definition,
        )
        # Get some test results
        check_prediction_equality(es_model, classifier,
                                  random_rows(training_data[0], 20))

        # Clean up
        es_model.delete_model()
Example #7
0
    def test_es_if_exists_fail(self, compress_model_definition):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = RandomForestRegressor()
        regressor.fit(training_data[0], training_data[1])

        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_random_forest_regressor"

        # If both overwrite and es_if_exists is given.
        match = f"Trained machine learning model {model_id} already exists"
        with pytest.raises(ValueError, match=match):
            MLModel.import_model(
                ES_TEST_CLIENT,
                model_id,
                regressor,
                feature_names,
                es_if_exists="fail",
                es_compress_model_definition=compress_model_definition,
            )
Example #8
0
    def test_imported_mlmodel_overwrite_true(self, compress_model_definition,
                                             overwrite):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = RandomForestRegressor()
        regressor.fit(training_data[0], training_data[1])

        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_random_forest_regressor"

        match = "'overwrite' parameter is deprecated, use 'es_if_exists' instead"
        with pytest.warns(DeprecationWarning, match=match):
            MLModel.import_model(
                ES_TEST_CLIENT,
                model_id,
                regressor,
                feature_names,
                overwrite=overwrite,
                es_compress_model_definition=compress_model_definition,
            )
Example #9
0
    def test_imported_mlmodel_bothparams(self, compress_model_definition,
                                         es_if_exists, overwrite):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = RandomForestRegressor()
        regressor.fit(training_data[0], training_data[1])

        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_random_forest_regressor"

        match = "Using 'overwrite' and 'es_if_exists' together is invalid, use only 'es_if_exists'"
        with pytest.raises(ValueError, match=match):
            MLModel.import_model(
                ES_TEST_CLIENT,
                model_id,
                regressor,
                feature_names,
                es_if_exists=es_if_exists,
                overwrite=overwrite,
                es_compress_model_definition=compress_model_definition,
            )
Example #10
0
    def test_imported_mlmodel_overwrite_false(self, compress_model_definition,
                                              overwrite):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        regressor = RandomForestRegressor()
        regressor.fit(training_data[0], training_data[1])

        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_random_forest_regressor"

        match_error = f"Trained machine learning model {model_id} already exists"
        match_warning = (
            "'overwrite' parameter is deprecated, use 'es_if_exists' instead")
        with pytest.raises(ValueError, match=match_error):
            with pytest.warns(DeprecationWarning, match=match_warning):
                MLModel.import_model(
                    ES_TEST_CLIENT,
                    model_id,
                    regressor,
                    feature_names,
                    overwrite=overwrite,
                    es_compress_model_definition=compress_model_definition,
                )
Example #11
0
    def test_unpack_and_raise_errors_in_ingest_simulate(self, mocker):
        # Train model
        training_data = datasets.make_classification(n_features=5)
        classifier = DecisionTreeClassifier()
        classifier.fit(training_data[0], training_data[1])

        # Serialise the models to Elasticsearch
        feature_names = ["f0", "f1", "f2", "f3", "f4"]
        model_id = "test_decision_tree_classifier"
        test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]]

        es_model = MLModel.import_model(
            ES_TEST_CLIENT,
            model_id,
            classifier,
            feature_names,
            es_if_exists="replace",
            es_compress_model_definition=True,
        )

        # Mock the ingest.simulate API to return an error within {'docs': [...]}
        mock = mocker.patch.object(ES_TEST_CLIENT.ingest, "simulate")
        mock.return_value = {
            "docs": [{
                "error": {
                    "type":
                    "x_content_parse_exception",
                    "reason":
                    "[1:1052] [inference_model_definition] failed to parse field [trained_model]",
                }
            }]
        }

        with pytest.raises(RuntimeError) as err:
            es_model.predict(test_data)

        assert repr(err.value) == (
            'RuntimeError("Failed to run prediction for model ID '
            "'test_decision_tree_classifier'\", {'type': 'x_content_parse_exception', "
            "'reason': '[1:1052] [inference_model_definition] failed to parse "
            "field [trained_model]'})")
Example #12
0
    def test_lgbm_classifier_objectives_and_booster(self,
                                                    compress_model_definition,
                                                    objective, booster):
        # test both multiple and binary classification
        if objective.startswith("multi"):
            skip_if_multiclass_classifition()
            training_data = datasets.make_classification(n_features=5,
                                                         n_classes=3,
                                                         n_informative=3)
            classifier = LGBMClassifier(boosting_type=booster,
                                        objective=objective)
        else:
            training_data = datasets.make_classification(n_features=5)
            classifier = LGBMClassifier(boosting_type=booster,
                                        objective=objective)

        # Train model
        classifier.fit(training_data[0], training_data[1])

        # Serialise the models to Elasticsearch
        feature_names = [
            "Column_0", "Column_1", "Column_2", "Column_3", "Column_4"
        ]
        model_id = "test_lgbm_classifier"

        es_model = MLModel.import_model(
            ES_TEST_CLIENT,
            model_id,
            classifier,
            feature_names,
            es_if_exists="replace",
            es_compress_model_definition=compress_model_definition,
        )

        check_prediction_equality(es_model, classifier,
                                  random_rows(training_data[0], 20))

        # Clean up
        es_model.delete_model()
Example #13
0
    def test_lgbm_regressor(self, compress_model_definition, objective,
                            booster):
        # Train model
        training_data = datasets.make_regression(n_features=5)
        if booster == "rf":
            regressor = LGBMRegressor(
                boosting_type=booster,
                objective=objective,
                bagging_fraction=0.5,
                bagging_freq=3,
            )
        else:
            regressor = LGBMRegressor(boosting_type=booster,
                                      objective=objective)
        regressor.fit(training_data[0], training_data[1])

        # Serialise the models to Elasticsearch
        feature_names = [
            "Column_0", "Column_1", "Column_2", "Column_3", "Column_4"
        ]
        model_id = "test_lgbm_regressor"

        es_model = MLModel.import_model(
            ES_TEST_CLIENT,
            model_id,
            regressor,
            feature_names,
            es_if_exists="replace",
            es_compress_model_definition=compress_model_definition,
        )
        # Get some test results
        check_prediction_equality(es_model, regressor,
                                  random_rows(training_data[0], 20))

        # Clean up
        es_model.delete_model()