Esempio n. 1
0
def tests_boston_xgb_path_already_exists(boston_training_test, model_path):
    x_train, x_test, y_train, _ = boston_training_test
    model = xgb.XGBRegressor()
    fitted_model = model.fit(x_train, y_train)
    cbw.save_model(model_path, fitted_model, zip=False)
    with pytest.raises(cbw.ClearboxWrapperException):
        cbw.save_model(model_path, fitted_model, zip=False)
Esempio n. 2
0
def test_boston_xgb_preprocessing_check_model_and_preprocessing_signature(
        preprocessor, boston_training_test, model_path):
    x_train, x_test, y_train, _ = boston_training_test
    x_train_transformed = preprocessor.fit_transform(x_train)

    model = xgb.XGBRegressor()
    fitted_model = model.fit(x_train_transformed, y_train)
    cbw.save_model(
        model_path,
        fitted_model,
        preprocessing=preprocessor,
        input_data=x_train,
        zip=False,
    )

    loaded_model = cbw.load_model(model_path)
    original_model_predictions = fitted_model.predict(x_train_transformed[:5])
    loaded_model_predictions = loaded_model.predict(x_train[:5])
    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)

    mlmodel = cbw.Model.load(model_path)
    preprocessing_input_schema = mlmodel.get_preprocessing_input_schema()
    preprocessing_output_schema = mlmodel.get_preprocessing_output_schema()
    model_input_schema = mlmodel.get_model_input_schema()

    assert _check_schema(x_train, preprocessing_input_schema)
    assert _check_schema(x_train_transformed, preprocessing_output_schema)
    assert _check_schema(x_train_transformed, model_input_schema)
    assert preprocessing_output_schema == model_input_schema
Esempio n. 3
0
def test_iris_xgb_preprocessing_check_model_and_preprocessing_signature(
        preprocessor, iris_data, model_path):
    x, y = iris_data
    x_transformed = preprocessor.fit_transform(x)

    model = xgb.XGBClassifier(use_label_encoder=False)
    fitted_model = model.fit(x_transformed, y)
    cbw.save_model(model_path,
                   fitted_model,
                   preprocessing=preprocessor,
                   input_data=x,
                   zip=False)

    loaded_model = cbw.load_model(model_path)
    original_model_predictions = fitted_model.predict_proba(x_transformed[:5])
    loaded_model_predictions = loaded_model.predict_proba(x[:5])
    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)

    mlmodel = cbw.Model.load(model_path)
    preprocessing_input_schema = mlmodel.get_preprocessing_input_schema()
    preprocessing_output_schema = mlmodel.get_preprocessing_output_schema()
    model_input_schema = mlmodel.get_model_input_schema()

    assert _check_schema(x, preprocessing_input_schema)
    assert _check_schema(x_transformed, preprocessing_output_schema)
    assert _check_schema(x_transformed, model_input_schema)
    assert preprocessing_output_schema == model_input_schema
Esempio n. 4
0
def test_boston_xgboost_data_preparation_and_preprocessing(
        preprocessor, add_value_to_column_transformer, boston_training_test,
        model_path):
    x_train, x_test, y_train, _ = boston_training_test
    x_train_prepared = add_value_to_column_transformer(x_train)
    x_train_transformed = preprocessor.fit_transform(x_train_prepared)

    model = xgb.XGBRegressor()
    fitted_model = model.fit(x_train_transformed, y_train)
    cbw.save_model(
        model_path,
        fitted_model,
        preprocessing=preprocessor,
        data_preparation=add_value_to_column_transformer,
        zip=False,
    )

    loaded_model = cbw.load_model(model_path)
    x_test_prepared = add_value_to_column_transformer(x_test)
    x_test_transformed = preprocessor.transform(x_test_prepared)

    original_model_predictions = fitted_model.predict(x_test_transformed)
    loaded_model_predictions = loaded_model.predict(x_test)
    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)
Esempio n. 5
0
def test_iris_xgboost_conda_env(iris_data, model_path):
    import cloudpickle

    x, y = iris_data
    model = xgb.XGBClassifier(use_label_encoder=False)
    fitted_model = model.fit(x, y)
    cbw.save_model(model_path, fitted_model, zip=False)

    with open(model_path + "/conda.yaml", "r") as f:
        conda_env = yaml.safe_load(f)

    python_version = "{major}.{minor}.{micro}".format(major=version_info.major,
                                                      minor=version_info.minor,
                                                      micro=version_info.micro)
    xgb_version = xgb.__version__
    cloudpickle_version = cloudpickle.__version__

    channels_list = ["defaults", "conda-forge"]
    dependencies = [
        "python={}".format(python_version),
        "pip",
        {
            "pip": [
                "cloudpickle=={}".format(cloudpickle_version),
                "xgboost=={}".format(xgb_version),
            ]
        },
    ]
    assert conda_env["channels"] == channels_list
    assert conda_env["dependencies"] == dependencies
Esempio n. 6
0
def tests_iris_xgboost_path_already_exists(iris_data, model_path):
    x, y = iris_data
    model = xgb.XGBClassifier(use_label_encoder=False)
    fitted_model = model.fit(x, y)
    cbw.save_model(model_path, fitted_model, zip=False)
    with pytest.raises(cbw.ClearboxWrapperException):
        cbw.save_model(model_path, fitted_model, zip=False)
def test_boston_keras_path_already_exists(
    sk_function_transformer,
    add_value_to_column_transformer,
    boston_training_test,
    keras_model,
    model_path,
):
    x_train, x_test, y_train, _ = boston_training_test

    x_train_prepared = add_value_to_column_transformer(x_train)
    x_train_transformed = sk_function_transformer.fit_transform(x_train_prepared)

    model = keras_model
    model.fit(x_train_transformed, y_train, epochs=10, batch_size=32)
    cbw.save_model(
        model_path,
        model,
        preprocessing=sk_function_transformer,
        data_preparation=add_value_to_column_transformer,
        zip=False,
    )

    with pytest.raises(cbw.ClearboxWrapperException):
        cbw.save_model(
            model_path,
            model,
            preprocessing=sk_function_transformer,
            data_preparation=add_value_to_column_transformer,
            zip=False,
        )
def test_iris_pytorch_get_prepared_data(
    preprocessor,
    add_value_to_column_transformer,
    iris_training,
    iris_test,
    iris_pytorch_model,
    model_path,
):
    x_train, y_train = iris_training

    x_prepared = add_value_to_column_transformer(x_train)
    x_transformed = preprocessor.fit_transform(x_prepared)

    x_transformed_tensor = torch.Tensor(x_transformed)
    y_train = torch.Tensor(y_train.values)

    def preprocessing_function(x_data):
        x_transformed = preprocessor.transform(x_data)
        return x_transformed

    model = iris_pytorch_model
    iris_pytorch_model_training(model, x_transformed_tensor, y_train)

    cbw.save_model(
        model_path,
        model,
        preprocessing=preprocessing_function,
        data_preparation=add_value_to_column_transformer,
        zip=False,
    )

    loaded_model = cbw.load_model(model_path)
    x_prepared_by_loaded_data_preparation = loaded_model.prepare_data(x_train)
    np.testing.assert_array_equal(x_prepared,
                                  x_prepared_by_loaded_data_preparation)
Esempio n. 9
0
def test_adult_sklearn_preprocessing_check_model_and_preprocessing_signature(
    sklearn_model, adult_training, adult_test, tmpdir
):
    x_training, y_training = adult_training
    x_test, y_test = adult_test
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training)

    x_transformed = x_preprocessor.fit_transform(x_training)
    y_transformed = y_encoder.fit_transform(y_training)

    sklearn_model = tree.DecisionTreeClassifier()
    fitted_model = sklearn_model.fit(x_transformed, y_transformed)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(
        tmp_model_path,
        fitted_model,
        preprocessing=x_preprocessor,
        input_data=x_training,
        zip=False,
    )

    loaded_model = cbw.load_model(tmp_model_path)
    original_model_predictions = fitted_model.predict(x_transformed[:5])
    loaded_model_predictions = loaded_model.predict(x_training[:5])
    np.testing.assert_array_equal(original_model_predictions, loaded_model_predictions)

    mlmodel = cbw.Model.load(tmp_model_path)
    preprocessing_input_schema = mlmodel.get_preprocessing_input_schema()
    preprocessing_output_schema = mlmodel.get_preprocessing_output_schema()
    model_input_schema = mlmodel.get_model_input_schema()

    assert _check_schema(x_training, preprocessing_input_schema)
    assert _check_schema(x_transformed, preprocessing_output_schema)
    assert _check_schema(x_transformed, model_input_schema)
    assert preprocessing_output_schema == model_input_schema
def test_boston_keras_data_preparation_and_preprocessing(
    preprocessor,
    add_value_to_column_transformer,
    boston_training_test,
    keras_model,
    model_path,
):
    x_train, x_test, y_train, _ = boston_training_test

    x_train_prepared = add_value_to_column_transformer(x_train)
    x_train_transformed = preprocessor.fit_transform(x_train_prepared)

    model = keras_model
    model.fit(x_train_transformed, y_train, epochs=10, batch_size=32)
    cbw.save_model(
        model_path,
        model,
        preprocessing=preprocessor,
        data_preparation=add_value_to_column_transformer,
        zip=False,
    )

    loaded_model = cbw.load_model(model_path)
    x_test_prepared = add_value_to_column_transformer(x_test)
    x_test_transformed = preprocessor.transform(x_test_prepared)
    original_model_predictions = model.predict(x_test_transformed)
    loaded_model_predictions = loaded_model.predict(x_test)

    np.testing.assert_array_equal(original_model_predictions, loaded_model_predictions)
Esempio n. 11
0
def test_adult_sklearn_get_prepared_data(
    adult_training, adult_test, data_preparation, tmpdir
):
    x_training, y_training = adult_training
    x_test, y_test = adult_test

    x_training_prepared = data_preparation(x_training)
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training_prepared)

    x_transformed = x_preprocessor.fit_transform(x_training_prepared)
    y_transformed = y_encoder.fit_transform(y_training)

    fitted_model = tree.DecisionTreeClassifier().fit(x_transformed, y_transformed)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(
        tmp_model_path,
        fitted_model,
        preprocessing=x_preprocessor,
        data_preparation=data_preparation,
        zip=False,
    )

    loaded_model = cbw.load_model(tmp_model_path)

    x_test_prepared_by_loaded_data_preparation = loaded_model.prepare_data(x_test)
    x_test_prepared = data_preparation(x_test)

    pd.testing.assert_frame_equal(
        x_test_prepared, x_test_prepared_by_loaded_data_preparation
    )
Esempio n. 12
0
def test_adult_sklearn_preprocessing_and_data_preparation(
    sklearn_model, adult_training, adult_test, data_preparation, tmpdir
):
    x_training, y_training = adult_training
    x_test, y_test = adult_test

    x_training_prepared = data_preparation(x_training)
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training_prepared)

    x_transformed = x_preprocessor.fit_transform(x_training_prepared)
    y_transformed = y_encoder.fit_transform(y_training)

    fitted_model = sklearn_model.fit(x_transformed, y_transformed)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(
        tmp_model_path,
        fitted_model,
        preprocessing=x_preprocessor,
        data_preparation=data_preparation,
        zip=False,
    )

    loaded_model = cbw.load_model(tmp_model_path)
    original_model_predictions = fitted_model.predict(
        x_preprocessor.transform(data_preparation(x_test))
    )
    loaded_model_predictions = loaded_model.predict(x_test)
    np.testing.assert_array_equal(original_model_predictions, loaded_model_predictions)
def test_adult_pytorch_path_already_exists(adult_training, adult_test,
                                           model_path):
    x_training, y_training = adult_training
    x_test, _ = adult_test
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training)

    x_train_transformed = x_preprocessor.fit_transform(x_training)
    x_train_transformed = torch.Tensor(x_train_transformed.todense())

    y_train_transformed = y_encoder.fit_transform(
        y_training.values.reshape(-1, 1))
    y_train_transformed = torch.Tensor(y_train_transformed.todense())

    def preprocessing_function(x_data):
        x_transformed = x_preprocessor.transform(x_data)
        return x_transformed.todense()

    model = AdultModel(x_train_transformed.shape[1])
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()
    train(model, x_train_transformed, y_train_transformed)

    cbw.save_model(model_path,
                   model,
                   preprocessing=preprocessing_function,
                   zip=False)

    with pytest.raises(cbw.ClearboxWrapperException):
        cbw.save_model(model_path,
                       model,
                       preprocessing=preprocessing_function,
                       zip=False)
def test_adult_pytorch_preprocessing(adult_training, adult_test, model_path):
    x_training, y_training = adult_training
    x_test, _ = adult_test
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training)

    x_train_transformed = x_preprocessor.fit_transform(x_training)
    x_train_transformed = torch.Tensor(x_train_transformed.todense())

    y_train_transformed = y_encoder.fit_transform(
        y_training.values.reshape(-1, 1))
    y_train_transformed = torch.Tensor(y_train_transformed.todense())

    def preprocessing_function(x_data):
        x_transformed = x_preprocessor.transform(x_data)
        return x_transformed.todense()

    model = AdultModel(x_train_transformed.shape[1])
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()
    train(model, x_train_transformed, y_train_transformed)

    cbw.save_model(model_path,
                   model,
                   preprocessing=preprocessing_function,
                   zip=False)
    loaded_model = cbw.load_model(model_path)

    x_test_transformed = preprocessing_function(x_test)
    x_test_transformed = torch.Tensor(x_test_transformed)
    original_model_predictions = model(x_test_transformed).detach().numpy()
    loaded_model_predictions = loaded_model.predict(x_test)

    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)
Esempio n. 15
0
def test_iris_xgboost_predict_without_preprocessing(preprocessor, iris_data,
                                                    drop_column_transformer,
                                                    model_path):
    x, y = iris_data
    x_prepared = drop_column_transformer(x)
    x_transformed = preprocessor.fit_transform(x_prepared)

    model = xgb.XGBClassifier(use_label_encoder=False)
    fitted_model = model.fit(x_transformed, y)
    cbw.save_model(
        model_path,
        fitted_model,
        preprocessing=preprocessor,
        data_preparation=drop_column_transformer,
        zip=False,
    )

    loaded_model = cbw.load_model(model_path)
    original_model_predictions = fitted_model.predict_proba(x_transformed)
    loaded_model_predictions = loaded_model.predict_proba(x, preprocess=False)
    np.testing.assert_raises(
        AssertionError,
        np.testing.assert_array_equal,
        original_model_predictions,
        loaded_model_predictions,
    )
def test_iris_pytorch_data_preparation_without_preprocessing(
    add_value_to_column_transformer,
    iris_training,
    iris_test,
    iris_pytorch_model,
    model_path,
):
    x_train, y_train = iris_training

    sk_transformer = sk_preprocessing.StandardScaler()
    x_transformed = sk_transformer.fit_transform(x_train)

    x_transformed = torch.Tensor(x_transformed)
    y_train = torch.Tensor(y_train.values)

    def preprocessing_function(x_data):
        x_transformed = sk_transformer.transform(x_data)
        return x_transformed

    model = iris_pytorch_model
    iris_pytorch_model_training(model, x_transformed, y_train)

    with pytest.raises(ValueError):
        cbw.save_model(
            model_path,
            model,
            data_preparation=add_value_to_column_transformer,
            zip=False,
        )
def test_iris_pytorch_preprocessing_with_custom_transformer(
        custom_transformer, iris_training, iris_test, iris_pytorch_model,
        model_path):
    x_train, y_train = iris_training
    x_test, _ = iris_test

    x_transformed = custom_transformer(x_train)
    x_transformed = torch.Tensor(x_transformed.values)
    y_train = torch.Tensor(y_train.values)

    def preprocessing_function(x_data):
        x_transformed = custom_transformer(x_data)
        return x_transformed

    model = iris_pytorch_model
    iris_pytorch_model_training(model, x_transformed, y_train)

    cbw.save_model(model_path,
                   model,
                   preprocessing=preprocessing_function,
                   zip=False)
    loaded_model = cbw.load_model(model_path)

    x_test_transformed = preprocessing_function(x_test)
    x_test_transformed = torch.Tensor(x_test_transformed.values)
    original_model_predictions = model(x_test_transformed).detach().numpy()
    loaded_model_predictions = loaded_model.predict(x_test)

    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)
Esempio n. 18
0
def test_iris_sklearn_preprocessing_check_model_and_preprocessing_signature(
    sklearn_model, preprocessor, iris_data, tmpdir
):
    x, y = iris_data
    x_transformed = preprocessor.fit_transform(x)
    fitted_model = sklearn_model.fit(x_transformed, y)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(
        tmp_model_path,
        fitted_model,
        preprocessing=preprocessor,
        input_data=x,
        zip=False,
    )
    loaded_model = cbw.load_model(tmp_model_path)
    original_model_predictions = fitted_model.predict_proba(x_transformed[:5])
    loaded_model_predictions = loaded_model.predict_proba(x[:5])
    np.testing.assert_array_equal(original_model_predictions, loaded_model_predictions)

    mlmodel = cbw.Model.load(tmp_model_path)
    preprocessing_input_schema = mlmodel.get_preprocessing_input_schema()
    preprocessing_output_schema = mlmodel.get_preprocessing_output_schema()
    model_input_schema = mlmodel.get_model_input_schema()

    assert _check_schema(x, preprocessing_input_schema)
    assert _check_schema(x_transformed, preprocessing_output_schema)
    assert _check_schema(x_transformed, model_input_schema)
    assert preprocessing_output_schema == model_input_schema
Esempio n. 19
0
def test_boston_pytorch_preprocessing_with_function_transformer(
        sk_function_transformer, boston_training_test, model_path):
    x_train, x_test, y_train, _ = boston_training_test

    x_transformed = sk_function_transformer.fit_transform(x_train)
    x_transformed = torch.Tensor(x_transformed)
    y_train = torch.Tensor(y_train.values)

    def preprocessing_function(x_data):
        x_transformed = sk_function_transformer.transform(x_data)
        return x_transformed

    model = BostonModel()
    model.train()
    train(model, x_transformed, y_train)

    cbw.save_model(model_path,
                   model,
                   preprocessing=preprocessing_function,
                   zip=False)
    loaded_model = cbw.load_model(model_path)

    x_test_transformed = preprocessing_function(x_test)
    x_test_transformed = torch.Tensor(x_test_transformed)
    original_model_predictions = model(x_test_transformed).detach().numpy()
    loaded_model_predictions = loaded_model.predict(x_test)

    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)
Esempio n. 20
0
def test_iris_sklearn_conda_env(sklearn_model, iris_data, tmpdir):
    import sklearn
    import cloudpickle

    x, y = iris_data
    fitted_model = sklearn_model.fit(x, y)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(tmp_model_path, fitted_model, zip=False)

    with open(tmp_model_path + "/conda.yaml", "r") as f:
        conda_env = yaml.safe_load(f)

    python_version = "{major}.{minor}.{micro}".format(
        major=version_info.major, minor=version_info.minor, micro=version_info.micro
    )
    sklearn_version = sklearn.__version__
    cloudpickle_version = cloudpickle.__version__

    channels_list = ["defaults", "conda-forge"]
    dependencies = [
        "python={}".format(python_version),
        "pip",
        {
            "pip": [
                "cloudpickle=={}".format(cloudpickle_version),
                "scikit-learn=={}".format(sklearn_version),
            ]
        },
    ]
    assert conda_env["channels"] == channels_list
    assert conda_env["dependencies"] == dependencies
def tests_adult_xgboost_path_already_exists(adult_training, adult_test,
                                            model_path):
    x_training, y_training = adult_training
    x_test, _ = adult_test
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training)

    x_transformed = x_preprocessor.fit_transform(x_training)
    y_transformed = y_encoder.fit_transform(y_training)

    model = xgb.XGBClassifier(
        colsample_bytree=1.0,
        max_depth=10,
        min_child_weight=0,
        subsample=0.5,
        reg_lambda=100.0,
        n_estimators=20,
        random_state=42,
    )

    fitted_model = model.fit(x_transformed, y_transformed)
    cbw.save_model(model_path,
                   fitted_model,
                   preprocessing=x_preprocessor,
                   zip=False)
    with pytest.raises(cbw.ClearboxWrapperException):
        cbw.save_model(model_path,
                       fitted_model,
                       preprocessing=x_preprocessor,
                       zip=False)
Esempio n. 22
0
def test_iris_sklearn_predict_without_preprocessing(
    sklearn_model, preprocessor, iris_data, drop_column_transformer, tmpdir
):
    x, y = iris_data
    data_preparation = drop_column_transformer
    x_prepared = data_preparation(x)
    x_transformed = preprocessor.fit_transform(x_prepared)
    fitted_model = sklearn_model.fit(x_transformed, y)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(
        tmp_model_path,
        fitted_model,
        preprocessing=preprocessor,
        data_preparation=data_preparation,
        zip=False,
    )
    loaded_model = cbw.load_model(tmp_model_path)
    original_model_predictions = fitted_model.predict_proba(x_transformed[:5])
    loaded_model_predictions = loaded_model.predict_proba(x[:5], preprocess=False)
    np.testing.assert_raises(
        AssertionError,
        np.testing.assert_array_equal,
        original_model_predictions,
        loaded_model_predictions,
    )
def test_adult_xgboost_preprocessing(adult_training, adult_test, model_path):
    x_training, y_training = adult_training
    x_test, _ = adult_test
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training)

    x_transformed = x_preprocessor.fit_transform(x_training)
    y_transformed = y_encoder.fit_transform(y_training)

    model = xgb.XGBClassifier(
        colsample_bytree=1.0,
        max_depth=10,
        min_child_weight=0,
        subsample=0.5,
        reg_lambda=100.0,
        n_estimators=20,
        random_state=42,
    )

    fitted_model = model.fit(x_transformed, y_transformed)
    cbw.save_model(model_path,
                   fitted_model,
                   preprocessing=x_preprocessor,
                   zip=False)
    loaded_model = cbw.load_model(model_path)

    x_test_transformed = x_preprocessor.transform(x_test)
    original_model_predictions = fitted_model.predict_proba(x_test_transformed)
    loaded_model_predictions = loaded_model.predict_proba(x_test)

    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)
def test_iris_keras_conda_env(iris_data, keras_model, model_path):
    import cloudpickle
    import tensorflow

    x, y = iris_data

    model = keras_model
    model.fit(x, y)
    cbw.save_model(model_path, model, zip=False)

    with open(model_path + "/conda.yaml", "r") as f:
        conda_env = yaml.safe_load(f)

    python_version = "{major}.{minor}.{micro}".format(major=version_info.major,
                                                      minor=version_info.minor,
                                                      micro=version_info.micro)
    tf_version = tensorflow.__version__
    cloudpickle_version = cloudpickle.__version__

    channels_list = ["defaults", "conda-forge"]
    dependencies = [
        "python={}".format(python_version),
        "pip",
        {
            "pip": [
                "cloudpickle=={}".format(cloudpickle_version),
                "tensorflow=={}".format(tf_version),
            ]
        },
    ]
    assert conda_env["channels"] == channels_list
    assert conda_env["dependencies"] == dependencies
def test_iris_keras_predict_without_data_preparation(preprocessor,
                                                     drop_column_transformer,
                                                     iris_data, model_path):
    x, y = iris_data
    x_prepared = drop_column_transformer(x)
    x_transformed = preprocessor.fit_transform(x_prepared)

    model = Sequential()
    model.add(Dense(8, input_dim=x_transformed.shape[1], activation="relu"))
    model.add(Dense(3, activation="softmax"))

    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    model.fit(x_transformed, y)
    cbw.save_model(
        model_path,
        model,
        preprocessing=preprocessor,
        data_preparation=drop_column_transformer,
        zip=False,
    )

    loaded_model = cbw.load_model(model_path)
    with pytest.raises(ValueError):
        loaded_model.predict(x, prepare_data=False)
def test_iris_keras_get_prepared_and_processed_data(preprocessor,
                                                    drop_column_transformer,
                                                    iris_data, model_path):
    x, y = iris_data
    x_prepared = drop_column_transformer(x)
    x_transformed = preprocessor.fit_transform(x_prepared)

    model = Sequential()
    model.add(Dense(8, input_dim=x_transformed.shape[1], activation="relu"))
    model.add(Dense(3, activation="softmax"))

    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    model.fit(x_transformed, y)
    cbw.save_model(
        model_path,
        model,
        preprocessing=preprocessor,
        data_preparation=drop_column_transformer,
        zip=False,
    )

    loaded_model = cbw.load_model(model_path)
    x_prepared_by_loaded_data_preparation = loaded_model.prepare_data(x)
    x_transformed_by_loaded_preprocessing = loaded_model.preprocess_data(
        x_prepared_by_loaded_data_preparation)
    np.testing.assert_array_equal(x_transformed,
                                  x_transformed_by_loaded_preprocessing)
Esempio n. 27
0
def tests_iris_sklearn_path_already_exists(iris_data, tmpdir):
    x, y = iris_data
    model = linear_model.LogisticRegression(max_iter=150)
    fitted_model = model.fit(x, y)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(tmp_model_path, fitted_model, zip=False)
    with pytest.raises(cbw.ClearboxWrapperException):
        cbw.save_model(tmp_model_path, fitted_model, zip=False)
def tests_boston_sklearn_path_already_exists(boston_data, tmpdir):
    x, y = boston_data
    sklearn_model = tree.DecisionTreeRegressor()
    fitted_model = sklearn_model.fit(x, y)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(tmp_model_path, fitted_model, zip=False)
    with pytest.raises(cbw.ClearboxWrapperException):
        cbw.save_model(tmp_model_path, fitted_model, zip=False)
def test_adult_xgb_check_model_preprocessing_and_data_preparation_signature(
        adult_training, adult_test, data_preparation, model_path):
    x_training, y_training = adult_training
    x_test, _ = adult_test
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training)

    x_training_prepared = data_preparation(x_training)
    x_preprocessor, y_encoder = x_and_y_preprocessing(x_training_prepared)

    x_transformed = x_preprocessor.fit_transform(x_training_prepared)
    y_transformed = y_encoder.fit_transform(y_training)

    model = xgb.XGBClassifier(
        colsample_bytree=1.0,
        max_depth=10,
        min_child_weight=0,
        subsample=0.5,
        reg_lambda=100.0,
        n_estimators=20,
        random_state=42,
    )

    fitted_model = model.fit(x_transformed, y_transformed)
    cbw.save_model(
        model_path,
        fitted_model,
        preprocessing=x_preprocessor,
        data_preparation=data_preparation,
        input_data=x_training,
        zip=False,
    )
    loaded_model = cbw.load_model(model_path)

    x_test_prepared = data_preparation(x_test)
    x_test_transformed = x_preprocessor.transform(x_test_prepared)

    original_model_predictions = fitted_model.predict(x_test_transformed)
    loaded_model_predictions = loaded_model.predict(x_test)

    np.testing.assert_array_equal(original_model_predictions,
                                  loaded_model_predictions)

    mlmodel = cbw.Model.load(model_path)
    data_preparation_input_schema = mlmodel.get_data_preparation_input_schema()
    data_preparation_output_schema = mlmodel.get_data_preparation_output_schema(
    )
    preprocessing_input_schema = mlmodel.get_preprocessing_input_schema()
    preprocessing_output_schema = mlmodel.get_preprocessing_output_schema()
    model_input_schema = mlmodel.get_model_input_schema()

    assert _check_schema(x_training, data_preparation_input_schema)
    assert _check_schema(x_training_prepared, data_preparation_output_schema)
    assert _check_schema(x_training_prepared, preprocessing_input_schema)
    assert _check_schema(x_transformed, preprocessing_output_schema)
    assert _check_schema(x_transformed, model_input_schema)
    assert not _check_schema(x_training, model_input_schema)
    assert data_preparation_output_schema == preprocessing_input_schema
    assert preprocessing_output_schema == model_input_schema
Esempio n. 30
0
def test_iris_sklearn_load_preprocessing_without_preprocessing(iris_data, tmpdir):
    x, y = iris_data
    model = linear_model.LogisticRegression(max_iter=150)
    fitted_model = model.fit(x, y)
    tmp_model_path = str(tmpdir + "/saved_model")
    cbw.save_model(tmp_model_path, fitted_model, zip=False)
    loaded_model = cbw.load_model(tmp_model_path)
    with pytest.raises(cbw.ClearboxWrapperException):
        loaded_model.preprocess_data(x)