Example #1
0
def test_BoxCoxTargetTransformer_target_transform():

    for ll in (0, 0.1, 0.5, 2):

        bb = BoxCoxTargetTransformer(Ridge(), ll=ll)

        assert not is_classifier(bb)
        assert is_regressor(bb)

        y = np.arange(-100, 100, step=0.1)

        my = bb.target_transform(y)
        ymy = bb.target_inverse_transform(my)
        mymy = bb.target_transform(ymy)

        #        plt.subplot(211)
        #        plt.plot(y,my)
        #        plt.subplot(212)
        #        plt.plot(my,ymy)

        assert not pd.Series(my).isnull().any()
        assert not pd.Series(ymy).isnull().any()
        assert np.max(np.abs(y - ymy)) <= 10**(-10)
        assert np.max(np.abs(my - mymy)) <= 10**(-10)
def test_filtered_get_params():
    forest = RandomForestClassifier(n_estimators=250)
    assert RandomForestClassifier().get_params()["n_estimators"] != 250
    assert filtered_get_params(forest) == {"n_estimators": 250}

    forest = RandomForestClassifier(n_estimators=250, max_depth=None)
    assert filtered_get_params(forest) == {"n_estimators": 250}

    model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250),
                                    ll=0)
    fparams = filtered_get_params(model)

    assert "ll" not in fparams
    assert "model" in fparams

    model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250),
                                    ll=1)
    assert BoxCoxTargetTransformer(
        RandomForestClassifier()).get_params()["ll"] != 1
    fparams = filtered_get_params(model)

    assert "ll" in fparams
    assert fparams["ll"] == 1
    assert "model" in fparams
Example #3
0
def test_BoxCoxTargetTransformer():

    np.random.seed(123)
    X = np.random.randn(100, 10)
    y = np.exp(np.random.randn(100))

    X2 = np.random.randn(100, 10) * 2

    for ll in (0, 0.1, 0.5, 2):

        bb = BoxCoxTargetTransformer(Ridge(), ll=ll)

        bb.fit(X, y)

        yhat = bb.predict(X)
        yhat2 = bb.predict(X2)

        assert yhat.ndim == 1
        assert yhat.shape[0] == y.shape[0]

        assert yhat2.ndim == 1
        assert yhat2.shape[0] == y.shape[0]
Example #4
0
def test_approx_cross_validation_BoxCoxTargetTransformer():

    np.random.seed(123)
    X = np.random.randn(100, 10)
    y = np.exp(np.random.randn(100))

    for ll in (0, 0.1, 0.5, 2):

        # Scorer entered as a string #

        bb = BoxCoxTargetTransformer(Ridge(), ll=ll)
        cv_res1, yhat1 = bb.approx_cross_validation(
            X,
            y,
            scoring=["neg_mean_squared_error"],
            cv=10,
            return_predict=True)

        assert isinstance(cv_res1, pd.DataFrame)
        assert cv_res1.shape[0] == 10
        assert "test_neg_mean_squared_error" in cv_res1
        assert "train_neg_mean_squared_error" in cv_res1

        assert yhat1.ndim == 1
        assert yhat1.shape[0] == y.shape[0]

        with pytest.raises(NotFittedError):
            bb.predict(X)

        with pytest.raises(NotFittedError):
            bb.model.predict(X)

        #########################################
        ###  Scorer entered as a dictionnary  ###
        #########################################
        scoring = create_scoring(Ridge(), ["neg_mean_squared_error"])
        cv_res2, yhat2 = bb.approx_cross_validation(X,
                                                    y,
                                                    scoring=scoring,
                                                    cv=10,
                                                    return_predict=True)

        assert isinstance(cv_res2, pd.DataFrame)
        assert cv_res2.shape[0] == 10
        assert "test_neg_mean_squared_error" in cv_res2
        assert "train_neg_mean_squared_error" in cv_res2

        assert yhat2.ndim == 1
        assert yhat2.shape[0] == y.shape[0]

        with pytest.raises(NotFittedError):
            bb.predict(X)

        with pytest.raises(NotFittedError):
            bb.model.predict(X)

        assert np.abs(cv_res2["test_neg_mean_squared_error"] -
                      cv_res1["test_neg_mean_squared_error"]).max() <= 10**(-5)
        assert np.abs(cv_res2["train_neg_mean_squared_error"] -
                      cv_res1["train_neg_mean_squared_error"]).max() <= 10**(
                          -5)

        assert np.max(np.abs(yhat2 - yhat1)) <= 10**(-5)
def test_param_from_sklearn_model():
    # simple RandomForest
    model = RandomForestClassifier(n_estimators=250)
    assert RandomForestClassifier().get_params()["n_estimators"] != 250
    assert param_from_sklearn_model(
        model, simplify_default=True) == ('RandomForestClassifier', {
            'n_estimators': 250
        })
    param = param_from_sklearn_model(model, simplify_default=False)
    assert isinstance(param, tuple)
    assert len(param) == 2
    assert param[0] == "RandomForestClassifier"

    assert isinstance(
        sklearn_model_from_param(param_from_sklearn_model(model)),
        model.__class__)
    s = json.dumps(param)  # check that it can be json serialized
    assert isinstance(s, str)

    assert isinstance(
        sklearn_model_from_param(param_from_sklearn_model(model)),
        model.__class__)

    # Composition model : BoxCoxTargetTransformer of RandomForestClassifier
    model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250),
                                    ll=0)
    param = param_from_sklearn_model(model, simplify_default=True)
    assert param == ('BoxCoxTargetTransformer', {
        'model': ('RandomForestClassifier', {
            'n_estimators': 250
        })
    })

    assert isinstance(
        sklearn_model_from_param(param_from_sklearn_model(model)),
        model.__class__)
    s = json.dumps(param)  # check that it can be json serialized
    assert isinstance(s, str)

    # Composition model : BoxCoxTargetTransformer of RandomForestClassifier
    model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250),
                                    ll=1)
    param = param_from_sklearn_model(model, simplify_default=True)
    assert param == ('BoxCoxTargetTransformer', {
        'll': 1,
        'model': ('RandomForestClassifier', {
            'n_estimators': 250
        })
    })
    s = json.dumps(param)  # check that it can be json serialized
    assert isinstance(s, str)

    assert isinstance(
        sklearn_model_from_param(param_from_sklearn_model(model)),
        model.__class__)

    # Pipeline
    model = Pipeline([("enc", NumericalEncoder()),
                      ("forest", RandomForestClassifier(n_estimators=250))])
    param = param_from_sklearn_model(model, simplify_default=True)
    assert param == ('Pipeline', {
        'steps': [('enc', ('NumericalEncoder', {})),
                  ('forest', ('RandomForestClassifier', {
                      'n_estimators': 250
                  }))]
    })

    assert isinstance(
        sklearn_model_from_param(param_from_sklearn_model(model)),
        model.__class__)
    s = json.dumps(param)  # check that it can be json serialized
    assert isinstance(s, str)

    # GraphPipeline
    model = GraphPipeline(models={
        "enc": NumericalEncoder(),
        "forest": RandomForestClassifier(n_estimators=250)
    },
                          edges=[("enc", "forest")])

    param = param_from_sklearn_model(model, simplify_default=True)
    assert param == ('GraphPipeline', {
        'models': {
            'enc': ('NumericalEncoder', {}),
            'forest': ('RandomForestClassifier', {
                'n_estimators': 250
            })
        },
        'edges': [('enc', 'forest')]
    })

    assert isinstance(
        sklearn_model_from_param(param_from_sklearn_model(model)),
        model.__class__)

    # GraphPipeline with verbose = True
    model = GraphPipeline(models={
        "enc": NumericalEncoder(),
        "forest": RandomForestClassifier(n_estimators=250)
    },
                          edges=[("enc", "forest")],
                          verbose=True)

    param = param_from_sklearn_model(model, simplify_default=True)
    assert param == ('GraphPipeline', {
        'models': {
            'enc': ('NumericalEncoder', {}),
            'forest': ('RandomForestClassifier', {
                'n_estimators': 250
            })
        },
        'edges': [('enc', 'forest')],
        'verbose': True
    })

    s = json.dumps(param)  # check that it can be json serialized
    assert isinstance(s, str)

    model2 = sklearn_model_from_param(param_from_sklearn_model(model))
    assert model2.verbose is True
    assert isinstance(model2, model.__class__)

    # GraphPipeline + composition
    model = GraphPipeline(models={
        "enc":
        NumericalEncoder(),
        "forest":
        BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250), ll=1)
    },
                          edges=[("enc", "forest")])

    param = param_from_sklearn_model(model, simplify_default=True)
    assert param == ('GraphPipeline', {
        'edges': [('enc', 'forest')],
        'models': {
            'enc': ('NumericalEncoder', {}),
            'forest': ('BoxCoxTargetTransformer', {
                'll':
                1,
                'model': ('RandomForestClassifier', {
                    'n_estimators': 250
                })
            })
        }
    })

    assert isinstance(
        sklearn_model_from_param(param_from_sklearn_model(model)),
        model.__class__)
    s = json.dumps(param)  # check that it can be json serialized
    assert isinstance(s, str)