def test_BoxCoxTargetTransformer_target_transform(): for ll in (0, 0.1, 0.5, 2): bb = BoxCoxTargetTransformer(Ridge(), ll=ll) assert not is_classifier(bb) assert is_regressor(bb) y = np.arange(-100, 100, step=0.1) my = bb.target_transform(y) ymy = bb.target_inverse_transform(my) mymy = bb.target_transform(ymy) # plt.subplot(211) # plt.plot(y,my) # plt.subplot(212) # plt.plot(my,ymy) assert not pd.Series(my).isnull().any() assert not pd.Series(ymy).isnull().any() assert np.max(np.abs(y - ymy)) <= 10**(-10) assert np.max(np.abs(my - mymy)) <= 10**(-10)
def test_filtered_get_params(): forest = RandomForestClassifier(n_estimators=250) assert RandomForestClassifier().get_params()["n_estimators"] != 250 assert filtered_get_params(forest) == {"n_estimators": 250} forest = RandomForestClassifier(n_estimators=250, max_depth=None) assert filtered_get_params(forest) == {"n_estimators": 250} model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250), ll=0) fparams = filtered_get_params(model) assert "ll" not in fparams assert "model" in fparams model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250), ll=1) assert BoxCoxTargetTransformer( RandomForestClassifier()).get_params()["ll"] != 1 fparams = filtered_get_params(model) assert "ll" in fparams assert fparams["ll"] == 1 assert "model" in fparams
def test_BoxCoxTargetTransformer(): np.random.seed(123) X = np.random.randn(100, 10) y = np.exp(np.random.randn(100)) X2 = np.random.randn(100, 10) * 2 for ll in (0, 0.1, 0.5, 2): bb = BoxCoxTargetTransformer(Ridge(), ll=ll) bb.fit(X, y) yhat = bb.predict(X) yhat2 = bb.predict(X2) assert yhat.ndim == 1 assert yhat.shape[0] == y.shape[0] assert yhat2.ndim == 1 assert yhat2.shape[0] == y.shape[0]
def test_approx_cross_validation_BoxCoxTargetTransformer(): np.random.seed(123) X = np.random.randn(100, 10) y = np.exp(np.random.randn(100)) for ll in (0, 0.1, 0.5, 2): # Scorer entered as a string # bb = BoxCoxTargetTransformer(Ridge(), ll=ll) cv_res1, yhat1 = bb.approx_cross_validation( X, y, scoring=["neg_mean_squared_error"], cv=10, return_predict=True) assert isinstance(cv_res1, pd.DataFrame) assert cv_res1.shape[0] == 10 assert "test_neg_mean_squared_error" in cv_res1 assert "train_neg_mean_squared_error" in cv_res1 assert yhat1.ndim == 1 assert yhat1.shape[0] == y.shape[0] with pytest.raises(NotFittedError): bb.predict(X) with pytest.raises(NotFittedError): bb.model.predict(X) ######################################### ### Scorer entered as a dictionnary ### ######################################### scoring = create_scoring(Ridge(), ["neg_mean_squared_error"]) cv_res2, yhat2 = bb.approx_cross_validation(X, y, scoring=scoring, cv=10, return_predict=True) assert isinstance(cv_res2, pd.DataFrame) assert cv_res2.shape[0] == 10 assert "test_neg_mean_squared_error" in cv_res2 assert "train_neg_mean_squared_error" in cv_res2 assert yhat2.ndim == 1 assert yhat2.shape[0] == y.shape[0] with pytest.raises(NotFittedError): bb.predict(X) with pytest.raises(NotFittedError): bb.model.predict(X) assert np.abs(cv_res2["test_neg_mean_squared_error"] - cv_res1["test_neg_mean_squared_error"]).max() <= 10**(-5) assert np.abs(cv_res2["train_neg_mean_squared_error"] - cv_res1["train_neg_mean_squared_error"]).max() <= 10**( -5) assert np.max(np.abs(yhat2 - yhat1)) <= 10**(-5)
def test_param_from_sklearn_model(): # simple RandomForest model = RandomForestClassifier(n_estimators=250) assert RandomForestClassifier().get_params()["n_estimators"] != 250 assert param_from_sklearn_model( model, simplify_default=True) == ('RandomForestClassifier', { 'n_estimators': 250 }) param = param_from_sklearn_model(model, simplify_default=False) assert isinstance(param, tuple) assert len(param) == 2 assert param[0] == "RandomForestClassifier" assert isinstance( sklearn_model_from_param(param_from_sklearn_model(model)), model.__class__) s = json.dumps(param) # check that it can be json serialized assert isinstance(s, str) assert isinstance( sklearn_model_from_param(param_from_sklearn_model(model)), model.__class__) # Composition model : BoxCoxTargetTransformer of RandomForestClassifier model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250), ll=0) param = param_from_sklearn_model(model, simplify_default=True) assert param == ('BoxCoxTargetTransformer', { 'model': ('RandomForestClassifier', { 'n_estimators': 250 }) }) assert isinstance( sklearn_model_from_param(param_from_sklearn_model(model)), model.__class__) s = json.dumps(param) # check that it can be json serialized assert isinstance(s, str) # Composition model : BoxCoxTargetTransformer of RandomForestClassifier model = BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250), ll=1) param = param_from_sklearn_model(model, simplify_default=True) assert param == ('BoxCoxTargetTransformer', { 'll': 1, 'model': ('RandomForestClassifier', { 'n_estimators': 250 }) }) s = json.dumps(param) # check that it can be json serialized assert isinstance(s, str) assert isinstance( sklearn_model_from_param(param_from_sklearn_model(model)), model.__class__) # Pipeline model = Pipeline([("enc", NumericalEncoder()), ("forest", RandomForestClassifier(n_estimators=250))]) param = param_from_sklearn_model(model, simplify_default=True) assert param == ('Pipeline', { 'steps': [('enc', ('NumericalEncoder', {})), ('forest', ('RandomForestClassifier', { 'n_estimators': 250 }))] }) assert isinstance( sklearn_model_from_param(param_from_sklearn_model(model)), model.__class__) s = json.dumps(param) # check that it can be json serialized assert isinstance(s, str) # GraphPipeline model = GraphPipeline(models={ "enc": NumericalEncoder(), "forest": RandomForestClassifier(n_estimators=250) }, edges=[("enc", "forest")]) param = param_from_sklearn_model(model, simplify_default=True) assert param == ('GraphPipeline', { 'models': { 'enc': ('NumericalEncoder', {}), 'forest': ('RandomForestClassifier', { 'n_estimators': 250 }) }, 'edges': [('enc', 'forest')] }) assert isinstance( sklearn_model_from_param(param_from_sklearn_model(model)), model.__class__) # GraphPipeline with verbose = True model = GraphPipeline(models={ "enc": NumericalEncoder(), "forest": RandomForestClassifier(n_estimators=250) }, edges=[("enc", "forest")], verbose=True) param = param_from_sklearn_model(model, simplify_default=True) assert param == ('GraphPipeline', { 'models': { 'enc': ('NumericalEncoder', {}), 'forest': ('RandomForestClassifier', { 'n_estimators': 250 }) }, 'edges': [('enc', 'forest')], 'verbose': True }) s = json.dumps(param) # check that it can be json serialized assert isinstance(s, str) model2 = sklearn_model_from_param(param_from_sklearn_model(model)) assert model2.verbose is True assert isinstance(model2, model.__class__) # GraphPipeline + composition model = GraphPipeline(models={ "enc": NumericalEncoder(), "forest": BoxCoxTargetTransformer(RandomForestClassifier(n_estimators=250), ll=1) }, edges=[("enc", "forest")]) param = param_from_sklearn_model(model, simplify_default=True) assert param == ('GraphPipeline', { 'edges': [('enc', 'forest')], 'models': { 'enc': ('NumericalEncoder', {}), 'forest': ('BoxCoxTargetTransformer', { 'll': 1, 'model': ('RandomForestClassifier', { 'n_estimators': 250 }) }) } }) assert isinstance( sklearn_model_from_param(param_from_sklearn_model(model)), model.__class__) s = json.dumps(param) # check that it can be json serialized assert isinstance(s, str)