예제 #1
0
class _BaggingRegressorImpl:
    def __init__(
        self,
        base_estimator=None,
        n_estimators=10,
        *,
        max_samples=1.0,
        max_features=1.0,
        bootstrap=True,
        bootstrap_features=False,
        oob_score=False,
        warm_start=False,
        n_jobs=None,
        random_state=None,
        verbose=0,
    ):
        estimator_impl = base_estimator

        self._hyperparams = {
            "base_estimator": estimator_impl,
            "n_estimators": n_estimators,
            "max_samples": max_samples,
            "max_features": max_features,
            "bootstrap": bootstrap,
            "bootstrap_features": bootstrap_features,
            "oob_score": oob_score,
            "warm_start": warm_start,
            "n_jobs": n_jobs,
            "random_state": random_state,
            "verbose": verbose,
        }
        self._wrapped_model = SKLModel(**self._hyperparams)
        self._hyperparams["base_estimator"] = base_estimator

    def get_params(self, deep=True):
        out = self._wrapped_model.get_params(deep=deep)
        # we want to return the lale operator, not the underlying impl
        out["base_estimator"] = self._hyperparams["base_estimator"]
        return out

    def fit(self, X, y, sample_weight=None):
        if isinstance(X, pd.DataFrame):
            feature_transformer = FunctionTransformer(
                func=lambda X_prime: pd.DataFrame(X_prime, columns=X.columns),
                inverse_func=None,
                check_inverse=False,
            )
            self._hyperparams["base_estimator"] = (
                feature_transformer >> self._hyperparams["base_estimator"]
            )
            self._wrapped_model = SKLModel(**self._hyperparams)
        self._wrapped_model.fit(X, y, sample_weight)

        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def score(self, X, y, sample_weight=None):
        return self._wrapped_model.score(X, y, sample_weight)
예제 #2
0
    def test_parameters(self):
        """ Testing parameters of Model class. """
#1.)
        #create instance of PLS model using Model class & creating instance
        #   using SKlearn libary, comparing if the parameters of both instances are equal
        pls_parameters = {"n_components": 20, "scale": False, "max_iter": 200}
        model = Model(algorithm="PlsRegression", parameters=pls_parameters)
        pls_model = PLSRegression(n_components=20, scale="svd", max_iter=200)

        for k, v in model.model.get_params().items():
            self.assertIn(k, list(pls_model.get_params()))
#2.)
        rf_parameters = {"n_estimators": 200, "max_depth": 50,"min_samples_split": 10}
        model = Model(algorithm="RandomForest", parameters=rf_parameters)
        rf_model = RandomForestRegressor(n_estimators=200, max_depth=50, min_samples_split=10)

        for k, v in model.model.get_params().items():
            self.assertIn(k, list(rf_model.get_params()))
#3.)
        knn_parameters = {"n_neighbors": 10, "weights": "distance", "algorithm": "ball_tree"}
        model = Model(algorithm="KNN", parameters=knn_parameters)
        knn_model = KNeighborsRegressor(n_neighbors=10, weights='distance', algorithm="kd_tree")

        for k, v in model.model.get_params().items():
            self.assertIn(k, list(knn_model.get_params()))
#4.)
        svr_parameters = {"kernel": "poly", "degree": 5, "coef0": 1}
        model = Model(algorithm="SVR",parameters=svr_parameters)
        svr_model = SVR(kernel='poly', degree=5, coef0=1)

        for k, v in model.model.get_params().items():
            self.assertIn(k, list(svr_model.get_params()))
#5.)
        ada_parameters = {"n_estimators": 150, "learning_rate": 1.2, "loss": "square"}
        model = Model(algorithm="AdaBoost", parameters=ada_parameters)
        ada_model = AdaBoostRegressor(n_estimators=150, learning_rate=1.2, loss="square")

        for k, v in model.model.get_params().items():
            self.assertIn(k, list(ada_model.get_params()))
#6.)
        bagging_parameters = {"n_estimators": 50, "max_samples": 1.5, "max_features": 2}
        model = Model(algorithm="Bagging", parameters=bagging_parameters)
        bagging_model = BaggingRegressor(n_estimators=50, max_samples=1.5, max_features="square")

        for k, v in model.model.get_params().items():
            self.assertIn(k, list(bagging_model.get_params()))
#7.)
        lasso_parameters = {"alpha": 1.5, "max_iter": 500, "tol": 0.004}
        model = Model(algorithm="lasso", parameters=lasso_parameters)
        lasso_model = Lasso(alpha=1.5, max_iter=500, tol=0.004)

        for k, v in model.model.get_params().items():
            self.assertIn(k, list(lasso_model.get_params()))
예제 #3
0
      f"{mean_absolute_error(target_test, target_predicted):.2f} k$")

# %% [markdown]
# Now, create a `RandomizedSearchCV` instance using the previous model and
# tune the important parameters of the bagging regressor. Find the best
# parameters  and check if you are able to find a set of parameters that
# improve the default regressor still using the mean absolute error as a
# metric.

# ```{tip}
# You can list the bagging regressor's parameters using the `get_params`
# method.
# ```

# %%
for param in bagging.get_params().keys():
    print(param)

# %%
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV

param_grid = {
    "n_estimators": randint(10, 30),
    "max_samples": [0.5, 0.8, 1.0],
    "max_features": [0.5, 0.8, 1.0],
    "base_estimator__max_depth": randint(3, 10),
}
search = RandomizedSearchCV(bagging,
                            param_grid,
                            n_iter=20,
예제 #4
0
class EnsembleModel(BaseEstimator, TransformerMixin):
    """
    Class used to construct ensemble models with a particular number and type of weak learner (base model). The
    ensemble model is compatible with most scikit-learn regressor models and KerasRegressor models

    Args:
        model: (str), string name denoting the name of the model type to use as the base model

        n_estimators: (int), the number of base models to include in the ensemble

        kwargs: keyword arguments for the base model parameter names and values

    Methods:
        fit: method that fits the model parameters to the provided training data
            Args:
                X: (pd.DataFrame), dataframe of X features

                y: (pd.Series), series of y target data

            Returns:
                fitted model

        predict: method that evaluates model on new data to give predictions
            Args:
                X: (pd.DataFrame), dataframe of X features

                as_frame: (bool), whether to return data as pandas dataframe (else numpy array)

            Returns:
                series or array of predicted values

        get_params: method to output key model parameters
            Args:
                deep: (bool), determines the extent of information returned, default True

            Returns:
                information on model parameters
    """
    def __init__(self, model, n_estimators, **kwargs):
        super(EnsembleModel, self).__init__()
        try:
            model = dict(sklearn.utils.all_estimators())[model](**kwargs)
        except:
            print(
                'Could not find designated model type in scikit-learn model library. Note the other supported model'
                'type is the keras.wrappers.scikit_learn.KerasRegressor model')
        self.n_estimators = n_estimators
        self.model = BaggingRegressor(base_estimator=model,
                                      n_estimators=self.n_estimators)
        self.base_estimator_ = model.__class__.__name__

    def fit(self, X, y):
        return self.model.fit(X, y)

    def predict(self, X, as_frame=True):
        if as_frame == True:
            return pd.DataFrame(self.model.predict(X),
                                columns=['y_pred']).squeeze()
        else:
            return self.model.predict(X).ravel()

    def get_params(self, deep=True):
        return self.model.get_params(deep)