class _BaggingRegressorImpl: def __init__( self, base_estimator=None, n_estimators=10, *, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0, ): estimator_impl = base_estimator self._hyperparams = { "base_estimator": estimator_impl, "n_estimators": n_estimators, "max_samples": max_samples, "max_features": max_features, "bootstrap": bootstrap, "bootstrap_features": bootstrap_features, "oob_score": oob_score, "warm_start": warm_start, "n_jobs": n_jobs, "random_state": random_state, "verbose": verbose, } self._wrapped_model = SKLModel(**self._hyperparams) self._hyperparams["base_estimator"] = base_estimator def get_params(self, deep=True): out = self._wrapped_model.get_params(deep=deep) # we want to return the lale operator, not the underlying impl out["base_estimator"] = self._hyperparams["base_estimator"] return out def fit(self, X, y, sample_weight=None): if isinstance(X, pd.DataFrame): feature_transformer = FunctionTransformer( func=lambda X_prime: pd.DataFrame(X_prime, columns=X.columns), inverse_func=None, check_inverse=False, ) self._hyperparams["base_estimator"] = ( feature_transformer >> self._hyperparams["base_estimator"] ) self._wrapped_model = SKLModel(**self._hyperparams) self._wrapped_model.fit(X, y, sample_weight) return self def predict(self, X): return self._wrapped_model.predict(X) def score(self, X, y, sample_weight=None): return self._wrapped_model.score(X, y, sample_weight)
def test_parameters(self): """ Testing parameters of Model class. """ #1.) #create instance of PLS model using Model class & creating instance # using SKlearn libary, comparing if the parameters of both instances are equal pls_parameters = {"n_components": 20, "scale": False, "max_iter": 200} model = Model(algorithm="PlsRegression", parameters=pls_parameters) pls_model = PLSRegression(n_components=20, scale="svd", max_iter=200) for k, v in model.model.get_params().items(): self.assertIn(k, list(pls_model.get_params())) #2.) rf_parameters = {"n_estimators": 200, "max_depth": 50,"min_samples_split": 10} model = Model(algorithm="RandomForest", parameters=rf_parameters) rf_model = RandomForestRegressor(n_estimators=200, max_depth=50, min_samples_split=10) for k, v in model.model.get_params().items(): self.assertIn(k, list(rf_model.get_params())) #3.) knn_parameters = {"n_neighbors": 10, "weights": "distance", "algorithm": "ball_tree"} model = Model(algorithm="KNN", parameters=knn_parameters) knn_model = KNeighborsRegressor(n_neighbors=10, weights='distance', algorithm="kd_tree") for k, v in model.model.get_params().items(): self.assertIn(k, list(knn_model.get_params())) #4.) svr_parameters = {"kernel": "poly", "degree": 5, "coef0": 1} model = Model(algorithm="SVR",parameters=svr_parameters) svr_model = SVR(kernel='poly', degree=5, coef0=1) for k, v in model.model.get_params().items(): self.assertIn(k, list(svr_model.get_params())) #5.) ada_parameters = {"n_estimators": 150, "learning_rate": 1.2, "loss": "square"} model = Model(algorithm="AdaBoost", parameters=ada_parameters) ada_model = AdaBoostRegressor(n_estimators=150, learning_rate=1.2, loss="square") for k, v in model.model.get_params().items(): self.assertIn(k, list(ada_model.get_params())) #6.) bagging_parameters = {"n_estimators": 50, "max_samples": 1.5, "max_features": 2} model = Model(algorithm="Bagging", parameters=bagging_parameters) bagging_model = BaggingRegressor(n_estimators=50, max_samples=1.5, max_features="square") for k, v in model.model.get_params().items(): self.assertIn(k, list(bagging_model.get_params())) #7.) lasso_parameters = {"alpha": 1.5, "max_iter": 500, "tol": 0.004} model = Model(algorithm="lasso", parameters=lasso_parameters) lasso_model = Lasso(alpha=1.5, max_iter=500, tol=0.004) for k, v in model.model.get_params().items(): self.assertIn(k, list(lasso_model.get_params()))
f"{mean_absolute_error(target_test, target_predicted):.2f} k$") # %% [markdown] # Now, create a `RandomizedSearchCV` instance using the previous model and # tune the important parameters of the bagging regressor. Find the best # parameters and check if you are able to find a set of parameters that # improve the default regressor still using the mean absolute error as a # metric. # ```{tip} # You can list the bagging regressor's parameters using the `get_params` # method. # ``` # %% for param in bagging.get_params().keys(): print(param) # %% from scipy.stats import randint from sklearn.model_selection import RandomizedSearchCV param_grid = { "n_estimators": randint(10, 30), "max_samples": [0.5, 0.8, 1.0], "max_features": [0.5, 0.8, 1.0], "base_estimator__max_depth": randint(3, 10), } search = RandomizedSearchCV(bagging, param_grid, n_iter=20,
class EnsembleModel(BaseEstimator, TransformerMixin): """ Class used to construct ensemble models with a particular number and type of weak learner (base model). The ensemble model is compatible with most scikit-learn regressor models and KerasRegressor models Args: model: (str), string name denoting the name of the model type to use as the base model n_estimators: (int), the number of base models to include in the ensemble kwargs: keyword arguments for the base model parameter names and values Methods: fit: method that fits the model parameters to the provided training data Args: X: (pd.DataFrame), dataframe of X features y: (pd.Series), series of y target data Returns: fitted model predict: method that evaluates model on new data to give predictions Args: X: (pd.DataFrame), dataframe of X features as_frame: (bool), whether to return data as pandas dataframe (else numpy array) Returns: series or array of predicted values get_params: method to output key model parameters Args: deep: (bool), determines the extent of information returned, default True Returns: information on model parameters """ def __init__(self, model, n_estimators, **kwargs): super(EnsembleModel, self).__init__() try: model = dict(sklearn.utils.all_estimators())[model](**kwargs) except: print( 'Could not find designated model type in scikit-learn model library. Note the other supported model' 'type is the keras.wrappers.scikit_learn.KerasRegressor model') self.n_estimators = n_estimators self.model = BaggingRegressor(base_estimator=model, n_estimators=self.n_estimators) self.base_estimator_ = model.__class__.__name__ def fit(self, X, y): return self.model.fit(X, y) def predict(self, X, as_frame=True): if as_frame == True: return pd.DataFrame(self.model.predict(X), columns=['y_pred']).squeeze() else: return self.model.predict(X).ravel() def get_params(self, deep=True): return self.model.get_params(deep)