def _predict_features_for_meta_models(self, X): """Provide predictions from all base learners Parameters ---------- X: pandas.DataFrame Input features. Returns ------- pandas.DataFrame Container with the X['date'] as index and the names of the base learners as column names. Each column should contain the prediction of a base learner with a name found in the column name. """ prediction = pd.DataFrame( index=X.index, columns=[ get_estimator_name(model) for model in self.base_learners ], ) for model in self.base_learners: model_name = get_estimator_name(model) prediction.loc[:, model_name] = model.predict(X).values.squeeze() return prediction
def __init__( self, best_model, cv_results, cv_data, model_reprs, partition, X_train, y_train, frequency, horizon, country_code_column, ): self.best_model = best_model self.cv_results = cv_results self.cv_data = cv_data self.model_reprs = model_reprs self.partition = partition self.X_train = X_train self.y_train = y_train self.frequency = frequency self.horizon = horizon self.country_code_column = country_code_column self.best_model_hash = generate_estimator_hash(best_model) self.best_model_cv_data = self.cv_data.rename({self.best_model_hash: "best_model"}, axis=1)[ ["split", "y_true", "best_model"] ] self.best_model_name = get_estimator_name(best_model).replace("model__", "") self.best_model_cv_results = self.cv_results[self.cv_results["rank_test_score"] == 1].iloc[0] self.best_model_repr = self.model_reprs[self.best_model_hash] self.partition_hash = generate_partition_hash(self.partition) self._persist_attrs = sorted(set(self.__dict__.keys()).difference(["self"])) self._df_plot = None
def predict(self, X): """Calculate the prediction of the ensemble for a given set of date / time Parameters ---------- X: pandas.DataFrame DataFrame container with a single column, named 'date', containing the datetimes for which the predictions should be made. Returns ------- pandas.DataFrame A DataFrame container with the index being the input (date)time vector. The single column in the DataFrame contains the prediction and the column name is the name of the model (i.e. the `name` parameter passed to the constructor) """ y_pred = pd.DataFrame(index=X.index, columns=[self.name]) for model in self.base_learners: model_name = get_estimator_name(model) y_pred[model_name] = model.predict(X) y_pred[self.name] = y_pred.drop(columns=[self.name]).apply( self.ensemble_func, axis=1) y_pred[self.name] = y_pred[self.name].clip( lower=self.clip_predictions_lower, upper=self.clip_predictions_upper) return y_pred[[self.name]]
def _check_base_learners_names(models): """Check if the base learner models have all unique names Parameters ---------- models: list List of instatiated hcrystalball model wrapper instances Raises ------ DuplicatedModelNameError If multiple models have the same `name` attribute. """ names = [get_estimator_name(model) for model in models] if len(names) != len(set(names)): raise DuplicatedModelNameError( "There seems to be duplicates in model names among SimpleEnsemble base learners." "Model names should be unique.")
def fit(self, X, y=None): """Fit the stacking ensemble model Parameters ---------- X: pandas.DataFrame Input features. y: numpy.ndarray Target vector. Returns ------- StackingEnsemble A fitted StackingEnsemble instance """ self._check_base_learners_names(self.base_learners) # Fit the base learners and the meta_model if (not self.fitted) or self.fit_meta_model_always: splitter = FinerTimeSplit(horizon=self.train_horizon, n_splits=self.train_n_splits) n_train_meta = self.train_n_splits * self.train_horizon X_meta = pd.DataFrame( index=X.index[-n_train_meta:], columns=[get_estimator_name(bl) for bl in self.base_learners], ) y_meta = y[-n_train_meta:] # Get base learners predictions for ind_train, ind_pred in splitter.split(X): X_train = X.iloc[ind_train, :] X_pred = X.iloc[ind_pred, :] y_train = y[ind_train] self._fit_base_learners(X_train, y_train) X_meta.loc[ X_pred.index, :] = self._predict_features_for_meta_models( X_pred) # Add dummy horizon variable for meta model if self.horizons_as_features: X_meta = pd.concat( [ X_meta, self._create_horizons_as_features( cross_results_index=X_meta.index, horizon=self.train_horizon, n_splits=self.train_n_splits, ), ], axis=1, ) if self.weekdays_as_features: X_meta = pd.concat( [ X_meta, self._create_weekdays_as_features( cross_results_index=X_meta.index) ], axis=1, ) self._fit_columns = X_meta.columns self.meta_model.fit(X_meta.values, y_meta) # Fit the base learners on the whole training set self._fit_base_learners(X, y) self.fitted = True return self