def _predict_features_for_meta_models(self, X):
        """Provide predictions from all base learners

        Parameters
        ----------
        X: pandas.DataFrame
            Input features.

        Returns
        -------
        pandas.DataFrame
            Container with the X['date'] as index and the names of the base learners
            as column names. Each column should contain the prediction of a base learner
            with a name found in the column name.
        """

        prediction = pd.DataFrame(
            index=X.index,
            columns=[
                get_estimator_name(model) for model in self.base_learners
            ],
        )

        for model in self.base_learners:
            model_name = get_estimator_name(model)
            prediction.loc[:, model_name] = model.predict(X).values.squeeze()

        return prediction
Ejemplo n.º 2
0
    def __init__(
        self,
        best_model,
        cv_results,
        cv_data,
        model_reprs,
        partition,
        X_train,
        y_train,
        frequency,
        horizon,
        country_code_column,
    ):
        self.best_model = best_model
        self.cv_results = cv_results
        self.cv_data = cv_data
        self.model_reprs = model_reprs
        self.partition = partition
        self.X_train = X_train
        self.y_train = y_train
        self.frequency = frequency
        self.horizon = horizon
        self.country_code_column = country_code_column

        self.best_model_hash = generate_estimator_hash(best_model)
        self.best_model_cv_data = self.cv_data.rename({self.best_model_hash: "best_model"}, axis=1)[
            ["split", "y_true", "best_model"]
        ]
        self.best_model_name = get_estimator_name(best_model).replace("model__", "")
        self.best_model_cv_results = self.cv_results[self.cv_results["rank_test_score"] == 1].iloc[0]
        self.best_model_repr = self.model_reprs[self.best_model_hash]
        self.partition_hash = generate_partition_hash(self.partition)

        self._persist_attrs = sorted(set(self.__dict__.keys()).difference(["self"]))
        self._df_plot = None
Ejemplo n.º 3
0
    def predict(self, X):
        """Calculate the prediction of the ensemble for a given set of date / time

        Parameters
        ----------
        X: pandas.DataFrame
            DataFrame container with a single column, named 'date',
            containing the datetimes for which the predictions should be made.

        Returns
        -------
        pandas.DataFrame
            A DataFrame container with the index being the input (date)time vector.
            The single column in the DataFrame contains the prediction and the column
            name is the name of the model (i.e. the `name` parameter passed to the constructor)
        """
        y_pred = pd.DataFrame(index=X.index, columns=[self.name])

        for model in self.base_learners:
            model_name = get_estimator_name(model)
            y_pred[model_name] = model.predict(X)
        y_pred[self.name] = y_pred.drop(columns=[self.name]).apply(
            self.ensemble_func, axis=1)
        y_pred[self.name] = y_pred[self.name].clip(
            lower=self.clip_predictions_lower,
            upper=self.clip_predictions_upper)
        return y_pred[[self.name]]
Ejemplo n.º 4
0
    def _check_base_learners_names(models):
        """Check if the base learner models have all unique names

        Parameters
        ----------
        models: list
            List of instatiated hcrystalball model wrapper instances

        Raises
        ------
        DuplicatedModelNameError
            If multiple models have the same `name` attribute.
        """

        names = [get_estimator_name(model) for model in models]
        if len(names) != len(set(names)):
            raise DuplicatedModelNameError(
                "There seems to be duplicates in model names among SimpleEnsemble base learners."
                "Model names should be unique.")
    def fit(self, X, y=None):
        """Fit the stacking ensemble model

        Parameters
        ----------
        X: pandas.DataFrame
            Input features.

        y: numpy.ndarray
            Target vector.

        Returns
        -------
        StackingEnsemble
            A fitted StackingEnsemble instance
        """
        self._check_base_learners_names(self.base_learners)

        # Fit the base learners and the meta_model
        if (not self.fitted) or self.fit_meta_model_always:
            splitter = FinerTimeSplit(horizon=self.train_horizon,
                                      n_splits=self.train_n_splits)

            n_train_meta = self.train_n_splits * self.train_horizon
            X_meta = pd.DataFrame(
                index=X.index[-n_train_meta:],
                columns=[get_estimator_name(bl) for bl in self.base_learners],
            )
            y_meta = y[-n_train_meta:]
            # Get base learners predictions
            for ind_train, ind_pred in splitter.split(X):
                X_train = X.iloc[ind_train, :]
                X_pred = X.iloc[ind_pred, :]
                y_train = y[ind_train]

                self._fit_base_learners(X_train, y_train)
                X_meta.loc[
                    X_pred.index, :] = self._predict_features_for_meta_models(
                        X_pred)
            # Add dummy horizon variable for meta model
            if self.horizons_as_features:
                X_meta = pd.concat(
                    [
                        X_meta,
                        self._create_horizons_as_features(
                            cross_results_index=X_meta.index,
                            horizon=self.train_horizon,
                            n_splits=self.train_n_splits,
                        ),
                    ],
                    axis=1,
                )
            if self.weekdays_as_features:
                X_meta = pd.concat(
                    [
                        X_meta,
                        self._create_weekdays_as_features(
                            cross_results_index=X_meta.index)
                    ],
                    axis=1,
                )

            self._fit_columns = X_meta.columns
            self.meta_model.fit(X_meta.values, y_meta)

        # Fit the base learners on the whole training set
        self._fit_base_learners(X, y)
        self.fitted = True

        return self