def test_cv_finertimesplit_split_pandas_container_data(ts_data, expected_error): n_splits = 2 horizon = 3 fts = FinerTimeSplit(n_splits=n_splits, horizon=horizon) if expected_error is None: result = fts.split(ts_data) assert isinstance(result, types.GeneratorType) result = list(result) assert len(result) == n_splits for i, isplit in enumerate(result): assert len(isplit) == 2 assert len(isplit[0]) == len(ts_data) - (n_splits - i) * horizon assert len(isplit[1]) == horizon assert np.array_equal(isplit[0], np.arange(len(ts_data) - (n_splits - i) * horizon)) assert np.array_equal(isplit[1], np.arange(horizon) + len(ts_data) - (n_splits - i) * horizon) else: with pytest.raises(expected_error): _ = list(fts.split(ts_data))
def test_cv_finertimesplit_split_input_data_types(test_data, expected_error): n_splits = 2 horizon = 3 fts = FinerTimeSplit(n_splits=n_splits, horizon=horizon) if expected_error is None: result = list(fts.split(test_data)) assert len(result) == n_splits for i, isplit in enumerate(result): assert len(isplit) == 2 assert len(isplit[0]) == len(test_data) - (n_splits - i) * horizon assert len(isplit[1]) == horizon assert np.array_equal(isplit[0], np.arange(len(test_data) - (n_splits - i) * horizon)) assert np.array_equal( isplit[1], np.arange(horizon) + len(test_data) - (n_splits - i) * horizon, ) else: with pytest.raises(expected_error): _ = list(fts.split(test_data))
def fit(self, X, y=None): """Fit the stacking ensemble model Parameters ---------- X: pandas.DataFrame Input features. y: numpy.ndarray Target vector. Returns ------- StackingEnsemble A fitted StackingEnsemble instance """ self._check_base_learners_names(self.base_learners) # Fit the base learners and the meta_model if (not self.fitted) or self.fit_meta_model_always: splitter = FinerTimeSplit(horizon=self.train_horizon, n_splits=self.train_n_splits) n_train_meta = self.train_n_splits * self.train_horizon X_meta = pd.DataFrame( index=X.index[-n_train_meta:], columns=[get_estimator_name(bl) for bl in self.base_learners], ) y_meta = y[-n_train_meta:] # Get base learners predictions for ind_train, ind_pred in splitter.split(X): X_train = X.iloc[ind_train, :] X_pred = X.iloc[ind_pred, :] y_train = y[ind_train] self._fit_base_learners(X_train, y_train) X_meta.loc[ X_pred.index, :] = self._predict_features_for_meta_models( X_pred) # Add dummy horizon variable for meta model if self.horizons_as_features: X_meta = pd.concat( [ X_meta, self._create_horizons_as_features( cross_results_index=X_meta.index, horizon=self.train_horizon, n_splits=self.train_n_splits, ), ], axis=1, ) if self.weekdays_as_features: X_meta = pd.concat( [ X_meta, self._create_weekdays_as_features( cross_results_index=X_meta.index) ], axis=1, ) self._fit_columns = X_meta.columns self.meta_model.fit(X_meta.values, y_meta) # Fit the base learners on the whole training set self._fit_base_learners(X, y) self.fitted = True return self