def _score_forecasters(forecasters, cv, y): """Will evaluate all the forecasters on y and return the name of best.""" scoring = check_scoring(None) scoring_name = f"test_{scoring.name}" score = None for name, forecaster in forecasters: results = evaluate(forecaster, cv, y) results = results.mean() new_score = float(results[scoring_name]) if not score or new_score < score: score = new_score best_name = name return best_name
def _fit(self, y, X=None, fh=None, **fit_params): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ cv = check_cv(self.cv) scoring = check_scoring(self.scoring) scoring_name = f"test_{scoring.name}" parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch) def _fit_and_score(params): # Clone forecaster. forecaster = clone(self.forecaster) # Set parameters. forecaster.set_params(**params) # Evaluate. out = evaluate( forecaster, cv, y, X, strategy=self.strategy, scoring=scoring, fit_params=fit_params, ) # Filter columns. out = out.filter(items=[scoring_name, "fit_time", "pred_time"], axis=1) # Aggregate results. out = out.mean() out = out.add_prefix("mean_") # Add parameters to output table. out["params"] = params return out def evaluate_candidates(candidate_params): candidate_params = list(candidate_params) if self.verbose > 0: n_candidates = len(candidate_params) n_splits = cv.get_n_splits(y) print( # noqa "Fitting {0} folds for each of {1} candidates," " totalling {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) out = parallel( delayed(_fit_and_score)(params) for params in candidate_params) if len(out) < 1: raise ValueError("No fits were performed. " "Was the CV iterator empty? " "Were there no candidates?") return out # Run grid-search cross-validation. results = self._run_search(evaluate_candidates) results = pd.DataFrame(results) # Rank results, according to whether greater is better for the given scoring. results[ f"rank_{scoring_name}"] = results.loc[:, f"mean_{scoring_name}"].rank( ascending=not scoring. greater_is_better) self.cv_results_ = results # Select best parameters. self.best_index_ = results.loc[:, f"rank_{scoring_name}"].argmin() self.best_score_ = results.loc[self.best_index_, f"mean_{scoring_name}"] self.best_params_ = results.loc[self.best_index_, "params"] self.best_forecaster_ = clone( self.forecaster).set_params(**self.best_params_) # Refit model with best parameters. if self.refit: self.best_forecaster_.fit(y, X, fh) # Sort values according to rank results = results.sort_values(by=f"rank_{scoring_name}", ascending=not scoring.greater_is_better) # Select n best forecaster self.n_best_forecasters_ = [] self.n_best_scores_ = [] for i in range(self.return_n_best_forecasters): params = results["params"].iloc[i] rank = results[f"rank_{scoring_name}"].iloc[i] rank = str(int(rank)) forecaster = clone(self.forecaster).set_params(**params) # Refit model with best parameters. if self.refit: forecaster.fit(y, X, fh) self.n_best_forecasters_.append((rank, forecaster)) # Save score score = results[f"mean_{scoring_name}"].iloc[i] self.n_best_scores_.append(score) return self
def evaluate( forecaster, cv, y, X=None, strategy="refit", scoring=None, return_data=False, ): """Evaluate forecaster using timeseries cross-validation. Parameters ---------- forecaster : sktime.forecaster Any forecaster cv : Temporal cross-validation splitter Splitter of how to split the data into test data and train data y : pd.Series Target time series to which to fit the forecaster. X : pd.DataFrame, default=None Exogenous variables strategy : {"refit", "update"} Must be "refit" or "update". The strategy defines whether the `forecaster` is only fitted on the first train window data and then updated, or always refitted. scoring : subclass of sktime.performance_metrics.BaseMetric, default=None. Used to get a score function that takes y_pred and y_test arguments and accept y_train as keyword argument. If None, then uses scoring = MeanAbsolutePercentageError(symmetric=True). return_data : bool, default=False Returns three additional columns in the DataFrame, by default False. The cells of the columns contain each a pd.Series for y_train, y_pred, y_test. Returns ------- pd.DataFrame DataFrame that contains several columns with information regarding each refit/update and prediction of the forecaster. Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.forecasting.model_evaluation import evaluate >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter >>> from sktime.forecasting.naive import NaiveForecaster >>> y = load_airline() >>> forecaster = NaiveForecaster(strategy="mean", sp=12) >>> cv = ExpandingWindowSplitter( ... initial_window=24, ... step_length=12, ... fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) >>> results = evaluate(forecaster=forecaster, y=y, cv=cv) """ _check_strategy(strategy) cv = check_cv(cv, enforce_start_with_window=True) scoring = check_scoring(scoring) y = check_series( y, enforce_univariate=forecaster.get_tag("scitype:y") == "univariate", enforce_multivariate=forecaster.get_tag("scitype:y") == "multivariate", ) X = check_X(X) # Define score name. score_name = "test_" + scoring.name # Initialize dataframe. results = pd.DataFrame() # Run temporal cross-validation. for i, (train, test) in enumerate(cv.split(y)): # split data y_train, y_test, X_train, X_test = _split(y, X, train, test, cv.fh) # create forecasting horizon fh = ForecastingHorizon(y_test.index, is_relative=False) # fit/update start_fit = time.perf_counter() if i == 0 or strategy == "refit": forecaster = clone(forecaster) forecaster.fit(y_train, X_train, fh=fh) else: # if strategy == "update": forecaster.update(y_train, X_train) fit_time = time.perf_counter() - start_fit pred_type = { "pred_quantiles": "forecaster.predict_quantiles", "pred_intervals": "forecaster.predict_interval", "pred_proba": "forecaster.predict_proba", None: "forecaster.predict", } # predict start_pred = time.perf_counter() if hasattr(scoring, "metric_args"): metric_args = scoring.metric_args try: scitype = scoring.get_tag("scitype:y_pred") except ValueError: # If no scitype exists then metric is not proba and no args needed scitype = None metric_args = {} y_pred = eval(pred_type[scitype])( fh, X_test, **metric_args, ) pred_time = time.perf_counter() - start_pred # score score = scoring(y_test, y_pred, y_train=y_train) # save results results = results.append( { score_name: score, "fit_time": fit_time, "pred_time": pred_time, "len_train_window": len(y_train), "cutoff": forecaster.cutoff, "y_train": y_train if return_data else np.nan, "y_test": y_test if return_data else np.nan, "y_pred": y_pred if return_data else np.nan, }, ignore_index=True, ) # post-processing of results if not return_data: results = results.drop(columns=["y_train", "y_test", "y_pred"]) results["len_train_window"] = results["len_train_window"].astype(int) return results
def fit(self, y, X=None, fh=None, **fit_params): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ y, X = check_y_X(y, X) # validate cross-validator cv = check_cv(self.cv) base_forecaster = clone(self.forecaster) scoring = check_scoring(self.scoring) scorers = {scoring.name: scoring} refit_metric = scoring.name fit_and_score_kwargs = dict( scorer=scorers, fit_params=fit_params, return_train_score=self.return_train_score, return_times=True, return_parameters=False, error_score=self.error_score, verbose=self.verbose, ) results = {} all_candidate_params = [] all_out = [] def evaluate_candidates(candidate_params): candidate_params = list(candidate_params) n_candidates = len(candidate_params) if self.verbose > 0: n_splits = cv.get_n_splits(y) print( # noqa "Fitting {0} folds for each of {1} candidates," " totalling {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) out = [] for parameters in candidate_params: r = _fit_and_score(clone(base_forecaster), cv, y, X, parameters=parameters, **fit_and_score_kwargs) out.append(r) n_splits = cv.get_n_splits(y) if len(out) < 1: raise ValueError("No fits were performed. " "Was the CV iterator empty? " "Were there no candidates?") all_candidate_params.extend(candidate_params) all_out.extend(out) nonlocal results results = self._format_results(all_candidate_params, scorers, all_out) return results self._run_search(evaluate_candidates) self.best_index_ = results["rank_test_%s" % refit_metric].argmin() self.best_score_ = results["mean_test_%s" % refit_metric][self.best_index_] self.best_params_ = results["params"][self.best_index_] self.best_forecaster_ = clone(base_forecaster).set_params( **self.best_params_) if self.refit: refit_start_time = time.time() self.best_forecaster_.fit(y, X, fh) self.refit_time_ = time.time() - refit_start_time # Store the only scorer not as a dict for single metric evaluation self.scorer_ = scorers[scoring.name] self.cv_results_ = results self.n_splits_ = cv.get_n_splits(y) self._is_fitted = True return self
def evaluate( forecaster, cv, y, X=None, strategy="refit", scoring=None, fit_params=None, return_data=False, ): """Evaluate forecaster using cross-validation Parameters ---------- forecaster : sktime.forecaster Any forecaster y : pd.Series Target time series to which to fit the forecaster. X : pd.DataFrame, optional (default=None) Exogenous variables cv : Temporal cross-validation splitter Splitter of how to split the data into test data and train data strategy : str, optional (default="refit") Must be "refit" or "update". The strategy defines whether the `forecaster` is only fitted on the first train window data and then updated, or always refitted. scoring : object of class MetricFunctionWrapper from sktime.performance_metrics, optional. Example scoring=sMAPE(). Used to get a score function that takes y_pred and y_test as arguments, by default None (if None, uses sMAPE) fit_params : dict, optional (default=None) Parameters passed to the `fit` call of the forecaster. return_data : bool, optional Returns three additional columns in the DataFrame, by default False. The cells of the columns contain each a pd.Series for y_train, y_pred, y_test. Returns ------- pd.DataFrame DataFrame that contains several columns with information regarding each refit/update and prediction of the forecaster. Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.forecasting.model_evaluation import evaluate >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter >>> from sktime.forecasting.naive import NaiveForecaster >>> y = load_airline() >>> forecaster = NaiveForecaster(strategy="mean", sp=12) >>> cv = ExpandingWindowSplitter(initial_window=24, step_length=12, ... fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) >>> results = evaluate(forecaster=forecaster, y=y, cv=cv) """ _check_strategy(strategy) cv = check_cv(cv, enforce_start_with_window=True) scoring = check_scoring(scoring) y, X = check_y_X(y, X) fit_params = {} if fit_params is None else fit_params # Define score name. score_name = "test_" + scoring.name # Initialize dataframe. results = pd.DataFrame() # Run temporal cross-validation. for i, (train, test) in enumerate(cv.split(y)): # split data y_train, y_test, X_train, X_test = _split(y, X, train, test, cv.fh) # create forecasting horizon fh = ForecastingHorizon(y_test.index, is_relative=False) # fit/update start_fit = time.time() if i == 0 or strategy == "refit": forecaster.fit(y_train, X_train, fh=fh, **fit_params) else: # if strategy == "update": forecaster.update(y_train, X_train) fit_time = time.time() - start_fit # predict start_pred = time.time() y_pred = forecaster.predict(fh, X=X_test) pred_time = time.time() - start_pred # score score = scoring(y_pred, y_test) # save results results = results.append( { score_name: score, "fit_time": fit_time, "pred_time": pred_time, "len_train_window": len(y_train), "cutoff": forecaster.cutoff, "y_train": y_train if return_data else np.nan, "y_test": y_test if return_data else np.nan, "y_pred": y_pred if return_data else np.nan, }, ignore_index=True, ) # post-processing of results if not return_data: results = results.drop(columns=["y_train", "y_test", "y_pred"]) results["len_train_window"] = results["len_train_window"].astype(int) return results
def evaluate(forecaster, cv, y, X=None, strategy="refit", scoring=None, return_data=False): """Evaluate forecaster using cross-validation Parameters ---------- forecaster : sktime.forecaster Any forecaster cv : sktime.SlidingWindowSplitter or sktime.ExpandingWindowSplitter Splitter of how to split the data into test data and train data y : pd.Series Target time series to which to fit the forecaster. X : pd.DataFrame, optional (default=None) Exogenous variables strategy : str, optional Must be "refit" or "update", by default "refit". The strategy defines whether forecaster is only fitted on the first train window data and then updated or always refitted. scoring : object of class MetricFunctionWrapper from sktime.performance_metrics, optional. Example scoring=sMAPE(). Used to get a score function that takes y_pred and y_test as arguments, by default None (if None, uses sMAPE) return_data : bool, optional Returns three additional columns in the DataFrame, by default False. The cells of the columns contain each a pd.Series for y_train, y_pred, y_test. Returns ------- pd.DataFrame DataFrame that contains several columns with information regarding each refit/update and prediction of the forecaster. Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.performance_metrics.forecasting import evaluate >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter >>> from sktime.forecasting.naive import NaiveForecaster >>> y = load_airline() >>> forecaster = NaiveForecaster(strategy="drift", sp=12) >>> cv = ExpandingWindowSplitter( initial_window=24, step_length=12, fh=[1,2,3,4,5,6,7,8,9,10,11,12] ) >>> evaluate(forecaster=forecaster, y=y, cv=cv) """ cv = check_cv(cv) y = check_y(y) _check_strategies(strategy) scoring = check_scoring(scoring) n_splits = cv.get_n_splits(y) results = pd.DataFrame() cv.start_with_window = True for i, (train, test) in enumerate(tqdm(cv.split(y), total=n_splits)): # get initial window, if required if i == 0 and cv.initial_window and strategy == "update": train, test = cv.split_initial(y) # this might have to be directly handled in split_initial() test = test[:len(cv.fh)] # create train/test data y_train = y.iloc[train] y_test = y.iloc[test] X_train = X.iloc[train] if X else None X_test = X.iloc[test] if X else None # fit/update start_fit = time.time() if strategy == "refit" or i == 0: forecaster.fit( y=y_train, X=X_train, fh=ForecastingHorizon(y_test.index, is_relative=False), ) else: # strategy == "update" and i != 0: forecaster.update(y=y_train, X=X_train) fit_time = time.time() - start_fit # predict start_pred = time.time() y_pred = forecaster.predict(fh=ForecastingHorizon(y_test.index, is_relative=False), X=X_test) pred_time = time.time() - start_pred # save results results = results.append( { "test_" + scoring.__class__.__name__: scoring(y_pred, y_test), "fit_time": fit_time, "pred_time": pred_time, "len_train_window": len(y_train), "cutoff": forecaster.cutoff, "y_train": y_train if return_data else np.nan, "y_test": y_test if return_data else np.nan, "y_pred": y_pred if return_data else np.nan, }, ignore_index=True, ) # post-processing of results if not return_data: results = results.drop(columns=["y_train", "y_test", "y_pred"]) results["len_train_window"] = results["len_train_window"].astype(int) return results