def predict(self, dataset: Dataset, num_samples: Optional[int] = None) -> Iterator[Forecast]: """ Returns a dictionary taking each quantile to a list of floats, which are the predictions for that quantile as you run over (time_steps, time_series) lexicographically. So: first it would give the quantile prediction for the first time step for all time series, then the second time step for all time series ˜˜ , and so forth. """ context_length = self.preprocess_object.context_window_size if num_samples: log_once("Forecast is not sample based. Ignoring parameter" " `num_samples` from predict method.") for ts in dataset: featurized_data = self.preprocess_object.make_features( ts, starting_index=len(ts["target"]) - context_length) yield RotbaumForecast( self.model_list, [featurized_data], start_date=forecast_start(ts), prediction_length=self.prediction_length, freq=self.freq, )
def test_forecasts(method_name): if method_name == "mlp": # https://stackoverflow.com/questions/56254321/error-in-ifncol-matrix-rep-argument-is-of-length-zero # https://cran.r-project.org/web/packages/neuralnet/index.html # published before the bug fix: https://github.com/bips-hb/neuralnet/pull/21 # The issue is still open on nnfor package: https://github.com/trnnick/nnfor/issues/8 # TODO: look for a workaround. pytest.xfail( "MLP currently does not work because " "the `neuralnet` package is not yet updated with a known bug fix in ` bips-hb/neuralnet`" ) dataset = datasets.get_dataset("constant") (train_dataset, test_dataset, metadata) = ( dataset.train, dataset.test, dataset.metadata, ) freq = metadata.freq prediction_length = metadata.prediction_length params = dict( freq=freq, prediction_length=prediction_length, method_name=method_name ) predictor = RForecastPredictor(**params) predictions = list(predictor.predict(train_dataset)) forecast_type = ( QuantileForecast if method_name in QUANTILE_FORECAST_METHODS else SampleForecast ) assert all( isinstance(prediction, forecast_type) for prediction in predictions ) assert all(prediction.freq == freq for prediction in predictions) assert all( prediction.prediction_length == prediction_length for prediction in predictions ) assert all( prediction.start_date == forecast_start(data) for data, prediction in zip(train_dataset, predictions) ) evaluator = Evaluator() agg_metrics, item_metrics = backtest_metrics( test_dataset=test_dataset, predictor=predictor, evaluator=evaluator, ) assert agg_metrics["mean_wQuantileLoss"] < TOLERANCE assert agg_metrics["NRMSE"] < TOLERANCE assert agg_metrics["RMSE"] < TOLERANCE
def test_predictor(predictor_cls, freq: str): predictor = predictor_cls( freq=freq, prediction_length=PREDICTION_LENGTH, season_length=SEASON_LENGTH, ) dataset = list( generate_random_dataset( num_ts=NUM_TS, start_time=START_TIME, freq=freq, min_length=MIN_LENGTH, max_length=MAX_LENGTH, ) ) # get forecasts forecasts = list(predictor.predict(dataset)) assert len(dataset) == NUM_TS assert len(forecasts) == NUM_TS # check forecasts are as expected for data, forecast in zip(dataset, forecasts): assert forecast.samples.shape == (1, PREDICTION_LENGTH) ref = data["target"][ -SEASON_LENGTH : -SEASON_LENGTH + PREDICTION_LENGTH ] assert forecast.start_date == forecast_start(data) # specifically for the seasonal naive we can test the supposed result directly if predictor_cls == SeasonalNaivePredictor: assert np.allclose(forecast.samples[0], ref)
def predict_item(self, item: DataEntry) -> SampleForecast: samples_shape = self.num_samples, self.prediction_length samples = np.full(samples_shape, self.value) return SampleForecast( samples=samples, start_date=forecast_start(item), item_id=item.get("id"), )
def check_train_test_split(dataset): prediction_length = dataset.metadata.prediction_length train_end = {} for entry in dataset.train: assert (entry["item_id"] not in train_end), f"item {entry['item_id']} is duplicate" train_end[entry["item_id"]] = forecast_start(entry) test_end = {} for entry in dataset.test: test_end[entry["item_id"]] = forecast_start(entry) for k in test_end: if k not in train_end: continue expected_end = train_end[k] + prediction_length * train_end[k].freq assert ( test_end[k] >= expected_end ), f"test entry for item {k} ends at {test_end[k]} < {expected_end}"
def predict_item(self, item: DataEntry) -> Forecast: prediction = item["target"][-self.prediction_length:] samples = np.broadcast_to( array=np.expand_dims(prediction, 0), shape=(self.num_samples, self.prediction_length), ) return SampleForecast( samples=samples, start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def predict_item(self, item: DataEntry) -> SampleForecast: if self.context_length is not None: target = item["target"][-self.context_length:] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) return SampleForecast( samples=std * normal + mean, start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def predict_item(self, item: DataEntry) -> SampleForecast: target = item["target"].tolist() for _ in range(self.prediction_length): if self.context_length is not None: window = target[-self.context_length:] else: window = target target.append(np.nanmean(window)) return SampleForecast( samples=np.array([target[-self.prediction_length:]]), start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def predict_item(self, item: DataEntry) -> Forecast: past_ts_data = item["target"] item_id = item.get("item_id", None) forecast_start_time = forecast_start(item) assert ( len(past_ts_data) >= 1 ), "all time series should have at least one data point" prediction = naive_2(past_ts_data, self.prediction_length, self.freq) samples = np.array([prediction]) return SampleForecast( samples=samples, start_date=forecast_start_time, item_id=item_id, )
def predict_item(self, item: DataEntry) -> Forecast: target = np.asarray(item["target"], np.float32) len_ts = len(target) forecast_start_time = forecast_start(item) assert (len_ts >= 1), "all time series should have at least one data point" if len_ts >= self.season_length: indices = [ len_ts - self.season_length + k % self.season_length for k in range(self.prediction_length) ] samples = target[indices].reshape((1, self.prediction_length)) else: samples = np.full(shape=(1, self.prediction_length), fill_value=target.mean()) return SampleForecast( samples=samples, start_date=forecast_start_time, item_id=item.get("item_id", None), )
def predict( self, dataset: Dataset, num_samples: int = 100, intervals: Optional[List] = None, save_info: bool = False, **kwargs, ) -> Iterator[Union[SampleForecast, QuantileForecast]]: if self.method_name in POINT_FORECAST_METHODS: print("Overriding `output_types` to `mean` since" f" {self.method_name} is a point forecast method.") elif self.method_name in QUANTILE_FORECAST_METHODS: print("Overriding `output_types` to `quantiles` since " f"{self.method_name} is a quantile forecast method.") for data in dataset: if self.trunc_length: data["target"] = data["target"][-self.trunc_length:] params = self.params.copy() params["num_samples"] = num_samples if self.method_name in POINT_FORECAST_METHODS: params["output_types"] = ["mean"] elif self.method_name in QUANTILE_FORECAST_METHODS: params["output_types"] = ["quantiles", "mean"] if intervals is None: # This corresponds to quantiles: 0.05 to 0.95 in steps of # 0.05. params["intervals"] = list(range(0, 100, 10)) else: params["intervals"] = np.sort(intervals).tolist() forecast_dict, console_output = self._run_r_forecast( data, params, save_info=save_info) if self.method_name in QUANTILE_FORECAST_METHODS: quantile_forecasts_dict = forecast_dict["quantiles"] yield QuantileForecast( forecast_arrays=np.array( list(quantile_forecasts_dict.values())), forecast_keys=list(quantile_forecasts_dict.keys()), start_date=forecast_start(data), item_id=data.get("item_id", None), ) else: if self.method_name in POINT_FORECAST_METHODS: # Handling special cases outside of R is better, since it # is more visible and is easier to change. Repeat mean # forecasts `num_samples` times. samples = np.reshape( forecast_dict["mean"] * params["num_samples"], (params["num_samples"], self.prediction_length), ) else: samples = np.array(forecast_dict["samples"]) expected_shape = ( params["num_samples"], self.prediction_length, ) assert ( samples.shape == expected_shape ), f"Expected shape {expected_shape} but found {samples.shape}" info = ({ "console_output": "\n".join(console_output) } if save_info else None) yield SampleForecast( samples, forecast_start(data), info=info, item_id=data.get("item_id", None), )