def predict_item(self, item: DataEntry) -> SampleForecast: return SampleForecast( samples=self.samples, start_date=item["start"], freq=self.freq, item_id=item.get(FieldName.ITEM_ID), )
def predict_item(self, item, trained_model): """Compute quantiles using the confidence intervals of autoarima. Args: item (DataEntry): One timeseries. trained_model (STLForecastResults): Trained STL model. Returns: SampleForecast of quantiles. """ target_length = len(item[TIMESERIES_KEYS.TARGET]) start_date = frequency_add(item[TIMESERIES_KEYS.START], target_length) samples = [] for alpha in np.arange(0.02, 1.01, 0.02): predictions = trained_model.get_prediction( start=target_length, end=target_length + self.prediction_length - 1) confidence_intervals = predictions.conf_int(alpha=alpha) samples += [ confidence_intervals["lower"].values, confidence_intervals["upper"].values ] return SampleForecast(samples=np.stack(samples), start_date=start_date, freq=self.freq)
def predict(self, dataset: Dataset, num_samples=None, save_info=False, **kwargs) -> Iterator[SampleForecast]: for entry in dataset: if isinstance(entry, dict): data = entry else: data = entry.data if self.trunc_length: data = data[-self.trunc_length:] params = self.params.copy() if num_samples is not None: params['num_samples'] = num_samples forecast_dict, console_output = self._run_r_forecast( data, params, save_info=save_info) forecast_start = pd.Timestamp(data['start'], freq=self.freq) + \ data['target'].shape[0] samples = np.array(forecast_dict['samples']) expected_shape = (params['num_samples'], self.prediction_length) assert ( samples.shape == expected_shape ), f"Expected shape {expected_shape} but found {samples.shape}" info = ({ 'console_output': '\n'.join(console_output) } if save_info else None) yield SampleForecast(samples, forecast_start, forecast_start.freqstr, info=info)
def __call__(self, inference_data_loader: InferenceDataLoader, prediction_net: BlockType, input_names: List[str], freq: str, output_transform: Optional[OutputTransform], num_samples: Optional[int], **kwargs) -> Iterator[Forecast]: for batch in inference_data_loader: inputs = [batch[k] for k in input_names] outputs = prediction_net(*inputs).asnumpy() if output_transform is not None: outputs = output_transform(batch, outputs) if num_samples: num_collected_samples = outputs[0].shape[0] collected_samples = [outputs] while num_collected_samples < num_samples: outputs = prediction_net(*inputs).asnumpy() if output_transform is not None: outputs = output_transform(batch, outputs) collected_samples.append(outputs) num_collected_samples += outputs[0].shape[0] outputs = [ np.concatenate(s)[:num_samples] for s in zip(*collected_samples) ] assert len(outputs[0]) == num_samples i = -1 for i, output in enumerate(outputs): yield SampleForecast( output, start_date=batch["forecast_start"][i], freq=freq, item_id=batch[FieldName.ITEM_ID][i] if FieldName.ITEM_ID in batch else None, info=batch["info"][i] if "info" in batch else None, ) assert i + 1 == len(batch["forecast_start"])
def test_when_given_zero_forecasts_when_evaluator_called_then_output_equal_to_gluonts( metric_name, deepar_trained): model = deepar_trained forecast_iter, ts_iter = make_evaluation_predictions( dataset=model._to_gluonts_dataset(DUMMY_TS_DATAFRAME), predictor=model.gts_predictor, num_samples=100, ) fcast_list, ts_list = list(forecast_iter), list(ts_iter) zero_forecast_list = [] for s in fcast_list: zero_forecast_list.append( SampleForecast( samples=np.zeros_like(s.samples), # noqa start_date=s.start_date, freq=s.freq, item_id=s.item_id, )) forecast_df = model._gluonts_forecasts_to_data_frame( zero_forecast_list, quantile_levels=model.quantile_levels) ag_evaluator = TimeSeriesEvaluator(eval_metric=metric_name, prediction_length=2) ag_value = ag_evaluator(DUMMY_TS_DATAFRAME, forecast_df) gts_evaluator = GluonTSEvaluator() gts_results, _ = gts_evaluator(ts_iterator=ts_list, fcst_iterator=zero_forecast_list) assert np.isclose(gts_results[metric_name], ag_value, atol=1e-5)
def predict_item(self, item, trained_model): """Compute quantiles using the confidence intervals of autoarima. Args: item (DataEntry): One timeseries. trained_model (pm.auto_arima): Trained autoarima model. Returns: SampleForecast of quantiles. """ start_date = frequency_add(item[TIMESERIES_KEYS.START], len(item[TIMESERIES_KEYS.TARGET])) prediction_external_features = self._set_prediction_external_features( item) samples = [] for alpha in np.arange(0.02, 1.01, 0.02): confidence_intervals = trained_model.predict( n_periods=self.prediction_length, X=prediction_external_features, return_conf_int=True, alpha=alpha)[1] samples += [confidence_intervals[:, 0], confidence_intervals[:, 1]] return SampleForecast(samples=np.stack(samples), start_date=start_date, freq=self.freq)
def to_sample_forecast(self, num_samples: int = 200) -> SampleForecast: return SampleForecast( samples=self.distribution.sample(num_samples), start_date=self.start_date, item_id=self.item_id, info=self.info, )
def predict_item(self, item: DataEntry) -> SampleForecast: return SampleForecast( samples=self.samples, start_date=item["start"], freq=self.freq, item_id=item["id"] if "id" in item else None, )
def predict( self, dataset: Dataset, num_samples: int = 100, save_info: bool = False, **kwargs, ) -> Iterator[SampleForecast]: for entry in dataset: if isinstance(entry, dict): data = entry else: data = entry.data if self.trunc_length: data = data[-self.trunc_length:] params = self.params.copy() params["num_samples"] = num_samples forecast_dict, console_output = self._run_r_forecast( data, params, save_info=save_info) samples = np.array(forecast_dict["samples"]) expected_shape = (params["num_samples"], self.prediction_length) assert ( samples.shape == expected_shape ), f"Expected shape {expected_shape} but found {samples.shape}" info = ({ "console_output": "\n".join(console_output) } if save_info else None) yield SampleForecast(samples, forecast_start(data), self.freq, info=info)
def predict( targets: pd.Series, prediction_length: int, sampling_weights_iterator: Iterator[np.ndarray], num_samples: int, ) -> SampleForecast: """ Given the `targets`, generates `Forecast` containing prediction samples for `predcition_length` time points. Predictions are generated via weighted sampling where the weights are specified in `sampling_weights_iterator`. Parameters ---------- targets targets to predict prediction_length prediction length sampling_weights_iterator iterator over weights used for sampling num_samples number of samples to set in the :class:`SampleForecast` object Returns ------- SampleForecast a :class:`SampleForecast` object for the given targets """ # Note that to generate prediction from the second time step onwards, # we need the sample predicted for all the previous time steps in the # prediction range. # To make this simpler, we replicate the training targets for # `num_samples` times. # samples shape: (num_samples, train_length + prediction_length) samples = np.tile( A=np.concatenate((targets.values, np.zeros(prediction_length))), reps=(num_samples, 1), ) train_length = len(targets) for t, sampling_weights in enumerate(sampling_weights_iterator): samples_ix = WeightedSampler.sample(sampling_weights, num_samples) samples[:, train_length + t] = samples[np.arange(num_samples), samples_ix] # Forecast takes as input the prediction range samples, the start date # of the prediction range, and the frequency of the time series. samples_pred_range = samples[:, train_length:] # prediction range only forecast_start = targets.index[-1] + 1 * targets.index.freq return SampleForecast( samples=samples_pred_range, start_date=forecast_start, freq=forecast_start.freqstr, )
def predict(self, dataset: Dataset, **kwargs) -> Iterator[SampleForecast]: for x in dataset: yield SampleForecast( samples=self.samples, start_date=x["start"], freq=self.freq, item_id=x["id"] if "id" in x else None, )
def to_sample_forecast(self, num_samples: int = 200) -> SampleForecast: return SampleForecast( samples=self.distribution.sample((num_samples, )).cpu().numpy(), start_date=self.start_date, freq=self.freq, item_id=self.item_id, info=self.info, )
def predict_item(self, item: DataEntry) -> SampleForecast: samples_shape = self.num_samples, self.prediction_length samples = np.full(samples_shape, self.value) return SampleForecast( samples=samples, start_date=forecast_start(item), item_id=item.get("id"), )
def fcst_iterator(fcst, start_dates, freq): """ :param fcst: list of numpy arrays with the sample paths :return: """ for i in range(len(fcst)): yield SampleForecast( samples=fcst[i], start_date=start_dates[i], freq=freq )
def predict(self, dataset: Dataset, **kwargs) -> Iterator[SampleForecast]: for item in dataset: mean = np.mean(item["target"][-self.context_length:]) yield SampleForecast( samples=mean * np.ones(shape=self.shape), start_date=item["start"], freq=self.freq, item_id=item["id"] if "id" in item else None, )
def predict( self, dataset: Dataset, num_samples: Optional[int] = 1, **kwargs ) -> Iterator[Forecast]: if num_samples != 1: logging.warning( "NBEATSEnsemblePredictor does not support sampling. " "Therefore 'num_samples' will be ignored and set to 1." ) iterators = [] # create the iterators from the predictors for predictor in self.predictors: iterators.append(predictor.predict(dataset, num_samples=1)) # we always have to predict for one series in the dataset with # all models and return it as a 'SampleForecast' so that it is # clear that all these prediction concern the same series for item in dataset: output = [] start_date = None for iterator in iterators: prediction = next(iterator) # on order to avoid mypys complaints assert isinstance(prediction, SampleForecast) output.append(prediction.samples) # get the forecast start date if start_date is None: start_date = prediction.start_date output = np.stack(output, axis=0) # aggregating output of different models # default according to paper is median, # but we can also make use of not aggregating if self.aggregation_method == "median": output = np.median(output, axis=0) elif self.aggregation_method == "mean": output = np.mean(output, axis=0) else: # "none": do not aggregate pass # on order to avoid mypys complaints assert start_date is not None yield SampleForecast( output, start_date=start_date, freq=start_date.freqstr, item_id=item[FieldName.ITEM_ID] if FieldName.ITEM_ID in item else None, info=item["info"] if "info" in item else None, )
def predict(self, dataset: Dataset, **kwargs) -> Iterator[SampleForecast]: for entry in dataset: data = self._make_prophet_data_entry(entry) forecast_samples = self._run_prophet(data) yield SampleForecast( samples=forecast_samples, start_date=data.forecast_start, freq=self.freq, )
def predict_item(self, item: DataEntry) -> Forecast: past_ts_data = item["target"] forecast_start_time = forecast_start(item) assert (len(past_ts_data) >= 1), "all time series should have at least one data point" prediction = naive_2(past_ts_data, self.prediction_length, self.freq) samples = np.array([prediction]) return SampleForecast(samples, forecast_start_time, self.freq)
def predict_item(self, item: DataEntry) -> Forecast: prediction = item["target"][-self.prediction_length:] samples = np.broadcast_to( array=np.expand_dims(prediction, 0), shape=(self.num_samples, self.prediction_length), ) return SampleForecast( samples=samples, start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def to_forecast(self, ag_output, start_timestamp, item_id=None) -> Iterator[SampleForecast]: samples = ag_output.reshape((1, self.prediction_length)) sample = SampleForecast( freq=self.freq, start_date=pd.Timestamp(start_timestamp, freq=self.freq), item_id=item_id, samples=samples, ) return sample
def predict_item(self, item: DataEntry) -> Forecast: prediction = item["target"][-self.prediction_length:] samples = np.broadcast_to( array=np.expand_dims(prediction, 0), shape=(self.num_samples, self.prediction_length), ) return SampleForecast( samples=samples, start_date=item["start"], freq=self.freq, item_id=item["id"] if "id" in item else None, )
def predict(self, dataset: Dataset, **kwargs) -> Iterator[Forecast]: for x in dataset: prediction = x["target"][-self.prediction_length:] samples = np.broadcast_to( array=np.expand_dims(prediction, 0), shape=(self.num_samples, self.prediction_length), ) yield SampleForecast( samples=samples, start_date=x["start"], freq=self.freq, item_id=x["id"] if "id" in x else None, )
def predict_item(self, item: DataEntry) -> SampleForecast: if self.context_length is not None: target = item["target"][-self.context_length:] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) return SampleForecast( samples=std * normal + mean, start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def predict_item(self, item: DataEntry) -> SampleForecast: target = item["target"].tolist() for _ in range(self.prediction_length): if self.context_length is not None: window = target[-self.context_length:] else: window = target target.append(np.nanmean(window)) return SampleForecast( samples=np.array([target[-self.prediction_length:]]), start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def predict_item(self, item: DataEntry) -> SampleForecast: if self.context_length is not None: target = item["target"][-self.context_length :] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) start_date = frequency_add(item["start"], len(item["target"])) return SampleForecast( samples=std * normal + mean, start_date=start_date, freq=self.freq, item_id=item["id"] if "id" in item else None, )
def predict(self, dataset: Dataset, num_samples: int = 100, **kwargs) -> Iterator[SampleForecast]: params = self.prophet_params.copy() params.update(uncertainty_samples=num_samples) for entry in dataset: data = self._make_prophet_data_entry(entry) forecast_samples = self._run_prophet(data, params) yield SampleForecast( samples=forecast_samples, start_date=data.forecast_start, )
def _predict_time_series(self, start_time: pd.Timestamp, target: np.ndarray) -> SampleForecast: len_ts = len(target) assert (len_ts >= 1), "all time series should have at least one data point" if len_ts >= self.season_length: indices = [ len_ts - self.season_length + k % self.season_length for k in range(self.prediction_length) ] samples = target[indices].reshape((1, self.prediction_length)) else: samples = np.full(shape=(1, self.prediction_length), fill_value=target.mean()) forecast_time = start_time + len_ts * start_time.freq return SampleForecast(samples, forecast_time, start_time.freqstr)
def predict(self, dataset: Dataset, **kwargs) -> Iterator[SampleForecast]: for item in dataset: if self.context_length is not None: target = item["target"][-self.context_length:] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) start_date = frequency_add(item["start"], len(target)) yield SampleForecast( samples=std * normal + mean, start_date=start_date, freq=self.freq, item_id=item["id"] if "id" in item else None, )
def predict_item(self, item: DataEntry) -> Forecast: target = np.asarray(item["target"], np.float32) len_ts = len(target) forecast_start_time = forecast_start(item) assert (len_ts >= 1), "all time series should have at least one data point" if len_ts >= self.season_length: indices = [ len_ts - self.season_length + k % self.season_length for k in range(self.prediction_length) ] samples = target[indices].reshape((1, self.prediction_length)) else: samples = np.full(shape=(1, self.prediction_length), fill_value=target.mean()) return SampleForecast(samples, forecast_start_time, self.freq)
def _to_forecast( self, ag_output: np.ndarray, start_timestamp: pd.Period, item_id=None, ) -> Forecast: if self.quantiles_to_predict: forecasts = ag_output.transpose() return QuantileForecast( start_date=start_timestamp, item_id=item_id, forecast_arrays=forecasts, forecast_keys=self.forecast_keys, ) else: samples = ag_output.reshape((1, self.prediction_length)) return SampleForecast( start_date=start_timestamp, item_id=item_id, samples=samples, )