def predict_item(self, item: DataEntry) -> Forecast: past_ts_data = item["target"] forecast_start_time = forecast_start(item) assert ( len(past_ts_data) >= 1 ), "all time series should have at least one data point" prediction = naive_2(past_ts_data, self.prediction_length, self.freq) samples = np.array([prediction]) return SampleForecast(samples, forecast_start_time, self.freq)
def predict_item(self, item: DataEntry) -> Forecast: prediction = item["target"][-self.prediction_length:] samples = np.broadcast_to( array=np.expand_dims(prediction, 0), shape=(self.num_samples, self.prediction_length), ) return SampleForecast( samples=samples, start_date=item["start"], freq=self.freq, item_id=item.get(FieldName.ITEM_ID), )
def _to_forecast( self, ag_output: np.ndarray, start_timestamp: pd.Timestamp, item_id=None, ) -> Iterator[SampleForecast]: samples = ag_output.reshape((1, self.prediction_length)) sample = SampleForecast( freq=self.freq, start_date=pd.Timestamp(start_timestamp, freq=self.freq), item_id=item_id, samples=samples, ) return sample
def predict(self, dataset: Dataset, **kwargs) -> Iterator[Forecast]: for x in dataset: prediction = x["target"][-self.prediction_length:] samples = np.broadcast_to( array=np.expand_dims(prediction, 0), shape=(self.num_samples, self.prediction_length), ) yield SampleForecast( samples=samples, start_date=x["start"], freq=self.freq, item_id=x["id"] if "id" in x else None, )
def predict_item(self, item: DataEntry) -> SampleForecast: if self.context_length is not None: target = item["target"][-self.context_length:] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) return SampleForecast( samples=std * normal + mean, start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def predict_item(self, item: DataEntry) -> SampleForecast: target = item["target"].tolist() for _ in range(self.prediction_length): if self.context_length is not None: window = target[-self.context_length:] else: window = target target.append(np.nanmean(window)) return SampleForecast( samples=np.array([target[-self.prediction_length:]]), start_date=forecast_start(item), item_id=item.get(FieldName.ITEM_ID), )
def predict(self, dataset: Dataset, num_samples: int = 100, **kwargs) -> Iterator[SampleForecast]: params = self.prophet_params.copy() params.update(uncertainty_samples=num_samples) for entry in dataset: data = self._make_prophet_data_entry(entry) forecast_samples = self._run_prophet(data, params) yield SampleForecast( samples=forecast_samples, start_date=data.forecast_start, )
def predict_item(self, item: DataEntry) -> SampleForecast: if self.context_length is not None: target = item["target"][-self.context_length :] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) start_date = frequency_add(item["start"], len(item["target"])) return SampleForecast( samples=std * normal + mean, start_date=start_date, freq=self.freq, item_id=item["id"] if "id" in item else None, )
def _predict_time_series(self, start_time: pd.Timestamp, target: np.ndarray) -> SampleForecast: len_ts = len(target) assert (len_ts >= 1), "all time series should have at least one data point" if len_ts >= self.season_length: indices = [ len_ts - self.season_length + k % self.season_length for k in range(self.prediction_length) ] samples = target[indices].reshape((1, self.prediction_length)) else: samples = np.full(shape=(1, self.prediction_length), fill_value=target.mean()) forecast_time = start_time + len_ts * start_time.freq return SampleForecast(samples, forecast_time, start_time.freqstr)
def predict_item(self, item: DataEntry) -> Forecast: target = np.asarray(item["target"], np.float32) len_ts = len(target) forecast_start_time = forecast_start(item) assert (len_ts >= 1), "all time series should have at least one data point" if len_ts >= self.season_length: indices = [ len_ts - self.season_length + k % self.season_length for k in range(self.prediction_length) ] samples = target[indices].reshape((1, self.prediction_length)) else: samples = np.full(shape=(1, self.prediction_length), fill_value=target.mean()) return SampleForecast(samples, forecast_start_time, self.freq)
def predict(self, dataset: Dataset, **kwargs) -> Iterator[SampleForecast]: for item in dataset: if self.context_length is not None: target = item["target"][-self.context_length:] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) start_date = frequency_add(item["start"], len(target)) yield SampleForecast( samples=std * normal + mean, start_date=start_date, freq=self.freq, item_id=item["id"] if "id" in item else None, )
def predict_item(self, item, trained_model): """Compute quantiles using the confidence intervals of autoarima. Args: item (DataEntry): One timeseries. trained_model (STLForecastResults): Trained STL model. Returns: SampleForecast of quantiles. """ target_length = len(item[TIMESERIES_KEYS.TARGET]) start_date = frequency_add(item[TIMESERIES_KEYS.START], target_length) samples = [] for alpha in np.arange(0.02, 1.01, 0.02): predictions = trained_model.get_prediction(start=target_length, end=target_length + self.prediction_length - 1) confidence_intervals = predictions.conf_int(alpha=alpha) samples += [confidence_intervals["lower"].values, confidence_intervals["upper"].values] return SampleForecast(samples=np.stack(samples), start_date=start_date, freq=self.freq)
def __call__( self, inference_data_loader: InferenceDataLoader, prediction_net, input_names: List[str], freq: str, output_transform: Optional[OutputTransform], num_samples: Optional[int], **kwargs ) -> Iterator[Forecast]: for batch in inference_data_loader: inputs = [batch[k] for k in input_names] outputs = predict_to_numpy(prediction_net, inputs) if output_transform is not None: outputs = output_transform(batch, outputs) if num_samples: num_collected_samples = outputs[0].shape[0] collected_samples = [outputs] while num_collected_samples < num_samples: outputs = predict_to_numpy(prediction_net, inputs) if output_transform is not None: outputs = output_transform(batch, outputs) collected_samples.append(outputs) num_collected_samples += outputs[0].shape[0] outputs = [ np.concatenate(s)[:num_samples] for s in zip(*collected_samples) ] assert len(outputs[0]) == num_samples i = -1 for i, output in enumerate(outputs): yield SampleForecast( output, start_date=batch["forecast_start"][i], freq=freq, item_id=batch[FieldName.ITEM_ID][i] if FieldName.ITEM_ID in batch else None, info=batch["info"][i] if "info" in batch else None, ) assert i + 1 == len(batch["forecast_start"])
def _to_forecast( self, ag_output: np.ndarray, start_timestamp: pd.Period, item_id=None, ) -> Forecast: if self.quantiles_to_predict: forecasts = ag_output.transpose() return QuantileForecast( start_date=start_timestamp, item_id=item_id, forecast_arrays=forecasts, forecast_keys=self.forecast_keys, ) else: samples = ag_output.reshape((1, self.prediction_length)) return SampleForecast( start_date=start_timestamp, item_id=item_id, samples=samples, )
def predict( self, dataset: Dataset, num_samples: int = 100, save_info: bool = False, **kwargs, ) -> Iterator[SampleForecast]: for entry in dataset: if isinstance(entry, dict): data = entry else: data = entry.data if self.trunc_length: data = data[-self.trunc_length :] params = self.params.copy() params["num_samples"] = num_samples forecast_dict, console_output = self._run_r_forecast( data, params, save_info=save_info ) forecast_start = ( pd.Timestamp(data["start"], freq=self.freq) + data["target"].shape[0] ) samples = np.array(forecast_dict["samples"]) expected_shape = (params["num_samples"], self.prediction_length) assert ( samples.shape == expected_shape ), f"Expected shape {expected_shape} but found {samples.shape}" info = ( {"console_output": "\n".join(console_output)} if save_info else None ) yield SampleForecast( samples, forecast_start, forecast_start.freqstr, info=info )
def predict(self, dataset: Dataset, num_eval_samples=None, **kwargs) -> Iterator[SampleForecast]: for entry in dataset: if isinstance(entry, dict): data = entry else: data = entry.data params = self.params.copy() num_eval_samples = (num_eval_samples if num_eval_samples is not None else self.num_eval_samples) params['uncertainty_samples'] = num_eval_samples forecast = self._run_prophet(data, params) samples = forecast['yhat'].T forecast_start = pd.Timestamp(data['start'], freq=self.freq) + len( data['target']) assert samples.shape == ( num_eval_samples, self.prediction_length, ), samples.shape yield SampleForecast(samples, forecast_start, forecast_start.freqstr)
def predict( self, dataset: Dataset, num_samples: int = 1, save_info: bool = False, **kwargs, ) -> Iterator[SampleForecast]: if num_samples != 1: num_samples = 1 logger.warning( "num_samples changed to 1 because Croston is non-probabilistic" ) assert num_samples == 1, "Non Probabilistic Method only supports num_samples=1" for entry in dataset: if isinstance(entry, dict): data = entry else: data = entry.data if self.trunc_length: data = data[-self.trunc_length:] params = self.params.copy() params["num_samples"] = num_samples forecast_dict = self._run_croston_forecast(data, params) samples = np.array(forecast_dict["samples"]) expected_shape = (params["num_samples"], self.prediction_length) assert ( samples.shape == expected_shape ), f"Expected shape {expected_shape} but found {samples.shape}" yield SampleForecast(samples, forecast_start(data), self.freq, item_id=data["item_id"])
def predict_item(self, item: DataEntry) -> SampleForecast: return SampleForecast( samples=self.samples, start_date=item["start"], item_id=item.get(FieldName.ITEM_ID), )
def predict( self, dataset: Dataset, num_samples: int = 100, intervals: Optional[List] = None, save_info: bool = False, **kwargs, ) -> Iterator[Union[SampleForecast, QuantileForecast]]: if self.method_name in POINT_FORECAST_METHODS: print("Overriding `output_types` to `mean` since" f" {self.method_name} is a point forecast method.") elif self.method_name in QUANTILE_FORECAST_METHODS: print("Overriding `output_types` to `quantiles` since " f"{self.method_name} is a quantile forecast method.") for data in dataset: if self.trunc_length: data["target"] = data["target"][-self.trunc_length:] params = self.params.copy() params["num_samples"] = num_samples if self.method_name in POINT_FORECAST_METHODS: params["output_types"] = ["mean"] elif self.method_name in QUANTILE_FORECAST_METHODS: params["output_types"] = ["quantiles", "mean"] if intervals is None: # This corresponds to quantiles: 0.05 to 0.95 in steps of 0.05. params["intervals"] = list(range(0, 100, 10)) else: params["intervals"] = np.sort(intervals).tolist() forecast_dict, console_output = self._run_r_forecast( data, params, save_info=save_info) if self.method_name in QUANTILE_FORECAST_METHODS: quantile_forecasts_dict = forecast_dict["quantiles"] yield QuantileForecast( forecast_arrays=np.array( list(quantile_forecasts_dict.values())), forecast_keys=list(quantile_forecasts_dict.keys()), start_date=forecast_start(data), freq=self.freq, item_id=data.get("item_id", None), ) else: if self.method_name in POINT_FORECAST_METHODS: # Handling special cases outside of R is better, since it is more visible and is easier to change. # Repeat mean forecasts `num_samples` times. samples = np.reshape( forecast_dict["mean"] * params["num_samples"], (params["num_samples"], self.prediction_length), ) else: samples = np.array(forecast_dict["samples"]) expected_shape = ( params["num_samples"], self.prediction_length, ) assert ( samples.shape == expected_shape ), f"Expected shape {expected_shape} but found {samples.shape}" info = ({ "console_output": "\n".join(console_output) } if save_info else None) yield SampleForecast( samples, forecast_start(data), self.freq, info=info, item_id=data.get("item_id", None), )
QUANTILES = np.arange(1, 100) / 100 SAMPLES = np.arange(101).reshape(101, 1) / 100 START_DATE = pd.Timestamp(2017, 1, 1, 12) FREQ = "1D" FORECASTS = { "QuantileForecast": QuantileForecast( forecast_arrays=QUANTILES.reshape(-1, 1), start_date=START_DATE, forecast_keys=np.array(QUANTILES, str), freq=FREQ, ), "SampleForecast": SampleForecast(samples=SAMPLES, start_date=START_DATE, freq=FREQ), "DistributionForecast": DistributionForecast( distribution=Uniform(low=mx.nd.zeros(1), high=mx.nd.ones(1)), start_date=START_DATE, freq=FREQ, ), } @pytest.mark.parametrize("name", FORECASTS.keys()) def test_Forecast(name): forecast = FORECASTS[name] def percentile(value): return f"p{int(round(value * 100)):02d}"
def predict( targets: pd.Series, prediction_length: int, sampling_weights_iterator: Iterator[np.ndarray], num_samples: int, item_id: Optional[Any] = None, ) -> SampleForecast: """ Given the `targets`, generates `Forecast` containing prediction samples for `predcition_length` time points. Predictions are generated via weighted sampling where the weights are specified in `sampling_weights_iterator`. Parameters ---------- targets targets to predict prediction_length prediction length sampling_weights_iterator iterator over weights used for sampling num_samples number of samples to set in the :class:`SampleForecast` object item_id item_id to identify the time series Returns ------- SampleForecast a :class:`SampleForecast` object for the given targets """ # Note that to generate prediction from the second time step onwards, # we need the sample predicted for all the previous time steps in the # prediction range. # To make this simpler, we replicate the training targets for # `num_samples` times. # samples shape: (num_samples, train_length + prediction_length) samples = np.tile( A=np.concatenate((targets.values, np.zeros(prediction_length))), reps=(num_samples, 1), ) train_length = len(targets) for t, sampling_weights in enumerate(sampling_weights_iterator): samples_ix = WeightedSampler.sample(sampling_weights, num_samples) samples[:, train_length + t] = samples[np.arange(num_samples), samples_ix] # Forecast takes as input the prediction range samples, the start date # of the prediction range, and the frequency of the time series. samples_pred_range = samples[:, train_length:] # prediction range only forecast_start = targets.index[-1] + 1 * targets.index.freq return SampleForecast( samples=samples_pred_range, start_date=forecast_start, item_id=item_id, )
QUANTILES = SAMPLES[1:-1, 0] START_DATE = pd.Timestamp(2017, 1, 1, 12) FREQ = '1D' FORECASTS = { 'QuantileForecast': QuantileForecast( forecast_arrays=QUANTILES.reshape(-1, 1), start_date=START_DATE, forecast_keys=QUANTILES.tolist(), freq=FREQ, ), 'SampleForecast': SampleForecast( samples=SAMPLES.reshape(len(SAMPLES), 1), start_date=START_DATE, freq=FREQ, ), } @pytest.mark.parametrize("fcst_cls", FORECASTS.keys()) def test_Forecast(fcst_cls): fcst = FORECASTS[fcst_cls] num_samples, pred_length = SAMPLES.shape # quantiles = [x/float(num_samples-1) for x in range(0, num_samples)] for q_value in QUANTILES: q_str = str(q_value) quantile_str = 'p{:02d}'.format(int(round(q_value * 100)))
QUANTILES = np.arange(1, 100) / 100 SAMPLES = np.arange(101).reshape(101, 1) / 100 FREQ = "1D" START_DATE = pd.Period("2017 01-01 12:00", FREQ) FORECASTS = { "QuantileForecast": QuantileForecast( forecast_arrays=QUANTILES.reshape(-1, 1), start_date=START_DATE, forecast_keys=np.array(QUANTILES, str), ), "SampleForecast": SampleForecast( samples=SAMPLES, start_date=START_DATE, ), } @pytest.mark.parametrize("name", FORECASTS.keys()) def test_Forecast(name): forecast = FORECASTS[name] def percentile(value): return f"p{int(round(value * 100)):02d}" num_samples, pred_length = SAMPLES.shape for quantile in QUANTILES: test_cases = [quantile, str(quantile), percentile(quantile)]
def __call__( self, inference_data_loader: InferenceDataLoader, prediction_net: BlockType, input_names: List[str], freq: str, output_transform: Optional[OutputTransform], num_samples: Optional[int], **kwargs, ) -> Iterator[Forecast]: for batch in inference_data_loader: inputs = [batch[k] for k in input_names] outputs = prediction_net(*inputs).asnumpy() if output_transform is not None: outputs = output_transform(batch, outputs) if num_samples: num_collected_samples = outputs[0].shape[0] collected_samples = [outputs] while num_collected_samples < num_samples: outputs = prediction_net(*inputs).asnumpy() if output_transform is not None: outputs = output_transform(batch, outputs) collected_samples.append(outputs) num_collected_samples += outputs[0].shape[0] outputs = [ np.concatenate(s)[:num_samples] for s in zip(*collected_samples) ] assert len(outputs[0]) == num_samples i = -1 for i, output in enumerate(outputs): # M/ Q if self.forecast_type == "flat": output = output[:, :, 0] / output[:, :, 1] mask_nan = np.isnan(output) mask_inf = np.isposinf(output) output[mask_nan] = 0 output[mask_inf] = 0 output = np.vstack([output[:, 0]] * self.prediction_length).T # exact: --> (Q-1) times 0, M, (Q-1) times 0, M, repeat # hybrid: --> Q times M/Q, Q times M/Q, repeat elif self.forecast_type in ["exact", "hybrid"]: output_list = [] for o in output: pred = [] for row in o: m = row[0] q = max(1, row[1]) if self.forecast_type == "hybrid": f = m / q if m != 0 else 0 else: f = 0 for j in range(int(q - 1)): pred.append(f) pred.append( m ) if self.forecast_type == "exact" else pred.append( f) output_list.append(pred[:self.prediction_length]) output = np.array(output_list) else: raise NotImplementedError( f"{self.forecast_type} is not a value choice for forecast_type" ) yield SampleForecast( output, start_date=batch["forecast_start"][i], freq=freq, item_id=batch[FieldName.ITEM_ID][i] if FieldName.ITEM_ID in batch else None, info=batch["info"][i] if "info" in batch else None, ) assert i + 1 == len(batch["forecast_start"])