def predict_item(self, item, trained_model):
        """Compute quantiles using the confidence intervals of autoarima.

        Args:
            item (DataEntry): One timeseries.
            trained_model (STLForecastResults): Trained STL model.

        Returns:
            SampleForecast of quantiles.
        """
        target_length = len(item[TIMESERIES_KEYS.TARGET])
        start_date = frequency_add(item[TIMESERIES_KEYS.START], target_length)

        samples = []
        for alpha in np.arange(0.02, 1.01, 0.02):
            predictions = trained_model.get_prediction(
                start=target_length,
                end=target_length + self.prediction_length - 1)
            confidence_intervals = predictions.conf_int(alpha=alpha)
            samples += [
                confidence_intervals["lower"].values,
                confidence_intervals["upper"].values
            ]

        return SampleForecast(samples=np.stack(samples),
                              start_date=start_date,
                              freq=self.freq)
Beispiel #2
0
    def predict_item(self, item, trained_model):
        """Compute quantiles using the confidence intervals of autoarima.

        Args:
            item (DataEntry): One timeseries.
            trained_model (pm.auto_arima): Trained autoarima model.

        Returns:
            SampleForecast of quantiles.
        """
        start_date = frequency_add(item[TIMESERIES_KEYS.START],
                                   len(item[TIMESERIES_KEYS.TARGET]))

        prediction_external_features = self._set_prediction_external_features(
            item)

        samples = []
        for alpha in np.arange(0.02, 1.01, 0.02):
            confidence_intervals = trained_model.predict(
                n_periods=self.prediction_length,
                X=prediction_external_features,
                return_conf_int=True,
                alpha=alpha)[1]
            samples += [confidence_intervals[:, 0], confidence_intervals[:, 1]]

        return SampleForecast(samples=np.stack(samples),
                              start_date=start_date,
                              freq=self.freq)
Beispiel #3
0
    def predict_item(self, item: DataEntry) -> SampleForecast:
        if self.context_length is not None:
            target = item["target"][-self.context_length :]
        else:
            target = item["target"]

        mean = np.nanmean(target)
        std = np.nanstd(target)
        normal = np.random.standard_normal(self.shape)

        start_date = frequency_add(item["start"], len(item["target"]))
        return SampleForecast(
            samples=std * normal + mean,
            start_date=start_date,
            freq=self.freq,
            item_id=item.get(FieldName.ITEM_ID),
        )
Beispiel #4
0
    def predict(self, dataset: Dataset, **kwargs) -> Iterator[SampleForecast]:
        for item in dataset:
            if self.context_length is not None:
                target = item["target"][-self.context_length:]
            else:
                target = item["target"]

            mean = np.nanmean(target)
            std = np.nanstd(target)
            normal = np.random.standard_normal(self.shape)

            start_date = frequency_add(item["start"], len(target))

            yield SampleForecast(
                samples=std * normal + mean,
                start_date=start_date,
                freq=self.freq,
                item_id=item["id"] if "id" in item else None,
            )
Beispiel #5
0
def generate_lstnet_dataset(dataset_path: Path, dataset_name: str):
    ds_info = datasets_info[dataset_name]

    os.makedirs(dataset_path, exist_ok=True)

    with open(dataset_path / "metadata.json", "w") as f:
        f.write(
            json.dumps(
                metadata(
                    cardinality=ds_info.num_series,
                    freq=ds_info.freq,
                    prediction_length=ds_info.prediction_length,
                )))

    train_file = dataset_path / "train" / "data.json"
    test_file = dataset_path / "test" / "data.json"

    time_index = pd.date_range(
        start=ds_info.start_date,
        freq=ds_info.freq,
        periods=ds_info.num_time_steps,
    )

    df = pd.read_csv(ds_info.url, header=None)

    assert df.shape == (
        ds_info.num_time_steps,
        ds_info.num_series,
    ), f"expected num_time_steps/num_series {(ds_info.num_time_steps, ds_info.num_series)} but got {df.shape}"

    timeseries = load_from_pandas(df=df,
                                  time_index=time_index,
                                  agg_freq=ds_info.agg_freq)

    # the last date seen during training
    ts_index = timeseries[0].index
    training_end = ts_index[int(len(ts_index) * (8 / 10))]

    train_ts = []
    for cat, ts in enumerate(timeseries):
        sliced_ts = ts[:training_end]
        if len(sliced_ts) > 0:
            train_ts.append(
                to_dict(
                    target_values=sliced_ts.values,
                    start=sliced_ts.index[0],
                    cat=[cat],
                ))

    assert len(train_ts) == ds_info.num_series

    save_to_file(train_file, train_ts)

    # time of the first prediction
    prediction_dates = [
        frequency_add(training_end, i * ds_info.prediction_length)
        for i in range(ds_info.rolling_evaluations)
    ]

    test_ts = []
    for prediction_start_date in prediction_dates:
        for cat, ts in enumerate(timeseries):
            # print(prediction_start_date)
            prediction_end_date = frequency_add(prediction_start_date,
                                                ds_info.prediction_length)
            sliced_ts = ts[:prediction_end_date]
            test_ts.append(
                to_dict(
                    target_values=sliced_ts.values,
                    start=sliced_ts.index[0],
                    cat=[cat],
                ))

    assert len(test_ts) == ds_info.num_series * ds_info.rolling_evaluations

    save_to_file(test_file, test_ts)