Esempio n. 1
0
def convert_base(text: str) -> str:
    try:
        return BASE_FREQ_TO_PANDAS_OFFSET[text]
    except KeyError:
        raise GluonTSDataError(
            f'"{text}" is not recognized as a frequency string'
        )
Esempio n. 2
0
    def __call__(self, data: DataEntry) -> DataEntry:
        value = data.get(self.name, None)
        if value is not None:
            value = np.asarray(value, dtype=self.dtype)

            if self.req_ndim != value.ndim:
                raise GluonTSDataError(
                    f"Array '{self.name}' has bad shape - expected "
                    f"{self.req_ndim} dimensions, got {value.ndim}.")

            data[self.name] = value

            return data
        elif not self.is_required:
            return data
        else:
            raise GluonTSDataError(
                f"Object is missing a required field `{self.name}`")
Esempio n. 3
0
    def __call__(self, data: DataEntry) -> DataEntry:
        try:
            if self.use_timestamp:
                data[self.name] = pd.Timestamp(data[self.name])
            else:
                data[self.name] = pd.Period(data[self.name], self.freq)
        except (TypeError, ValueError) as e:
            raise GluonTSDataError(
                f'Error "{e}" occurred, when reading field "{self.name}"'
            ) from e

        return data
Esempio n. 4
0
    def __call__(self, data: DataEntry) -> DataEntry:
        try:
            timestamp = ProcessStartField.process(data[self.name], self.freq)
        except (TypeError, ValueError) as e:
            raise GluonTSDataError(
                f'Error "{e}" occurred, when reading field "{self.name}"'
            ) from e

        if timestamp.tz is not None:
            if self.tz_strategy == TimeZoneStrategy.error:
                raise GluonTSDataError(
                    "Timezone information is not supported, "
                    f'but provided in the "{self.name}" field.')
            if self.tz_strategy == TimeZoneStrategy.utc:
                # align timestamp to utc timezone
                timestamp = timestamp.tz_convert("UTC")

            # removes timezone information
            timestamp = timestamp.tz_localize(None)

        data[self.name] = timestamp

        return data
Esempio n. 5
0
    def __iter__(self):
        # Basic idea is to split the dataset into roughly equally sized segments
        # with lower and upper bound, where each worker is assigned one segment
        bounds = get_bounds_for_mp_data_loading(len(self))
        if not self.cache or (self.cache and not self._data_cache):
            with self.open(self.path) as jsonl_file:
                for line_number, raw in enumerate(jsonl_file):
                    if not bounds.lower <= line_number < bounds.upper:
                        continue

                    span = Span(path=self.path, line=line_number)
                    try:
                        parsed_line = Line(json.loads(raw), span=span)
                        if self.cache:
                            self._data_cache.append(parsed_line)
                        yield parsed_line
                    except ValueError:
                        raise GluonTSDataError(
                            f"Could not read json line {line_number}, {raw}")
        else:
            yield from self._data_cache
Esempio n. 6
0
    def predict_time_series(
        self,
        ts: pd.Series,
        num_samples: int,
        custom_features: np.ndarray = None,
        item_id: Optional[Any] = None,
    ) -> SampleForecast:
        """
        Given a training time series, this method generates `Forecast` object
        containing prediction samples for `prediction_length` time points.

        The predictions are generated via weighted sampling where the weights
        are determined by the `NPTSPredictor` kernel type and feature map.

        Parameters
        ----------
        ts
            training time series object
        custom_features
            custom features (covariates) to use
        num_samples
            number of samples to draw
        item_id
            item_id to identify the time series
        Returns
        -------
        Forecast
          A prediction for the supplied `ts` and `custom_features`.
        """

        if np.all(np.isnan(ts.values[-self.context_length:])):
            raise GluonTSDataError(
                f"The last {self.context_length} positions of the target time "
                f"series are all NaN. Please increase the `context_length` "
                f"parameter of your NPTS model so the last "
                f"{self.context_length} positions of each target contain at "
                f"least one non-NaN value.")

        # Get the features for both training and prediction ranges
        train_features, predict_features = self._get_features(
            ts.index, self.prediction_length, custom_features)

        # Compute weights for sampling for each time step `t` in the
        # prediction range
        sampling_weights_iterator = NPTS.compute_weights(
            train_features=train_features,
            pred_features=predict_features,
            target_isnan_positions=np.argwhere(np.isnan(ts.values)),
            kernel=self.kernel,
            do_exp=self._is_exp_kernel(),
        )

        # Generate forecasts
        forecast = NPTS.predict(
            targets=ts,
            prediction_length=self.prediction_length,
            sampling_weights_iterator=sampling_weights_iterator,
            num_samples=num_samples,
            item_id=item_id,
        )

        return forecast
Esempio n. 7
0
def check_loss_finite(val: float) -> None:
    if not np.isfinite(val):
        raise GluonTSDataError(
            "Encountered invalid loss value! Try reducing the learning rate "
            "or try a different likelihood.")