def convert_base(text: str) -> str: try: return BASE_FREQ_TO_PANDAS_OFFSET[text] except KeyError: raise GluonTSDataError( f'"{text}" is not recognized as a frequency string' )
def __call__(self, data: DataEntry) -> DataEntry: value = data.get(self.name, None) if value is not None: value = np.asarray(value, dtype=self.dtype) if self.req_ndim != value.ndim: raise GluonTSDataError( f"Array '{self.name}' has bad shape - expected " f"{self.req_ndim} dimensions, got {value.ndim}.") data[self.name] = value return data elif not self.is_required: return data else: raise GluonTSDataError( f"Object is missing a required field `{self.name}`")
def __call__(self, data: DataEntry) -> DataEntry: try: if self.use_timestamp: data[self.name] = pd.Timestamp(data[self.name]) else: data[self.name] = pd.Period(data[self.name], self.freq) except (TypeError, ValueError) as e: raise GluonTSDataError( f'Error "{e}" occurred, when reading field "{self.name}"' ) from e return data
def __call__(self, data: DataEntry) -> DataEntry: try: timestamp = ProcessStartField.process(data[self.name], self.freq) except (TypeError, ValueError) as e: raise GluonTSDataError( f'Error "{e}" occurred, when reading field "{self.name}"' ) from e if timestamp.tz is not None: if self.tz_strategy == TimeZoneStrategy.error: raise GluonTSDataError( "Timezone information is not supported, " f'but provided in the "{self.name}" field.') if self.tz_strategy == TimeZoneStrategy.utc: # align timestamp to utc timezone timestamp = timestamp.tz_convert("UTC") # removes timezone information timestamp = timestamp.tz_localize(None) data[self.name] = timestamp return data
def __iter__(self): # Basic idea is to split the dataset into roughly equally sized segments # with lower and upper bound, where each worker is assigned one segment bounds = get_bounds_for_mp_data_loading(len(self)) if not self.cache or (self.cache and not self._data_cache): with self.open(self.path) as jsonl_file: for line_number, raw in enumerate(jsonl_file): if not bounds.lower <= line_number < bounds.upper: continue span = Span(path=self.path, line=line_number) try: parsed_line = Line(json.loads(raw), span=span) if self.cache: self._data_cache.append(parsed_line) yield parsed_line except ValueError: raise GluonTSDataError( f"Could not read json line {line_number}, {raw}") else: yield from self._data_cache
def predict_time_series( self, ts: pd.Series, num_samples: int, custom_features: np.ndarray = None, item_id: Optional[Any] = None, ) -> SampleForecast: """ Given a training time series, this method generates `Forecast` object containing prediction samples for `prediction_length` time points. The predictions are generated via weighted sampling where the weights are determined by the `NPTSPredictor` kernel type and feature map. Parameters ---------- ts training time series object custom_features custom features (covariates) to use num_samples number of samples to draw item_id item_id to identify the time series Returns ------- Forecast A prediction for the supplied `ts` and `custom_features`. """ if np.all(np.isnan(ts.values[-self.context_length:])): raise GluonTSDataError( f"The last {self.context_length} positions of the target time " f"series are all NaN. Please increase the `context_length` " f"parameter of your NPTS model so the last " f"{self.context_length} positions of each target contain at " f"least one non-NaN value.") # Get the features for both training and prediction ranges train_features, predict_features = self._get_features( ts.index, self.prediction_length, custom_features) # Compute weights for sampling for each time step `t` in the # prediction range sampling_weights_iterator = NPTS.compute_weights( train_features=train_features, pred_features=predict_features, target_isnan_positions=np.argwhere(np.isnan(ts.values)), kernel=self.kernel, do_exp=self._is_exp_kernel(), ) # Generate forecasts forecast = NPTS.predict( targets=ts, prediction_length=self.prediction_length, sampling_weights_iterator=sampling_weights_iterator, num_samples=num_samples, item_id=item_id, ) return forecast
def check_loss_finite(val: float) -> None: if not np.isfinite(val): raise GluonTSDataError( "Encountered invalid loss value! Try reducing the learning rate " "or try a different likelihood.")