def _load(self) -> Dict[str, Dataset]: if "metadata" in self.channels: path = self.channels.pop("metadata") self.hyperparameters["freq"] = MetaData.parse_file( path / "metadata.json").freq file_dataset = partial(FileDataset, freq=self.hyperparameters["freq"]) list_dataset = partial(ListDataset, freq=self.hyperparameters["freq"]) datasets = map_dct_values(file_dataset, self.channels) if self._listify_dataset(): datasets = map_dct_values(list_dataset, datasets) return datasets
def generate(self) -> None: if self.root.exists(): return (self.root / "gluonts").mkdir(parents=True) # Download data and move to our own managed directory with tempfile.TemporaryDirectory() as directory: self._materialize(Path(directory)) source = Path(directory) / self._gluonts_name # Copy and read metadata meta_file = self.root / "gluonts" / "metadata.json" shutil.copyfile(source / "metadata.json", meta_file) meta = MetaData.parse_file(meta_file) # Copy the data and apply filters filters = self._filters(self._prediction_length_multiplier * cast(int, meta.prediction_length)) read_transform_write( self.root / "gluonts" / "train" / "data.json", filters=filters + [EndOfSeriesCutFilter(cast(int, meta.prediction_length))], source=source / "train" / "data.json", ) read_transform_write( self.root / "gluonts" / "val" / "data.json", filters=filters, source=source / "train" / "data.json", ) # Although we increase the prediction length for the filters here, this does not # exclude any more data! The time series is only longer by the prediction length... read_transform_write( self.root / "gluonts" / "test" / "data.json", filters=self._filters( (self._prediction_length_multiplier + 1) * cast(int, meta.prediction_length)), source=source / "test" / "data.json", )
def meta(self) -> MetaData: """ Returns the dataset's metadata. """ return MetaData.parse_file(self.root / "gluonts" / "metadata.json")