def metadata(self) -> MetaData: metadata = MetaData( freq=self.freq, feat_static_cat=[{ "name": "feat_static_cat_000", "cardinality": str(self.num_timeseries), }], feat_static_real=[{ "name": "feat_static_real_000" }], prediction_length=self.prediction_length, ) if self.is_promotions or self.holidays: metadata = MetaData( freq=self.freq, feat_static_cat=[{ "name": "feat_static_cat_000", "cardinality": str(self.num_timeseries), }], feat_static_real=[{ "name": "feat_static_real_000" }], feat_dynamic_real=[ BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL) ], prediction_length=self.prediction_length, ) return metadata
def metadata(self) -> MetaData: metadata = MetaData( time_granularity=self.time_granularity, feat_static_cat=[{ "name": "feat_static_cat_000", "cardinality": str(self.num_timeseries), }], feat_static_real=[{ "name": "feat_static_real_000" }], prediction_length=self.prediction_length, ) if self.is_promotions or self.holidays: metadata = MetaData( time_granularity=self.time_granularity, feat_static_cat=[{ "name": "feat_static_cat_000", "cardinality": str(self.num_timeseries), }], feat_static_real=[{ "name": "feat_static_real_000" }], feat_dynamic_real=[BasicFeatureInfo(name='feat_dynamic_real')], prediction_length=self.prediction_length, ) return metadata
def test_recipe_dataset(recipe) -> None: data = RecipeDataset( recipe=recipe, metadata=MetaData( freq="D", feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], feat_static_cat=[ CategoricalFeatureInfo(name="foo", cardinality=10) ], feat_dynamic_real=[BasicFeatureInfo(name="binary_causal")], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs["train_length"] / 2))), kwargs["train_length"], ), ) generated = data.generate() generated_train = list(generated.train) generated_test = list(generated.test) train_lengths = np.array([len(x["target"]) for x in generated_train]) test_lengths = np.array([len(x["target"]) for x in generated_test]) assert np.all(test_lengths >= 10) assert np.all(test_lengths - train_lengths >= 10) assert len(list(generated.train)) == 10
def _load(self) -> Dict[str, Dataset]: if "metadata" in self.channels: path = self.channels.pop("metadata") self.hyperparameters["freq"] = MetaData.parse_file( path / "metadata.json").freq file_dataset = partial(FileDataset, freq=self.hyperparameters["freq"]) list_dataset = partial(ListDataset, freq=self.hyperparameters["freq"]) datasets = map_dct_values(file_dataset, self.channels) if self._listify_dataset(): datasets = map_dct_values(list_dataset, datasets) return datasets
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq='1H', feat_static_cat=[ CategoricalFeatureInfo( name='feat_static_cat_000', cardinality='10' ) ], feat_static_real=[BasicFeatureInfo(name='feat_static_real_000')], ) start_date = '2000-01-01 00:00:00' train_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 24, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 30, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name='constant_dataset', metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 24, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 30, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 24, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 30, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def _load_hyperparameters(path: Path, channels) -> dict: with path.open() as json_file: hyperparameters = parse_sagemaker_parameters(json.load(json_file)) for old_freq_name in ['time_freq', 'time_granularity']: if old_freq_name in hyperparameters: hyperparameters['freq'] = hyperparameters[old_freq_name] if "metadata" in channels: with (channels["metadata"] / "metadata.json").open() as file: metadata = MetaData(**json.load(file)) hyperparameters.update(freq=metadata.freq) return hyperparameters
def test_timeseries_item_serialization() -> None: ts_item = TimeSeriesItem( item="1", start="2014-09-07 00:00:00", target=[1, 2], feat_static_cat=[1], ) metadata = MetaData( freq="1H", feat_static_cat=[{"name": "feat_static_cat_000", "cardinality": 1}], ) process = ProcessDataEntry(freq=metadata.freq) data_entry = process(ts_item.gluontsify(metadata)) serialized_data = serialize_data_entry(data_entry) deserialized_ts_item = TimeSeriesItem(**serialized_data) assert deserialized_ts_item == ts_item
def _load_hyperparameters(path: Path, channels) -> dict: with path.open() as json_file: hyperparameters = decode_sagemaker_parameters(json.load(json_file)) for old_freq_name in ["time_freq", "time_granularity", "frequency"]: if old_freq_name in hyperparameters: hyperparameters["freq"] = hyperparameters.pop(old_freq_name) if "metadata" in channels: with (channels["metadata"] / "metadata.json").open() as file: metadata = MetaData(**json.load(file)) hyperparameters.update(freq=metadata.freq) assert "freq" in hyperparameters, ( "The 'freq' key not in the loaded hyperparameters dictionary. " "Please set the 'freq' as a hyperparameter or provide a metadata " "channel which contains 'freq' information.") return hyperparameters
def default_synthetic() -> Tuple[DatasetInfo, Dataset, Dataset]: recipe = [ (FieldName.TARGET, LinearTrend() + RandomGaussian()), (FieldName.FEAT_STATIC_CAT, RandomCat([10])), ( FieldName.FEAT_STATIC_REAL, ForEachCat(RandomGaussian(1, (10,)), FieldName.FEAT_STATIC_CAT) + RandomGaussian(0.1, (10,)), ), ] data = RecipeDataset( recipe=recipe, metadata=MetaData( freq="D", feat_static_real=[ BasicFeatureInfo(name=FieldName.FEAT_STATIC_REAL) ], feat_static_cat=[ CategoricalFeatureInfo( name=FieldName.FEAT_STATIC_CAT, cardinality=10 ) ], feat_dynamic_real=[ BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL) ], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs["train_length"] / 2))), kwargs["train_length"], ), ) generated = data.generate() assert generated.test is not None info = data.dataset_info(generated.train, generated.test) return info, generated.train, generated.test
def generate(self) -> None: if self.root.exists(): return (self.root / "gluonts").mkdir(parents=True) # Download data and move to our own managed directory with tempfile.TemporaryDirectory() as directory: self._materialize(Path(directory)) source = Path(directory) / self._gluonts_name # Copy and read metadata meta_file = self.root / "gluonts" / "metadata.json" shutil.copyfile(source / "metadata.json", meta_file) meta = MetaData.parse_file(meta_file) # Copy the data and apply filters filters = self._filters(self._prediction_length_multiplier * cast(int, meta.prediction_length)) read_transform_write( self.root / "gluonts" / "train" / "data.json", filters=filters + [EndOfSeriesCutFilter(cast(int, meta.prediction_length))], source=source / "train" / "data.json", ) read_transform_write( self.root / "gluonts" / "val" / "data.json", filters=filters, source=source / "train" / "data.json", ) # Although we increase the prediction length for the filters here, this does not # exclude any more data! The time series is only longer by the prediction length... read_transform_write( self.root / "gluonts" / "test" / "data.json", filters=self._filters( (self._prediction_length_multiplier + 1) * cast(int, meta.prediction_length)), source=source / "test" / "data.json", )
def default_synthetic() -> Tuple[DatasetInfo, Dataset, Dataset]: recipe = [ ('target', LinearTrend() + RandomGaussian()), ('feat_static_cat', RandomCat([10])), ( 'feat_static_real', ForEachCat(RandomGaussian(1, 10), 'feat_static_cat') + RandomGaussian(0.1, 10), ), ] data = RecipeDataset( recipe=recipe, metadata=MetaData( time_granularity='D', feat_static_real=[BasicFeatureInfo(name='feat_static_real')], feat_static_cat=[ CategoricalFeatureInfo(name='feat_static_cat', cardinality=10) ], feat_dynamic_real=[BasicFeatureInfo(name='feat_dynamic_real')], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs['train_length'] / 2))), kwargs['train_length'], ), ) generated = data.generate() assert generated.test is not None info = data.dataset_info(generated.train, generated.test) return info, generated.train, generated.test
def metadata(self) -> MetaData: return MetaData( time_granularity=self.time_granularity, prediction_length=self.prediction_length, )
def metadata(self) -> MetaData: return MetaData(time_granularity=self.time_granularity)
def metadata(self) -> MetaData: return MetaData(freq=self.freq, prediction_length=self.prediction_length)
static_cat=['sku', 'Label', 'Custname']) # Test data include fcast_length which are ground truths. test_data = df2gluonts(processed_df_fill, cat_inverted_idx, fcast_len=0, freq=freq, ts_id=['sku', 'Label', 'Custname'], static_cat=['sku', 'Label', 'Custname']) gluonts_datasets = TrainDatasets( metadata=MetaData( freq=freq, target={'name': 'quantity'}, feat_static_cat=[ # Add 'unknown'. CategoricalFeatureInfo(name=k, cardinality=len(v) + 1) for k, v in cat_inverted_idx.items() ], prediction_length=fcast_length), train=train_data, test=test_data) # %% epochs = 20 metric = [ { "Name": "train:loss", "Regex": r"Epoch\[\d+\] Evaluation metric 'epoch_loss'=(\S+)" },
def meta(self) -> MetaData: """ Returns the dataset's metadata. """ return MetaData.parse_file(self.root / "gluonts" / "metadata.json")