def load_multivariate_constant_dataset(): metadata, train_ds, test_ds = constant_dataset() grouper_train = MultivariateGrouper(max_target_dim=NUM_SERIES) grouper_test = MultivariateGrouper(max_target_dim=NUM_SERIES) return TrainDatasets( metadata=metadata, train=grouper_train(train_ds), test=grouper_test(test_ds), )
def load_multivariate_datasets(path: Path) -> TrainDatasets: ds = load_datasets(path / "metadata", path / "train", path / "test") target_dim = ds.metadata.feat_static_cat[0].cardinality grouper_train = MultivariateGrouper(max_target_dim=target_dim) grouper_test = MultivariateGrouper(max_target_dim=target_dim) return TrainDatasets( metadata=ds.metadata, train=grouper_train(ds.train), test=grouper_test(ds.test), )
def load_dataset(dataset_name: str, path: Path) -> TrainDatasets: dataset = get_dataset(dataset_name, path, regenerate=False) target_dim = dataset.metadata.feat_static_cat[0].cardinality grouper_train = MultivariateGrouper(max_target_dim=target_dim) grouper_test = MultivariateGrouper(max_target_dim=target_dim) return TrainDatasets( metadata=dataset.metadata, train=grouper_train(dataset.train), test=grouper_test(dataset.test), )
def load_multivariate_constant_dataset(): dataset_info, train_ds, test_ds = constant_dataset() grouper_train = MultivariateGrouper(max_target_dim=10) grouper_test = MultivariateGrouper(num_test_dates=1, max_target_dim=10) metadata = dataset_info.metadata metadata.prediction_length = dataset_info.prediction_length return TrainDatasets( metadata=dataset_info.metadata, train=grouper_train(train_ds), test=grouper_test(test_ds), )
def apply(self): train_scale = map(self._max_normalize, iter(self.datasets.train)) unzip_train_scale = list(zip(*train_scale)) train = ListDataset(unzip_train_scale[0], freq=self.freq) scales = unzip_train_scale[1] test = None if self.datasets.test is not None: test_scale = zip(iter(self.datasets.test), scales) test = ListDataset( map(lambda x: self._max_normalize(x[0], x[1])[0], test_scale), freq=self.freq, ) self.datasets = TrainDatasets(self.datasets.metadata, train, test) return self
def log1p_tds(dataset: TrainDatasets) -> TrainDatasets: """Create a new train datasets with targets log-transformed.""" # Implementation note: currently, the only way is to eagerly load all timeseries in memory, and do the transform. train = ListDataset(dataset.train, freq=dataset.metadata.freq) log1p(train) if dataset.test is not None: test = ListDataset(dataset.test, freq=dataset.metadata.freq) log1p(test) else: test = None # fmt: off return TrainDatasets( dataset.metadata.copy(), # Note: pydantic's deep copy. train=train, test=test)
def test_max_normalize(): info, train_ds, test_ds = constant_dataset() datasets = TrainDatasets(info.metadata, train_ds, test_ds) normalize = MaxNormalize(datasets).apply() assert normalize.datasets.metadata == datasets.metadata for i, train_data in enumerate(normalize.datasets.train): train = train_data["target"] if i == 0: assert np.all(train == np.zeros(len(train), dtype=np.float32)) else: assert np.all(train == np.ones(len(train), dtype=np.float32)) assert normalize.datasets.test is not None for i, test_data in enumerate(normalize.datasets.test): test = test_data["target"] if i == 0: assert np.all(test == np.zeros(len(test), dtype=np.float32)) else: assert np.all(test == np.ones(len(test), dtype=np.float32))
def createGluontsDataset(data_name): # 获取所有目标序列,元信息 #(samples_size , seq_len , 1) target_slice, ds_metadata = create_dataset(data_name) train_ds = ListDataset([{ FieldName.TARGET: target, FieldName.START: start, FieldName.FORECAST_START: forecast_start } for (target, start, forecast_start ) in zip(target_slice[:, :-ds_metadata['prediction_length']], ds_metadata['start'], ds_metadata['forecast_start'])], freq=ds_metadata['freq'], one_dim_target=False) test_ds = ListDataset([{ FieldName.TARGET: target, FieldName.START: start, FieldName.FORECAST_START: forecast_start } for (target, start, forecast_start) in zip( target_slice, ds_metadata['start'], ds_metadata['forecast_start'])], freq=ds_metadata['freq'], one_dim_target=False) dataset = TrainDatasets(metadata=ds_metadata, train=train_ds, test=test_ds) with open( processed_root + '{}_{}_{}_{}.pkl'.format( '%s_start(%s)_freq(%s)' % (data_name, args.start, args.freq), 'steps(%d)_slice(%s)_DsSeries(%d)' % (args.num_time_steps, args.slice, dataset.metadata['sample_size']), 'train(%d)' % args.train_length, 'pred(%d)' % args.pred_length, ), 'wb') as fp: pickle.dump(dataset, fp) print('当前数据集为: ', data_name, '训练长度为 %d , 预测长度为 %d ' % (args.train_length, args.pred_length), '(切片之后)每个数据集样本数目为:', dataset.metadata['sample_size'])
def generate(self) -> TrainDatasets: metadata = self.metadata data_it = generate( length=self.max_train_length + self.prediction_length, recipe=self.recipe, start=self.data_start, ) full_length_data = take_as_list(data_it, self.num_timeseries) test_data = [ RecipeDataset.trim_ts_item_front( x, self.trim_length_fun(x, train_length=self.max_train_length)) for x in full_length_data ] train_data = [ RecipeDataset.trim_ts_item_end(x, self.prediction_length) for x in test_data ] return TrainDatasets( metadata=metadata, train=ListDataset(train_data, metadata.time_granularity), test=ListDataset(test_data, metadata.time_granularity), )
def generate(self) -> TrainDatasets: return TrainDatasets( metadata=self.metadata, train=ListDataset(self.train, self.time_granularity), test=ListDataset(self.test, self.time_granularity), )
def generate(self) -> TrainDatasets: return TrainDatasets( metadata=self.metadata, train=ListDataset(self.train, self.freq), test=ListDataset(self.test, self.freq), )
def create_multivariate_datasets(data_dir: str) -> None: info, train_ds, test_ds = default_synthetic() save_datasets(TrainDatasets(metadata=info.metadata, train=train_ds, test=test_ds), data_dir) return
static_cat=['sku', 'Label', 'Custname']) # Test data include fcast_length which are ground truths. test_data = df2gluonts(processed_df_fill, cat_inverted_idx, fcast_len=0, freq=freq, ts_id=['sku', 'Label', 'Custname'], static_cat=['sku', 'Label', 'Custname']) gluonts_datasets = TrainDatasets( metadata=MetaData( freq=freq, target={'name': 'quantity'}, feat_static_cat=[ # Add 'unknown'. CategoricalFeatureInfo(name=k, cardinality=len(v) + 1) for k, v in cat_inverted_idx.items() ], prediction_length=fcast_length), train=train_data, test=test_data) # %% epochs = 20 metric = [ { "Name": "train:loss", "Regex": r"Epoch\[\d+\] Evaluation metric 'epoch_loss'=(\S+)" }, {