예제 #1
0
def load_multivariate_constant_dataset():
    metadata, train_ds, test_ds = constant_dataset()
    grouper_train = MultivariateGrouper(max_target_dim=NUM_SERIES)
    grouper_test = MultivariateGrouper(max_target_dim=NUM_SERIES)
    return TrainDatasets(
        metadata=metadata,
        train=grouper_train(train_ds),
        test=grouper_test(test_ds),
    )
예제 #2
0
def load_multivariate_datasets(path: Path) -> TrainDatasets:
    ds = load_datasets(path / "metadata", path / "train", path / "test")
    target_dim = ds.metadata.feat_static_cat[0].cardinality
    grouper_train = MultivariateGrouper(max_target_dim=target_dim)
    grouper_test = MultivariateGrouper(max_target_dim=target_dim)
    return TrainDatasets(
        metadata=ds.metadata,
        train=grouper_train(ds.train),
        test=grouper_test(ds.test),
    )
def load_dataset(dataset_name: str, path: Path) -> TrainDatasets:
    dataset = get_dataset(dataset_name, path, regenerate=False)
    target_dim = dataset.metadata.feat_static_cat[0].cardinality
    grouper_train = MultivariateGrouper(max_target_dim=target_dim)
    grouper_test = MultivariateGrouper(max_target_dim=target_dim)
    return TrainDatasets(
        metadata=dataset.metadata,
        train=grouper_train(dataset.train),
        test=grouper_test(dataset.test),
    )
예제 #4
0
def load_multivariate_constant_dataset():
    dataset_info, train_ds, test_ds = constant_dataset()
    grouper_train = MultivariateGrouper(max_target_dim=10)
    grouper_test = MultivariateGrouper(num_test_dates=1, max_target_dim=10)
    metadata = dataset_info.metadata
    metadata.prediction_length = dataset_info.prediction_length
    return TrainDatasets(
        metadata=dataset_info.metadata,
        train=grouper_train(train_ds),
        test=grouper_test(test_ds),
    )
예제 #5
0
    def apply(self):
        train_scale = map(self._max_normalize, iter(self.datasets.train))
        unzip_train_scale = list(zip(*train_scale))
        train = ListDataset(unzip_train_scale[0], freq=self.freq)
        scales = unzip_train_scale[1]
        test = None
        if self.datasets.test is not None:
            test_scale = zip(iter(self.datasets.test), scales)
            test = ListDataset(
                map(lambda x: self._max_normalize(x[0], x[1])[0], test_scale), freq=self.freq,
            )

        self.datasets = TrainDatasets(self.datasets.metadata, train, test)
        return self
def log1p_tds(dataset: TrainDatasets) -> TrainDatasets:
    """Create a new train datasets with targets log-transformed."""
    # Implementation note: currently, the only way is to eagerly load all timeseries in memory, and do the transform.
    train = ListDataset(dataset.train, freq=dataset.metadata.freq)
    log1p(train)

    if dataset.test is not None:
        test = ListDataset(dataset.test, freq=dataset.metadata.freq)
        log1p(test)
    else:
        test = None

    # fmt: off
    return TrainDatasets(
        dataset.metadata.copy(),  # Note: pydantic's deep copy.
        train=train,
        test=test)
예제 #7
0
def test_max_normalize():
    info, train_ds, test_ds = constant_dataset()
    datasets = TrainDatasets(info.metadata, train_ds, test_ds)
    normalize = MaxNormalize(datasets).apply()
    assert normalize.datasets.metadata == datasets.metadata
    for i, train_data in enumerate(normalize.datasets.train):
        train = train_data["target"]
        if i == 0:
            assert np.all(train == np.zeros(len(train), dtype=np.float32))
        else:
            assert np.all(train == np.ones(len(train), dtype=np.float32))

    assert normalize.datasets.test is not None
    for i, test_data in enumerate(normalize.datasets.test):
        test = test_data["target"]
        if i == 0:
            assert np.all(test == np.zeros(len(test), dtype=np.float32))
        else:
            assert np.all(test == np.ones(len(test), dtype=np.float32))
예제 #8
0
def createGluontsDataset(data_name):
    # 获取所有目标序列,元信息
    #(samples_size , seq_len , 1)
    target_slice, ds_metadata = create_dataset(data_name)

    train_ds = ListDataset([{
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FORECAST_START: forecast_start
    } for (target, start, forecast_start
           ) in zip(target_slice[:, :-ds_metadata['prediction_length']],
                    ds_metadata['start'], ds_metadata['forecast_start'])],
                           freq=ds_metadata['freq'],
                           one_dim_target=False)

    test_ds = ListDataset([{
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FORECAST_START: forecast_start
    } for (target, start, forecast_start) in zip(
        target_slice, ds_metadata['start'], ds_metadata['forecast_start'])],
                          freq=ds_metadata['freq'],
                          one_dim_target=False)

    dataset = TrainDatasets(metadata=ds_metadata, train=train_ds, test=test_ds)

    with open(
            processed_root + '{}_{}_{}_{}.pkl'.format(
                '%s_start(%s)_freq(%s)' % (data_name, args.start, args.freq),
                'steps(%d)_slice(%s)_DsSeries(%d)' %
                (args.num_time_steps, args.slice,
                 dataset.metadata['sample_size']),
                'train(%d)' % args.train_length,
                'pred(%d)' % args.pred_length,
            ), 'wb') as fp:
        pickle.dump(dataset, fp)

    print('当前数据集为: ', data_name,
          '训练长度为 %d , 预测长度为 %d ' % (args.train_length, args.pred_length),
          '(切片之后)每个数据集样本数目为:', dataset.metadata['sample_size'])
예제 #9
0
파일: _base.py 프로젝트: stadlmax/gluon-ts
    def generate(self) -> TrainDatasets:
        metadata = self.metadata
        data_it = generate(
            length=self.max_train_length + self.prediction_length,
            recipe=self.recipe,
            start=self.data_start,
        )
        full_length_data = take_as_list(data_it, self.num_timeseries)

        test_data = [
            RecipeDataset.trim_ts_item_front(
                x, self.trim_length_fun(x, train_length=self.max_train_length))
            for x in full_length_data
        ]
        train_data = [
            RecipeDataset.trim_ts_item_end(x, self.prediction_length)
            for x in test_data
        ]
        return TrainDatasets(
            metadata=metadata,
            train=ListDataset(train_data, metadata.time_granularity),
            test=ListDataset(test_data, metadata.time_granularity),
        )
예제 #10
0
파일: _base.py 프로젝트: stadlmax/gluon-ts
 def generate(self) -> TrainDatasets:
     return TrainDatasets(
         metadata=self.metadata,
         train=ListDataset(self.train, self.time_granularity),
         test=ListDataset(self.test, self.time_granularity),
     )
예제 #11
0
파일: _base.py 프로젝트: yx1215/gluon-ts
 def generate(self) -> TrainDatasets:
     return TrainDatasets(
         metadata=self.metadata,
         train=ListDataset(self.train, self.freq),
         test=ListDataset(self.test, self.freq),
     )
def create_multivariate_datasets(data_dir: str) -> None:
    info, train_ds, test_ds = default_synthetic()
    save_datasets(TrainDatasets(metadata=info.metadata, train=train_ds, test=test_ds), data_dir)
    return
예제 #13
0
                        static_cat=['sku', 'Label', 'Custname'])

# Test data include fcast_length which are ground truths.
test_data = df2gluonts(processed_df_fill,
                       cat_inverted_idx,
                       fcast_len=0,
                       freq=freq,
                       ts_id=['sku', 'Label', 'Custname'],
                       static_cat=['sku', 'Label', 'Custname'])

gluonts_datasets = TrainDatasets(
    metadata=MetaData(
        freq=freq,
        target={'name': 'quantity'},
        feat_static_cat=[
            # Add 'unknown'.
            CategoricalFeatureInfo(name=k, cardinality=len(v) + 1)
            for k, v in cat_inverted_idx.items()
        ],
        prediction_length=fcast_length),
    train=train_data,
    test=test_data)

# %%
epochs = 20

metric = [
    {
        "Name": "train:loss",
        "Regex": r"Epoch\[\d+\] Evaluation metric 'epoch_loss'=(\S+)"
    },
    {