Beispiel #1
0
def test_chain():
    chain = transform.Chain(trans=[
        transform.AddTimeFeatures(
            start_field=FieldName.START,
            target_field=FieldName.TARGET,
            output_field="time_feat",
            time_features=[
                time_feature.DayOfWeek(),
                time_feature.DayOfMonth(),
                time_feature.MonthOfYear(),
            ],
            pred_length=10,
        ),
        transform.AddAgeFeature(
            target_field=FieldName.TARGET,
            output_field="age",
            pred_length=10,
            log_scale=True,
        ),
        transform.AddObservedValuesIndicator(target_field=FieldName.TARGET,
                                             output_field="observed_values"),
    ])

    assert equals(chain, clone(chain))
    assert not equals(chain, clone(chain, {"trans": []}))

    another_chain = transform.Chain(trans=[
        transform.AddTimeFeatures(
            start_field=FieldName.START,
            target_field=FieldName.TARGET,
            output_field="time_feat",
            time_features=[
                time_feature.DayOfWeek(),
                time_feature.DayOfMonth(),
                time_feature.MonthOfYear(),
            ],
            pred_length=10,
        ),
        transform.AddAgeFeature(
            target_field=FieldName.TARGET,
            output_field="age",
            pred_length=10,
            log_scale=False,
        ),
        transform.AddObservedValuesIndicator(target_field=FieldName.TARGET,
                                             output_field="observed_values"),
    ])
    assert not equals(chain, another_chain)
Beispiel #2
0
 def create_transformation(self) -> transform.Transformation:
     return transform.Chain(
         trans=[
             transform.AsNumpyArray(
                 field=FieldName.TARGET, expected_ndim=1
             ),
             transform.AddTimeFeatures(
                 start_field=transform.FieldName.START,
                 target_field=transform.FieldName.TARGET,
                 output_field=transform.FieldName.FEAT_TIME,
                 time_features=time_features_from_frequency_str(self.freq),
                 pred_length=self.prediction_length,
             ),
             transform.VstackFeatures(
                 output_field=FieldName.FEAT_DYNAMIC_REAL,
                 input_fields=[FieldName.FEAT_TIME],
             ),
             transform.SetFieldIfNotPresent(
                 field=FieldName.FEAT_STATIC_CAT, value=[0.0]
             ),
             transform.AsNumpyArray(
                 field=FieldName.FEAT_STATIC_CAT, expected_ndim=1
             ),
             transform.InstanceSplitter(
                 target_field=transform.FieldName.TARGET,
                 is_pad_field=transform.FieldName.IS_PAD,
                 start_field=transform.FieldName.START,
                 forecast_start_field=transform.FieldName.FORECAST_START,
                 train_sampler=ExpectedNumInstanceSampler(num_instances=1),
                 past_length=self.context_length,
                 future_length=self.prediction_length,
                 time_series_fields=[FieldName.FEAT_DYNAMIC_REAL],
             ),
         ]
     )
Beispiel #3
0
def test_ExpectedNumInstanceSampler():
    N = 6
    train_length = 2
    pred_length = 1
    ds = make_dataset(N, train_length)

    t = transform.Chain(trans=[
        transform.InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=transform.ExpectedNumInstanceSampler(
                num_instances=4, min_future=pred_length),
            past_length=train_length,
            future_length=pred_length,
        )
    ])

    assert_serializable(t)

    scale_hist = ScaleHistogram()

    repetition = 2
    for i in range(repetition):
        for data in t(iter(ds), is_train=True):
            target_values = data["past_target"]
            # for simplicity, discard values that are zeros to avoid confusion with padding
            target_values = target_values[target_values > 0]
            scale_hist.add(target_values)

    expected_values = {i: 2**i * repetition for i in range(1, N)}

    assert expected_values == scale_hist.bin_counts
Beispiel #4
0
 def create_transformation(self) -> transform.Transformation:
     return transform.Chain(
         trans=[
             transform.AsNumpyArray(
                 field=FieldName.TARGET, expected_ndim=1
             ),
             transform.AddTimeFeatures(
                 start_field=FieldName.START,
                 target_field=FieldName.TARGET,
                 output_field=FieldName.FEAT_TIME,
                 time_features=time_features_from_frequency_str(self.freq),
                 pred_length=self.prediction_length,
             ),
             transform.VstackFeatures(
                 output_field=FieldName.FEAT_DYNAMIC_REAL,
                 input_fields=[FieldName.FEAT_TIME],
             ),
             transform.SetFieldIfNotPresent(
                 field=FieldName.FEAT_STATIC_CAT, value=[0.0]
             ),
             transform.AsNumpyArray(
                 field=FieldName.FEAT_STATIC_CAT, expected_ndim=1
             ),
         ]
     )
Beispiel #5
0
def test_Transformation():
    train_length = 100
    ds = gluonts.dataset.common.ListDataset(
        [{"start": "2012-01-01", "target": [0.2] * train_length}], freq="1D"
    )

    pred_length = 10

    t = transform.Chain(
        trans=[
            transform.AddTimeFeatures(
                start_field=transform.FieldName.START,
                target_field=transform.FieldName.TARGET,
                output_field="time_feat",
                time_features=[
                    time_feature.DayOfWeek(),
                    time_feature.DayOfMonth(),
                    time_feature.MonthOfYear(),
                ],
                pred_length=pred_length,
            ),
            transform.AddAgeFeature(
                target_field=transform.FieldName.TARGET,
                output_field="age",
                pred_length=pred_length,
                log_scale=True,
            ),
            transform.AddObservedValuesIndicator(
                target_field=transform.FieldName.TARGET,
                output_field="observed_values",
            ),
            transform.VstackFeatures(
                output_field="dynamic_feat",
                input_fields=["age", "time_feat"],
                drop_inputs=True,
            ),
            transform.InstanceSplitter(
                target_field=transform.FieldName.TARGET,
                is_pad_field=transform.FieldName.IS_PAD,
                start_field=transform.FieldName.START,
                forecast_start_field=transform.FieldName.FORECAST_START,
                train_sampler=transform.ExpectedNumInstanceSampler(
                    num_instances=4
                ),
                past_length=train_length,
                future_length=pred_length,
                time_series_fields=["dynamic_feat", "observed_values"],
            ),
        ]
    )

    assert_serializable(t)

    for u in t(iter(ds), is_train=True):
        print(u)
Beispiel #6
0
def test_forking_sequence_with_features(is_train) -> None:
    def make_dataset(N, train_length):
        # generates 2 ** N - 1 timeseries with constant increasing values
        n = 2 ** N - 1

        targets = np.arange(n * train_length).reshape((n, train_length))

        return ListDataset(
            [
                {"start": "2012-01-01", "target": targets[i, :]}
                for i in range(n)
            ],
            freq="D",
        )

    ds = make_dataset(1, 20)

    trans = transform.Chain(
        trans=[
            transform.AddAgeFeature(
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_AGE,
                pred_length=10,
            ),
            transform.AddTimeFeatures(
                start_field=FieldName.START,
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_TIME,
                time_features=time_features_from_frequency_str("D"),
                pred_length=10,
            ),
            ForkingSequenceSplitter(
                train_sampler=TSplitSampler(),
                enc_len=5,
                dec_len=3,
                encoder_series_fields=[
                    FieldName.FEAT_AGE,
                    FieldName.FEAT_TIME,
                ],
                decoder_series_fields=[FieldName.FEAT_TIME],
            ),
        ]
    )

    out = trans(iter(ds), is_train=is_train)
    transformed_data = next(iter(out))

    assert transformed_data["past_target"].shape == (5, 1)
    assert transformed_data["past_feat_dynamic_age"].shape == (5, 1)
    assert transformed_data["past_time_feat"].shape == (5, 3)
    assert transformed_data["future_time_feat"].shape == (5, 3, 3)

    if is_train:
        assert transformed_data["future_target"].shape == (5, 3)
def test_forking_sequence_splitter() -> None:
    len_ts = 20
    ds = make_dataset(1, len_ts)
    enc_len = 5
    dec_len = 3

    trans = transform.Chain(
        [
            transform.AddAgeFeature(
                target_field=FieldName.TARGET,
                output_field="age",
                pred_length=dec_len,
            ),
            ForkingSequenceSplitter(
                train_sampler=TSplitSampler(),
                enc_len=enc_len,
                dec_len=dec_len,
                encoder_series_fields=["age"],
            ),
        ]
    )

    out = trans(ds, is_train=True)
    transformed_data = next(iter(out))

    future_target = np.array(
        [
            [13.0, 14.0, 15.0],
            [14.0, 15.0, 16.0],
            [15.0, 16.0, 17.0],
            [16.0, 17.0, 18.0],
            [17.0, 18.0, 19.0],
        ]
    )
    assert (
        np.linalg.norm(future_target - transformed_data["future_target"])
        < 1e-5
    ), "the forking sequence target should be computed correctly."

    age = np.log10(2.0 + np.arange(len_ts))
    assert (
        np.linalg.norm(
            age[-(enc_len + dec_len) : -dec_len]
            - transformed_data["past_age"].flatten()
        )
        < 1e-5
    ), "the forking sequence past feature should be computed correctly."
Beispiel #8
0
def test_BucketInstanceSampler():
    N = 6
    train_length = 2
    pred_length = 1
    ds = make_dataset(N, train_length)

    dataset_stats = calculate_dataset_statistics(ds)

    t = transform.Chain(
        trans=[
            transform.InstanceSplitter(
                target_field=transform.FieldName.TARGET,
                is_pad_field=transform.FieldName.IS_PAD,
                start_field=transform.FieldName.START,
                forecast_start_field=transform.FieldName.FORECAST_START,
                train_sampler=transform.BucketInstanceSampler(
                    dataset_stats.scale_histogram
                ),
                past_length=train_length,
                future_length=pred_length,
                pick_incomplete=True,
            )
        ]
    )

    assert_serializable(t)

    scale_hist = ScaleHistogram()

    repetition = 200
    for i in range(repetition):
        for data in t(iter(ds), is_train=True):
            target_values = data["past_target"]
            # for simplicity, discard values that are zeros to avoid confusion with padding
            target_values = target_values[target_values > 0]
            scale_hist.add(target_values)

    expected_values = {i: repetition for i in range(1, N)}
    found_values = scale_hist.bin_counts

    for i in range(1, N):
        assert abs(
            expected_values[i] - found_values[i] < expected_values[i] * 0.3
        )
Beispiel #9
0
def test_target_dim_indicator():
    target = np.array([0, 2, 3, 10]).tolist()

    multi_dim_target = np.array([target, target, target, target])
    dataset = gluonts.dataset.common.ListDataset(
        data_iter=[{
            "start": "2012-01-01",
            "target": multi_dim_target
        }],
        freq="1D",
        one_dim_target=False,
    )

    t = transform.Chain(trans=[
        transform.TargetDimIndicator(target_field=FieldName.TARGET,
                                     field_name="target_dimensions")
    ])

    for data_entry in t(dataset, is_train=True):
        assert (data_entry["target_dimensions"] == np.array([0, 1, 2,
                                                             3])).all()
Beispiel #10
0
def test_multi_dim_transformation(is_train):
    train_length = 10

    first_dim = np.arange(1, 11, 1).tolist()
    first_dim[-1] = "NaN"

    second_dim = np.arange(11, 21, 1).tolist()
    second_dim[0] = "NaN"

    ds = gluonts.dataset.common.ListDataset(
        data_iter=[{"start": "2012-01-01", "target": [first_dim, second_dim]}],
        freq="1D",
        one_dim_target=False,
    )
    pred_length = 2

    # Looks weird - but this is necessary to assert the nan entries correctly.
    first_dim[-1] = np.nan
    second_dim[0] = np.nan

    t = transform.Chain(
        trans=[
            transform.AddTimeFeatures(
                start_field=transform.FieldName.START,
                target_field=transform.FieldName.TARGET,
                output_field="time_feat",
                time_features=[
                    time_feature.DayOfWeek(),
                    time_feature.DayOfMonth(),
                    time_feature.MonthOfYear(),
                ],
                pred_length=pred_length,
            ),
            transform.AddAgeFeature(
                target_field=transform.FieldName.TARGET,
                output_field="age",
                pred_length=pred_length,
                log_scale=True,
            ),
            transform.AddObservedValuesIndicator(
                target_field=transform.FieldName.TARGET,
                output_field="observed_values",
                convert_nans=False,
            ),
            transform.VstackFeatures(
                output_field="dynamic_feat",
                input_fields=["age", "time_feat"],
                drop_inputs=True,
            ),
            transform.InstanceSplitter(
                target_field=transform.FieldName.TARGET,
                is_pad_field=transform.FieldName.IS_PAD,
                start_field=transform.FieldName.START,
                forecast_start_field=transform.FieldName.FORECAST_START,
                train_sampler=transform.ExpectedNumInstanceSampler(
                    num_instances=4
                ),
                past_length=train_length,
                future_length=pred_length,
                time_series_fields=["dynamic_feat", "observed_values"],
                output_NTC=False,
            ),
        ]
    )

    assert_serializable(t)

    if is_train:
        for u in t(iter(ds), is_train=True):
            assert_shape(u["past_target"], (2, 10))
            assert_shape(u["past_dynamic_feat"], (4, 10))
            assert_shape(u["past_observed_values"], (2, 10))
            assert_shape(u["future_target"], (2, 2))

            assert_padded_array(
                u["past_observed_values"],
                np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]),
                u["past_is_pad"],
            )
            assert_padded_array(
                u["past_target"],
                np.array([first_dim, second_dim]),
                u["past_is_pad"],
            )
    else:
        for u in t(iter(ds), is_train=False):
            assert_shape(u["past_target"], (2, 10))
            assert_shape(u["past_dynamic_feat"], (4, 10))
            assert_shape(u["past_observed_values"], (2, 10))
            assert_shape(u["future_target"], (2, 0))

            assert_padded_array(
                u["past_observed_values"],
                np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]),
                u["past_is_pad"],
            )
            assert_padded_array(
                u["past_target"],
                np.array([first_dim, second_dim]),
                u["past_is_pad"],
            )
Beispiel #11
0
def test_cdf_to_gaussian_transformation():
    def make_test_data():
        target = np.array(
            [
                0,
                0,
                0,
                0,
                10,
                10,
                20,
                20,
                30,
                30,
                40,
                50,
                59,
                60,
                60,
                70,
                80,
                90,
                100,
            ]
        ).tolist()

        np.random.shuffle(target)

        multi_dim_target = np.array([target, target]).transpose()

        past_is_pad = np.array([[0] * len(target)]).transpose()

        past_observed_target = np.array(
            [[1] * len(target), [1] * len(target)]
        ).transpose()

        ds = gluonts.dataset.common.ListDataset(
            # Mimic output from InstanceSplitter
            data_iter=[
                {
                    "start": "2012-01-01",
                    "target": multi_dim_target,
                    "past_target": multi_dim_target,
                    "future_target": multi_dim_target,
                    "past_is_pad": past_is_pad,
                    f"past_{FieldName.OBSERVED_VALUES}": past_observed_target,
                }
            ],
            freq="1D",
            one_dim_target=False,
        )
        return ds

    def make_fake_output(u: DataEntry):
        fake_output = np.expand_dims(
            np.expand_dims(u["past_target_cdf"], axis=0), axis=0
        )
        return fake_output

    ds = make_test_data()

    t = transform.Chain(
        trans=[
            transform.CDFtoGaussianTransform(
                target_field=FieldName.TARGET,
                observed_values_field=FieldName.OBSERVED_VALUES,
                max_context_length=20,
                target_dim=2,
            )
        ]
    )

    for u in t(iter(ds), is_train=False):

        fake_output = make_fake_output(u)

        # Fake transformation chain output
        u["past_target_sorted"] = mx.nd.array(
            np.expand_dims(u["past_target_sorted"], axis=0)
        )

        u["slopes"] = mx.nd.array(np.expand_dims(u["slopes"], axis=0))

        u["intercepts"] = mx.nd.array(np.expand_dims(u["intercepts"], axis=0))

        back_transformed = transform.cdf_to_gaussian_forward_transform(
            u, fake_output
        )

        # Get any sample/batch (slopes[i][:, d]they are all the same)
        back_transformed = back_transformed[0][0]

        original_target = u["target"]

        # Original target and back-transformed target should be the same
        assert np.allclose(original_target, back_transformed)
Beispiel #12
0
def test_forking_sequence_splitter() -> None:
    def make_dataset(N, train_length):
        # generates 2 ** N - 1 timeseries with constant increasing values
        n = 2 ** N - 1

        targets = np.arange(n * train_length).reshape((n, train_length))

        return ListDataset(
            [
                {"start": "2012-01-01", "target": targets[i, :]}
                for i in range(n)
            ],
            freq="D",
        )

    ds = make_dataset(1, 20)

    trans = transform.Chain(
        trans=[
            transform.AddAgeFeature(
                target_field=transform.FieldName.TARGET,
                output_field="age",
                pred_length=10,
            ),
            ForkingSequenceSplitter(
                train_sampler=TestSplitSampler(),
                time_series_fields=["age"],
                enc_len=5,
                dec_len=3,
            ),
        ]
    )

    out = trans(iter(ds), is_train=True)
    transformed_data = next(iter(out))

    future_target = np.array(
        [
            [13.0, 14.0, 15.0],
            [14.0, 15.0, 16.0],
            [15.0, 16.0, 17.0],
            [16.0, 17.0, 18.0],
            [17.0, 18.0, 19.0],
        ]
    )

    assert (
        np.linalg.norm(future_target - transformed_data["future_target"])
        < 1e-5
    ), "the forking sequence target should be computed correctly."

    trans_oob = transform.Chain(
        trans=[
            transform.AddAgeFeature(
                target_field=transform.FieldName.TARGET,
                output_field="age",
                pred_length=10,
            ),
            ForkingSequenceSplitter(
                train_sampler=TestSplitSampler(),
                time_series_fields=["age"],
                enc_len=20,
                dec_len=20,
            ),
        ]
    )

    transformed_data_oob = next(iter(trans_oob(iter(ds), is_train=True)))

    assert (
        np.sum(transformed_data_oob["future_target"]) - np.sum(np.arange(20))
        < 1e-5
    ), "the forking sequence target should be computed correctly."