Ejemplo n.º 1
0
 def create_transformation(self) -> Transformation:
     return Chain(
         trans=[
             AsNumpyArray(field=FieldName.TARGET, expected_ndim=1),
             AddTimeFeatures(
                 start_field=FieldName.START,
                 target_field=FieldName.TARGET,
                 output_field=FieldName.FEAT_TIME,
                 time_features=time_features_from_frequency_str(self.freq),
                 pred_length=self.prediction_length,
             ),
             SetFieldIfNotPresent(
                 field=FieldName.FEAT_STATIC_CAT, value=[0.0]
             ),
             AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
             transform.InstanceSplitter(
                 target_field=transform.FieldName.TARGET,
                 is_pad_field=transform.FieldName.IS_PAD,
                 start_field=transform.FieldName.START,
                 forecast_start_field=transform.FieldName.FORECAST_START,
                 train_sampler=TestSplitSampler(),
                 time_series_fields=[FieldName.FEAT_TIME],
                 past_length=self.context_length,
                 future_length=self.prediction_length,
             ),
         ]
     )
Ejemplo n.º 2
0
def test_ExpectedNumInstanceSampler():
    N = 6
    train_length = 2
    pred_length = 1
    ds = make_dataset(N, train_length)

    t = transform.Chain(trans=[
        transform.InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=transform.ExpectedNumInstanceSampler(
                num_instances=4, min_future=pred_length),
            past_length=train_length,
            future_length=pred_length,
        )
    ])

    assert_serializable(t)

    scale_hist = ScaleHistogram()

    repetition = 2
    for i in range(repetition):
        for data in t(iter(ds), is_train=True):
            target_values = data["past_target"]
            # for simplicity, discard values that are zeros to avoid confusion with padding
            target_values = target_values[target_values > 0]
            scale_hist.add(target_values)

    expected_values = {i: 2**i * repetition for i in range(1, N)}

    assert expected_values == scale_hist.bin_counts
Ejemplo n.º 3
0
 def create_transformation(self) -> transform.Transformation:
     return transform.Chain(trans=[
         transform.AsNumpyArray(field=FieldName.TARGET, expected_ndim=1),
         transform.AddTimeFeatures(
             start_field=FieldName.START,
             target_field=FieldName.TARGET,
             output_field=FieldName.FEAT_TIME,
             time_features=time_features_from_frequency_str(self.freq),
             pred_length=self.prediction_length,
         ),
         transform.VstackFeatures(
             output_field=FieldName.FEAT_DYNAMIC_REAL,
             input_fields=[FieldName.FEAT_TIME],
         ),
         transform.SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT,
                                        value=[0.0]),
         transform.AsNumpyArray(field=FieldName.FEAT_STATIC_CAT,
                                expected_ndim=1),
         transform.InstanceSplitter(
             target_field=FieldName.TARGET,
             is_pad_field=FieldName.IS_PAD,
             start_field=FieldName.START,
             forecast_start_field=FieldName.FORECAST_START,
             train_sampler=ExpectedNumInstanceSampler(num_instances=1),
             past_length=self.context_length,
             future_length=self.prediction_length,
             time_series_fields=[FieldName.FEAT_DYNAMIC_REAL],
         ),
     ])
Ejemplo n.º 4
0
def test_InstanceSplitter(start, target, lead_time: int, is_train: bool,
                          pick_incomplete: bool):
    train_length = 100
    pred_length = 13
    t = transform.InstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        train_sampler=transform.UniformSplitSampler(p=1.0),
        past_length=train_length,
        future_length=pred_length,
        lead_time=lead_time,
        time_series_fields=["some_time_feature"],
        pick_incomplete=pick_incomplete,
    )

    assert_serializable(t)

    other_feat = np.arange(len(target) + 100)
    data = {
        "start": start,
        "target": target,
        "some_time_feature": other_feat,
        "some_other_col": "ABC",
    }

    if not is_train and not pick_incomplete and len(target) < train_length:
        with pytest.raises(AssertionError):
            out = list(t.flatmap_transform(data, is_train=is_train))
        return
    else:
        out = list(t.flatmap_transform(data, is_train=is_train))

    if is_train:
        assert len(out) == max(
            0,
            len(target) - pred_length - lead_time + 1 -
            (0 if pick_incomplete else train_length),
        )
    else:
        assert len(out) == 1

    for o in out:
        assert "target" not in o
        assert "some_time_feature" not in o
        assert "some_other_col" in o

        assert len(o["past_some_time_feature"]) == train_length
        assert len(o["past_target"]) == train_length

        if is_train:
            assert len(o["future_target"]) == pred_length
            assert len(o["future_some_time_feature"]) == pred_length
        else:
            assert len(o["future_target"]) == 0
            assert len(o["future_some_time_feature"]) == pred_length
Ejemplo n.º 5
0
def test_Transformation():
    train_length = 100
    ds = gluonts.dataset.common.ListDataset(
        [{"start": "2012-01-01", "target": [0.2] * train_length}], freq="1D"
    )

    pred_length = 10

    t = transform.Chain(
        trans=[
            transform.AddTimeFeatures(
                start_field=transform.FieldName.START,
                target_field=transform.FieldName.TARGET,
                output_field="time_feat",
                time_features=[
                    time_feature.DayOfWeek(),
                    time_feature.DayOfMonth(),
                    time_feature.MonthOfYear(),
                ],
                pred_length=pred_length,
            ),
            transform.AddAgeFeature(
                target_field=transform.FieldName.TARGET,
                output_field="age",
                pred_length=pred_length,
                log_scale=True,
            ),
            transform.AddObservedValuesIndicator(
                target_field=transform.FieldName.TARGET,
                output_field="observed_values",
            ),
            transform.VstackFeatures(
                output_field="dynamic_feat",
                input_fields=["age", "time_feat"],
                drop_inputs=True,
            ),
            transform.InstanceSplitter(
                target_field=transform.FieldName.TARGET,
                is_pad_field=transform.FieldName.IS_PAD,
                start_field=transform.FieldName.START,
                forecast_start_field=transform.FieldName.FORECAST_START,
                train_sampler=transform.ExpectedNumInstanceSampler(
                    num_instances=4
                ),
                past_length=train_length,
                future_length=pred_length,
                time_series_fields=["dynamic_feat", "observed_values"],
            ),
        ]
    )

    assert_serializable(t)

    for u in t(iter(ds), is_train=True):
        print(u)
Ejemplo n.º 6
0
 def _create_instance_splitter(self, mode: str):
     return transform.InstanceSplitter(
         target_field=FieldName.TARGET,
         is_pad_field=FieldName.IS_PAD,
         start_field=FieldName.START,
         forecast_start_field=FieldName.FORECAST_START,
         instance_sampler=TestSplitSampler(),
         time_series_fields=[FieldName.FEAT_TIME],
         past_length=self.context_length,
         future_length=self.prediction_length,
     )
Ejemplo n.º 7
0
def test_InstanceSplitter(start, target, is_train):
    train_length = 100
    pred_length = 13
    t = transform.InstanceSplitter(
        target_field=transform.FieldName.TARGET,
        is_pad_field=transform.FieldName.IS_PAD,
        start_field=transform.FieldName.START,
        forecast_start_field=transform.FieldName.FORECAST_START,
        train_sampler=transform.UniformSplitSampler(p=1.0),
        past_length=train_length,
        future_length=pred_length,
        time_series_fields=["some_time_feature"],
        pick_incomplete=True,
    )

    assert_serializable(t)

    other_feat = np.arange(len(target) + 100)
    data = {
        "start": start,
        "target": target,
        "some_time_feature": other_feat,
        "some_other_col": "ABC",
    }

    out = list(t.flatmap_transform(data, is_train=is_train))

    if is_train:
        assert len(out) == max(0, len(target) - pred_length + 1)
    else:
        assert len(out) == 1

    for o in out:
        assert "target" not in o
        assert "some_time_feature" not in o
        assert "some_other_col" in o

        assert len(o["past_some_time_feature"]) == train_length
        assert len(o["past_target"]) == train_length

        if is_train:
            assert len(o["future_target"]) == pred_length
            assert len(o["future_some_time_feature"]) == pred_length
        else:
            assert len(o["future_target"]) == 0
            assert len(o["future_some_time_feature"]) == pred_length
Ejemplo n.º 8
0
def test_BucketInstanceSampler():
    N = 6
    train_length = 2
    pred_length = 1
    ds = make_dataset(N, train_length)

    dataset_stats = calculate_dataset_statistics(ds)

    t = transform.Chain(
        trans=[
            transform.InstanceSplitter(
                target_field=transform.FieldName.TARGET,
                is_pad_field=transform.FieldName.IS_PAD,
                start_field=transform.FieldName.START,
                forecast_start_field=transform.FieldName.FORECAST_START,
                train_sampler=transform.BucketInstanceSampler(
                    dataset_stats.scale_histogram
                ),
                past_length=train_length,
                future_length=pred_length,
                pick_incomplete=True,
            )
        ]
    )

    assert_serializable(t)

    scale_hist = ScaleHistogram()

    repetition = 200
    for i in range(repetition):
        for data in t(iter(ds), is_train=True):
            target_values = data["past_target"]
            # for simplicity, discard values that are zeros to avoid confusion with padding
            target_values = target_values[target_values > 0]
            scale_hist.add(target_values)

    expected_values = {i: repetition for i in range(1, N)}
    found_values = scale_hist.bin_counts

    for i in range(1, N):
        assert abs(
            expected_values[i] - found_values[i] < expected_values[i] * 0.3
        )
Ejemplo n.º 9
0
    def _create_instance_splitter(self, mode: str):
        assert mode in ["training", "validation", "test"]

        instance_sampler = {
            "training": self.train_sampler,
            "validation": self.validation_sampler,
            "test": TestSplitSampler(),
        }[mode]

        return transform.InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=instance_sampler,
            past_length=self.context_length,
            future_length=self.prediction_length,
            time_series_fields=[FieldName.FEAT_DYNAMIC_REAL],
        )
Ejemplo n.º 10
0
def test_instance_splitter():
    splitter = transform.InstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        instance_sampler=transform.ExpectedNumInstanceSampler(num_instances=4),
        past_length=100,
        future_length=10,
        time_series_fields=["dynamic_feat", "observed_values"],
    )

    splitter2 = clone(
        splitter,
        {
            "instance_sampler":
            transform.ExpectedNumInstanceSampler(num_instances=5)
        },
    )
    assert equals(splitter, clone(splitter))
    assert not equals(splitter, splitter2)
Ejemplo n.º 11
0
def test_multi_dim_transformation(is_train):
    train_length = 10

    first_dim = np.arange(1, 11, 1).tolist()
    first_dim[-1] = "NaN"

    second_dim = np.arange(11, 21, 1).tolist()
    second_dim[0] = "NaN"

    ds = gluonts.dataset.common.ListDataset(
        data_iter=[{"start": "2012-01-01", "target": [first_dim, second_dim]}],
        freq="1D",
        one_dim_target=False,
    )
    pred_length = 2

    # Looks weird - but this is necessary to assert the nan entries correctly.
    first_dim[-1] = np.nan
    second_dim[0] = np.nan

    t = transform.Chain(
        trans=[
            transform.AddTimeFeatures(
                start_field=transform.FieldName.START,
                target_field=transform.FieldName.TARGET,
                output_field="time_feat",
                time_features=[
                    time_feature.DayOfWeek(),
                    time_feature.DayOfMonth(),
                    time_feature.MonthOfYear(),
                ],
                pred_length=pred_length,
            ),
            transform.AddAgeFeature(
                target_field=transform.FieldName.TARGET,
                output_field="age",
                pred_length=pred_length,
                log_scale=True,
            ),
            transform.AddObservedValuesIndicator(
                target_field=transform.FieldName.TARGET,
                output_field="observed_values",
                convert_nans=False,
            ),
            transform.VstackFeatures(
                output_field="dynamic_feat",
                input_fields=["age", "time_feat"],
                drop_inputs=True,
            ),
            transform.InstanceSplitter(
                target_field=transform.FieldName.TARGET,
                is_pad_field=transform.FieldName.IS_PAD,
                start_field=transform.FieldName.START,
                forecast_start_field=transform.FieldName.FORECAST_START,
                train_sampler=transform.ExpectedNumInstanceSampler(
                    num_instances=4
                ),
                past_length=train_length,
                future_length=pred_length,
                time_series_fields=["dynamic_feat", "observed_values"],
                output_NTC=False,
            ),
        ]
    )

    assert_serializable(t)

    if is_train:
        for u in t(iter(ds), is_train=True):
            assert_shape(u["past_target"], (2, 10))
            assert_shape(u["past_dynamic_feat"], (4, 10))
            assert_shape(u["past_observed_values"], (2, 10))
            assert_shape(u["future_target"], (2, 2))

            assert_padded_array(
                u["past_observed_values"],
                np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]),
                u["past_is_pad"],
            )
            assert_padded_array(
                u["past_target"],
                np.array([first_dim, second_dim]),
                u["past_is_pad"],
            )
    else:
        for u in t(iter(ds), is_train=False):
            assert_shape(u["past_target"], (2, 10))
            assert_shape(u["past_dynamic_feat"], (4, 10))
            assert_shape(u["past_observed_values"], (2, 10))
            assert_shape(u["future_target"], (2, 0))

            assert_padded_array(
                u["past_observed_values"],
                np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]),
                u["past_is_pad"],
            )
            assert_padded_array(
                u["past_target"],
                np.array([first_dim, second_dim]),
                u["past_is_pad"],
            )