Exemple #1
0
def test_ExpectedNumInstanceSampler():
    N = 6
    train_length = 2
    pred_length = 1
    ds = make_dataset(N, train_length)

    t = transform.Chain(trans=[
        transform.InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=transform.ExpectedNumInstanceSampler(
                num_instances=4, min_future=pred_length),
            past_length=train_length,
            future_length=pred_length,
        )
    ])

    assert_serializable(t)

    scale_hist = ScaleHistogram()

    repetition = 2
    for i in range(repetition):
        for data in t(iter(ds), is_train=True):
            target_values = data["past_target"]
            # for simplicity, discard values that are zeros to avoid confusion with padding
            target_values = target_values[target_values > 0]
            scale_hist.add(target_values)

    expected_values = {i: 2**i * repetition for i in range(1, N)}

    assert expected_values == scale_hist.bin_counts
Exemple #2
0
def test_BucketInstanceSampler():
    N = 6
    train_length = 2
    pred_length = 1
    ds = make_dataset(N, train_length)

    dataset_stats = calculate_dataset_statistics(ds)

    t = transform.Chain(
        trans=[
            transform.InstanceSplitter(
                target_field=transform.FieldName.TARGET,
                is_pad_field=transform.FieldName.IS_PAD,
                start_field=transform.FieldName.START,
                forecast_start_field=transform.FieldName.FORECAST_START,
                train_sampler=transform.BucketInstanceSampler(
                    dataset_stats.scale_histogram
                ),
                past_length=train_length,
                future_length=pred_length,
                pick_incomplete=True,
            )
        ]
    )

    assert_serializable(t)

    scale_hist = ScaleHistogram()

    repetition = 200
    for i in range(repetition):
        for data in t(iter(ds), is_train=True):
            target_values = data["past_target"]
            # for simplicity, discard values that are zeros to avoid confusion with padding
            target_values = target_values[target_values > 0]
            scale_hist.add(target_values)

    expected_values = {i: repetition for i in range(1, N)}
    found_values = scale_hist.bin_counts

    for i in range(1, N):
        assert abs(
            expected_values[i] - found_values[i] < expected_values[i] * 0.3
        )
Exemple #3
0
    def test_dataset_statistics(self) -> None:

        n = 2
        T = 10

        # use integers to avoid float conversion that can fail comparison
        np.random.seed(0)
        targets = np.random.randint(0, 10, (n, T))

        scale_histogram = ScaleHistogram()
        for i in range(n):
            scale_histogram.add(targets[i, :])

        scale_histogram.add([])

        expected = DatasetStatistics(
            integer_dataset=True,
            num_time_series=n + 1,
            num_time_observations=targets.size,
            mean_target_length=T * 2 / 3,
            min_target=targets.min(),
            mean_target=targets.mean(),
            mean_abs_target=targets.mean(),
            max_target=targets.max(),
            cats=[{0}, {1, 2}],
            num_dynamic_feat=2,
            num_missing_values=0,
            scale_histogram=scale_histogram,
        )

        # FIXME: the cast below is a hack to make mypy happy
        timeseries = cast(
            Dataset,
            [
                make_time_series(
                    target=targets[0, :], cat=[0, 1], num_dynamic_feat=2
                ),
                make_time_series(
                    target=targets[1, :], cat=[0, 2], num_dynamic_feat=2
                ),
                make_time_series(
                    target=np.array([]), cat=[0, 2], num_dynamic_feat=2
                ),
            ],
        )

        found = calculate_dataset_statistics(timeseries)

        assert expected == found
Exemple #4
0
    def test_dataset_statistics(self) -> None:

        num_time_series = 3
        num_time_observations = 10
        num_feat_dynamic_real = 2
        num_past_feat_dynamic_real = 3
        num_feat_dynamic_cat = 2
        num_missing_values = 0

        # use integers to avoid float conversion that can fail comparison
        np.random.seed(0)
        targets = np.random.randint(
            0, 10, (num_time_series - 1, num_time_observations)
        )

        scale_histogram = ScaleHistogram()
        for i in range(num_time_series - 1):
            scale_histogram.add(targets[i, :])

        scale_histogram.add([])

        expected = DatasetStatistics(
            integer_dataset=True,
            num_time_series=num_time_series,  # includes empty array
            num_time_observations=targets.size,
            mean_target_length=num_time_observations
            * (num_time_series - 1)
            / num_time_series,
            max_target_length=num_time_observations,
            min_target=targets.min(),
            mean_target=targets.mean(),
            mean_abs_target=targets.mean(),
            max_target=targets.max(),
            feat_static_real=[{0.1}, {0.2, 0.3}],
            feat_static_cat=[{1}, {2, 3}],
            num_feat_dynamic_real=num_feat_dynamic_real,
            num_past_feat_dynamic_real=num_past_feat_dynamic_real,
            num_feat_dynamic_cat=num_feat_dynamic_cat,
            num_missing_values=num_missing_values,
            scale_histogram=scale_histogram,
        )

        # FIXME: the cast below is a hack to make mypy happy
        timeseries = cast(
            Dataset,
            [
                make_time_series(
                    target=targets[0, :],
                    feat_static_cat=[1, 2],
                    feat_static_real=[0.1, 0.2],
                    num_feat_dynamic_cat=num_feat_dynamic_cat,
                    num_feat_dynamic_real=num_feat_dynamic_real,
                    num_past_feat_dynamic_real=num_past_feat_dynamic_real,
                ),
                make_time_series(
                    target=targets[1, :],
                    feat_static_cat=[1, 3],
                    feat_static_real=[0.1, 0.3],
                    num_feat_dynamic_cat=num_feat_dynamic_cat,
                    num_feat_dynamic_real=num_feat_dynamic_real,
                    num_past_feat_dynamic_real=num_past_feat_dynamic_real,
                ),
                make_time_series(
                    target=np.array([]),
                    feat_static_cat=[1, 3],
                    feat_static_real=[0.1, 0.3],
                    num_feat_dynamic_cat=num_feat_dynamic_cat,
                    num_feat_dynamic_real=num_feat_dynamic_real,
                    num_past_feat_dynamic_real=num_past_feat_dynamic_real,
                ),
            ],
        )

        found = calculate_dataset_statistics(timeseries)

        assert expected == found