Beispiel #1
0
    def test_dataset_statistics(self) -> None:

        n = 2
        T = 10

        # use integers to avoid float conversion that can fail comparison
        np.random.seed(0)
        targets = np.random.randint(0, 10, (n, T))

        scale_histogram = ScaleHistogram()
        for i in range(n):
            scale_histogram.add(targets[i, :])

        scale_histogram.add([])

        expected = DatasetStatistics(
            integer_dataset=True,
            num_time_series=n + 1,
            num_time_observations=targets.size,
            mean_target_length=T * 2 / 3,
            min_target=targets.min(),
            mean_target=targets.mean(),
            mean_abs_target=targets.mean(),
            max_target=targets.max(),
            cats=[{0}, {1, 2}],
            num_dynamic_feat=2,
            num_missing_values=0,
            scale_histogram=scale_histogram,
        )

        # FIXME: the cast below is a hack to make mypy happy
        timeseries = cast(
            Dataset,
            [
                make_time_series(
                    target=targets[0, :], cat=[0, 1], num_dynamic_feat=2
                ),
                make_time_series(
                    target=targets[1, :], cat=[0, 2], num_dynamic_feat=2
                ),
                make_time_series(
                    target=np.array([]), cat=[0, 2], num_dynamic_feat=2
                ),
            ],
        )

        found = calculate_dataset_statistics(timeseries)

        assert expected == found
Beispiel #2
0
    def test_dataset_statistics(self) -> None:

        num_time_series = 3
        num_time_observations = 10
        num_feat_dynamic_real = 2
        num_past_feat_dynamic_real = 3
        num_feat_dynamic_cat = 2
        num_missing_values = 0

        # use integers to avoid float conversion that can fail comparison
        np.random.seed(0)
        targets = np.random.randint(
            0, 10, (num_time_series - 1, num_time_observations)
        )

        scale_histogram = ScaleHistogram()
        for i in range(num_time_series - 1):
            scale_histogram.add(targets[i, :])

        scale_histogram.add([])

        expected = DatasetStatistics(
            integer_dataset=True,
            num_time_series=num_time_series,  # includes empty array
            num_time_observations=targets.size,
            mean_target_length=num_time_observations
            * (num_time_series - 1)
            / num_time_series,
            max_target_length=num_time_observations,
            min_target=targets.min(),
            mean_target=targets.mean(),
            mean_abs_target=targets.mean(),
            max_target=targets.max(),
            feat_static_real=[{0.1}, {0.2, 0.3}],
            feat_static_cat=[{1}, {2, 3}],
            num_feat_dynamic_real=num_feat_dynamic_real,
            num_past_feat_dynamic_real=num_past_feat_dynamic_real,
            num_feat_dynamic_cat=num_feat_dynamic_cat,
            num_missing_values=num_missing_values,
            scale_histogram=scale_histogram,
        )

        # FIXME: the cast below is a hack to make mypy happy
        timeseries = cast(
            Dataset,
            [
                make_time_series(
                    target=targets[0, :],
                    feat_static_cat=[1, 2],
                    feat_static_real=[0.1, 0.2],
                    num_feat_dynamic_cat=num_feat_dynamic_cat,
                    num_feat_dynamic_real=num_feat_dynamic_real,
                    num_past_feat_dynamic_real=num_past_feat_dynamic_real,
                ),
                make_time_series(
                    target=targets[1, :],
                    feat_static_cat=[1, 3],
                    feat_static_real=[0.1, 0.3],
                    num_feat_dynamic_cat=num_feat_dynamic_cat,
                    num_feat_dynamic_real=num_feat_dynamic_real,
                    num_past_feat_dynamic_real=num_past_feat_dynamic_real,
                ),
                make_time_series(
                    target=np.array([]),
                    feat_static_cat=[1, 3],
                    feat_static_real=[0.1, 0.3],
                    num_feat_dynamic_cat=num_feat_dynamic_cat,
                    num_feat_dynamic_real=num_feat_dynamic_real,
                    num_past_feat_dynamic_real=num_past_feat_dynamic_real,
                ),
            ],
        )

        found = calculate_dataset_statistics(timeseries)

        assert expected == found