예제 #1
0
    def test_dataset_histogram(self) -> None:

        # generates 2 ** N - 1 timeseries with constant increasing values
        N = 6
        n = 2**N - 1
        T = 5
        targets = np.ones((n, T))
        for i in range(0, n):
            targets[i, :] = targets[i, :] * i

        # FIXME: the cast below is a hack to make mypy happy
        timeseries = cast(
            Dataset,
            [make_time_series(target=targets[i, :]) for i in range(n)])

        found = calculate_dataset_statistics(timeseries)

        hist = found.scale_histogram.bin_counts
        for i in range(0, N):
            assert i in hist
            assert hist[i] == 2**i
예제 #2
0
    def test_dataset_statistics(self) -> None:

        n = 2
        T = 10

        # use integers to avoid float conversion that can fail comparison
        np.random.seed(0)
        targets = np.random.randint(0, 10, (n, T))

        scale_histogram = ScaleHistogram()
        for i in range(n):
            scale_histogram.add(targets[i, :])

        scale_histogram.add([])

        expected = DatasetStatistics(
            integer_dataset=True,
            num_time_series=n + 1,
            num_time_observations=targets.size,
            mean_target_length=T * 2 / 3,
            min_target=targets.min(),
            mean_target=targets.mean(),
            mean_abs_target=targets.mean(),
            max_target=targets.max(),
            feat_static_real=[{0.1}, {0.2, 0.3}],
            feat_static_cat=[{1}, {2, 3}],
            num_feat_dynamic_real=2,
            num_feat_dynamic_cat=2,
            num_missing_values=0,
            scale_histogram=scale_histogram,
        )

        # FIXME: the cast below is a hack to make mypy happy
        timeseries = cast(
            Dataset,
            [
                make_time_series(
                    target=targets[0, :],
                    feat_static_cat=[1, 2],
                    feat_static_real=[0.1, 0.2],
                    num_feat_dynamic_cat=2,
                    num_feat_dynamic_real=2,
                ),
                make_time_series(
                    target=targets[1, :],
                    feat_static_cat=[1, 3],
                    feat_static_real=[0.1, 0.3],
                    num_feat_dynamic_cat=2,
                    num_feat_dynamic_real=2,
                ),
                make_time_series(
                    target=np.array([]),
                    feat_static_cat=[1, 3],
                    feat_static_real=[0.1, 0.3],
                    num_feat_dynamic_cat=2,
                    num_feat_dynamic_real=2,
                ),
            ],
        )

        found = calculate_dataset_statistics(timeseries)

        assert expected == found
예제 #3
0
 def check_error_message(expected_regex, dataset) -> None:
     with self.assertRaisesRegex(Exception, expected_regex):
         calculate_dataset_statistics(dataset)
예제 #4
0
    def test_dataset_statistics_exceptions(self) -> None:
        def check_error_message(expected_regex, dataset) -> None:
            with self.assertRaisesRegex(Exception, expected_regex):
                calculate_dataset_statistics(dataset)

        check_error_message("Time series dataset is empty!", [])

        check_error_message(
            "Only empty time series found in the dataset!",
            [make_time_series(target=np.random.randint(0, 10, 0))],
        )

        # infinite target
        # check_error_message(
        #     "Target values have to be finite (e.g., not inf, -inf, "
        #     "or None) and cannot exceed single precision floating "
        #     "point range.",
        #     [make_time_series(target=np.full(20, np.inf))]
        # )

        # different number of feat_dynamic_{cat, real}
        check_error_message(
            "Found instances with different number of features in "
            "feat_dynamic_cat, found one with 2 and another with 1.",
            [
                make_time_series(num_feat_dynamic_cat=2),
                make_time_series(num_feat_dynamic_cat=1),
            ],
        )
        check_error_message(
            "Found instances with different number of features in "
            "feat_dynamic_cat, found one with 0 and another with 1.",
            [
                make_time_series(num_feat_dynamic_cat=0),
                make_time_series(num_feat_dynamic_cat=1),
            ],
        )
        check_error_message(
            "feat_dynamic_cat was found for some instances but not others.",
            [
                make_time_series(num_feat_dynamic_cat=1),
                make_time_series(num_feat_dynamic_cat=0),
            ],
        )
        check_error_message(
            "Found instances with different number of features in "
            "feat_dynamic_real, found one with 2 and another with 1.",
            [
                make_time_series(num_feat_dynamic_real=2),
                make_time_series(num_feat_dynamic_real=1),
            ],
        )
        check_error_message(
            "Found instances with different number of features in "
            "feat_dynamic_real, found one with 0 and another with 1.",
            [
                make_time_series(num_feat_dynamic_real=0),
                make_time_series(num_feat_dynamic_real=1),
            ],
        )
        check_error_message(
            "feat_dynamic_real was found for some instances but not others.",
            [
                make_time_series(num_feat_dynamic_real=1),
                make_time_series(num_feat_dynamic_real=0),
            ],
        )

        # infinite feat_dynamic_{cat,real}
        inf_dynamic_feat = np.full((2, len(target)), np.inf)
        check_error_message(
            "Features values have to be finite and cannot exceed single "
            "precision floating point range.",
            [
                ts(
                    start,
                    target,
                    feat_dynamic_cat=inf_dynamic_feat,
                    feat_static_cat=[0, 1],
                )
            ],
        )
        check_error_message(
            "Features values have to be finite and cannot exceed single "
            "precision floating point range.",
            [
                ts(
                    start,
                    target,
                    feat_dynamic_real=inf_dynamic_feat,
                    feat_static_cat=[0, 1],
                )
            ],
        )

        # feat_dynamic_{cat, real} different length from target
        check_error_message(
            "Each feature in feat_dynamic_cat has to have the same length as the "
            "target. Found an instance with feat_dynamic_cat of length 1 and a "
            "target of length 20.",
            [
                ts(
                    start=start,
                    target=target,
                    feat_static_cat=[0, 1],
                    feat_dynamic_cat=np.ones((1, 1)),
                )
            ],
        )
        check_error_message(
            "Each feature in feat_dynamic_real has to have the same length as the "
            "target. Found an instance with feat_dynamic_real of length 1 and a "
            "target of length 20.",
            [
                ts(
                    start=start,
                    target=target,
                    feat_static_cat=[0, 1],
                    feat_dynamic_real=np.ones((1, 1)),
                )
            ],
        )

        # feat_static_{cat, real} different length
        check_error_message(
            "Not all feat_static_cat vectors have the same length 2 != 1.",
            [
                ts(start=start, target=target, feat_static_cat=[0, 1]),
                ts(start=start, target=target, feat_static_cat=[1]),
            ],
        )
        check_error_message(
            "Not all feat_static_real vectors have the same length 2 != 1.",
            [
                ts(start=start, target=target, feat_static_real=[0, 1]),
                ts(start=start, target=target, feat_static_real=[1]),
            ],
        )

        calculate_dataset_statistics(
            # FIXME: the cast below is a hack to make mypy happy
            cast(
                Dataset,
                [
                    make_time_series(num_feat_dynamic_cat=2),
                    make_time_series(num_feat_dynamic_cat=2),
                ],
            ))

        calculate_dataset_statistics(
            # FIXME: the cast below is a hack to make mypy happy
            cast(
                Dataset,
                [
                    make_time_series(num_feat_dynamic_cat=0),
                    make_time_series(num_feat_dynamic_cat=0),
                ],
            ))