def test_dataset_histogram(self) -> None: # generates 2 ** N - 1 timeseries with constant increasing values N = 6 n = 2**N - 1 T = 5 targets = np.ones((n, T)) for i in range(0, n): targets[i, :] = targets[i, :] * i # FIXME: the cast below is a hack to make mypy happy timeseries = cast( Dataset, [make_time_series(target=targets[i, :]) for i in range(n)]) found = calculate_dataset_statistics(timeseries) hist = found.scale_histogram.bin_counts for i in range(0, N): assert i in hist assert hist[i] == 2**i
def test_dataset_statistics(self) -> None: n = 2 T = 10 # use integers to avoid float conversion that can fail comparison np.random.seed(0) targets = np.random.randint(0, 10, (n, T)) scale_histogram = ScaleHistogram() for i in range(n): scale_histogram.add(targets[i, :]) scale_histogram.add([]) expected = DatasetStatistics( integer_dataset=True, num_time_series=n + 1, num_time_observations=targets.size, mean_target_length=T * 2 / 3, min_target=targets.min(), mean_target=targets.mean(), mean_abs_target=targets.mean(), max_target=targets.max(), feat_static_real=[{0.1}, {0.2, 0.3}], feat_static_cat=[{1}, {2, 3}], num_feat_dynamic_real=2, num_feat_dynamic_cat=2, num_missing_values=0, scale_histogram=scale_histogram, ) # FIXME: the cast below is a hack to make mypy happy timeseries = cast( Dataset, [ make_time_series( target=targets[0, :], feat_static_cat=[1, 2], feat_static_real=[0.1, 0.2], num_feat_dynamic_cat=2, num_feat_dynamic_real=2, ), make_time_series( target=targets[1, :], feat_static_cat=[1, 3], feat_static_real=[0.1, 0.3], num_feat_dynamic_cat=2, num_feat_dynamic_real=2, ), make_time_series( target=np.array([]), feat_static_cat=[1, 3], feat_static_real=[0.1, 0.3], num_feat_dynamic_cat=2, num_feat_dynamic_real=2, ), ], ) found = calculate_dataset_statistics(timeseries) assert expected == found
def check_error_message(expected_regex, dataset) -> None: with self.assertRaisesRegex(Exception, expected_regex): calculate_dataset_statistics(dataset)
def test_dataset_statistics_exceptions(self) -> None: def check_error_message(expected_regex, dataset) -> None: with self.assertRaisesRegex(Exception, expected_regex): calculate_dataset_statistics(dataset) check_error_message("Time series dataset is empty!", []) check_error_message( "Only empty time series found in the dataset!", [make_time_series(target=np.random.randint(0, 10, 0))], ) # infinite target # check_error_message( # "Target values have to be finite (e.g., not inf, -inf, " # "or None) and cannot exceed single precision floating " # "point range.", # [make_time_series(target=np.full(20, np.inf))] # ) # different number of feat_dynamic_{cat, real} check_error_message( "Found instances with different number of features in " "feat_dynamic_cat, found one with 2 and another with 1.", [ make_time_series(num_feat_dynamic_cat=2), make_time_series(num_feat_dynamic_cat=1), ], ) check_error_message( "Found instances with different number of features in " "feat_dynamic_cat, found one with 0 and another with 1.", [ make_time_series(num_feat_dynamic_cat=0), make_time_series(num_feat_dynamic_cat=1), ], ) check_error_message( "feat_dynamic_cat was found for some instances but not others.", [ make_time_series(num_feat_dynamic_cat=1), make_time_series(num_feat_dynamic_cat=0), ], ) check_error_message( "Found instances with different number of features in " "feat_dynamic_real, found one with 2 and another with 1.", [ make_time_series(num_feat_dynamic_real=2), make_time_series(num_feat_dynamic_real=1), ], ) check_error_message( "Found instances with different number of features in " "feat_dynamic_real, found one with 0 and another with 1.", [ make_time_series(num_feat_dynamic_real=0), make_time_series(num_feat_dynamic_real=1), ], ) check_error_message( "feat_dynamic_real was found for some instances but not others.", [ make_time_series(num_feat_dynamic_real=1), make_time_series(num_feat_dynamic_real=0), ], ) # infinite feat_dynamic_{cat,real} inf_dynamic_feat = np.full((2, len(target)), np.inf) check_error_message( "Features values have to be finite and cannot exceed single " "precision floating point range.", [ ts( start, target, feat_dynamic_cat=inf_dynamic_feat, feat_static_cat=[0, 1], ) ], ) check_error_message( "Features values have to be finite and cannot exceed single " "precision floating point range.", [ ts( start, target, feat_dynamic_real=inf_dynamic_feat, feat_static_cat=[0, 1], ) ], ) # feat_dynamic_{cat, real} different length from target check_error_message( "Each feature in feat_dynamic_cat has to have the same length as the " "target. Found an instance with feat_dynamic_cat of length 1 and a " "target of length 20.", [ ts( start=start, target=target, feat_static_cat=[0, 1], feat_dynamic_cat=np.ones((1, 1)), ) ], ) check_error_message( "Each feature in feat_dynamic_real has to have the same length as the " "target. Found an instance with feat_dynamic_real of length 1 and a " "target of length 20.", [ ts( start=start, target=target, feat_static_cat=[0, 1], feat_dynamic_real=np.ones((1, 1)), ) ], ) # feat_static_{cat, real} different length check_error_message( "Not all feat_static_cat vectors have the same length 2 != 1.", [ ts(start=start, target=target, feat_static_cat=[0, 1]), ts(start=start, target=target, feat_static_cat=[1]), ], ) check_error_message( "Not all feat_static_real vectors have the same length 2 != 1.", [ ts(start=start, target=target, feat_static_real=[0, 1]), ts(start=start, target=target, feat_static_real=[1]), ], ) calculate_dataset_statistics( # FIXME: the cast below is a hack to make mypy happy cast( Dataset, [ make_time_series(num_feat_dynamic_cat=2), make_time_series(num_feat_dynamic_cat=2), ], )) calculate_dataset_statistics( # FIXME: the cast below is a hack to make mypy happy cast( Dataset, [ make_time_series(num_feat_dynamic_cat=0), make_time_series(num_feat_dynamic_cat=0), ], ))