def test_dataset_statistics(self) -> None: n = 2 T = 10 # use integers to avoid float conversion that can fail comparison np.random.seed(0) targets = np.random.randint(0, 10, (n, T)) scale_histogram = ScaleHistogram() for i in range(n): scale_histogram.add(targets[i, :]) scale_histogram.add([]) expected = DatasetStatistics( integer_dataset=True, num_time_series=n + 1, num_time_observations=targets.size, mean_target_length=T * 2 / 3, min_target=targets.min(), mean_target=targets.mean(), mean_abs_target=targets.mean(), max_target=targets.max(), cats=[{0}, {1, 2}], num_dynamic_feat=2, num_missing_values=0, scale_histogram=scale_histogram, ) # FIXME: the cast below is a hack to make mypy happy timeseries = cast( Dataset, [ make_time_series( target=targets[0, :], cat=[0, 1], num_dynamic_feat=2 ), make_time_series( target=targets[1, :], cat=[0, 2], num_dynamic_feat=2 ), make_time_series( target=np.array([]), cat=[0, 2], num_dynamic_feat=2 ), ], ) found = calculate_dataset_statistics(timeseries) assert expected == found
def test_dataset_statistics(self) -> None: num_time_series = 3 num_time_observations = 10 num_feat_dynamic_real = 2 num_past_feat_dynamic_real = 3 num_feat_dynamic_cat = 2 num_missing_values = 0 # use integers to avoid float conversion that can fail comparison np.random.seed(0) targets = np.random.randint( 0, 10, (num_time_series - 1, num_time_observations) ) scale_histogram = ScaleHistogram() for i in range(num_time_series - 1): scale_histogram.add(targets[i, :]) scale_histogram.add([]) expected = DatasetStatistics( integer_dataset=True, num_time_series=num_time_series, # includes empty array num_time_observations=targets.size, mean_target_length=num_time_observations * (num_time_series - 1) / num_time_series, max_target_length=num_time_observations, min_target=targets.min(), mean_target=targets.mean(), mean_abs_target=targets.mean(), max_target=targets.max(), feat_static_real=[{0.1}, {0.2, 0.3}], feat_static_cat=[{1}, {2, 3}], num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, num_feat_dynamic_cat=num_feat_dynamic_cat, num_missing_values=num_missing_values, scale_histogram=scale_histogram, ) # FIXME: the cast below is a hack to make mypy happy timeseries = cast( Dataset, [ make_time_series( target=targets[0, :], feat_static_cat=[1, 2], feat_static_real=[0.1, 0.2], num_feat_dynamic_cat=num_feat_dynamic_cat, num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, ), make_time_series( target=targets[1, :], feat_static_cat=[1, 3], feat_static_real=[0.1, 0.3], num_feat_dynamic_cat=num_feat_dynamic_cat, num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, ), make_time_series( target=np.array([]), feat_static_cat=[1, 3], feat_static_real=[0.1, 0.3], num_feat_dynamic_cat=num_feat_dynamic_cat, num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, ), ], ) found = calculate_dataset_statistics(timeseries) assert expected == found