def test_InstanceSplitter(start, target, lead_time: int, is_train: bool, pick_incomplete: bool): train_length = 100 pred_length = 13 t = transform.InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=transform.UniformSplitSampler(p=1.0), past_length=train_length, future_length=pred_length, lead_time=lead_time, time_series_fields=["some_time_feature"], pick_incomplete=pick_incomplete, ) assert_serializable(t) other_feat = np.arange(len(target) + 100) data = { "start": start, "target": target, "some_time_feature": other_feat, "some_other_col": "ABC", } if not is_train and not pick_incomplete and len(target) < train_length: with pytest.raises(AssertionError): out = list(t.flatmap_transform(data, is_train=is_train)) return else: out = list(t.flatmap_transform(data, is_train=is_train)) if is_train: assert len(out) == max( 0, len(target) - pred_length - lead_time + 1 - (0 if pick_incomplete else train_length), ) else: assert len(out) == 1 for o in out: assert "target" not in o assert "some_time_feature" not in o assert "some_other_col" in o assert len(o["past_some_time_feature"]) == train_length assert len(o["past_target"]) == train_length if is_train: assert len(o["future_target"]) == pred_length assert len(o["future_some_time_feature"]) == pred_length else: assert len(o["future_target"]) == 0 assert len(o["future_some_time_feature"]) == pred_length
def test_CanonicalInstanceSplitter( start, target, is_train: bool, use_prediction_features: bool, allow_target_padding: bool, ): train_length = 100 pred_length = 13 t = transform.CanonicalInstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=(transform.UniformSplitSampler( p=1.0, min_past=train_length, ) if is_train else ( transform.ValidationSplitSampler() if allow_target_padding else transform.TestSplitSampler())), instance_length=train_length, prediction_length=pred_length, time_series_fields=["some_time_feature"], allow_target_padding=allow_target_padding, use_prediction_features=use_prediction_features, ) assert_serializable(t) other_feat = np.arange(len(target) + 100) data = { "start": start, "target": target, "some_time_feature": other_feat, "some_other_col": "ABC", } out = list(t.flatmap_transform(data, is_train=is_train)) min_num_instances = 1 if allow_target_padding and not is_train else 0 if is_train: assert len(out) == max(min_num_instances, len(target) - train_length + 1) else: assert len(out) == 1 for o in out: assert "target" not in o assert "future_target" not in o assert "some_time_feature" not in o assert "some_other_col" in o assert len(o["past_some_time_feature"]) == train_length assert len(o["past_target"]) == train_length if use_prediction_features and not is_train: assert len(o["future_some_time_feature"]) == pred_length
def test_CanonicalInstanceSplitter( start, target, is_train, use_prediction_features, allow_target_padding ): train_length = 100 pred_length = 13 t = transform.CanonicalInstanceSplitter( target_field=transform.FieldName.TARGET, is_pad_field=transform.FieldName.IS_PAD, start_field=transform.FieldName.START, forecast_start_field=transform.FieldName.FORECAST_START, instance_sampler=transform.UniformSplitSampler(p=1.0), instance_length=train_length, prediction_length=pred_length, time_series_fields=['some_time_feature'], allow_target_padding=allow_target_padding, use_prediction_features=use_prediction_features, ) assert_serializable(t) other_feat = np.arange(len(target) + 100) data = { 'start': start, 'target': target, 'some_time_feature': other_feat, 'some_other_col': 'ABC', } out = list(t.flatmap_transform(data, is_train=is_train)) min_num_instances = 1 if allow_target_padding else 0 if is_train: assert len(out) == max( min_num_instances, len(target) - train_length + 1 ) else: assert len(out) == 1 for o in out: assert 'target' not in o assert 'future_target' not in o assert 'some_time_feature' not in o assert 'some_other_col' in o assert len(o['past_some_time_feature']) == train_length assert len(o['past_target']) == train_length if use_prediction_features and not is_train: assert len(o['future_some_time_feature']) == pred_length
def test_InstanceSplitter(start, target, is_train): train_length = 100 pred_length = 13 t = transform.InstanceSplitter( target_field=transform.FieldName.TARGET, is_pad_field=transform.FieldName.IS_PAD, start_field=transform.FieldName.START, forecast_start_field=transform.FieldName.FORECAST_START, train_sampler=transform.UniformSplitSampler(p=1.0), past_length=train_length, future_length=pred_length, time_series_fields=["some_time_feature"], pick_incomplete=True, ) assert_serializable(t) other_feat = np.arange(len(target) + 100) data = { "start": start, "target": target, "some_time_feature": other_feat, "some_other_col": "ABC", } out = list(t.flatmap_transform(data, is_train=is_train)) if is_train: assert len(out) == max(0, len(target) - pred_length + 1) else: assert len(out) == 1 for o in out: assert "target" not in o assert "some_time_feature" not in o assert "some_other_col" in o assert len(o["past_some_time_feature"]) == train_length assert len(o["past_target"]) == train_length if is_train: assert len(o["future_target"]) == pred_length assert len(o["future_some_time_feature"]) == pred_length else: assert len(o["future_target"]) == 0 assert len(o["future_some_time_feature"]) == pred_length