Beispiel #1
0
def test_ExpectedNumInstanceSampler():
    N = 6
    train_length = 2
    pred_length = 1
    ds = make_dataset(N, train_length)

    t = transform.Chain(trans=[
        transform.InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            train_sampler=transform.ExpectedNumInstanceSampler(
                num_instances=4),
            past_length=train_length,
            future_length=pred_length,
            pick_incomplete=True,
        )
    ])

    scale_hist = ScaleHistogram()

    repetition = 2
    for i in range(repetition):
        for data in t(iter(ds), is_train=True):
            target_values = data["past_target"]
            # for simplicity, discard values that are zeros to avoid confusion with padding
            target_values = target_values[target_values > 0]
            scale_hist.add(target_values)

    expected_values = {i: 2**i * repetition for i in range(1, N)}

    assert expected_values == scale_hist.bin_counts
Beispiel #2
0
def test_Transformation():
    train_length = 100
    ds = ListDataset(
        [{"start": "2012-01-01", "target": [0.2] * train_length}], freq="1D"
    )

    pred_length = 10

    t = transform.Chain(
        trans=[
            transform.AddTimeFeatures(
                start_field=FieldName.START,
                target_field=FieldName.TARGET,
                output_field="time_feat",
                time_features=[
                    time_feature.DayOfWeek(),
                    time_feature.DayOfMonth(),
                    time_feature.MonthOfYear(),
                ],
                pred_length=pred_length,
            ),
            transform.AddAgeFeature(
                target_field=FieldName.TARGET,
                output_field="age",
                pred_length=pred_length,
                log_scale=True,
            ),
            transform.AddObservedValuesIndicator(
                target_field=FieldName.TARGET, output_field="observed_values"
            ),
            transform.VstackFeatures(
                output_field="dynamic_feat",
                input_fields=["age", "time_feat"],
                drop_inputs=True,
            ),
            transform.InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=transform.ExpectedNumInstanceSampler(
                    num_instances=4
                ),
                past_length=train_length,
                future_length=pred_length,
                time_series_fields=["dynamic_feat", "observed_values"],
            ),
        ]
    )

    for u in t(iter(ds), is_train=True):
        print(u)
Beispiel #3
0
def test_multi_dim_transformation(is_train):
    train_length = 10

    first_dim: list = list(np.arange(1, 11, 1))
    first_dim[-1] = "NaN"

    second_dim: list = list(np.arange(11, 21, 1))
    second_dim[0] = "NaN"

    ds = ListDataset(
        data_iter=[{
            "start": "2012-01-01",
            "target": [first_dim, second_dim]
        }],
        freq="1D",
        one_dim_target=False,
    )
    pred_length = 2

    # Looks weird - but this is necessary to assert the nan entries correctly.
    first_dim[-1] = np.nan
    second_dim[0] = np.nan

    t = transform.Chain(trans=[
        transform.AddTimeFeatures(
            start_field=FieldName.START,
            target_field=FieldName.TARGET,
            output_field="time_feat",
            time_features=[
                time_feature.DayOfWeek(),
                time_feature.DayOfMonth(),
                time_feature.MonthOfYear(),
            ],
            pred_length=pred_length,
        ),
        transform.AddAgeFeature(
            target_field=FieldName.TARGET,
            output_field="age",
            pred_length=pred_length,
            log_scale=True,
        ),
        transform.AddObservedValuesIndicator(
            target_field=FieldName.TARGET,
            output_field="observed_values",
            convert_nans=False,
        ),
        transform.VstackFeatures(
            output_field="dynamic_feat",
            input_fields=["age", "time_feat"],
            drop_inputs=True,
        ),
        transform.InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            train_sampler=transform.ExpectedNumInstanceSampler(
                num_instances=4),
            past_length=train_length,
            future_length=pred_length,
            time_series_fields=["dynamic_feat", "observed_values"],
            time_first=False,
        ),
    ])

    if is_train:
        for u in t(iter(ds), is_train=True):
            assert_shape(u["past_target"], (2, 10))
            assert_shape(u["past_dynamic_feat"], (4, 10))
            assert_shape(u["past_observed_values"], (2, 10))
            assert_shape(u["future_target"], (2, 2))

            assert_padded_array(
                u["past_observed_values"],
                np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]),
                u["past_is_pad"],
            )
            assert_padded_array(
                u["past_target"],
                np.array([first_dim, second_dim]),
                u["past_is_pad"],
            )
    else:
        for u in t(iter(ds), is_train=False):
            assert_shape(u["past_target"], (2, 10))
            assert_shape(u["past_dynamic_feat"], (4, 10))
            assert_shape(u["past_observed_values"], (2, 10))
            assert_shape(u["future_target"], (2, 0))

            assert_padded_array(
                u["past_observed_values"],
                np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]),
                u["past_is_pad"],
            )
            assert_padded_array(
                u["past_target"],
                np.array([first_dim, second_dim]),
                u["past_is_pad"],
            )