Esempio n. 1
0
def test_delay_feature_transformer_woodwork_custom_overrides_returned_by_components(X_df, fit_transform):
    y = pd.Series([1, 2, 1])
    override_types = [Integer, Double, Categorical, Datetime, Boolean]
    for logical_type in override_types:
        try:
            X = ww.DataTable(X_df, logical_types={0: logical_type})
        except TypeError:
            continue
        dft = DelayedFeatureTransformer(max_delay=1, gap=11)
        if fit_transform:
            transformed = dft.fit_transform(X, y)
        else:
            dft.fit(X, y)
            transformed = dft.transform(X, y)
        assert isinstance(transformed, ww.DataTable)
        if logical_type in [Integer, Double, Categorical]:
            assert transformed.logical_types == {0: logical_type,
                                                 '0_delay_1': Double,
                                                 'target_delay_0': Integer,
                                                 'target_delay_1': Double}
        else:
            assert transformed.logical_types == {0: logical_type,
                                                 '0_delay_1': logical_type,
                                                 'target_delay_0': Integer,
                                                 'target_delay_1': Double}
def test_lagged_feature_extractor_delay_features_delay_target(
        encode_y_as_str, encode_X_as_str, delay_features, delay_target,
        delayed_features_data):
    X, y = delayed_features_data
    X, X_answer, y, y_answer = encode_X_y_as_strings(X, y, encode_X_as_str,
                                                     encode_y_as_str)
    all_delays = pd.DataFrame({
        "feature": X.feature,
        "feature_delay_1": X_answer.feature.shift(1),
        "feature_delay_2": X_answer.feature.shift(2),
        "feature_delay_3": X_answer.feature.shift(3),
        "target_delay_0": y_answer.astype("Int64"),
        "target_delay_1": y_answer.shift(1),
        "target_delay_2": y_answer.shift(2),
        "target_delay_3": y_answer.shift(3)
    })
    if not encode_X_as_str:
        all_delays["feature"] = X.feature.astype("Int64")
    if not delay_features:
        all_delays = all_delays.drop(
            columns=[c for c in all_delays.columns if "feature_" in c])
    if not delay_target:
        all_delays = all_delays.drop(
            columns=[c for c in all_delays.columns if "target" in c])

    transformer = DelayedFeatureTransformer(max_delay=3,
                                            gap=1,
                                            delay_features=delay_features,
                                            delay_target=delay_target)
    assert_frame_equal(all_delays,
                       transformer.fit_transform(X, y).to_dataframe())
Esempio n. 3
0
def test_target_delay_when_gap_is_0(gap, delayed_features_data):
    X, y = delayed_features_data
    expected = pd.DataFrame({"feature": X.feature.astype("Int64"),
                             "feature_delay_1": X.feature.shift(1),
                             "target_delay_0": y.astype("Int64"),
                             "target_delay_1": y.shift(1)})

    if gap == 0:
        expected = expected.drop(columns=["target_delay_0"])

    transformer = DelayedFeatureTransformer(max_delay=1, gap=gap)
    assert_frame_equal(expected, transformer.fit_transform(X, y).to_dataframe())
    expected = pd.DataFrame({"target_delay_0": y.astype("Int64"),
                             "target_delay_1": y.shift(1)})

    if gap == 0:
        expected = expected.drop(columns=["target_delay_0"])
    assert_frame_equal(expected, transformer.fit_transform(None, y).to_dataframe())
Esempio n. 4
0
def test_target_delay_when_gap_is_0(gap, delayed_features_data):
    X, y = delayed_features_data

    answer = pd.DataFrame({"feature": X.feature,
                           "feature_delay_1": X.feature.shift(1),
                           "target_delay_0": y,
                           "target_delay_1": y.shift(1)})

    if gap == 0:
        answer = answer.drop(columns=["target_delay_0"])

    transformer = DelayedFeatureTransformer(max_delay=1, gap=gap)
    pd.testing.assert_frame_equal(transformer.fit_transform(X, y), answer)

    answer = pd.DataFrame({"target_delay_0": y,
                           "target_delay_1": y.shift(1)})

    if gap == 0:
        answer = answer.drop(columns=["target_delay_0"])

    pd.testing.assert_frame_equal(transformer.fit_transform(None, y), answer)
Esempio n. 5
0
def test_lagged_feature_extractor_delay_target(encode_y_as_str, encode_X_as_str, delay_features,
                                               delay_target, delayed_features_data):
    X, y = delayed_features_data
    X, X_answer, y, y_answer = encode_X_y_as_strings(X, y, encode_X_as_str, encode_y_as_str)
    answer = pd.DataFrame()
    if delay_target:
        answer = pd.DataFrame({"target_delay_0": y_answer.astype("Int64"),
                               "target_delay_1": y_answer.shift(1),
                               "target_delay_2": y_answer.shift(2),
                               "target_delay_3": y_answer.shift(3)})

    transformer = DelayedFeatureTransformer(max_delay=3, gap=1,
                                            delay_features=delay_features, delay_target=delay_target)
    assert_frame_equal(answer, transformer.fit_transform(None, y).to_dataframe())