def test_delay_feature_transformer_woodwork_custom_overrides_returned_by_components(X_df, fit_transform): y = pd.Series([1, 2, 1]) override_types = [Integer, Double, Categorical, Datetime, Boolean] for logical_type in override_types: try: X = ww.DataTable(X_df, logical_types={0: logical_type}) except TypeError: continue dft = DelayedFeatureTransformer(max_delay=1, gap=11) if fit_transform: transformed = dft.fit_transform(X, y) else: dft.fit(X, y) transformed = dft.transform(X, y) assert isinstance(transformed, ww.DataTable) if logical_type in [Integer, Double, Categorical]: assert transformed.logical_types == {0: logical_type, '0_delay_1': Double, 'target_delay_0': Integer, 'target_delay_1': Double} else: assert transformed.logical_types == {0: logical_type, '0_delay_1': logical_type, 'target_delay_0': Integer, 'target_delay_1': Double}
def test_lagged_feature_extractor_delay_features_delay_target( encode_y_as_str, encode_X_as_str, delay_features, delay_target, delayed_features_data): X, y = delayed_features_data X, X_answer, y, y_answer = encode_X_y_as_strings(X, y, encode_X_as_str, encode_y_as_str) all_delays = pd.DataFrame({ "feature": X.feature, "feature_delay_1": X_answer.feature.shift(1), "feature_delay_2": X_answer.feature.shift(2), "feature_delay_3": X_answer.feature.shift(3), "target_delay_0": y_answer.astype("Int64"), "target_delay_1": y_answer.shift(1), "target_delay_2": y_answer.shift(2), "target_delay_3": y_answer.shift(3) }) if not encode_X_as_str: all_delays["feature"] = X.feature.astype("Int64") if not delay_features: all_delays = all_delays.drop( columns=[c for c in all_delays.columns if "feature_" in c]) if not delay_target: all_delays = all_delays.drop( columns=[c for c in all_delays.columns if "target" in c]) transformer = DelayedFeatureTransformer(max_delay=3, gap=1, delay_features=delay_features, delay_target=delay_target) assert_frame_equal(all_delays, transformer.fit_transform(X, y).to_dataframe())
def test_target_delay_when_gap_is_0(gap, delayed_features_data): X, y = delayed_features_data expected = pd.DataFrame({"feature": X.feature.astype("Int64"), "feature_delay_1": X.feature.shift(1), "target_delay_0": y.astype("Int64"), "target_delay_1": y.shift(1)}) if gap == 0: expected = expected.drop(columns=["target_delay_0"]) transformer = DelayedFeatureTransformer(max_delay=1, gap=gap) assert_frame_equal(expected, transformer.fit_transform(X, y).to_dataframe()) expected = pd.DataFrame({"target_delay_0": y.astype("Int64"), "target_delay_1": y.shift(1)}) if gap == 0: expected = expected.drop(columns=["target_delay_0"]) assert_frame_equal(expected, transformer.fit_transform(None, y).to_dataframe())
def test_target_delay_when_gap_is_0(gap, delayed_features_data): X, y = delayed_features_data answer = pd.DataFrame({"feature": X.feature, "feature_delay_1": X.feature.shift(1), "target_delay_0": y, "target_delay_1": y.shift(1)}) if gap == 0: answer = answer.drop(columns=["target_delay_0"]) transformer = DelayedFeatureTransformer(max_delay=1, gap=gap) pd.testing.assert_frame_equal(transformer.fit_transform(X, y), answer) answer = pd.DataFrame({"target_delay_0": y, "target_delay_1": y.shift(1)}) if gap == 0: answer = answer.drop(columns=["target_delay_0"]) pd.testing.assert_frame_equal(transformer.fit_transform(None, y), answer)
def test_lagged_feature_extractor_delay_target(encode_y_as_str, encode_X_as_str, delay_features, delay_target, delayed_features_data): X, y = delayed_features_data X, X_answer, y, y_answer = encode_X_y_as_strings(X, y, encode_X_as_str, encode_y_as_str) answer = pd.DataFrame() if delay_target: answer = pd.DataFrame({"target_delay_0": y_answer.astype("Int64"), "target_delay_1": y_answer.shift(1), "target_delay_2": y_answer.shift(2), "target_delay_3": y_answer.shift(3)}) transformer = DelayedFeatureTransformer(max_delay=3, gap=1, delay_features=delay_features, delay_target=delay_target) assert_frame_equal(answer, transformer.fit_transform(None, y).to_dataframe())