def test_custom_transformer(): input_df = pd.DataFrame({ 'feat1': [1, 2, 3], 'feat2': [math.e, math.e**2, math.e**3], 'feat3': [1.5, 2.5, 3.5], 'target': [1, 4, 9] }) expected = pd.DataFrame({ 'feat1': [1, 2, 3], 'feat2': [math.e, math.e**2, math.e**3], 'feat3': [1.5, 2.5, 3.5], 'target': [1.0, 2.0, 3.0] }) expected2 = pd.DataFrame({ 'feat1': [1, 4, 9], 'feat2': [math.e, math.e**2, math.e**3], 'feat3': [1.5, 2.5, 3.5], 'target': [1, 4, 9] }) transformer_fn, data, log = custom_transformer(input_df, ["target"], sqrt) # the transformed input df should contain the square root of the target column assert expected.equals(data) transformer_fn, data, log = custom_transformer(input_df, ["feat1"], lambda x: x**2) # the transformed input df should contain the squared value of the feat1 column assert expected2.equals(data) expected3 = pd.DataFrame({ 'feat1': [1, 2, 3], 'feat2': [1.0, 2.0, 3.0], 'feat3': [1.5, 2.5, 3.5], 'target': [1, 4, 9] }) expected4 = pd.DataFrame({ 'feat1': [1, 2, 3], 'feat2': [math.e, math.e**2, math.e**3], 'feat3': [1.0, 2.0, 3.0], 'target': [1, 4, 9] }) transformer_fn, data, log = custom_transformer(input_df, ["feat2"], ln, is_vectorized=True) # the transformed input df should contain the square root of the target column assert expected3.equals(data) transformer_fn, data, log = custom_transformer(input_df, ["feat3"], floor, is_vectorized=True) # the transformed input df should contain the squared value of the feat1 column assert expected4.equals(data)
def test_custom_transformer(): input_df = pd.DataFrame({'feat1': [1, 2, 3], 'target': [1, 4, 9]}) expected = pd.DataFrame({'feat1': [1, 2, 3], 'target': [1.0, 2.0, 3.0]}) expected2 = pd.DataFrame({'feat1': [1, 4, 9], 'target': [1.0, 2.0, 3.0]}) transformer_fn, data, log = custom_transformer(input_df, ["target"], sqrt) # the transformed input df should contain the square root of the target column assert expected.equals(data) transformer_fn, data, log = custom_transformer(input_df, ["feat1"], lambda x: x**2) # the transformed input df should contain the squared value of the feat1 column assert expected2.equals(data)
def test_custom_transformer(): input_df = pd.DataFrame( { "feat1": [1, 2, 3], "feat2": [math.e, math.e ** 2, math.e ** 3], "feat3": [1.5, 2.5, 3.5], "target": [1, 4, 9], } ) expected = pd.DataFrame( { "feat1": [1, 2, 3], "feat2": [math.e, math.e ** 2, math.e ** 3], "feat3": [1.5, 2.5, 3.5], "target": [1.0, 2.0, 3.0], } ) expected2 = pd.DataFrame( { "feat1": [1, 4, 9], "feat2": [math.e, math.e ** 2, math.e ** 3], "feat3": [1.5, 2.5, 3.5], "target": [1, 4, 9], } ) # the transformed input df should contain the square root of the target column pred_fn1, data1, log = custom_transformer(input_df, ["target"], sqrt) pred_fn2, data2, log = custom_transformer( input_df, ["target"], sqrt, suffix="_suffix" ) pred_fn3, data3, log = custom_transformer( input_df, ["target"], sqrt, prefix="prefix_" ) pred_fn4, data4, log = custom_transformer( input_df, ["target"], sqrt, columns_mapping={"target": "target_raw"} ) assert expected.equals(data1) assert pd.concat( [expected, input_df[["target"]].copy().add_suffix("_suffix")], axis=1 ).equals(data2) assert pd.concat( [expected, input_df[["target"]].copy().add_prefix("prefix_")], axis=1 ).equals(data3) assert pd.concat( [expected, input_df[["target"]].copy().add_suffix("_raw")], axis=1 ).equals(data4) # the transformed input df should contain the squared value of the feat1 column pred_fn1, data1, log = custom_transformer( input_df, ["feat1"], lambda x: x ** 2 ) pred_fn2, data2, log = custom_transformer( input_df, ["feat1"], lambda x: x ** 2, suffix="_suffix" ) pred_fn3, data3, log = custom_transformer( input_df, ["feat1"], lambda x: x ** 2, prefix="prefix_" ) pred_fn4, data4, log = custom_transformer( input_df, ["feat1"], lambda x: x ** 2, columns_mapping={"feat1": "feat1_raw"}, ) assert expected2.equals(data1) assert pd.concat( [expected2, input_df[["feat1"]].copy().add_suffix("_suffix")], axis=1 ).equals(data2) assert pd.concat( [expected2, input_df[["feat1"]].copy().add_prefix("prefix_")], axis=1 ).equals(data3) assert pd.concat( [expected2, input_df[["feat1"]].copy().add_suffix("_raw")], axis=1 ).equals(data4) expected3 = pd.DataFrame( { "feat1": [1, 2, 3], "feat2": [1.0, 2.0, 3.0], "feat3": [1.5, 2.5, 3.5], "target": [1, 4, 9], } ) expected4 = pd.DataFrame( { "feat1": [1, 2, 3], "feat2": [math.e, math.e ** 2, math.e ** 3], "feat3": [1.0, 2.0, 3.0], "target": [1, 4, 9], } ) # the transformed input df should contain the log of the target column pred_fn1, data1, log = custom_transformer( input_df, ["feat2"], ln, is_vectorized=True ) pred_fn2, data2, log = custom_transformer( input_df, ["feat2"], ln, is_vectorized=True, suffix="_suffix" ) pred_fn3, data3, log = custom_transformer( input_df, ["feat2"], ln, is_vectorized=True, prefix="prefix_" ) pred_fn4, data4, log = custom_transformer( input_df, ["feat2"], ln, is_vectorized=True, columns_mapping={"feat2": "feat2_raw"}, ) assert expected3.equals(data1) assert pd.concat( [expected3, input_df[["feat2"]].copy().add_suffix("_suffix")], axis=1 ).equals(data2) assert pd.concat( [expected3, input_df[["feat2"]].copy().add_prefix("prefix_")], axis=1 ).equals(data3) assert pd.concat( [expected3, input_df[["feat2"]].copy().add_suffix("_raw")], axis=1 ).equals(data4) # the transformed input df should contain the floor value of the feat1 column pred_fn1, data1, log = custom_transformer( input_df, ["feat3"], floor, is_vectorized=True ) pred_fn2, data2, log = custom_transformer( input_df, ["feat3"], floor, is_vectorized=True, suffix="_suffix" ) pred_fn3, data3, log = custom_transformer( input_df, ["feat3"], floor, is_vectorized=True, prefix="prefix_" ) pred_fn4, data4, log = custom_transformer( input_df, ["feat3"], floor, is_vectorized=True, columns_mapping={"feat3": "feat3_raw"}, ) assert expected4.equals(data1) assert pd.concat( [expected4, input_df[["feat3"]].copy().add_suffix("_suffix")], axis=1 ).equals(data2) assert pd.concat( [expected4, input_df[["feat3"]].copy().add_prefix("prefix_")], axis=1 ).equals(data3) assert pd.concat( [expected4, input_df[["feat3"]].copy().add_suffix("_raw")], axis=1 ).equals(data4)