def test_transformer_keep_cols(): import pandas as pd from foreshadow.concrete import StandardScaler as CustomScaler boston_path = get_file_path("data", "boston_housing.csv") df = pd.read_csv(boston_path) custom = CustomScaler(keep_columns=True) custom_tf = custom.fit_transform(df[["crim"]]) assert custom_tf.shape[1] == 2
def test_transformer_naming_default(): from foreshadow.concrete import StandardScaler import pandas as pd boston_path = get_file_path("data", "boston_housing.csv") df = pd.read_csv(boston_path) scaler = StandardScaler(keep_columns=False) out = scaler.fit_transform(df[["crim"]]) assert out.iloc[:, 0].name == "crim"
def test_transformer_wrapper_init(): from foreshadow.concrete import StandardScaler scaler = StandardScaler(name="test-scaler", keep_columns=True) assert scaler.name == "test-scaler" assert scaler.keep_columns is True
def pick_transformer(X, y=None, **fit_params): data = X.iloc[:, 0] if data[0] == 0: return StandardScaler() else: return MinMaxScaler()
def test_smarttransformer_function(smart_child): """Test overall SmartTransformer functionality Args: smart_child: A subclass of SmartTransformer. """ import numpy as np import pandas as pd from foreshadow.concrete import StandardScaler boston_path = get_file_path("data", "boston_housing.csv") df = pd.read_csv(boston_path) smart = smart_child(cache_manager=CacheManager()) smart_data = smart.fit_transform(df[["crim"]]) std = StandardScaler() std_data = std.fit_transform(df[["crim"]]) assert smart_data.equals(std_data) smart.fit(df[["crim"]]) smart_data = smart.transform(df[["crim"]]) std.fit(df[["crim"]]) std_data = std.transform(df[["crim"]]) # TODO, remove when SmartTransformer is no longer wrapped # Column names will be different, thus np.allclose() is used assert np.allclose(smart_data, std_data)
def test_transformer_wrapper_empty_input(): import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler as StandardScaler from foreshadow.concrete import StandardScaler as CustomScaler df = pd.DataFrame({"A": np.array([])}) with pytest.raises(ValueError): StandardScaler().fit(df) with pytest.raises(ValueError): CustomScaler().fit(df)
def test_transformer_wrapper_function(): import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler as StandardScaler from foreshadow.concrete import StandardScaler as CustomScaler boston_path = get_file_path("data", "boston_housing.csv") df = pd.read_csv(boston_path) custom = CustomScaler() sklearn = StandardScaler() custom.fit(df[["crim"]]) sklearn.fit(df[["crim"]]) custom_tf = custom.transform(df[["crim"]]) sklearn_tf = sklearn.transform(df[["crim"]]) assert np.array_equal(custom_tf.values, sklearn_tf) custom_tf = custom.fit_transform(df[["crim"]]) sklearn_tf = sklearn.fit_transform(df[["crim"]]) assert np.array_equal(custom_tf.values, sklearn_tf)
def pick_transformer(self, X, y=None, **fit_params): return StandardScaler()