def test_foreshadow_y_preparer(mocker): import numpy as np from sklearn.pipeline import Pipeline from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from foreshadow.foreshadow import Foreshadow import pandas as pd np.random.seed(0) y_pipeline = Pipeline([("yohe", StandardScaler())]) setattr(y_pipeline, "pipeline", y_pipeline) estimator = LinearRegression() X = pd.DataFrame(np.array([0] * 50 + [1] * 50).reshape((-1, 1)), columns=["col1"]) y = pd.DataFrame(np.random.normal(100, 10, 100).reshape((-1, 1)), columns=["y"]) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) # Let foreshadow set to defaults, we will overwrite them y_preparer = mocker.PropertyMock(return_value=y_pipeline) mocker.patch.object(Foreshadow, "y_preparer", y_preparer) foreshadow = Foreshadow(problem_type=ProblemType.REGRESSION, estimator=estimator) foreshadow.fit(X_train, y_train) foreshadow_predict = foreshadow.predict(X_test) foreshadow_score = foreshadow.score(X_test, y_test) expected_predict = np.array([ [102.19044770619593], [102.19044770619593], [102.19044770619593], [100.05275170774354], [102.19044770619593], [102.19044770619593], [102.19044770619593], [102.19044770619593], [100.05275170774354], [100.05275170774354], ]) expected_score = -0.3576910440975052 assert np.allclose(foreshadow_predict, expected_predict) assert np.allclose(foreshadow_score, expected_score)
def test_foreshadow_predict_before_fit(): import numpy as np from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from foreshadow.foreshadow import Foreshadow np.random.seed(0) estimator = LinearRegression() X = np.arange(200).reshape((-1, 2)) y = np.random.normal(0, 1, 100).reshape((-1, 1)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) foreshadow = Foreshadow(problem_type=ProblemType.REGRESSION, estimator=estimator) with pytest.raises(ValueError) as e: _ = foreshadow.predict(X_test) assert str(e.value) == "Foreshadow has not been fit yet"
def test_core_foreshadow_example_regression(): import numpy as np import pandas as pd from sklearn.datasets import load_boston from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split from foreshadow.foreshadow import Foreshadow np.random.seed(0) boston = load_boston() bostonX_df = pd.DataFrame(boston.data, columns=boston.feature_names) bostony_df = pd.DataFrame(boston.target, columns=["target"]) X_train, X_test, y_train, y_test = train_test_split(bostonX_df, bostony_df, test_size=0.2) model = Foreshadow(estimator=LinearRegression(), problem_type=ProblemType.REGRESSION) model.fit(X_train, y_train) score = r2_score(y_test, model.predict(X_test)) print("Boston score: %f" % score)
def test_core_foreshadow_example_classification(): import numpy as np import pandas as pd from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score from sklearn.model_selection import train_test_split from foreshadow.foreshadow import Foreshadow np.random.seed(0) iris = load_iris() irisX_df = pd.DataFrame(iris.data, columns=iris.feature_names) irisy_df = pd.DataFrame(iris.target, columns=["target"]) X_train, X_test, y_train, y_test = train_test_split(irisX_df, irisy_df, test_size=0.2) model = Foreshadow(estimator=LogisticRegression(), problem_type=ProblemType.CLASSIFICATION) model.fit(X_train, y_train) score = f1_score(y_test, model.predict(X_test), average="weighted") print("Iris score: %f" % score)