def test_regular_df_X_y(): # autofeat with df without column names X, target = get_random_data() afreg = AutoFeatRegressor(verbose=1, feateng_steps=3) df = afreg.fit_transform(pd.DataFrame(X), pd.DataFrame(target)) # score once with original, once with transformed data assert afreg.score(pd.DataFrame(X), target) >= 0.999, "R^2 should be 1." assert afreg.score(df, target) >= 0.999, "R^2 should be 1." assert list(df.columns)[:3] == ["0", "1", "2"], "Wrong column names"
def test_regular_X_y(): # autofeat with numpy arrays X, target = get_random_data() afreg = AutoFeatRegressor(verbose=1, feateng_steps=3) df = afreg.fit_transform(X, target) assert afreg.score(X, target) >= 0.999, "R^2 should be 1." assert afreg.score(df, target) >= 0.999, "R^2 should be 1." assert list(df.columns)[:3] == ["x000", "x001", "x002"], "Wrong column names"
def test_weird_colnames(): # autofeat with df with weird column names X, target = get_random_data() afreg = AutoFeatRegressor(verbose=1, feateng_steps=3) df = afreg.fit_transform(pd.DataFrame(X, columns=["x 1.1", 2, "x/3"]), pd.DataFrame(target)) assert afreg.score(pd.DataFrame(X, columns=["x 1.1", 2, "x/3"]), target) >= 0.999, "R^2 should be 1." assert list(df.columns)[:3] == ["x 1.1", "2", "x/3"], "Wrong column names" # error if the column names aren't the same as before try: afreg.score(pd.DataFrame(X, columns=["x 11", 2, "x/3"]), target) except ValueError: pass else: raise AssertionError("Should throw error on mismatch column names")
def test_categorical_cols(): np.random.seed(15) x1 = np.random.rand(1000) x2 = np.random.randn(1000) x3 = np.random.rand(1000) x4 = np.array(200 * [4] + 300 * [5] + 500 * [2], dtype=int) target = 2 + 15 * x1 + 3 / (x2 - 1 / x3) + 5 * (x2 + np.log(x1))**3 + x4 X = np.vstack([x1, x2, x3, x4]).T afreg = AutoFeatRegressor(verbose=1, categorical_cols=["x4", "x5"], feateng_steps=3) try: df = afreg.fit_transform( pd.DataFrame(X, columns=["x1", "x2", "x3", "x4"]), target) except ValueError: pass else: raise AssertionError( "categorical_cols not in df should throw an error") afreg = AutoFeatRegressor(verbose=1, categorical_cols=["x4"], feateng_steps=3) df = afreg.fit_transform(pd.DataFrame(X, columns=["x1", "x2", "x3", "x4"]), target) assert list(df.columns)[3:6] == [ "cat_x4_2.0", "cat_x4_4.0", "cat_x4_5.0" ], "categorical_cols were not transformed correctly" assert "x4" not in df.columns, "categorical_cols weren't deleted from df" df = afreg.transform(pd.DataFrame(X, columns=["x1", "x2", "x3", "x4"])) assert list(df.columns)[3:6] == [ "cat_x4_2.0", "cat_x4_4.0", "cat_x4_5.0" ], "categorical_cols were not transformed correctly" assert "x4" not in df.columns, "categorical_cols weren't deleted from df" assert afreg.score(pd.DataFrame(X, columns=["x1", "x2", "x3", "x4"]), target) >= 0.999, "R^2 should be 1."
def test_units(): np.random.seed(15) x1 = np.random.rand(1000) x2 = np.random.randn(1000) x3 = np.random.rand(1000) target = 2 + 15 * x1 + 3 / (x2 - 1 / x3) + 5 * (x2 * np.log(x1))**3 X = np.vstack([x1, x2, x3]).T units = {"x2": "m/sec", "x3": "min/mm"} afreg = AutoFeatRegressor(verbose=1, units=units, feateng_steps=3) _ = afreg.fit_transform(pd.DataFrame(X, columns=["x1", "x2", "x3"]), target) assert afreg.score(pd.DataFrame(X, columns=["x1", "x2", "x3"]), target) >= 0.999, "R^2 should be 1."
from autofeat import FeatureSelector, AutoFeatRegressor from sklearn.pipeline import make_pipeline import pickle def main(): # Get the dataset from the users GitHub repository dataset_path = "https://raw.githubusercontent.com/" + os.environ["GITHUB_REPOSITORY"] +"/master/dataset.csv" df = pd.read_csv(dataset_path) print() print(df.describe()) for steps in range(5): np.random.seed(55) print("### AutoFeat with %i feateng_steps" % steps) afreg = AutoFeatRegressor(verbose=1, feateng_steps=steps) df = afreg.fit_transform(df_org, target) r2 = afreg.score(df_org, target) print("## Final R^2: %.4f" % r2) plt.figure() plt.scatter(afreg.predict(df_org), target, s=2); plt.title("%i FE steps (R^2: %.4f; %i new features)" % (steps, r2, len(afreg.new_feat_cols_))) afreg = AutoFeatRegressor(verbose=1, feateng_steps=3) # train on noisy data df = afreg.fit_transform(df_org, target_noisy) # test on real targets print("Final R^2: %.4f" % afreg.score(df, target)) plt.figure() plt.scatter(afreg.predict(df), target, s=2); afreg = AutoFeatRegressor(verbose=1, feateng_steps=3) # train on noisy data df = afreg.fit_transform(df_org, target_very_noisy) # test on real targets