def test_pipeline_gridsearch(): X, y = load_boston(return_X_y=True) pipe = Pipeline([("info", InformationFilter(columns=[11, 12])), ("model", LinearRegression())]) mod = GridSearchCV( estimator=pipe, param_grid={"info__columns": [[], [11], [12], [11, 12]]}, cv=2) assert pd.DataFrame(mod.fit(X, y).cv_results_).shape[0] == 4
def test_alpha_param2(): X, y = load_boston(return_X_y=True) df = pd.DataFrame( X, columns=[ "crim", "zn", "indus", "chas", "nox", "rm", "age", "dis", "rad", "tax", "ptratio", "b", "lstat", ], ) ifilter = InformationFilter(columns=["b", "lstat"], alpha=0.0) X_removed = df.drop(columns=["b", "lstat"]).values assert np.isclose(ifilter.fit_transform(df), X_removed).all()
def test_output_orthogonal_general_cols(): X, y = load_boston(return_X_y=True) cols = [ "crim", "zn", "indus", "chas", "nox", "rm", "age", "dis", "rad", "tax", "ptratio", "b", "lstat", ] df = pd.DataFrame(X, columns=cols) for col in cols: X_fair = InformationFilter(columns=col).fit_transform(df) assert all([(c * df[col]).sum() < 1e-5 for c in X_fair.T])
def test_alpha_param1(): X, y = load_boston(return_X_y=True) ifilter = InformationFilter(columns=[11, 12], alpha=0.0) X_removed = np.delete(X, [11, 12], axis=1) assert np.isclose(ifilter.fit_transform(X), X_removed).all()
def test_output_orthogonal(): X, y = load_boston(return_X_y=True) X_fair = InformationFilter(columns=[11, 12]).fit_transform(X) assert all([(c * X[:, 11]).sum() < 1e-5 for c in X_fair.T]) assert all([(c * X[:, 12]).sum() < 1e-5 for c in X_fair.T])
def test_v_columns_orthogonal(): X, y = load_boston(return_X_y=True) ifilter = InformationFilter(columns=[11, 12]).fit(X, y) v_values = ifilter._make_v_vectors(X, [11, 12]) assert v_values.prod(axis=1).sum() == pytest.approx(0, abs=1e-5)
def test_estimator_checks(test_fn): test_fn(InformationFilter.__name__, InformationFilter(columns=[])) test_fn(InformationFilter.__name__, InformationFilter(columns=[0]))