def fit(self, X, y): corr_coefs = np.abs(r_regression(X, y)) cols_to_drop = np.where(corr_coefs <= self.threshold)[0] if self.verbose: print(f"Threshold = {self.threshold}") print(f"Dropping {len(cols_to_drop)} of {X.shape[1]} columns") print(f"Columns dropped: {self.col_names[cols_to_drop]}") self.cols_to_drop = cols_to_drop
def test_r_regression(center): X, y = make_regression(n_samples=2000, n_features=20, n_informative=5, shuffle=False, random_state=0) corr_coeffs = r_regression(X, y, center=center) assert ((-1 < corr_coeffs).all()) assert ((corr_coeffs < 1).all()) sparse_X = _convert_container(X, "sparse") sparse_corr_coeffs = r_regression(sparse_X, y, center=center) assert_allclose(sparse_corr_coeffs, corr_coeffs) # Testing against numpy for reference Z = np.hstack((X, y[:, np.newaxis])) correlation_matrix = np.corrcoef(Z, rowvar=False) np_corr_coeffs = correlation_matrix[:-1, -1] assert_array_almost_equal(np_corr_coeffs, corr_coeffs, decimal=3)
def test_r_regression_force_finite(X, y, expected_corr_coef, force_finite): """Check the behaviour of `force_finite` for some corner cases with `r_regression`. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/15672 """ with warnings.catch_warnings(): warnings.simplefilter("error", RuntimeWarning) corr_coef = r_regression(X, y, force_finite=force_finite) np.testing.assert_array_almost_equal(corr_coef, expected_corr_coef)
def test_r_regression_force_finite(X, y, expected_corr_coef, force_finite): """Check the behaviour of `force_finite` for some corner cases with `r_regression`. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/15672 """ with pytest.warns(None) as records: corr_coef = r_regression(X, y, force_finite=force_finite) assert not [str(w.message) for w in records] np.testing.assert_array_almost_equal(corr_coef, expected_corr_coef)