Ejemplo n.º 1
0
 def fit(self, X, y):
     corr_coefs = np.abs(r_regression(X, y))
     cols_to_drop = np.where(corr_coefs <= self.threshold)[0]
     if self.verbose:
         print(f"Threshold = {self.threshold}")
         print(f"Dropping {len(cols_to_drop)} of {X.shape[1]} columns")
         print(f"Columns dropped: {self.col_names[cols_to_drop]}")
     self.cols_to_drop = cols_to_drop
Ejemplo n.º 2
0
def test_r_regression(center):
    X, y = make_regression(n_samples=2000, n_features=20, n_informative=5,
                           shuffle=False, random_state=0)

    corr_coeffs = r_regression(X, y, center=center)
    assert ((-1 < corr_coeffs).all())
    assert ((corr_coeffs < 1).all())

    sparse_X = _convert_container(X, "sparse")

    sparse_corr_coeffs = r_regression(sparse_X, y, center=center)
    assert_allclose(sparse_corr_coeffs, corr_coeffs)

    # Testing against numpy for reference
    Z = np.hstack((X, y[:, np.newaxis]))
    correlation_matrix = np.corrcoef(Z, rowvar=False)
    np_corr_coeffs = correlation_matrix[:-1, -1]
    assert_array_almost_equal(np_corr_coeffs, corr_coeffs, decimal=3)
Ejemplo n.º 3
0
def test_r_regression_force_finite(X, y, expected_corr_coef, force_finite):
    """Check the behaviour of `force_finite` for some corner cases with `r_regression`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15672
    """
    with warnings.catch_warnings():
        warnings.simplefilter("error", RuntimeWarning)
        corr_coef = r_regression(X, y, force_finite=force_finite)
    np.testing.assert_array_almost_equal(corr_coef, expected_corr_coef)
Ejemplo n.º 4
0
def test_r_regression_force_finite(X, y, expected_corr_coef, force_finite):
    """Check the behaviour of `force_finite` for some corner cases with `r_regression`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15672
    """
    with pytest.warns(None) as records:
        corr_coef = r_regression(X, y, force_finite=force_finite)
    assert not [str(w.message) for w in records]
    np.testing.assert_array_almost_equal(corr_coef, expected_corr_coef)