Esempio n. 1
0
def test_errors_alpha_out_of_bounds():
    X, y = datasets.make_cubic(random_state=123)

    sir = SlicedInverseRegression(alpha=10)

    with pytest.raises(ValueError):
        sir.fit(X, y)
Esempio n. 2
0
def test_n_directions_auto_heuristic():
    X, y = datasets.make_exponential(random_state=123)
    sir = SlicedInverseRegression(n_directions='auto').fit(X, y)
    assert sir.n_directions_ == 2

    X_sir = sir.transform(X)
    assert X_sir.shape == (500, 2)
Esempio n. 3
0
def sir_pre(data_train_center, data_test_center, y_label):

    sir = SlicedInverseRegression(n_directions=9, n_slices=10)
    nonzero_id = np.where(np.std(data_train_center, 0) != 0)[0]
    data_train_center_shrink = data_train_center[:, nonzero_id]
    sir.fit(data_train_center_shrink, y_label)
    data_sir_fit = data_train_center_shrink @ np.transpose(sir.directions_)
    data_sir_test = data_test_center[:, nonzero_id] @ np.transpose(
        sir.directions_)

    return data_sir_fit, data_sir_test
Esempio n. 4
0
def test_all_zero_coefficients_warns_and_does_not_zero_out():
    """To avoid errors a t-test that indicates that all coefficients are
    zero will not zero-out the directions vector.
    """
    X, y = load_breast_cancer(return_X_y=True)
    sir = SlicedInverseRegression(n_directions=2, alpha=0.05)

    with pytest.warns(RuntimeWarning):
        sir.fit(X, y)

    assert np.any(sir.directions_ != 0)
Esempio n. 5
0
def test_classification():
    """SIR is LDA for classification so lets test some predictions."""
    # Data is just 6 separable points in the plane
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]],
                 dtype=np.float64)
    y = np.array([1, 1, 1, 0, 0, 0])

    sir = SlicedInverseRegression(n_directions=1, n_slices=2).fit(X, y)
    lda = LinearDiscriminantAnalysis(solver='eigen').fit(X, y)

    y_pred = sir.transform(X) > 0
    np.testing.assert_equal(y, y_pred.ravel())
    np.testing.assert_equal(lda.predict(X), y_pred.ravel())
Esempio n. 6
0
def test_matches_athletes():
    """Test that the resutls match the R dr package on a ais dataset.
    """
    X, y = datasets.load_athletes()
    sir = SlicedInverseRegression(n_directions=4, n_slices=11).fit(X, y)

    np.testing.assert_allclose(
        sir.eigenvalues_,
        np.array([0.957661631, 0.245041613, 0.107075941, 0.090413047]))

    expected_directions = np.array(
        [[
            1.50963358e-01, -9.16480522e-01, -1.31538894e-01, -9.33588596e-02,
            4.46783829e-03, -1.88973540e-01, 2.74758965e-01, -5.63123794e-03
        ],
         [
             -5.01785457e-02, -1.94229862e-01, 6.85475076e-01, -4.33408964e-02,
             1.83380846e-04, 3.47565293e-01, -6.05830142e-01, 1.30588502e-02
         ],
         [
             1.08983356e-01, -2.01236965e-01, 7.19975455e-01, 4.64453982e-01,
             4.49759016e-02, 2.94969081e-01, -3.41966152e-01, -8.70270913e-02
         ],
         [
             -2.21020634e-03, -8.97220257e-02, -6.63097774e-01, 2.90838658e-01,
             7.19045566e-02, 3.70563626e-02, 6.78877114e-01, 1.55472144e-02
         ]])

    np.testing.assert_allclose(sir.directions_, expected_directions)
Esempio n. 7
0
def test_single_y_value():
    rng = np.random.RandomState(123)

    X = rng.randn(100, 4)
    y = np.ones(100)

    with pytest.raises(ValueError):
        SlicedInverseRegression().fit(X, y)
Esempio n. 8
0
def test_n_slices_too_big():
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]],
                 dtype=np.float64)
    y = np.array([1, 1, 1, 0, 0, 0])

    sir = SlicedInverseRegression(n_directions=1, n_slices=10).fit(X, y)

    assert sir.n_slices_ == 2
Esempio n. 9
0
def test_sparse_coefficient():
    """Test the component-wise t-test works on a simple synthetic dataset.
    """
    rng = np.random.RandomState(123)

    n_samples = 300
    n_features = 6
    X = rng.randn(n_samples, n_features)
    noise = rng.randn(n_samples).reshape(-1, 1)
    beta = np.array([1, 0, 0, -1, 0, 0]).reshape(-1, 1)
    y = np.exp(-0.75 * np.dot(X, beta)) + 0.5 * noise

    sir = SlicedInverseRegression(alpha=0.05)
    sir.fit(X, y.ravel())

    np.testing.assert_array_equal(sir.directions_.ravel() != 0,
                                  beta.ravel() != 0)
Esempio n. 10
0
def test_regression():
    """NOTE: subsequent calls may flip the direction of eigenvectors
        (mulitply by -1), so we can only compare absolute values.

        This was not a problem for svds.. investigate if we can get
        deterministic behavior back.
    """
    X, y = datasets.make_cubic(random_state=123)

    for n_dir in range(1, X.shape[1]):
        sir = SlicedInverseRegression(n_directions=n_dir)

        # take shape is correct
        X_sir = sir.fit(X, y).transform(X)
        np.testing.assert_equal(X_sir.shape[1], n_dir)

        # should match fit_transform
        X_sir2 = sir.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2))

        # call transform again and check if things are okay
        X_sir = sir.transform(X)
        X_sir2 = sir.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_sir), np.abs(X_sir2))

        # there is one true angle it should fine
        true_beta = (1 / np.sqrt(2)) * np.hstack((np.ones(2), np.zeros(8)))
        angle = np.dot(true_beta, sir.directions_[0, :])
        np.testing.assert_allclose(np.abs(angle), 1, rtol=1e-1)
Esempio n. 11
0
def test_sparse_coefficient_multiple_dimensions():
    """Perform the t-test on a dataset with two directions.
    """
    rng = np.random.RandomState(123)

    n_samples = 300
    n_features = 15
    beta = np.zeros((2, n_features))
    beta[0, :9] = 1
    beta[1, 9:] = 1
    X = rng.randn(n_samples, n_features)
    y = (np.sign(np.dot(X, beta[0, :])) *
         np.log(np.abs(np.dot(X, beta[1, :]) + 5)))

    sir = SlicedInverseRegression(n_directions=2, alpha=0.05)
    sir.fit(X, y.ravel())

    # the first dimension is found without error
    np.testing.assert_array_equal(sir.directions_[1, :].ravel() != 0,
                                  beta[0, :].ravel() != 0)

    # the second dimension picks up a few spurious coefficients
    assert np.all(sir.directions_[0, :].ravel()[9:] != 0)
Esempio n. 12
0
 def __init__(self):
     super().__init__()
     self.regressor = SlicedInverseRegression()
Esempio n. 13
0
============================================

A comparison of the subspace found by sliced inverse regression and
principal component analysis on the australian athletes dataset.
"""
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

from sliced.datasets import load_athletes
from sliced import SlicedInverseRegression

X, y = load_athletes()

# fit SIR model
sir = SlicedInverseRegression(n_slices=11).fit(X, y)
X_sir = sir.transform(X)

# fit PCA
pca = PCA(random_state=123).fit(X, y)
X_pca = pca.transform(X)

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)

ax1.scatter(X_sir[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
ax1.set_title('SIR Subspace')
ax1.set_xlabel("$X\hat{\\beta}_{SIR}$")
ax1.set_ylabel("Lean Body Mass (kg)")

ax2.scatter(X_pca[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
ax2.set_title('PCA Subspace')
Esempio n. 14
0
def test_sparse_not_supported():
    X, y = datasets.make_cubic(random_state=123)
    X = sparse.csr_matrix(X)
    with pytest.raises(TypeError):
        SlicedInverseRegression().fit(X, y)
Esempio n. 15
0
"""
=========================
Sliced Inverse Regression
=========================

An example plot of :class:`sliced.sir.SlicedInverseRegression`
"""
import numpy as np
import matplotlib.pyplot as plt

from sliced import SlicedInverseRegression
from sliced import datasets

X, y = datasets.make_cubic(random_state=123)

sir = SlicedInverseRegression()
X_sir = sir.fit_transform(X, y)

# estimate of the first dimension reducing directions
beta1_hat = sir.directions_[0, :]

# plot data projected onto the first direction
plt.scatter(X_sir[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
plt.xlabel("$X\hat{\\beta_1}$")
plt.ylabel("y")

# annotation showing the direction found
beta_text = "$\\beta_1$ = " + "{0}".format([0.707, 0.707])
plt.annotate(beta_text, xy=(-2, 6.5))
beta1_hat_text = "$\hat{\\beta_1}$ = " + "{0}".format(
    np.round(beta1_hat, 3).tolist()[:2])
Esempio n. 16
0
def test_n_directions_none():
    X, y = datasets.make_cubic(random_state=123)
    sir = SlicedInverseRegression(n_directions=None).fit(X, y)
    np.testing.assert_equal(sir.n_directions_, X.shape[1])
Esempio n. 17
0
"""
=======================
Binary Targets with SIR
=======================

Sliced Inverse Regression is able to find a one-dimensional subspace
that seperates cases in the famous breast cancer dataset.
"""
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sliced import SlicedInverseRegression

X, y = load_breast_cancer(return_X_y=True)

sir = SlicedInverseRegression(n_directions=2).fit(X, y)
X_sir = sir.transform(X)

plt.scatter(X_sir[:, 0], X_sir[:, 1], c=y, alpha=0.8, edgecolor='k')
plt.xlabel("$X\hat{\\beta}_{1}$")
plt.ylabel("$X\hat{\\beta}_{2}$")
plt.title("Breast Cancer Data")

plt.show()
Esempio n. 18
0
def test_zero_variance_features():
    """Raise an informative error message when features of zero variance."""
    X, y = load_digits(return_X_y=True)

    with pytest.raises(linalg.LinAlgError):
        sir = SlicedInverseRegression(n_directions='auto').fit(X, y)