Example #1
0
def test_n_directions_auto_heuristic():
    X, y = datasets.make_exponential(random_state=123)
    save = SlicedAverageVarianceEstimation(n_directions='auto').fit(X, y)
    assert save.n_directions_ == 2

    X_save = save.transform(X)
    assert X_save.shape == (500, 2)
Example #2
0
def test_n_slices_too_big():
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]],
                 dtype=np.float64)
    y = np.array([1, 1, 1, 0, 0, 0])

    save = SlicedAverageVarianceEstimation(n_directions=1, n_slices=10)
    save.fit(X, y)

    assert save.n_slices_ == 2
Example #3
0
def test_single_y_value():
    rng = np.random.RandomState(123)

    X = rng.randn(100, 4)
    y = np.ones(100)

    with pytest.raises(ValueError):
        SlicedAverageVarianceEstimation().fit(X, y)
Example #4
0
def test_cubic():
    X, y = datasets.make_cubic(random_state=123)

    save = SlicedAverageVarianceEstimation().fit(X, y)

    true_beta = (1 / np.sqrt(2)) * np.hstack((np.ones(2), np.zeros(8)))
    angle = np.dot(true_beta, save.directions_[0, :])
    np.testing.assert_allclose(np.abs(angle), 1, rtol=1e-1)
Example #5
0
def test_regression():
    """NOTE: subsequent calls may flip the direction of eigenvectors
        (mulitply by -1), so we can only compare absolute values.

        This was not a problem for svds.. investigate if we can get
        deterministic behavior back.
    """
    X, y = datasets.make_quadratic(random_state=123)

    for n_dir in range(1, X.shape[1]):
        save = SlicedAverageVarianceEstimation(n_directions=n_dir)

        # take shape is correct
        X_save = save.fit(X, y).transform(X)
        np.testing.assert_equal(X_save.shape[1], n_dir)

        # should match fit_transform
        X_save2 = save.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_save), np.abs(X_save2))

        # call transform again and check if things are okay
        X_save = save.transform(X)
        X_save2 = save.fit_transform(X, y)
        np.testing.assert_allclose(np.abs(X_save), np.abs(X_save2))

        # there is one true angle it should fine
        true_beta = (1 / np.sqrt(2)) * np.hstack((np.ones(2), np.zeros(8)))
        angle = np.dot(true_beta, save.directions_[0, :])
        np.testing.assert_allclose(np.abs(angle), 1, rtol=1e-1)
Example #6
0
def test_matches_swiss_banknote():
    """Test that the results match the R dr package on a few common datasets.
    """
    X, y = datasets.load_banknote()
    save = SlicedAverageVarianceEstimation(n_directions=4).fit(X, y)

    np.testing.assert_allclose(
        save.eigenvalues_,
        np.array([0.87239404, 0.42288351, 0.12792117, 0.03771284])
    )

    expected_directions = np.array(
        [[0.03082069, 0.20309393, -0.25314643, -0.58931337, -0.56801632,
          0.47306135],
         [-0.2841728, -0.05472057, -0.15731808, 0.50606843, 0.33404888,
          0.72374622],
         [0.09905744, -0.88896348, 0.42252244, -0.00162151, -0.09222179,
          -0.11357311],
         [0.75251819, -0.26448055, 0.59669025, 0.03982343, -0.018666,
          0.07611073]],
    )
    np.testing.assert_allclose(
        save.directions_, expected_directions, atol=1e-8)
Example #7
0
"""
====================
Clustering with SAVE
====================

Sliced Average Variance Estimation is able to find three distinct clusters
in a dataset used to classify counterfeit swiss banknotes.
"""
import matplotlib.pyplot as plt

from sliced.datasets import load_banknote
from sliced import SlicedAverageVarianceEstimation

X, y = load_banknote()

save = SlicedAverageVarianceEstimation(n_directions=2, n_slices=2)
X_save = save.fit_transform(X, y)

plt.scatter(X_save[:, 0], X_save[:, 1], c=y, alpha=0.8, edgecolor='k')
plt.xlabel("$X\hat{\\beta}_{1}$")
plt.ylabel("$X\hat{\\beta}_{2}$")
plt.title("Swiss Banknote Data")

plt.show()
Example #8
0
def test_sparse_not_supported():
    X, y = datasets.make_cubic(random_state=123)
    X = sparse.csr_matrix(X)

    with pytest.raises(TypeError):
        SlicedAverageVarianceEstimation().fit(X, y)
Example #9
0
def test_n_directions_none():
    X, y = datasets.make_cubic(random_state=123)
    sir = SlicedAverageVarianceEstimation(n_directions=None).fit(X, y)
    np.testing.assert_equal(sir.n_directions_, X.shape[1])
Example #10
0
def test_zero_variance_features():
    """Raise an informative error message when features of zero variance."""
    X, y = load_digits(return_X_y=True)

    with pytest.raises(linalg.LinAlgError):
        save = SlicedAverageVarianceEstimation(n_directions='auto').fit(X, y)
Example #11
0
"""
==================================
Sliced Average Variance Estimation
==================================

An example plot of :class:`sliced.save.SlicedAverageVarianceEstimation`
"""
import numpy as np
import matplotlib.pyplot as plt

from sliced import SlicedAverageVarianceEstimation
from sliced import datasets

X, y = datasets.make_quadratic(random_state=123)

save = SlicedAverageVarianceEstimation()
X_save = save.fit_transform(X, y)

# estimate of the first dimension reducing direction
beta1_hat = save.directions_[0, :]

plt.scatter(X_save[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
plt.xlabel("$X\hat{\\beta_1}$")
plt.ylabel("y")

# annotation showing the direction found
beta_text = "$\\beta_1$ = " + "{0}".format([0.707, 0.707])
plt.annotate(beta_text, xy=(-1, 2))
beta1_hat_text = "$\hat{\\beta_1}$ = " + "{0}".format(
    np.round(beta1_hat, 3).tolist()[:2])
plt.annotate(beta1_hat_text, xy=(-1, 1.75))