Esempio n. 1
0
    def __init__(self):
        import pdb; pdb.set_trace()

        self.x = datasets.load_iris()['data']
        self.y = datasets.load_iris()['target']

        indices = []
Esempio n. 2
0
def main():
    # load Iris dataset
    data = datasets.load_iris()
    X = data['data']

    # cluster and compute silhouette score
    K = 8
    C, cIDX = kmeans2(X, K)
    print cIDX
    D = pairwise_distances(X, metric='euclidean')
    s = silhouette_coefficient(cIDX,D)
    silhouette = silhouette_score(D, y, metric='precomputed')
    # plot

    order = np.lexsort((-s,cIDX))
    indices = [np.flatnonzero(cIDX[order]==k) for k in range(K)]
    ytick = [(np.max(ind)+np.min(ind))/2 for ind in indices]
    ytickLabels = ["%d" % x for x in range(K)]
    cmap = cm.jet( np.linspace(0,1,K) ).tolist()
    clr = [cmap[i] for i in cIDX[order]]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.barh(range(X.shape[0]), s[order], height=1.0, 
            edgecolor='none', color=clr)
    ax.set_ylim(ax.get_ylim()[::-1])
    plt.yticks(ytick, ytickLabels)
    plt.xlabel('Silhouette Value')
    plt.ylabel('Cluster')
    plt.show()
Esempio n. 3
0
def test_pipeline():
    # check that Isomap works fine as a transformer in a Pipeline
    iris = datasets.load_iris()
    clf = pipeline.Pipeline(
        [('isomap', manifold.Isomap()),
         ('neighbors_clf', neighbors.NeighborsClassifier())])
    clf.fit(iris.data, iris.target)
    assert_lower(.7, clf.score(iris.data, iris.target))
Esempio n. 4
0
def test_pipeline():
    # check that LocallyLinearEmbedding works fine as a Pipeline
    from scikits.learn import pipeline, datasets
    iris = datasets.load_iris()
    clf = pipeline.Pipeline(
        [('filter', manifold.LocallyLinearEmbedding()),
         ('clf', neighbors.NeighborsClassifier())])
    clf.fit(iris.data, iris.target)
    assert clf.score(iris.data, iris.target) > .7
Esempio n. 5
0
def test_SVC_iris():
    """Test the sparse SVC with the iris dataset"""
    iris = datasets.load_iris()
    sp_clf = sparse.svm.SVC(kernel='linear').fit(iris.data, iris.target)
    clf = svm.SVC(kernel='linear').fit(iris.data, iris.target)

    assert_array_almost_equal(clf.support_, sp_clf.support_.todense())
    assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.todense())
    assert_array_almost_equal(clf.coef_, sp_clf.coef_.todense())
    assert_array_almost_equal(clf.predict(iris.data), sp_clf.predict(iris.data))
Esempio n. 6
0
def test_LinearSVC_iris():
    """Test the sparse LinearSVC with the iris dataset"""
    iris = datasets.load_iris()
    sp_clf = svm.sparse.LinearSVC().fit(iris.data, iris.target)
    clf = svm.LinearSVC().fit(iris.data, iris.target)

    assert_array_almost_equal(clf.label_, sp_clf.label_)
    assert_equal (clf.fit_intercept, sp_clf.fit_intercept)

    assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=1)
    assert_array_almost_equal(clf.predict(iris.data), sp_clf.predict(iris.data))
Esempio n. 7
0
def test_precomputed():
    """
    SVC with a precomputed kernel.

    We test it with a toy dataset and with iris.
    """
    clf = svm.SVC(kernel='precomputed')
    # we use just a linear kernel
    K = np.dot(X, np.array(X).T)
    clf.fit(K, Y)
    # KT is the Gram matrix
    KT = np.dot(T, np.array(X).T)
    pred = clf.predict(KT)

    assert_array_equal(clf.dual_coef_, [[0.25, -.25]])
    assert_array_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.support_, [[2], [4]])
    assert_array_equal(pred, true_result)

    # same as before, but using a callable function instead of the kernel
    # matrix. kernel is just a linear kernel

    kfunc = lambda x, y: np.dot(x, y.T)
    clf = svm.SVC(kernel=kfunc)
    clf.fit(X, Y)
    pred = clf.predict(T)

    assert_array_equal(clf.dual_coef_, [[0.25, -.25]])
    assert_array_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.support_, [[2], [4]])
    assert_array_equal(pred, true_result)

    # test a precomputed kernel with the iris dataset
    clf = svm.SVC(kernel='precomputed')
    iris = datasets.load_iris()
    K = np.dot(iris.data, iris.data.T)
    clf.fit(K, iris.target)
    pred = clf.predict(K)
    assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)

    clf = svm.SVC(kernel=kfunc)
    clf.fit(iris.data, iris.target)
    assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)
Esempio n. 8
0
def test_probability():
    """
    Predict probabilities using SVC

    This uses cross validation, so we use a slightly bigger testing set.
    """
    from scikits.learn import datasets
    iris = datasets.load_iris()

    clf = svm.SVC(probability=True)
    clf.fit(iris.data, iris.target)

    # predict on a simple dataset
    T = [[0, 0, 0, 0],
         [2, 2, 2, 2]]
    assert_array_almost_equal(clf.predict_proba(T),
                [[ 0.993, 0.003, 0.002],
                 [ 0.740, 0.223  , 0.035]],
                 decimal=2)

    # make sure probabilities sum to one
    pprob = clf.predict_proba(X)
    assert_array_almost_equal(pprob.sum(axis=1),
                               np.ones(len(X)))
Esempio n. 9
0
import random
import numpy as np
from scikits.learn import svm, datasets
from scikits.learn.metrics import roc, auc, precision_recall, \
            confusion_matrix, zero_one, explained_variance, \
            mean_square_error

from numpy.testing import assert_array_equal, \
                          assert_array_almost_equal, \
                          assert_equal, assert_almost_equal

# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target
X, y = X[y!=2], y[y!=2]
n_samples, n_features = X.shape
p = range(n_samples)
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples/2)

# Add noisy features
np.random.seed(0)
X = np.c_[X,np.random.randn(n_samples, 200*n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half],y[:half]).predict_proba(X[half:])
y_ = classifier.predict(X[half:])
Esempio n. 10
0
"""
print __doc__

from scipy import linalg
import numpy as np
import pylab as pl
import matplotlib as mpl

from scikits.learn.lda import LDA
from scikits.learn.qda import QDA

################################################################################
# load sample dataset
from scikits.learn.datasets import load_iris

iris = load_iris()
X = iris.data[:,:2] # Take only 2 dimensions
y = iris.target
X = X[y > 0]
y = y[y > 0]
y -= 1
target_names = iris.target_names[1:]

################################################################################
# LDA
lda = LDA()
y_pred = lda.fit(X, y, store_covariance=True).predict(X)

# QDA
qda = QDA()
y_pred = qda.fit(X, y, store_covariances=True).predict(X)
Esempio n. 11
0
# Author: Alexandre Gramfort <*****@*****.**>
#         Gael Varoquaux <*****@*****.**>
#         Virgile Fritsch <*****@*****.**>
#
# License: BSD Style.

from numpy.testing import assert_almost_equal, assert_array_almost_equal

from .. import empirical_covariance, EmpiricalCovariance, \
    ShrunkCovariance, shrunk_covariance, LedoitWolf, ledoit_wolf, OAS, oas

import numpy as np
from scikits.learn import datasets

X = datasets.load_iris().data
n_samples, n_features = X.shape

def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), norm='spectral'), 0)

    # test with n_features = 1
    X_1d = X[:,0]
Esempio n. 12
0
        v, w = np.linalg.eigh(gmm.covars[n][:2, :2])
        u = w[0] / np.linalg.norm(w[0])
        angle = np.arctan(u[1] / u[0])
        angle = 180 * angle / np.pi  # convert to degrees
        v *= 9
        ell = mpl.patches.Ellipse(gmm.means[n, :2],
                                  v[0],
                                  v[1],
                                  180 + angle,
                                  color=color)
        ell.set_clip_box(h.bbox)
        ell.set_alpha(0.5)
        h.add_artist(ell)


iris = datasets.load_iris()

# Break up the dataset into non-overlapping training (75%) and testing
# (25%) sets.
skf = StratifiedKFold(iris.target, k=4)
# Only take the first fold.
train_index, test_index = skf.__iter__().next()

X_train = iris.data[train_index]
y_train = iris.target[train_index]
X_test = iris.data[test_index]
y_test = iris.target[test_index]

n_classes = len(np.unique(y_train))

# Try GMMs using different types of covariances.
Esempio n. 13
0
# Author: Alexandre Gramfort <*****@*****.**>
#         Gael Varoquaux <*****@*****.**>
#
# License: BSD Style.

from numpy.testing import assert_almost_equal, assert_array_almost_equal

from .. import LedoitWolf, Covariance, ShrunkCovariance

import numpy as np
from scikits.learn import datasets

X = datasets.load_iris().data
n_samples = X.shape[0]


def test_Covariance():
    """
    Test Covariance on a simple dataset.
    """
    cov = Covariance()
    cov.fit(X)
    assert_array_almost_equal(np.dot(X.T, X) / n_samples, cov.covariance_, 4)


def test_LedoitWolf():
    """
    Test LedoitWolf on a simple dataset.
    """
    lw = LedoitWolf()
    lw.fit(X)