def __init__(self): import pdb; pdb.set_trace() self.x = datasets.load_iris()['data'] self.y = datasets.load_iris()['target'] indices = []
def main(): # load Iris dataset data = datasets.load_iris() X = data['data'] # cluster and compute silhouette score K = 8 C, cIDX = kmeans2(X, K) print cIDX D = pairwise_distances(X, metric='euclidean') s = silhouette_coefficient(cIDX,D) silhouette = silhouette_score(D, y, metric='precomputed') # plot order = np.lexsort((-s,cIDX)) indices = [np.flatnonzero(cIDX[order]==k) for k in range(K)] ytick = [(np.max(ind)+np.min(ind))/2 for ind in indices] ytickLabels = ["%d" % x for x in range(K)] cmap = cm.jet( np.linspace(0,1,K) ).tolist() clr = [cmap[i] for i in cIDX[order]] fig = plt.figure() ax = fig.add_subplot(111) ax.barh(range(X.shape[0]), s[order], height=1.0, edgecolor='none', color=clr) ax.set_ylim(ax.get_ylim()[::-1]) plt.yticks(ytick, ytickLabels) plt.xlabel('Silhouette Value') plt.ylabel('Cluster') plt.show()
def test_pipeline(): # check that Isomap works fine as a transformer in a Pipeline iris = datasets.load_iris() clf = pipeline.Pipeline( [('isomap', manifold.Isomap()), ('neighbors_clf', neighbors.NeighborsClassifier())]) clf.fit(iris.data, iris.target) assert_lower(.7, clf.score(iris.data, iris.target))
def test_pipeline(): # check that LocallyLinearEmbedding works fine as a Pipeline from scikits.learn import pipeline, datasets iris = datasets.load_iris() clf = pipeline.Pipeline( [('filter', manifold.LocallyLinearEmbedding()), ('clf', neighbors.NeighborsClassifier())]) clf.fit(iris.data, iris.target) assert clf.score(iris.data, iris.target) > .7
def test_SVC_iris(): """Test the sparse SVC with the iris dataset""" iris = datasets.load_iris() sp_clf = sparse.svm.SVC(kernel='linear').fit(iris.data, iris.target) clf = svm.SVC(kernel='linear').fit(iris.data, iris.target) assert_array_almost_equal(clf.support_, sp_clf.support_.todense()) assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.todense()) assert_array_almost_equal(clf.coef_, sp_clf.coef_.todense()) assert_array_almost_equal(clf.predict(iris.data), sp_clf.predict(iris.data))
def test_LinearSVC_iris(): """Test the sparse LinearSVC with the iris dataset""" iris = datasets.load_iris() sp_clf = svm.sparse.LinearSVC().fit(iris.data, iris.target) clf = svm.LinearSVC().fit(iris.data, iris.target) assert_array_almost_equal(clf.label_, sp_clf.label_) assert_equal (clf.fit_intercept, sp_clf.fit_intercept) assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=1) assert_array_almost_equal(clf.predict(iris.data), sp_clf.predict(iris.data))
def test_precomputed(): """ SVC with a precomputed kernel. We test it with a toy dataset and with iris. """ clf = svm.SVC(kernel='precomputed') # we use just a linear kernel K = np.dot(X, np.array(X).T) clf.fit(K, Y) # KT is the Gram matrix KT = np.dot(T, np.array(X).T) pred = clf.predict(KT) assert_array_equal(clf.dual_coef_, [[0.25, -.25]]) assert_array_equal(clf.intercept_, [0]) assert_array_almost_equal(clf.support_, [[2], [4]]) assert_array_equal(pred, true_result) # same as before, but using a callable function instead of the kernel # matrix. kernel is just a linear kernel kfunc = lambda x, y: np.dot(x, y.T) clf = svm.SVC(kernel=kfunc) clf.fit(X, Y) pred = clf.predict(T) assert_array_equal(clf.dual_coef_, [[0.25, -.25]]) assert_array_equal(clf.intercept_, [0]) assert_array_almost_equal(clf.support_, [[2], [4]]) assert_array_equal(pred, true_result) # test a precomputed kernel with the iris dataset clf = svm.SVC(kernel='precomputed') iris = datasets.load_iris() K = np.dot(iris.data, iris.data.T) clf.fit(K, iris.target) pred = clf.predict(K) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) clf = svm.SVC(kernel=kfunc) clf.fit(iris.data, iris.target) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)
def test_probability(): """ Predict probabilities using SVC This uses cross validation, so we use a slightly bigger testing set. """ from scikits.learn import datasets iris = datasets.load_iris() clf = svm.SVC(probability=True) clf.fit(iris.data, iris.target) # predict on a simple dataset T = [[0, 0, 0, 0], [2, 2, 2, 2]] assert_array_almost_equal(clf.predict_proba(T), [[ 0.993, 0.003, 0.002], [ 0.740, 0.223 , 0.035]], decimal=2) # make sure probabilities sum to one pprob = clf.predict_proba(X) assert_array_almost_equal(pprob.sum(axis=1), np.ones(len(X)))
import random import numpy as np from scikits.learn import svm, datasets from scikits.learn.metrics import roc, auc, precision_recall, \ confusion_matrix, zero_one, explained_variance, \ mean_square_error from numpy.testing import assert_array_equal, \ assert_array_almost_equal, \ assert_equal, assert_almost_equal # import some data to play with iris = datasets.load_iris() X = iris.data y = iris.target X, y = X[y!=2], y[y!=2] n_samples, n_features = X.shape p = range(n_samples) random.seed(0) random.shuffle(p) X, y = X[p], y[p] half = int(n_samples/2) # Add noisy features np.random.seed(0) X = np.c_[X,np.random.randn(n_samples, 200*n_features)] # Run classifier classifier = svm.SVC(kernel='linear', probability=True) probas_ = classifier.fit(X[:half],y[:half]).predict_proba(X[half:]) y_ = classifier.predict(X[half:])
""" print __doc__ from scipy import linalg import numpy as np import pylab as pl import matplotlib as mpl from scikits.learn.lda import LDA from scikits.learn.qda import QDA ################################################################################ # load sample dataset from scikits.learn.datasets import load_iris iris = load_iris() X = iris.data[:,:2] # Take only 2 dimensions y = iris.target X = X[y > 0] y = y[y > 0] y -= 1 target_names = iris.target_names[1:] ################################################################################ # LDA lda = LDA() y_pred = lda.fit(X, y, store_covariance=True).predict(X) # QDA qda = QDA() y_pred = qda.fit(X, y, store_covariances=True).predict(X)
# Author: Alexandre Gramfort <*****@*****.**> # Gael Varoquaux <*****@*****.**> # Virgile Fritsch <*****@*****.**> # # License: BSD Style. from numpy.testing import assert_almost_equal, assert_array_almost_equal from .. import empirical_covariance, EmpiricalCovariance, \ ShrunkCovariance, shrunk_covariance, LedoitWolf, ledoit_wolf, OAS, oas import numpy as np from scikits.learn import datasets X = datasets.load_iris().data n_samples, n_features = X.shape def test_covariance(): """Tests Covariance module on a simple dataset. """ # test covariance fit from data cov = EmpiricalCovariance() cov.fit(X) assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4) assert_almost_equal(cov.error_norm(empirical_covariance(X)), 0) assert_almost_equal( cov.error_norm(empirical_covariance(X), norm='spectral'), 0) # test with n_features = 1 X_1d = X[:,0]
v, w = np.linalg.eigh(gmm.covars[n][:2, :2]) u = w[0] / np.linalg.norm(w[0]) angle = np.arctan(u[1] / u[0]) angle = 180 * angle / np.pi # convert to degrees v *= 9 ell = mpl.patches.Ellipse(gmm.means[n, :2], v[0], v[1], 180 + angle, color=color) ell.set_clip_box(h.bbox) ell.set_alpha(0.5) h.add_artist(ell) iris = datasets.load_iris() # Break up the dataset into non-overlapping training (75%) and testing # (25%) sets. skf = StratifiedKFold(iris.target, k=4) # Only take the first fold. train_index, test_index = skf.__iter__().next() X_train = iris.data[train_index] y_train = iris.target[train_index] X_test = iris.data[test_index] y_test = iris.target[test_index] n_classes = len(np.unique(y_train)) # Try GMMs using different types of covariances.
# Author: Alexandre Gramfort <*****@*****.**> # Gael Varoquaux <*****@*****.**> # # License: BSD Style. from numpy.testing import assert_almost_equal, assert_array_almost_equal from .. import LedoitWolf, Covariance, ShrunkCovariance import numpy as np from scikits.learn import datasets X = datasets.load_iris().data n_samples = X.shape[0] def test_Covariance(): """ Test Covariance on a simple dataset. """ cov = Covariance() cov.fit(X) assert_array_almost_equal(np.dot(X.T, X) / n_samples, cov.covariance_, 4) def test_LedoitWolf(): """ Test LedoitWolf on a simple dataset. """ lw = LedoitWolf() lw.fit(X)