def test_compute_embedding(check_asserts=True): np.random.seed(0) random.seed(0) # sample data from the digits 8x8 pixels dataset digits = datasets.load_digits() data = digits.data n_samples, n_features = data.shape low_dim = 2 # baseline score using a random 2D projection projection = random_project(data, target_dim=2, rng=np.random) score = local_match(data, projection, query_size=50, ratio=0.1, seed=0) assert_almost_equal(score, 0.12, 2) # compute an embedding of the data embedder = SDAEmbedder( (n_features, 40, 15, low_dim), noise=0.1, reconstruction_penalty=0.0, embedding_penalty=1, sparsity_penalty=0.0, learning_rate=0.1, seed=0, ) embedder.pre_train(data, epochs=500, batch_size=5) code = embedder.encode(data) assert_equal(code.shape, (n_samples, low_dim)) # compare nearest neighbors score = local_match(data, code, query_size=50, ratio=0.1, seed=0) assert_almost_equal(score, 0.33, 1)
""" # Authors: Fabian Pedregosa <*****@*****.**> # Olivier Grisel <*****@*****.**> # Mathieu Blondel <*****@*****.**> # License: BSD, (C) INRIA 2011 print __doc__ import numpy as np import pylab as pl from matplotlib import offsetbox from scikits.learn.utils.fixes import qr_economic from scikits.learn import manifold, datasets, decomposition, lda digits = datasets.load_digits(n_class=6) X = digits.data y = digits.target n_samples, n_features = X.shape # ---------------------------------------------------------------------- # Random 2D projection using a random unitary matrix print "Computing random projection" rng = np.random.RandomState(42) Q, _ = qr_economic(rng.normal(size=(n_features, 2))) X_projected = np.dot(Q.T, X.T).T # ---------------------------------------------------------------------- # Projection on to the first 2 principal components
as categorical boolean arrays of shape (n_sample, n_unique_labels) and measure the Pearson correlation as an additional measure of the clustering quality. """ print __doc__ from time import time import numpy as np from scikits.learn.cluster import KMeans from scikits.learn.datasets import load_digits from scikits.learn.pca import PCA from scikits.learn.preprocessing import scale np.random.seed(42) digits = load_digits() data = scale(digits.data) n_samples, n_features = data.shape n_digits = len(np.unique(digits.target)) print "n_digits: %d" % n_digits print "n_features: %d" % n_features print "n_samples: %d" % n_samples print print "Raw k-means with k-means++ init..." t0 = time() km = KMeans(init='k-means++', k=n_digits, n_init=10).fit(data) print "done in %0.3fs" % (time() - t0) print "inertia: %f" % km.inertia_
""" # Authors: Fabian Pedregosa <*****@*****.**> # Olivier Grisel <*****@*****.**> # Mathieu Blondel <*****@*****.**> # License: BSD, (C) INRIA 2011 print __doc__ import numpy as np import pylab as pl from matplotlib import offsetbox from scikits.learn.utils.fixes import qr_economic from scikits.learn import manifold, datasets, decomposition, lda digits = datasets.load_digits(n_class=6) X = digits.data y = digits.target n_samples, n_features = X.shape #---------------------------------------------------------------------- # Random 2D projection using a random unitary matrix print "Computing random projection" rng = np.random.RandomState(42) Q, _ = qr_economic(rng.normal(size=(n_features, 2))) X_projected = np.dot(Q.T, X.T).T #---------------------------------------------------------------------- # Projection on to the first 2 principal components print "Computing PCA projection"
A recursive feature elimination is performed prior to SVM classification. """ import numpy as np from scikits.learn.svm import SVC from scikits.learn.cross_val import StratifiedKFold, GridSearchCV from scikits.learn import datasets from scikits.learn.feature_selection.univariate_selection import\ UnivariateFilter,SelectKBest,f_classif from scikits.learn.rfe import RFE ################################################################################ # Loading the Digits dataset digits = datasets.load_digits() # To apply an classifier on this data, we need to flatten the image, to # turn the data in a (samples, feature) matrix: n_samples = len(digits.images) X = digits.images.reshape((n_samples, -1)) y = digits.target ################################################################################ # Create the RFE object and compute a cross-validated score, compared to an # unvariate feature selection <<<<<<< HEAD rfe = RFE(estimator = SVC(kernel="linear",C=1), n_features = 10, percentage = 0.1) anova_filter = UnivariateFilter(SelectKBest(k=10), f_classif)
from scikits.learn import datasets, decomposition import pylab as pl import numpy as np digits = datasets.load_digits() digits.data += .2 * np.random.normal(size=digits.data.shape) ica = decomposition.FastICA(n_components=10) tt = ica.fit(digits.data.T).transform(digits.data.T).T for i in range(8): pl.subplot(2, 4, 1 + i) pl.imshow(tt[i].reshape(8, 8), cmap=pl.cm.gray_r, interpolation='nearest') # pl.axis('off') pl.show()
def main(argv): digits = datasets.load_digits() # generate samples for dataset num_samples = len(digits.images) # train svm with all data from scikits.learn import svm svc = svm.SVC(probability=True) svc.fit(digits.data[:num_samples * 3.0 / 4], digits.target[:num_samples * 3.0 / 4]) #probs_ = svc.predict_proba(digits.data) preds = svc.predict(digits.data[num_samples * 3.0 / 4:]) print "precision/ recall for SVM with 0.75 pct data" prec = precision(digits.target, preds) rec = recall(digits.target, preds) #cm = confusion_matrix(digits.target, preds) print prec, rec # train svm with 50% data svc_50 = svm.SVC(probability=True) svc_50.fit(digits.data[:num_samples / 2], digits.target[:num_samples / 2]) #probs_ = svc_50.predict_proba(digits.data[num_samples/2:]) preds = svc.predict(digits.data[num_samples / 2:]) print "precision / recall for SVM with 0.50 pct data" prec = precision(digits.target, preds) rec = recall(digits.target, preds) #cm = confusion_matrix(digits.target, preds) print prec, rec # train label propagation with 20% data lp_50 = label_propagation.LabelPropagation() ma = [] for t in digits.target: b = [0 for i in xrange(10)] b[t] = 1 ma.append(b) dts = np.matrix(ma) lp_50.fit(digits.data, dts[:num_samples * 0.2]) preds = [] for y in lp_50.Y[num_samples * 0.2:]: preds.append(np.argmax(y)) preds = np.array(preds) print "precision / recall for Label propagation with all data & 0.2 pct labels" prec = precision(digits.target[num_samples * 0.2:], preds) rec = recall(digits.target[num_samples * 0.2:], preds) print prec, rec ls = label_propagation.LabelSpreading() ls.fit(digits.data, dts[:num_samples * 0.2]) preds = [] for y in ls.Y[num_samples * 0.2:]: preds.append(np.argmax(y)) preds = np.array(preds) print "precision / recall for Label Spreading with all data & 0.2 pct labels" prec = precision(digits.target[num_samples * 0.2:], preds) rec = recall(digits.target[num_samples * 0.2:], preds) print prec, rec
def main(argv): digits = datasets.load_digits() # generate samples for dataset num_samples = len(digits.images) # train svm with all data from scikits.learn import svm svc = svm.SVC(probability=True) svc.fit(digits.data[:num_samples * 3.0/4], digits.target[:num_samples * 3.0 / 4]) #probs_ = svc.predict_proba(digits.data) preds = svc.predict(digits.data[num_samples * 3.0/4:]) print "precision/ recall for SVM with 0.75 pct data" prec = precision(digits.target, preds) rec = recall(digits.target, preds) #cm = confusion_matrix(digits.target, preds) print prec, rec # train svm with 50% data svc_50 = svm.SVC(probability=True) svc_50.fit(digits.data[:num_samples/2], digits.target[:num_samples/2]) #probs_ = svc_50.predict_proba(digits.data[num_samples/2:]) preds = svc.predict(digits.data[num_samples/2:]) print "precision / recall for SVM with 0.50 pct data" prec = precision(digits.target, preds) rec = recall(digits.target, preds) #cm = confusion_matrix(digits.target, preds) print prec, rec # train label propagation with 20% data lp_50 = label_propagation.LabelPropagation() ma = [] for t in digits.target: b = [0 for i in xrange(10)] b[t] = 1 ma.append(b) dts = np.matrix(ma) lp_50.fit(digits.data,dts[:num_samples*0.2]) preds = [] for y in lp_50.Y[num_samples*0.2:]: preds.append(np.argmax(y)) preds = np.array(preds) print "precision / recall for Label propagation with all data & 0.2 pct labels" prec = precision(digits.target[num_samples*0.2:], preds) rec = recall(digits.target[num_samples*0.2:], preds) print prec, rec ls = label_propagation.LabelSpreading() ls.fit(digits.data,dts[:num_samples*0.2]) preds = [] for y in ls.Y[num_samples*0.2:]: preds.append(np.argmax(y)) preds = np.array(preds) print "precision / recall for Label Spreading with all data & 0.2 pct labels" prec = precision(digits.target[num_samples*0.2:], preds) rec = recall(digits.target[num_samples*0.2:], preds) print prec, rec