Exemple #1
0
def test_compute_embedding(check_asserts=True):
    np.random.seed(0)
    random.seed(0)

    # sample data from the digits 8x8 pixels dataset
    digits = datasets.load_digits()
    data = digits.data
    n_samples, n_features = data.shape
    low_dim = 2

    # baseline score using a random 2D projection
    projection = random_project(data, target_dim=2, rng=np.random)
    score = local_match(data, projection, query_size=50, ratio=0.1, seed=0)
    assert_almost_equal(score, 0.12, 2)

    # compute an embedding of the data
    embedder = SDAEmbedder(
        (n_features, 40, 15, low_dim),
        noise=0.1,
        reconstruction_penalty=0.0,
        embedding_penalty=1,
        sparsity_penalty=0.0,
        learning_rate=0.1,
        seed=0,
    )
    embedder.pre_train(data, epochs=500, batch_size=5)

    code = embedder.encode(data)
    assert_equal(code.shape, (n_samples, low_dim))

    # compare nearest neighbors
    score = local_match(data, code, query_size=50, ratio=0.1, seed=0)

    assert_almost_equal(score, 0.33, 1)
"""

# Authors: Fabian Pedregosa <*****@*****.**>
#          Olivier Grisel <*****@*****.**>
#          Mathieu Blondel <*****@*****.**>
# License: BSD, (C) INRIA 2011

print __doc__

import numpy as np
import pylab as pl
from matplotlib import offsetbox
from scikits.learn.utils.fixes import qr_economic
from scikits.learn import manifold, datasets, decomposition, lda

digits = datasets.load_digits(n_class=6)
X = digits.data
y = digits.target
n_samples, n_features = X.shape

# ----------------------------------------------------------------------
# Random 2D projection using a random unitary matrix
print "Computing random projection"
rng = np.random.RandomState(42)
Q, _ = qr_economic(rng.normal(size=(n_features, 2)))
X_projected = np.dot(Q.T, X.T).T


# ----------------------------------------------------------------------
# Projection on to the first 2 principal components
Exemple #3
0
as categorical boolean arrays of shape (n_sample, n_unique_labels) and measure
the Pearson correlation as an additional measure of the clustering quality.
"""
print __doc__

from time import time
import numpy as np

from scikits.learn.cluster import KMeans
from scikits.learn.datasets import load_digits
from scikits.learn.pca import PCA
from scikits.learn.preprocessing import scale

np.random.seed(42)

digits = load_digits()
data = scale(digits.data)

n_samples, n_features = data.shape
n_digits = len(np.unique(digits.target))

print "n_digits: %d" % n_digits
print "n_features: %d" % n_features
print "n_samples: %d" % n_samples
print

print "Raw k-means with k-means++ init..."
t0 = time()
km = KMeans(init='k-means++', k=n_digits, n_init=10).fit(data)
print "done in %0.3fs" % (time() - t0)
print "inertia: %f" % km.inertia_
"""

# Authors: Fabian Pedregosa <*****@*****.**>
#          Olivier Grisel <*****@*****.**>
#          Mathieu Blondel <*****@*****.**>
# License: BSD, (C) INRIA 2011

print __doc__

import numpy as np
import pylab as pl
from matplotlib import offsetbox
from scikits.learn.utils.fixes import qr_economic
from scikits.learn import manifold, datasets, decomposition, lda

digits = datasets.load_digits(n_class=6)
X = digits.data
y = digits.target
n_samples, n_features = X.shape

#----------------------------------------------------------------------
# Random 2D projection using a random unitary matrix
print "Computing random projection"
rng = np.random.RandomState(42)
Q, _ = qr_economic(rng.normal(size=(n_features, 2)))
X_projected = np.dot(Q.T, X.T).T

#----------------------------------------------------------------------
# Projection on to the first 2 principal components

print "Computing PCA projection"
A recursive feature elimination is performed prior to SVM classification.
"""

import numpy as np
from scikits.learn.svm import SVC
from scikits.learn.cross_val import StratifiedKFold, GridSearchCV
from scikits.learn import datasets
from scikits.learn.feature_selection.univariate_selection import\
UnivariateFilter,SelectKBest,f_classif
from scikits.learn.rfe import RFE



################################################################################
# Loading the Digits dataset
digits = datasets.load_digits()

# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

################################################################################
# Create the RFE object and compute a cross-validated score, compared to an
# unvariate feature selection

<<<<<<< HEAD
rfe = RFE(estimator = SVC(kernel="linear",C=1), n_features = 10, percentage =
0.1)
anova_filter = UnivariateFilter(SelectKBest(k=10), f_classif)
from scikits.learn import datasets, decomposition
import pylab as pl
import numpy as np

digits = datasets.load_digits()

digits.data += .2 * np.random.normal(size=digits.data.shape)
ica = decomposition.FastICA(n_components=10)
tt = ica.fit(digits.data.T).transform(digits.data.T).T

for i in range(8):
    pl.subplot(2, 4, 1 + i)
    pl.imshow(tt[i].reshape(8, 8), cmap=pl.cm.gray_r, interpolation='nearest')
#    pl.axis('off')
pl.show()
Exemple #7
0
def main(argv):
    digits = datasets.load_digits()

    # generate samples for dataset
    num_samples = len(digits.images)

    # train svm with all data
    from scikits.learn import svm
    svc = svm.SVC(probability=True)
    svc.fit(digits.data[:num_samples * 3.0 / 4],
            digits.target[:num_samples * 3.0 / 4])

    #probs_ = svc.predict_proba(digits.data)
    preds = svc.predict(digits.data[num_samples * 3.0 / 4:])

    print "precision/ recall for SVM with 0.75 pct data"
    prec = precision(digits.target, preds)
    rec = recall(digits.target, preds)
    #cm = confusion_matrix(digits.target, preds)
    print prec, rec

    # train svm with 50% data
    svc_50 = svm.SVC(probability=True)
    svc_50.fit(digits.data[:num_samples / 2], digits.target[:num_samples / 2])

    #probs_ = svc_50.predict_proba(digits.data[num_samples/2:])
    preds = svc.predict(digits.data[num_samples / 2:])

    print "precision / recall for SVM with 0.50 pct data"
    prec = precision(digits.target, preds)
    rec = recall(digits.target, preds)
    #cm = confusion_matrix(digits.target, preds)
    print prec, rec

    # train label propagation with 20% data
    lp_50 = label_propagation.LabelPropagation()
    ma = []
    for t in digits.target:
        b = [0 for i in xrange(10)]
        b[t] = 1
        ma.append(b)
    dts = np.matrix(ma)

    lp_50.fit(digits.data, dts[:num_samples * 0.2])

    preds = []
    for y in lp_50.Y[num_samples * 0.2:]:
        preds.append(np.argmax(y))
    preds = np.array(preds)

    print "precision / recall for Label propagation with all data & 0.2 pct labels"
    prec = precision(digits.target[num_samples * 0.2:], preds)
    rec = recall(digits.target[num_samples * 0.2:], preds)
    print prec, rec

    ls = label_propagation.LabelSpreading()

    ls.fit(digits.data, dts[:num_samples * 0.2])

    preds = []
    for y in ls.Y[num_samples * 0.2:]:
        preds.append(np.argmax(y))
    preds = np.array(preds)

    print "precision / recall for Label Spreading with all data & 0.2 pct labels"
    prec = precision(digits.target[num_samples * 0.2:], preds)
    rec = recall(digits.target[num_samples * 0.2:], preds)
    print prec, rec
def main(argv):
    digits = datasets.load_digits()

    # generate samples for dataset
    num_samples = len(digits.images)

    # train svm with all data
    from scikits.learn import svm
    svc = svm.SVC(probability=True)
    svc.fit(digits.data[:num_samples * 3.0/4], digits.target[:num_samples * 3.0 / 4])

    #probs_ = svc.predict_proba(digits.data)
    preds = svc.predict(digits.data[num_samples * 3.0/4:])

    print "precision/ recall for SVM with 0.75 pct data"
    prec = precision(digits.target, preds)
    rec = recall(digits.target, preds)
    #cm = confusion_matrix(digits.target, preds)
    print prec, rec

    # train svm with 50% data
    svc_50 = svm.SVC(probability=True)
    svc_50.fit(digits.data[:num_samples/2], digits.target[:num_samples/2])

    #probs_ = svc_50.predict_proba(digits.data[num_samples/2:])
    preds = svc.predict(digits.data[num_samples/2:])

    print "precision / recall for SVM with 0.50 pct data"
    prec = precision(digits.target, preds)
    rec = recall(digits.target, preds)
    #cm = confusion_matrix(digits.target, preds)
    print prec, rec

    # train label propagation with 20% data
    lp_50 = label_propagation.LabelPropagation()
    ma = []
    for t in digits.target:
        b = [0 for i in xrange(10)]
        b[t] = 1
        ma.append(b)
    dts = np.matrix(ma)

    lp_50.fit(digits.data,dts[:num_samples*0.2])

    preds = []
    for y in lp_50.Y[num_samples*0.2:]:
        preds.append(np.argmax(y))
    preds = np.array(preds)

    print "precision / recall for Label propagation with all data & 0.2 pct labels"
    prec = precision(digits.target[num_samples*0.2:], preds)
    rec = recall(digits.target[num_samples*0.2:], preds)
    print prec, rec

    ls = label_propagation.LabelSpreading()

    ls.fit(digits.data,dts[:num_samples*0.2])

    preds = []
    for y in ls.Y[num_samples*0.2:]:
        preds.append(np.argmax(y))
    preds = np.array(preds)

    print "precision / recall for Label Spreading with all data & 0.2 pct labels"
    prec = precision(digits.target[num_samples*0.2:], preds)
    rec = recall(digits.target[num_samples*0.2:], preds)
    print prec, rec