Ejemplo n.º 1
0
def test_gridminimise():
    from milksets.wine import load
    features, labels = load()
    x = gridminimise(milk.supervised.svm_simple(kernel=np.dot, C=2.),
                     features[::2], labels[::2] == 0, {'C': (0.5, )})
    cval, = x
    assert cval == ('C', .5)
Ejemplo n.º 2
0
def test_multi_view():
    from milksets.wine import load
    features, labels = load()
    features0 = features[::10]
    features1 = features[1::10]
    features2 = features[2::10]
    labels0 = labels[::10]
    labels1 = labels[1::10]
    labels2 = labels[2::10]

    assert np.all(labels0 == labels1)
    assert np.all(labels1 == labels2)
    labels = labels0
    train_features = zip(features0, features1, features2)
    test_features = zip(features[3::10], features[4::10], features[5::10])
    base = milk.supervised.classifier.ctransforms(
        feature_selection_simple(),
        milk.supervised.svm.svm_raw(C=128,
                                    kernel=milk.supervised.svm.rbf_kernel(4.)),
        milk.supervised.svm.svm_sigmoidal_correction())
    classifier = milk.supervised.multi_view.multi_view_classifier(
        [base, base, base])
    model = classifier.train(train_features, labels == 0)
    assert ([model.apply(f)
             for f in test_features] == (labels == 0)).mean() > .9
Ejemplo n.º 3
0
def test_default_small():
    features, labels = load()
    selected = np.concatenate( [np.where(labels < 2)[0], np.where(labels == 2)[0][:6]] )
    features = features[selected]
    labels = labels[selected]
    learner = defaultclassifier('fast')
    # For version 0.3.8, the line below led to an error
    milk.nfoldcrossvalidation(features, labels, classifier=learner)
Ejemplo n.º 4
0
def test_defaultclassifier():
    from milksets import wine
    features, labels = wine.load()
    C = milk.supervised.defaultclassifier()
    model = C.train(features, labels)
    labelset = set(labels)
    for f in features:
        assert model.apply(f) in labelset
Ejemplo n.º 5
0
def test_defaultclassifier():
    from milksets import wine
    features, labels = wine.load()
    C = milk.supervised.defaultclassifier()
    model = C.train(features,labels)
    labelset = set(labels)
    for f in features:
        assert model.apply(f) in labelset
Ejemplo n.º 6
0
def test_gridminimise_return():
    from milksets.wine import load
    features,labels = load()
    learner = fast_classifier()
    gridminimise(learner, features, labels, { 'ignore' : [0] })
    _,error = gridminimise(learner, features, labels, { 'ignore' : [0] }, return_value=True, nfolds=5)
    cmat,_ = milk.nfoldcrossvalidation(features, labels, learner=learner, nfolds=5)
    assert error == cmat.sum()-cmat.trace()
Ejemplo n.º 7
0
def test_assign_cids():
    from milksets.wine import load
    features, _ = load()
    assigns, centroids = milk.unsupervised.kmeans(features,
                                                  3,
                                                  R=2,
                                                  max_iters=10)
    assert np.all(assign_centroids(features, centroids) == assigns)
Ejemplo n.º 8
0
def test_tree():
    from milksets import wine
    features, labels = wine.load()
    selected = (labels < 2)
    features = features[selected]
    labels = labels[selected]
    C = milk.supervised.tree.tree_classifier()
    model = C.train(features,labels)
    assert (np.array([model.apply(f) for f in features]) == labels).mean() > .5
Ejemplo n.º 9
0
def test_with_error():
    from milksets.wine import load
    features, labels = load()
    learner = error_learner()
    G = milk.supervised.gridsearch(
        error_learner(),
        params = { 'error' : range(3), 'error2' : range(5) }
        )
    G.train(features,labels)
Ejemplo n.º 10
0
def test_select_n():
    from milksets.wine import load

    features,labels = load()
    for n in (1,2,4,8):
        select = select_n_best(n, rank_corr)
        model = select.train(features,labels)
        f = model.apply(features[3])
        assert len(f) == n
Ejemplo n.º 11
0
def test_nfoldcrossvalidation_simple_list():
    from milksets import wine
    features, labels = wine.load()
    features = features[::2]
    labels = labels[::2]

    cmat,clabels = nfoldcrossvalidation(list(features), list(labels), classifier=fast_classifier())
    assert cmat.shape == (3,3)
    assert len(clabels) == 3
Ejemplo n.º 12
0
def test_tree():
    from milksets import wine
    features, labels = wine.load()
    selected = (labels < 2)
    features = features[selected]
    labels = labels[selected]
    C = milk.supervised.tree.tree_classifier()
    model = C.train(features, labels)
    assert (np.array([model.apply(f) for f in features]) == labels).mean() > .5
Ejemplo n.º 13
0
def test_select_n():
    from milksets.wine import load

    features, labels = load()
    for n in (1, 2, 4, 8):
        select = select_n_best(n, rank_corr)
        model = select.train(features, labels)
        f = model.apply(features[3])
        assert len(f) == n
Ejemplo n.º 14
0
def test_rf():
    from milksets import wine
    features, labels = wine.load()
    features = features[labels < 2]
    labels = labels[labels < 2]
    learner = randomforest.rf_learner()
    model = learner.train(features[::5], labels[::5])
    test = [model.apply(f) for f in features]
    assert np.mean(labels == test) > .7
Ejemplo n.º 15
0
def test_with_error():
    from milksets.wine import load
    features, labels = load()
    learner = error_learner()
    G = milk.supervised.gridsearch(error_learner(),
                                   params={
                                       'error': list(range(3)),
                                       'error2': list(range(5))
                                   })
    G.train(features, labels)
Ejemplo n.º 16
0
def test_non_contiguous_fmatrix():
    from milksets.wine import load
    features,_ = load()
    features = features[:,::2]
    assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10)
    assert np.all(assign_centroids(features, centroids) == assigns)

    features = features.astype(np.int32)
    assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10)
    assert np.all(assign_centroids(features, centroids) == assigns)
Ejemplo n.º 17
0
def test_learner():
    from milksets import wine
    learner = milk.supervised.adaboost.boost_learner(milk.supervised.tree.stump_learner())
    features, labels = wine.load()
    features = features[labels < 2]
    labels = labels[labels < 2] == 0
    labels = labels.astype(int)
    model = learner.train(features[::2], labels[::2])
    train_out = np.array(map(model.apply, features))
    assert (train_out == labels).mean() > .9
Ejemplo n.º 18
0
def test_default_small():
    features, labels = load()
    selected = np.concatenate(
        [np.where(labels < 2)[0],
         np.where(labels == 2)[0][:6]])
    features = features[selected]
    labels = labels[selected]
    learner = defaultclassifier('fast')
    # For version 0.3.8, the line below led to an error
    milk.nfoldcrossvalidation(features, labels, classifier=learner)
Ejemplo n.º 19
0
def test_non_contiguous_fmatrix():
    from milksets.wine import load
    features,_ = load()
    features = features[:,::2]
    assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10)
    assert np.all(assign_centroids(features, centroids) == assigns)

    features = features.astype(np.int32)
    assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10)
    assert np.all(assign_centroids(features, centroids) == assigns)
Ejemplo n.º 20
0
def test_learner():
    from milksets import wine
    learner = milk.supervised.adaboost.boost_learner(
        milk.supervised.tree.stump_learner())
    features, labels = wine.load()
    features = features[labels < 2]
    labels = labels[labels < 2] == 0
    labels = labels.astype(int)
    model = learner.train(features[::2], labels[::2])
    train_out = np.array(map(model.apply, features))
    assert (train_out == labels).mean() > .9
Ejemplo n.º 21
0
def test_split_subsample():
    import random
    from milksets import wine
    features, labels = wine.load()

    seen = set()
    for i in xrange(20):
        random.seed(2)
        i,s = milk.supervised.tree._split(features[::10], labels[::10], None, milk.supervised.tree.information_gain, 2, random)
        seen.add(i)
    assert len(seen) <= 2
Ejemplo n.º 22
0
def test_split_subsample():
    import random
    from milksets import wine
    features, labels = wine.load()
    labels = labels.astype(np.int)

    seen = set()
    for i in xrange(20):
        random.seed(2)
        i,s = milk.supervised.tree._split(features[::10], labels[::10], None, milk.supervised.tree.information_gain, 2, random)
        seen.add(i)
    assert len(seen) <= 2
Ejemplo n.º 23
0
def test_gridsearch():
    from milksets import wine
    features, labels = wine.load()
    selected = (labels < 2)
    features = features[selected]
    labels = labels[selected]

    G = milk.supervised.gridsearch(
            milk.supervised.svm.svm_raw(),
            params={'C':[.01,.1,1.,10.],
                    'kernel':[milk.supervised.svm.rbf_kernel(0.1),milk.supervised.svm.rbf_kernel(1.)]
            })
    model = G.train(features,labels)
    reslabels = [model.apply(f) for f in features]
    assert len(reslabels) == len(features)
Ejemplo n.º 24
0
def test_gridminimise_return():
    from milksets.wine import load
    features, labels = load()
    learner = fast_classifier()
    gridminimise(learner, features, labels, {'ignore': [0]})
    _, error = gridminimise(learner,
                            features,
                            labels, {'ignore': [0]},
                            return_value=True,
                            nfolds=5)
    cmat, _ = milk.nfoldcrossvalidation(features,
                                        labels,
                                        learner=learner,
                                        nfolds=5)
    assert error == cmat.sum() - cmat.trace()
Ejemplo n.º 25
0
def test_gridsearch():
    from milksets import wine
    features, labels = wine.load()
    selected = (labels < 2)
    features = features[selected]
    labels = labels[selected]

    G = milk.supervised.gridsearch(milk.supervised.svm.svm_raw(),
                                   params={
                                       'C': [.01, .1, 1., 10.],
                                       'kernel': [
                                           milk.supervised.svm.rbf_kernel(0.1),
                                           milk.supervised.svm.rbf_kernel(1.)
                                       ]
                                   })
    model = G.train(features, labels)
    reslabels = [model.apply(f) for f in features]
    assert len(reslabels) == len(features)
Ejemplo n.º 26
0
def test_multi_view():
    from milksets.wine import load
    features, labels = load()
    features0 = features[::10]
    features1 = features[1::10]
    features2 = features[2::10]
    labels0 = labels[::10]
    labels1 = labels[1::10]
    labels2 = labels[2::10]

    assert np.all(labels0 == labels1)
    assert np.all(labels1 == labels2)
    labels = labels0
    train_features = list(zip(features0,features1,features2))
    test_features = list(zip(features[3::10], features[4::10], features[5::10]))
    base = milk.supervised.classifier.ctransforms(
                feature_selection_simple(),
                milk.supervised.svm.svm_raw(C=128, kernel=milk.supervised.svm.rbf_kernel(4.)),
                milk.supervised.svm.svm_sigmoidal_correction()
                )
    classifier = milk.supervised.multi_view.multi_view_classifier([base,base,base])
    model = classifier.train(train_features, labels == 0)
    assert ([model.apply(f) for f in test_features] == (labels == 0)).mean() > .9
Ejemplo n.º 27
0
def test_gridminimise():
    from milksets.wine import load
    features, labels = load()
    x = gridminimise(milk.supervised.svm_simple(kernel=np.dot, C=2.), features[::2], labels[::2] == 0, {'C' : (0.5,) })
    cval, = x
    assert cval == ('C', .5)
Ejemplo n.º 28
0
def test_compare_rank_corr():
    from milksets.wine import load
    features,labels = load()
    r0 = rank_corr(features,labels)
    r1 = slow_rank_corr(features,labels)
    assert np.allclose(r0,r1)
Ejemplo n.º 29
0
from pylab import *
import numpy as np

from milksets.wine import load
import milk.supervised
import milk.unsupervised.pca
import milk.supervised.svm

features, labels = load()
features = features[labels < 2]
labels = labels[labels < 2]
features, _ = milk.unsupervised.pca(features)
features = features[:, :2]
learner = milk.supervised.svm.svm_raw(kernel=np.dot, C=12)
model = learner.train(features, labels)
w = np.dot(model.svs.T, model.Yw)
b = model.b
x = np.linspace(-.5, .1, 100)
y = -w[0] / w[1] * x + b / w[1]
plot(features[labels == 1][:, 0], features[labels == 1][:, 1], 'bx')
plot(features[labels == 0][:, 0], features[labels == 0][:, 1], 'ro')
plot(x, y)
savefig('svm-demo-points.pdf')

clf()

learner = milk.supervised.svm.svm_raw(
    kernel=milk.supervised.svm.rbf_kernel(1.), C=12)
model = learner.train(features, labels)
Y, X = (np.mgrid[:101, :101] - 50) / 12.5
values = [model.apply((y, x)) for y, x in zip(Y.ravel(), X.ravel())]
Ejemplo n.º 30
0
def test_fisher_approx():
    from milksets import wine
    features,labels = wine.load()
    f = milk.supervised.svm.sigma_value_fisher(features,labels)
    for sigma in (2.**-4,2.,16.,32.):
        assert abs(f(sigma) - _slow_f(features,labels,sigma)) < 1e-6
Ejemplo n.º 31
0
def test_sda():
    from milksets import wine
    features, labels = wine.load()
    selected = milk.supervised.featureselection.sda(features, labels)
    for sel in selected:
        assert sel <= features.shape[1]
Ejemplo n.º 32
0
def test_extra_arg():
    from milksets.wine import load
    features, labels = load()
    learner = milk.defaultlearner()
    model = learner.train(features[::2], labels[::2], extra_arg=5)
    assert model.apply(features[1]) < 12.
Ejemplo n.º 33
0
def test_sda():
    from milksets import wine
    features, labels = wine.load()
    selected = milk.supervised.featureselection.sda(features,labels)
    for sel in selected:
        assert sel <= features.shape[1]
Ejemplo n.º 34
0
def test_extra_arg():
    from milksets.wine import load
    features,labels = load()
    learner = milk.defaultlearner()
    model = learner.train(features[::2],labels[::2], extra_arg=5)
    assert model.apply(features[1]) < 12.
Ejemplo n.º 35
0
def test_compare_rank_corr():
    from milksets.wine import load
    features, labels = load()
    r0 = rank_corr(features, labels)
    r1 = slow_rank_corr(features, labels)
    assert np.allclose(r0, r1)
Ejemplo n.º 36
0
def test_assign_cids():
    from milksets.wine import load
    features,_ = load()
    assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10)
    assert np.all(assign_centroids(features, centroids) == assigns)
Ejemplo n.º 37
0
from milk.supervised import randomforest
from milk.supervised.multi import one_against_one
import milk.nfoldcrossvalidation
import milk.unsupervised

import pylab
from milksets import wine

# Load 'wine' dataset
features, labels = wine.load()
# random forest learner
rf_learner = randomforest.rf_learner()
# rf is a binary learner, so we transform it into a multi-class classifier
learner = one_against_one(rf_learner)

# cross validate with this learner and return predictions on left-out elements
cmat,names, preds = milk.nfoldcrossvalidation(features, labels, classifier=learner, return_predictions=1)

print('cross-validation accuracy:', cmat.trace()/float(cmat.sum()))

# dimensionality reduction for display
x,v = milk.unsupervised.pca(features)
colors = "rgb" # predicted colour
marks = "xo" # whether the prediction was correct
for (y,x),p,r in zip(x[:,:2], preds, labels):
    c = colors[p]
    m = marks[p == r]
    pylab.plot(y,x,c+m)
pylab.show()

Ejemplo n.º 38
0
from pylab import *
import numpy as np

from milksets.wine import load
import milk.supervised
import milk.unsupervised.pca
import milk.supervised.svm

features, labels = load()
features = features[labels < 2]
labels = labels[labels < 2]
features,_ = milk.unsupervised.pca(features)
features = features[:,:2]
learner = milk.supervised.svm.svm_raw(kernel=np.dot, C=12)
model = learner.train(features, labels)
w = np.dot(model.svs.T, model.Yw)
b = model.b
x = np.linspace(-.5, .1, 100)
y = -w[0]/w[1]*x + b/w[1]
plot(features[labels == 1][:,0], features[labels == 1][:,1], 'bx')
plot(features[labels == 0][:,0], features[labels == 0][:,1], 'ro')
plot(x,y)
savefig('svm-demo-points.pdf')

clf()