def test_gridminimise(): from milksets.wine import load features, labels = load() x = gridminimise(milk.supervised.svm_simple(kernel=np.dot, C=2.), features[::2], labels[::2] == 0, {'C': (0.5, )}) cval, = x assert cval == ('C', .5)
def test_multi_view(): from milksets.wine import load features, labels = load() features0 = features[::10] features1 = features[1::10] features2 = features[2::10] labels0 = labels[::10] labels1 = labels[1::10] labels2 = labels[2::10] assert np.all(labels0 == labels1) assert np.all(labels1 == labels2) labels = labels0 train_features = zip(features0, features1, features2) test_features = zip(features[3::10], features[4::10], features[5::10]) base = milk.supervised.classifier.ctransforms( feature_selection_simple(), milk.supervised.svm.svm_raw(C=128, kernel=milk.supervised.svm.rbf_kernel(4.)), milk.supervised.svm.svm_sigmoidal_correction()) classifier = milk.supervised.multi_view.multi_view_classifier( [base, base, base]) model = classifier.train(train_features, labels == 0) assert ([model.apply(f) for f in test_features] == (labels == 0)).mean() > .9
def test_default_small(): features, labels = load() selected = np.concatenate( [np.where(labels < 2)[0], np.where(labels == 2)[0][:6]] ) features = features[selected] labels = labels[selected] learner = defaultclassifier('fast') # For version 0.3.8, the line below led to an error milk.nfoldcrossvalidation(features, labels, classifier=learner)
def test_defaultclassifier(): from milksets import wine features, labels = wine.load() C = milk.supervised.defaultclassifier() model = C.train(features, labels) labelset = set(labels) for f in features: assert model.apply(f) in labelset
def test_defaultclassifier(): from milksets import wine features, labels = wine.load() C = milk.supervised.defaultclassifier() model = C.train(features,labels) labelset = set(labels) for f in features: assert model.apply(f) in labelset
def test_gridminimise_return(): from milksets.wine import load features,labels = load() learner = fast_classifier() gridminimise(learner, features, labels, { 'ignore' : [0] }) _,error = gridminimise(learner, features, labels, { 'ignore' : [0] }, return_value=True, nfolds=5) cmat,_ = milk.nfoldcrossvalidation(features, labels, learner=learner, nfolds=5) assert error == cmat.sum()-cmat.trace()
def test_assign_cids(): from milksets.wine import load features, _ = load() assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10) assert np.all(assign_centroids(features, centroids) == assigns)
def test_tree(): from milksets import wine features, labels = wine.load() selected = (labels < 2) features = features[selected] labels = labels[selected] C = milk.supervised.tree.tree_classifier() model = C.train(features,labels) assert (np.array([model.apply(f) for f in features]) == labels).mean() > .5
def test_with_error(): from milksets.wine import load features, labels = load() learner = error_learner() G = milk.supervised.gridsearch( error_learner(), params = { 'error' : range(3), 'error2' : range(5) } ) G.train(features,labels)
def test_select_n(): from milksets.wine import load features,labels = load() for n in (1,2,4,8): select = select_n_best(n, rank_corr) model = select.train(features,labels) f = model.apply(features[3]) assert len(f) == n
def test_nfoldcrossvalidation_simple_list(): from milksets import wine features, labels = wine.load() features = features[::2] labels = labels[::2] cmat,clabels = nfoldcrossvalidation(list(features), list(labels), classifier=fast_classifier()) assert cmat.shape == (3,3) assert len(clabels) == 3
def test_tree(): from milksets import wine features, labels = wine.load() selected = (labels < 2) features = features[selected] labels = labels[selected] C = milk.supervised.tree.tree_classifier() model = C.train(features, labels) assert (np.array([model.apply(f) for f in features]) == labels).mean() > .5
def test_select_n(): from milksets.wine import load features, labels = load() for n in (1, 2, 4, 8): select = select_n_best(n, rank_corr) model = select.train(features, labels) f = model.apply(features[3]) assert len(f) == n
def test_rf(): from milksets import wine features, labels = wine.load() features = features[labels < 2] labels = labels[labels < 2] learner = randomforest.rf_learner() model = learner.train(features[::5], labels[::5]) test = [model.apply(f) for f in features] assert np.mean(labels == test) > .7
def test_with_error(): from milksets.wine import load features, labels = load() learner = error_learner() G = milk.supervised.gridsearch(error_learner(), params={ 'error': list(range(3)), 'error2': list(range(5)) }) G.train(features, labels)
def test_non_contiguous_fmatrix(): from milksets.wine import load features,_ = load() features = features[:,::2] assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10) assert np.all(assign_centroids(features, centroids) == assigns) features = features.astype(np.int32) assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10) assert np.all(assign_centroids(features, centroids) == assigns)
def test_learner(): from milksets import wine learner = milk.supervised.adaboost.boost_learner(milk.supervised.tree.stump_learner()) features, labels = wine.load() features = features[labels < 2] labels = labels[labels < 2] == 0 labels = labels.astype(int) model = learner.train(features[::2], labels[::2]) train_out = np.array(map(model.apply, features)) assert (train_out == labels).mean() > .9
def test_default_small(): features, labels = load() selected = np.concatenate( [np.where(labels < 2)[0], np.where(labels == 2)[0][:6]]) features = features[selected] labels = labels[selected] learner = defaultclassifier('fast') # For version 0.3.8, the line below led to an error milk.nfoldcrossvalidation(features, labels, classifier=learner)
def test_learner(): from milksets import wine learner = milk.supervised.adaboost.boost_learner( milk.supervised.tree.stump_learner()) features, labels = wine.load() features = features[labels < 2] labels = labels[labels < 2] == 0 labels = labels.astype(int) model = learner.train(features[::2], labels[::2]) train_out = np.array(map(model.apply, features)) assert (train_out == labels).mean() > .9
def test_split_subsample(): import random from milksets import wine features, labels = wine.load() seen = set() for i in xrange(20): random.seed(2) i,s = milk.supervised.tree._split(features[::10], labels[::10], None, milk.supervised.tree.information_gain, 2, random) seen.add(i) assert len(seen) <= 2
def test_split_subsample(): import random from milksets import wine features, labels = wine.load() labels = labels.astype(np.int) seen = set() for i in xrange(20): random.seed(2) i,s = milk.supervised.tree._split(features[::10], labels[::10], None, milk.supervised.tree.information_gain, 2, random) seen.add(i) assert len(seen) <= 2
def test_gridsearch(): from milksets import wine features, labels = wine.load() selected = (labels < 2) features = features[selected] labels = labels[selected] G = milk.supervised.gridsearch( milk.supervised.svm.svm_raw(), params={'C':[.01,.1,1.,10.], 'kernel':[milk.supervised.svm.rbf_kernel(0.1),milk.supervised.svm.rbf_kernel(1.)] }) model = G.train(features,labels) reslabels = [model.apply(f) for f in features] assert len(reslabels) == len(features)
def test_gridminimise_return(): from milksets.wine import load features, labels = load() learner = fast_classifier() gridminimise(learner, features, labels, {'ignore': [0]}) _, error = gridminimise(learner, features, labels, {'ignore': [0]}, return_value=True, nfolds=5) cmat, _ = milk.nfoldcrossvalidation(features, labels, learner=learner, nfolds=5) assert error == cmat.sum() - cmat.trace()
def test_gridsearch(): from milksets import wine features, labels = wine.load() selected = (labels < 2) features = features[selected] labels = labels[selected] G = milk.supervised.gridsearch(milk.supervised.svm.svm_raw(), params={ 'C': [.01, .1, 1., 10.], 'kernel': [ milk.supervised.svm.rbf_kernel(0.1), milk.supervised.svm.rbf_kernel(1.) ] }) model = G.train(features, labels) reslabels = [model.apply(f) for f in features] assert len(reslabels) == len(features)
def test_multi_view(): from milksets.wine import load features, labels = load() features0 = features[::10] features1 = features[1::10] features2 = features[2::10] labels0 = labels[::10] labels1 = labels[1::10] labels2 = labels[2::10] assert np.all(labels0 == labels1) assert np.all(labels1 == labels2) labels = labels0 train_features = list(zip(features0,features1,features2)) test_features = list(zip(features[3::10], features[4::10], features[5::10])) base = milk.supervised.classifier.ctransforms( feature_selection_simple(), milk.supervised.svm.svm_raw(C=128, kernel=milk.supervised.svm.rbf_kernel(4.)), milk.supervised.svm.svm_sigmoidal_correction() ) classifier = milk.supervised.multi_view.multi_view_classifier([base,base,base]) model = classifier.train(train_features, labels == 0) assert ([model.apply(f) for f in test_features] == (labels == 0)).mean() > .9
def test_gridminimise(): from milksets.wine import load features, labels = load() x = gridminimise(milk.supervised.svm_simple(kernel=np.dot, C=2.), features[::2], labels[::2] == 0, {'C' : (0.5,) }) cval, = x assert cval == ('C', .5)
def test_compare_rank_corr(): from milksets.wine import load features,labels = load() r0 = rank_corr(features,labels) r1 = slow_rank_corr(features,labels) assert np.allclose(r0,r1)
from pylab import * import numpy as np from milksets.wine import load import milk.supervised import milk.unsupervised.pca import milk.supervised.svm features, labels = load() features = features[labels < 2] labels = labels[labels < 2] features, _ = milk.unsupervised.pca(features) features = features[:, :2] learner = milk.supervised.svm.svm_raw(kernel=np.dot, C=12) model = learner.train(features, labels) w = np.dot(model.svs.T, model.Yw) b = model.b x = np.linspace(-.5, .1, 100) y = -w[0] / w[1] * x + b / w[1] plot(features[labels == 1][:, 0], features[labels == 1][:, 1], 'bx') plot(features[labels == 0][:, 0], features[labels == 0][:, 1], 'ro') plot(x, y) savefig('svm-demo-points.pdf') clf() learner = milk.supervised.svm.svm_raw( kernel=milk.supervised.svm.rbf_kernel(1.), C=12) model = learner.train(features, labels) Y, X = (np.mgrid[:101, :101] - 50) / 12.5 values = [model.apply((y, x)) for y, x in zip(Y.ravel(), X.ravel())]
def test_fisher_approx(): from milksets import wine features,labels = wine.load() f = milk.supervised.svm.sigma_value_fisher(features,labels) for sigma in (2.**-4,2.,16.,32.): assert abs(f(sigma) - _slow_f(features,labels,sigma)) < 1e-6
def test_sda(): from milksets import wine features, labels = wine.load() selected = milk.supervised.featureselection.sda(features, labels) for sel in selected: assert sel <= features.shape[1]
def test_extra_arg(): from milksets.wine import load features, labels = load() learner = milk.defaultlearner() model = learner.train(features[::2], labels[::2], extra_arg=5) assert model.apply(features[1]) < 12.
def test_sda(): from milksets import wine features, labels = wine.load() selected = milk.supervised.featureselection.sda(features,labels) for sel in selected: assert sel <= features.shape[1]
def test_extra_arg(): from milksets.wine import load features,labels = load() learner = milk.defaultlearner() model = learner.train(features[::2],labels[::2], extra_arg=5) assert model.apply(features[1]) < 12.
def test_compare_rank_corr(): from milksets.wine import load features, labels = load() r0 = rank_corr(features, labels) r1 = slow_rank_corr(features, labels) assert np.allclose(r0, r1)
def test_assign_cids(): from milksets.wine import load features,_ = load() assigns, centroids = milk.unsupervised.kmeans(features, 3, R=2, max_iters=10) assert np.all(assign_centroids(features, centroids) == assigns)
from milk.supervised import randomforest from milk.supervised.multi import one_against_one import milk.nfoldcrossvalidation import milk.unsupervised import pylab from milksets import wine # Load 'wine' dataset features, labels = wine.load() # random forest learner rf_learner = randomforest.rf_learner() # rf is a binary learner, so we transform it into a multi-class classifier learner = one_against_one(rf_learner) # cross validate with this learner and return predictions on left-out elements cmat,names, preds = milk.nfoldcrossvalidation(features, labels, classifier=learner, return_predictions=1) print('cross-validation accuracy:', cmat.trace()/float(cmat.sum())) # dimensionality reduction for display x,v = milk.unsupervised.pca(features) colors = "rgb" # predicted colour marks = "xo" # whether the prediction was correct for (y,x),p,r in zip(x[:,:2], preds, labels): c = colors[p] m = marks[p == r] pylab.plot(y,x,c+m) pylab.show()
from pylab import * import numpy as np from milksets.wine import load import milk.supervised import milk.unsupervised.pca import milk.supervised.svm features, labels = load() features = features[labels < 2] labels = labels[labels < 2] features,_ = milk.unsupervised.pca(features) features = features[:,:2] learner = milk.supervised.svm.svm_raw(kernel=np.dot, C=12) model = learner.train(features, labels) w = np.dot(model.svs.T, model.Yw) b = model.b x = np.linspace(-.5, .1, 100) y = -w[0]/w[1]*x + b/w[1] plot(features[labels == 1][:,0], features[labels == 1][:,1], 'bx') plot(features[labels == 0][:,0], features[labels == 0][:,1], 'ro') plot(x,y) savefig('svm-demo-points.pdf') clf()