def test_LinearSVC(): """ Test basic routines using LinearSVC """ clf = svm.LinearSVC().fit(X, Y) # by default should have intercept assert clf.fit_intercept assert_array_equal(clf.predict(T), true_result) assert_array_almost_equal(clf.intercept_, [0], decimal=3) # the same with l1 penalty clf = svm.LinearSVC(penalty='l1', dual=False).fit(X, Y) assert_array_equal(clf.predict(T), true_result) # l2 penalty with dual formulation clf = svm.LinearSVC(penalty='l2', dual=True).fit(X, Y) assert_array_equal(clf.predict(T), true_result) # l2 penalty, l1 loss clf = svm.LinearSVC(penalty='l2', loss='l1', dual=True).fit(X, Y) assert_array_equal(clf.predict(T), true_result) # test also decision function dec = clf.decision_function(T).ravel() res = (dec > 0).astype(np.int) + 1 assert_array_equal(res, true_result)
def test_liblinear_predict(): """ Test liblinear predict Sanity check, test that predict implemented in python returns the same as the one in libliblinear """ # multi-class case clf = svm.LinearSVC().fit(iris.data, iris.target) weights = clf.coef_.T bias = clf.intercept_ H = np.dot(iris.data, weights) + bias assert_array_equal(clf.predict(iris.data), H.argmax(axis=1)) # binary-class case X = [[2, 1], [3, 1], [1, 3], [2, 3]] y = [0, 0, 1, 1] clf = svm.LinearSVC().fit(X, y) weights = np.ravel(clf.coef_) bias = clf.intercept_ H = np.dot(X, weights) + bias assert_array_equal(clf.predict(X), (H > 0).astype(int))
def train_liblinear_classifier_core(trainXy, classifier_type="liblinear", trace_normalize=False, **kwargs): """ Classifier training using SVMs Input: train_features = training features (both positive and negative) train_labels = corresponding label vector svm_eps = eps of svm svm_C = C parameter of svm classifier_type = liblinear or libsvm""" #do normalization (train_features, train_labels), train_mean, train_std, trace = normalize( [trainXy], trace_normalize=trace_normalize) if classifier_type == 'liblinear': clf = sklearn_svm.LinearSVC(**kwargs) if classifier_type == 'libSVM': clf = sklearn_svm.SVC(**kwargs) elif classifier_type == 'LRL': clf = LogisticRegression(**kwargs) elif classifier_type == 'MCC': clf = CorrelationClassifier(**kwargs) elif classifier_type.startswith('svm.'): ct = classifier_type.split('.')[-1] clf = getattr(sklearn_svm, ct)(**kwargs) elif classifier_type.startswith('linear_model.'): ct = classifier_type.split('.')[-1] clf = getattr(sklearn_linear_model, ct)(**kwargs) clf.fit(train_features, train_labels) return clf, train_mean, train_std, trace
def test_coef_and_intercept_SVC_vs_LinearSVC(): """ Test that SVC and LinearSVC return the same coef_ and intercept_ """ svc = svm.SVC(kernel='linear', C=1).fit(X, Y) linsvc = svm.LinearSVC(C=1, penalty='l2', loss='l1', dual=True).fit(X, Y) assert_array_equal(linsvc.coef_.shape, svc.coef_.shape) assert_array_almost_equal(linsvc.coef_, svc.coef_, decimal=5) assert_array_almost_equal(linsvc.intercept_, svc.intercept_, decimal=5)
def test_LinearSVC_iris(): """ Test that LinearSVC gives plausible predictions on the iris dataset """ clf = svm.LinearSVC().fit(iris.data, iris.target) assert np.mean(clf.predict(iris.data) == iris.target) > 0.8 dec = clf.decision_function(iris.data) pred = np.argmax(dec, 1) assert_array_equal(pred, clf.predict(iris.data))
def test_LinearSVC(): """ Test basic routines using LinearSVC """ clf = svm.LinearSVC().fit(X, Y) assert_array_equal(clf.predict(T), true_result) assert_array_almost_equal(clf.intercept_, [0], decimal=5) # the same with l1 penalty clf = svm.LinearSVC(penalty='l1', dual=False).fit(X, Y) assert_array_equal(clf.predict(T), true_result) # l2 penalty with dual formulation clf = svm.LinearSVC(penalty='l2', dual=True).fit(X, Y) assert_array_equal(clf.predict(T), true_result) # clf = svm.LinearSVC(penalty='l2', loss='l1', dual=True).fit(X, Y) assert_array_equal(clf.predict(T), true_result)
def svm_method(entries, lin_or_poly): feature_vectors = [] classes = [] for e in entries: feature_vectors.append(e.v) classes.append(e.c) if lin_or_poly: curr_svm = svm.LinearSVC() else: curr_svm = svm.SVC(kernel='poly') curr_svm.fit(feature_vectors, classes) return curr_svm
def test_LinearSVC_iris(): """Test the sparse LinearSVC with the iris dataset""" sp_clf = svm.sparse.LinearSVC().fit(iris.data, iris.target) clf = svm.LinearSVC().fit(iris.data.todense(), iris.target) assert_array_almost_equal(clf.label_, sp_clf.label_) assert_equal(clf.fit_intercept, sp_clf.fit_intercept) assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=1) assert_array_almost_equal(clf.predict(iris.data.todense()), sp_clf.predict(iris.data)) # check decision_function pred = np.argmax(sp_clf.decision_function(iris.data), 1) assert_array_almost_equal(pred, clf.predict(iris.data.todense()))
def test_weight(): """ Test class weights """ clf = svm.SVC() # we give a small weights to class 1 clf.fit(X, Y, {1: 0.1}) # so all predicted values belong to class 2 assert_array_almost_equal(clf.predict(X), [2] * 6) X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5, 1], seed=0) for clf in (linear_model.LogisticRegression(), svm.LinearSVC(), svm.SVC()): clf.fit(X_[: 180], y_[: 180], class_weight={0: 5}) y_pred = clf.predict(X_[180:]) assert np.sum(y_pred == y_[180:]) >= 11
def test_LinearSVC(): """ Similar to test_SVC """ clf = svm.LinearSVC().fit(X, Y) sp_clf = svm.sparse.LinearSVC().fit(X, Y) assert sp_clf.fit_intercept assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=4) assert_array_almost_equal(clf.predict(X), sp_clf.predict(X)) clf.fit(X2, Y2) sp_clf.fit(X2, Y2) assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=4)
def test_auto_weight(): """Test class weights for imbalanced data""" from scikits.learn.linear_model import LogisticRegression # we take as dataset a the two-dimensional projection of iris so # that it is not separable and remove half of predictors from # class 1 from scikits.learn.svm.base import _get_class_weight X, y = iris.data[:, :2], iris.target unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2]) assert np.argmax(_get_class_weight('auto', y[unbalanced])[0]) == 2 for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(), LogisticRegression()): # check that score is better when class='auto' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced], class_weight={}).predict(X) y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced], class_weight='auto').predict(X) assert metrics.f1_score(y, y_pred) <= metrics.f1_score(y, y_pred_balanced)
def test_bad_input(): """ Test that it gives proper exception on deficient input """ # impossible value of C assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y) # impossible value of nu clf = svm.NuSVC(nu=0.0) assert_raises(ValueError, clf.fit, X, Y) Y2 = Y[:-1] # wrong dimensions for labels assert_raises(ValueError, clf.fit, X, Y2) # Test with arrays that are non-contiguous. for clf in (svm.SVC(), svm.LinearSVC(), svm.sparse.SVC(), svm.sparse.LinearSVC()): Xf = np.asfortranarray(X) assert Xf.flags['C_CONTIGUOUS'] == False yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T) yf = yf[:, -1] assert yf.flags['F_CONTIGUOUS'] == False assert yf.flags['C_CONTIGUOUS'] == False clf.fit(Xf, yf) assert_array_equal(clf.predict(T), true_result) # error for precomputed kernelsx clf = svm.SVC(kernel='precomputed') assert_raises(ValueError, clf.fit, X, Y) Xt = np.array(X).T clf = svm.SVC(kernel='precomputed') clf.fit(np.dot(X, Xt), Y) assert_raises(ValueError, clf.predict, X) clf = svm.SVC() clf.fit(X, Y) assert_raises(ValueError, clf.predict, Xt)
from scikits.learn import svm, datasets # import some data to play with iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset Y = iris.target h=.02 # step size in the mesh # we create an instance of SVM and fit out data. We do not scale our # data since we want to plot the support vectors svc = svm.SVC(kernel='linear').fit(X, Y) rbf_svc = svm.SVC(kernel='poly').fit(X, Y) nu_svc = svm.NuSVC(kernel='linear').fit(X,Y) lin_svc = svm.LinearSVC().fit(X, Y) # create a mesh to plot in x_min, x_max = X[:,0].min()-1, X[:,0].max()+1 y_min, y_max = X[:,1].min()-1, X[:,1].max()+1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # title for the plots titles = ['SVC with linear kernel', 'SVC with polynomial (degree 3) kernel', 'NuSVC with linear kernel', 'LinearSVC (linear kernel)'] pl.set_cmap(pl.cm.Paired)
def __init__(self, value): if isinstance(value, type(svm.SVC())) or isinstance( value, type(svm.LinearSVC())) or True: self._value = value else: assert False, "Not a value scikits.learn.SVM type."
def validate(self, value): if isinstance(value, type(svm.SVC())) or isinstance( value, type(svm.LinearSVC())): pass else: assert False, "SVMModelField: Improper type."
def _make_model(self): from scikits.learn import svm kw = dict(self._param) self._m = svm.LinearSVC(**kw)
from scikits.learn import datasets, svm import pylab as pl digits = datasets.load_digits() clf = svm.LinearSVC(fit_intercept=False) clf.fit(digits.data, digits.target) for i in range(4): pl.subplot(2, 4, 1 + i) pl.imshow(clf.coef_[i].reshape(8, 8), cmap=pl.cm.gray_r, interpolation='nearest') pl.axis('off') pl.show()