Example #1
0
def test_LinearSVC():
    """
    Test basic routines using LinearSVC
    """
    clf = svm.LinearSVC().fit(X, Y)

    # by default should have intercept
    assert clf.fit_intercept

    assert_array_equal(clf.predict(T), true_result)
    assert_array_almost_equal(clf.intercept_, [0], decimal=3)

    # the same with l1 penalty
    clf = svm.LinearSVC(penalty='l1', dual=False).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    # l2 penalty with dual formulation
    clf = svm.LinearSVC(penalty='l2', dual=True).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    # l2 penalty, l1 loss
    clf = svm.LinearSVC(penalty='l2', loss='l1', dual=True).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    # test also decision function
    dec = clf.decision_function(T).ravel()
    res = (dec > 0).astype(np.int) + 1
    assert_array_equal(res, true_result)
Example #2
0
def test_liblinear_predict():
    """
    Test liblinear predict

    Sanity check, test that predict implemented in python
    returns the same as the one in libliblinear

    """
    # multi-class case
    clf = svm.LinearSVC().fit(iris.data, iris.target)
    weights = clf.coef_.T
    bias = clf.intercept_
    H = np.dot(iris.data, weights) + bias
    assert_array_equal(clf.predict(iris.data), H.argmax(axis=1))

    # binary-class case
    X = [[2, 1],
         [3, 1],
         [1, 3],
         [2, 3]]
    y = [0, 0, 1, 1]

    clf = svm.LinearSVC().fit(X, y)
    weights = np.ravel(clf.coef_)
    bias = clf.intercept_
    H = np.dot(X, weights) + bias
    assert_array_equal(clf.predict(X), (H > 0).astype(int))
Example #3
0
def train_liblinear_classifier_core(trainXy,
                                    classifier_type="liblinear",
                                    trace_normalize=False,
                                    **kwargs):
    """ Classifier training using SVMs

    Input:
    train_features = training features (both positive and negative)
    train_labels = corresponding label vector
    svm_eps = eps of svm
    svm_C = C parameter of svm
    classifier_type = liblinear or libsvm"""

    #do normalization
    (train_features, train_labels), train_mean, train_std, trace = normalize(
        [trainXy], trace_normalize=trace_normalize)
    if classifier_type == 'liblinear':
        clf = sklearn_svm.LinearSVC(**kwargs)
    if classifier_type == 'libSVM':
        clf = sklearn_svm.SVC(**kwargs)
    elif classifier_type == 'LRL':
        clf = LogisticRegression(**kwargs)
    elif classifier_type == 'MCC':
        clf = CorrelationClassifier(**kwargs)
    elif classifier_type.startswith('svm.'):
        ct = classifier_type.split('.')[-1]
        clf = getattr(sklearn_svm, ct)(**kwargs)
    elif classifier_type.startswith('linear_model.'):
        ct = classifier_type.split('.')[-1]
        clf = getattr(sklearn_linear_model, ct)(**kwargs)

    clf.fit(train_features, train_labels)

    return clf, train_mean, train_std, trace
def test_coef_and_intercept_SVC_vs_LinearSVC():
    """
    Test that SVC and LinearSVC return the same coef_ and intercept_
    """
    svc = svm.SVC(kernel='linear', C=1).fit(X, Y)
    linsvc = svm.LinearSVC(C=1, penalty='l2', loss='l1', dual=True).fit(X, Y)

    assert_array_equal(linsvc.coef_.shape, svc.coef_.shape)
    assert_array_almost_equal(linsvc.coef_, svc.coef_, decimal=5)
    assert_array_almost_equal(linsvc.intercept_, svc.intercept_, decimal=5)
Example #5
0
def test_LinearSVC_iris():
    """
    Test that LinearSVC gives plausible predictions on the iris dataset
    """
    clf = svm.LinearSVC().fit(iris.data, iris.target)
    assert np.mean(clf.predict(iris.data) == iris.target) > 0.8

    dec = clf.decision_function(iris.data)
    pred = np.argmax(dec, 1)
    assert_array_equal(pred, clf.predict(iris.data))
def test_LinearSVC():
    """
    Test basic routines using LinearSVC
    """
    clf = svm.LinearSVC().fit(X, Y)

    assert_array_equal(clf.predict(T), true_result)
    assert_array_almost_equal(clf.intercept_, [0], decimal=5)

    # the same with l1 penalty
    clf = svm.LinearSVC(penalty='l1', dual=False).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    # l2 penalty with dual formulation
    clf = svm.LinearSVC(penalty='l2', dual=True).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)

    #
    clf = svm.LinearSVC(penalty='l2', loss='l1', dual=True).fit(X, Y)
    assert_array_equal(clf.predict(T), true_result)
Example #7
0
def svm_method(entries, lin_or_poly):
    feature_vectors = []
    classes = []
    for e in entries:
        feature_vectors.append(e.v)
        classes.append(e.c)

    if lin_or_poly:
        curr_svm = svm.LinearSVC()
    else:
        curr_svm = svm.SVC(kernel='poly')

    curr_svm.fit(feature_vectors, classes)
    return curr_svm
Example #8
0
def test_LinearSVC_iris():
    """Test the sparse LinearSVC with the iris dataset"""

    sp_clf = svm.sparse.LinearSVC().fit(iris.data, iris.target)
    clf = svm.LinearSVC().fit(iris.data.todense(), iris.target)

    assert_array_almost_equal(clf.label_, sp_clf.label_)
    assert_equal(clf.fit_intercept, sp_clf.fit_intercept)

    assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=1)
    assert_array_almost_equal(clf.predict(iris.data.todense()),
                              sp_clf.predict(iris.data))

    # check decision_function
    pred = np.argmax(sp_clf.decision_function(iris.data), 1)
    assert_array_almost_equal(pred, clf.predict(iris.data.todense()))
Example #9
0
def test_weight():
    """
    Test class weights
    """
    clf = svm.SVC()
    # we give a small weights to class 1
    clf.fit(X, Y, {1: 0.1})
    # so all predicted values belong to class 2
    assert_array_almost_equal(clf.predict(X), [2] * 6)

    X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5, 1],
                                  seed=0)
    for clf in (linear_model.LogisticRegression(), svm.LinearSVC(), svm.SVC()):
        clf.fit(X_[: 180], y_[: 180], class_weight={0: 5})
        y_pred = clf.predict(X_[180:])
        assert np.sum(y_pred == y_[180:]) >= 11
Example #10
0
def test_LinearSVC():
    """
    Similar to test_SVC
    """
    clf = svm.LinearSVC().fit(X, Y)
    sp_clf = svm.sparse.LinearSVC().fit(X, Y)

    assert sp_clf.fit_intercept

    assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=4)

    assert_array_almost_equal(clf.predict(X), sp_clf.predict(X))

    clf.fit(X2, Y2)
    sp_clf.fit(X2, Y2)

    assert_array_almost_equal(clf.raw_coef_, sp_clf.raw_coef_, decimal=4)
Example #11
0
def test_auto_weight():
    """Test class weights for imbalanced data"""
    from scikits.learn.linear_model import LogisticRegression
    # we take as dataset a the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1
    from scikits.learn.svm.base import _get_class_weight
    X, y = iris.data[:, :2], iris.target
    unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2])

    assert np.argmax(_get_class_weight('auto', y[unbalanced])[0]) == 2

    for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(), LogisticRegression()):
        # check that score is better when class='auto' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced],
                         class_weight={}).predict(X)
        y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],
                                  class_weight='auto').predict(X)
        assert metrics.f1_score(y, y_pred) <= metrics.f1_score(y, y_pred_balanced)
Example #12
0
def test_bad_input():
    """
    Test that it gives proper exception on deficient input
    """
    # impossible value of C
    assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y)

    # impossible value of nu
    clf = svm.NuSVC(nu=0.0)
    assert_raises(ValueError, clf.fit, X, Y)

    Y2 = Y[:-1]  # wrong dimensions for labels
    assert_raises(ValueError, clf.fit, X, Y2)

    # Test with arrays that are non-contiguous.
    for clf in (svm.SVC(), svm.LinearSVC(), svm.sparse.SVC(),
                svm.sparse.LinearSVC()):
        Xf = np.asfortranarray(X)
        assert Xf.flags['C_CONTIGUOUS'] == False
        yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T)
        yf = yf[:, -1]
        assert yf.flags['F_CONTIGUOUS'] == False
        assert yf.flags['C_CONTIGUOUS'] == False
        clf.fit(Xf, yf)
        assert_array_equal(clf.predict(T), true_result)

    # error for precomputed kernelsx
    clf = svm.SVC(kernel='precomputed')
    assert_raises(ValueError, clf.fit, X, Y)

    Xt = np.array(X).T
    clf = svm.SVC(kernel='precomputed')
    clf.fit(np.dot(X, Xt), Y)
    assert_raises(ValueError, clf.predict, X)

    clf = svm.SVC()
    clf.fit(X, Y)
    assert_raises(ValueError, clf.predict, Xt)
Example #13
0
from scikits.learn import svm, datasets

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features. We could
                     # avoid this ugly slicing by using a two-dim dataset
Y = iris.target

h=.02 # step size in the mesh

# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
svc     = svm.SVC(kernel='linear').fit(X, Y)
rbf_svc = svm.SVC(kernel='poly').fit(X, Y)
nu_svc  = svm.NuSVC(kernel='linear').fit(X,Y)
lin_svc = svm.LinearSVC().fit(X, Y)

# create a mesh to plot in
x_min, x_max = X[:,0].min()-1, X[:,0].max()+1
y_min, y_max = X[:,1].min()-1, X[:,1].max()+1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

# title for the plots
titles = ['SVC with linear kernel',
          'SVC with polynomial (degree 3) kernel',
          'NuSVC with linear kernel',
          'LinearSVC (linear kernel)']


pl.set_cmap(pl.cm.Paired)
Example #14
0
 def __init__(self, value):
     if isinstance(value, type(svm.SVC())) or isinstance(
             value, type(svm.LinearSVC())) or True:
         self._value = value
     else:
         assert False, "Not a value scikits.learn.SVM type."
 def validate(self, value):
     if isinstance(value, type(svm.SVC())) or isinstance(
             value, type(svm.LinearSVC())):
         pass
     else:
         assert False, "SVMModelField: Improper type."
Example #16
0
 def _make_model(self):
     from scikits.learn import svm
     kw = dict(self._param)
     self._m = svm.LinearSVC(**kw)
Example #17
0
from scikits.learn import datasets, svm
import pylab as pl

digits = datasets.load_digits()
clf = svm.LinearSVC(fit_intercept=False)
clf.fit(digits.data, digits.target)

for i in range(4):
    pl.subplot(2, 4, 1 + i)
    pl.imshow(clf.coef_[i].reshape(8, 8),
              cmap=pl.cm.gray_r,
              interpolation='nearest')
    pl.axis('off')
pl.show()