def test_svr(): # Test Support Vector Regression diabetes = datasets.load_diabetes() for clf in (svm.NuSVR(kernel='linear', nu=.4, C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.), svm.SVR(kernel='linear', C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)): clf.fit(diabetes.data, diabetes.target) assert clf.score(diabetes.data, diabetes.target) > 0.02 # non-regression test; previously, BaseLibSVM would check that # len(np.unique(y)) < 2, which must only be done for SVC svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data))) svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
def test_sparse_fit_support_vectors_empty(): # Regression test for #14893 X_train = sparse.csr_matrix([[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]]) y_train = np.array([0.04, 0.04, 0.10, 0.16]) model = svm.SVR(kernel='linear') model.fit(X_train, y_train) assert not model.support_vectors_.data.size assert not model.dual_coef_.data.size
def test_svr_errors(): X = [[0.0], [1.0]] y = [0.0, 0.5] # Bad kernel clf = svm.SVR(kernel=lambda x, y: np.array([[1.0]])) clf.fit(X, y) with pytest.raises(ValueError): clf.predict(X)
def test_svr_predict(): # Test SVR's decision_function # Sanity check, test that predict implemented in python # returns the same as the one in libsvm X = iris.data y = iris.target # linear kernel reg = svm.SVR(kernel='linear', C=0.1).fit(X, y) dec = np.dot(X, reg.coef_.T) + reg.intercept_ assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel()) # rbf kernel reg = svm.SVR(kernel='rbf', gamma=1).fit(X, y) rbfs = rbf_kernel(X, reg.support_vectors_, gamma=reg.gamma) dec = np.dot(rbfs, reg.dual_coef_.T) + reg.intercept_ assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())
def test_linearsvr(): # check that SVR(kernel='linear') and LinearSVC() give # comparable results diabetes = datasets.load_diabetes() lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) score1 = lsvr.score(diabetes.data, diabetes.target) svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target) score2 = svr.score(diabetes.data, diabetes.target) assert_allclose(np.linalg.norm(lsvr.coef_), np.linalg.norm(svr.coef_), 1, 0.0001) assert_almost_equal(score1, score2, 2)
def test_immutable_coef_property(): # Check that primal coef modification are not silently ignored svms = [ svm.SVC(kernel='linear').fit(iris.data, iris.target), svm.NuSVC(kernel='linear').fit(iris.data, iris.target), svm.SVR(kernel='linear').fit(iris.data, iris.target), svm.NuSVR(kernel='linear').fit(iris.data, iris.target), svm.OneClassSVM(kernel='linear').fit(iris.data), ] for clf in svms: with pytest.raises(AttributeError): clf.__setattr__('coef_', np.arange(3)) with pytest.raises((RuntimeError, ValueError)): clf.coef_.__setitem__((0, 0), 0)
def test_svr_coef_sign(): # Test that SVR(kernel="linear") has coef_ with the right sign. # Non-regression test for #2933. X = np.random.RandomState(21).randn(10, 3) y = np.random.RandomState(12).randn(10) for svr in [ svm.SVR(kernel='linear'), svm.NuSVR(kernel='linear'), svm.LinearSVR() ]: svr.fit(X, y) assert_array_almost_equal( svr.predict(X), np.dot(X, svr.coef_.ravel()) + svr.intercept_)
def test_n_support_oneclass_svr(): # Make n_support is correct for oneclass and SVR (used to be # non-initialized) # this is a non regression test for issue #14774 X = np.array([[0], [0.44], [0.45], [0.46], [1]]) clf = svm.OneClassSVM() assert not hasattr(clf, 'n_support_') clf.fit(X) assert clf.n_support_ == clf.support_vectors_.shape[0] assert clf.n_support_.size == 1 assert clf.n_support_ == 3 y = np.arange(X.shape[0]) reg = svm.SVR().fit(X, y) assert reg.n_support_ == reg.support_vectors_.shape[0] assert reg.n_support_.size == 1 assert reg.n_support_ == 4
def test_ovr_single_label_predict_proba(): base_clf = MultinomialNB(alpha=1) X, Y = iris.data, iris.target X_train, Y_train = X[:80], Y[:80] X_test = X[80:] clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train) # Decision function only estimator. decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train) assert not hasattr(decision_only, 'predict_proba') Y_pred = clf.predict(X_test) Y_proba = clf.predict_proba(X_test) assert_almost_equal(Y_proba.sum(axis=1), 1.0) # predict assigns a label if the probability that the # sample has the label is greater than 0.5. pred = np.array([l.argmax() for l in Y_proba]) assert not (pred - Y_pred).any()
def test_ovr_multilabel_predict_proba(): base_clf = MultinomialNB(alpha=1) for au in (False, True): X, Y = datasets.make_multilabel_classification(n_samples=100, n_features=20, n_classes=5, n_labels=3, length=50, allow_unlabeled=au, random_state=0) X_train, Y_train = X[:80], Y[:80] X_test = X[80:] clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train) # Decision function only estimator. decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train) assert not hasattr(decision_only, 'predict_proba') # Estimator with predict_proba disabled, depending on parameters. decision_only = OneVsRestClassifier(svm.SVC(probability=False)) assert not hasattr(decision_only, 'predict_proba') decision_only.fit(X_train, Y_train) assert not hasattr(decision_only, 'predict_proba') assert hasattr(decision_only, 'decision_function') # Estimator which can get predict_proba enabled after fitting gs = GridSearchCV(svm.SVC(probability=False), param_grid={'probability': [True]}) proba_after_fit = OneVsRestClassifier(gs) assert not hasattr(proba_after_fit, 'predict_proba') proba_after_fit.fit(X_train, Y_train) assert hasattr(proba_after_fit, 'predict_proba') Y_pred = clf.predict(X_test) Y_proba = clf.predict_proba(X_test) # predict assigns a label if the probability that the # sample has the label is greater than 0.5. pred = Y_proba > .5 assert_array_equal(pred, Y_pred)
y_pred = estimator.decision_function([[-1., 1.]]) assert y_pred == pytest.approx(0) # give more weights to opposed samples sample_weight = [10., .1, .1, .1, .1, 10] estimator.fit(X, Y, sample_weight=sample_weight) y_pred = estimator.decision_function([[-1., 1.]]) assert y_pred < 0 sample_weight = [1., .1, 10., 10., .1, .1] estimator.fit(X, Y, sample_weight=sample_weight) y_pred = estimator.decision_function([[-1., 1.]]) assert y_pred > 0 @pytest.mark.parametrize("estimator", [svm.SVR(C=1e-2), svm.NuSVR(C=1e-2)]) def test_svm_regressor_sided_sample_weight(estimator): # similar test to test_svm_classifier_sided_sample_weight but for # SVM regressors X = [[-2, 0], [-1, -1], [0, -2], [0, 2], [1, 1], [2, 0]] estimator.set_params(kernel='linear') # check that with unit weights, a sample is supposed to be predicted on # the boundary sample_weight = [1] * 6 estimator.fit(X, Y, sample_weight=sample_weight) y_pred = estimator.predict([[-1., 1.]]) assert y_pred == pytest.approx(1.5) # give more weights to opposed samples sample_weight = [10., .1, .1, .1, .1, 10]