def test_calibration_multiclass(): """Test calibration for multiclass """ # test multi-class setting with classifier that implements # only decision function clf = LinearSVC() X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42, centers=3, cluster_std=3.0) # Use categorical labels to check that CalibratedClassifierCV supports # them correctly target_names = np.array(['a', 'b', 'c']) y = target_names[y_idx] X_train, y_train = X[::2], y[::2] X_test, y_test = X[1::2], y[1::2] clf.fit(X_train, y_train) for method in ['isotonic', 'sigmoid']: cal_clf = CalibratedClassifierCV(clf, method=method, cv=2) cal_clf.fit(X_train, y_train) probas = cal_clf.predict_proba(X_test) assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test))) # Check that log-loss of calibrated classifier is smaller than # log-loss of naively turned OvR decision function to probabilities # via softmax def softmax(y_pred): e = np.exp(-y_pred) return e / e.sum(axis=1).reshape(-1, 1) uncalibrated_log_loss = \ log_loss(y_test, softmax(clf.decision_function(X_test))) calibrated_log_loss = log_loss(y_test, probas) assert uncalibrated_log_loss >= calibrated_log_loss # Test that calibration of a multiclass classifier decreases log-loss # for RandomForestClassifier X, y = make_blobs(n_samples=100, n_features=2, random_state=42, cluster_std=3.0) X_train, y_train = X[::2], y[::2] X_test, y_test = X[1::2], y[1::2] clf = RandomForestClassifier(n_estimators=10, random_state=42) clf.fit(X_train, y_train) clf_probs = clf.predict_proba(X_test) loss = log_loss(y_test, clf_probs) for method in ['isotonic', 'sigmoid']: cal_clf = CalibratedClassifierCV(clf, method=method, cv=3) cal_clf.fit(X_train, y_train) cal_clf_probs = cal_clf.predict_proba(X_test) cal_loss = log_loss(y_test, cal_clf_probs) assert loss > cal_loss
""" import numpy as np import matplotlib.pyplot as plt from mrex.datasets import make_blobs from mrex.svm import LinearSVC X, y = make_blobs(n_samples=40, centers=2, random_state=0) plt.figure(figsize=(10, 5)) for i, C in enumerate([1, 100]): # "hinge" is the standard SVM loss clf = LinearSVC(C=C, loss="hinge", random_state=42).fit(X, y) # obtain the support vectors through the decision function decision_function = clf.decision_function(X) # we can also calculate the decision function manually # decision_function = np.dot(X, clf.coef_[0]) + clf.intercept_[0] support_vector_indices = np.where((2 * y - 1) * decision_function <= 1)[0] support_vectors = X[support_vector_indices] plt.subplot(1, 2, i + 1) plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired) ax = plt.gca() xlim = ax.get_xlim() ylim = ax.get_ylim() xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 50), np.linspace(ylim[0], ylim[1], 50)) Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contour(xx,