Esempio n. 1
0
def binary_classifier_quality(model, X_test, Y_test):
    """
    Meant for binary classification.
    If `model` is Grid it uses best model.
    """
    if isinstance(model, GridSearchCV):
        result = pd.DataFrame(
            {k: model.cv_results_[k] for k in \
                 ['params', 'mean_test_score', 'std_test_score', 'rank_test_score']}
            )
        print(result)
        print()
        print(f"best params: {model.best_params_}")
        print("best score: {:f}".format(model.best_score_))
        print()

    Y_hat = model.predict(X_test)
    print("Confusion matrix (true x pred):")
    print(confusion_matrix(Y_test, Y_hat))
    print("Sensitivity: {:f}".format( sum(Y_hat[Y_test==1]) / sum(Y_test) ))
    print("Specificity: {:f}".format( sum(1 - Y_hat[Y_test==0]) / sum(Y_test==0)))
    print("Accuracy score on test data: {:f}".format( accuracy_score(Y_test, Y_hat) ))
    print("F1 score on test data: {:f}".format( f1_score(Y_test, Y_hat) ))

    #print(confusion_matrix(grid.predict(X_test), y_test))
    ConfusionMatrixDisplay.from_predictions(Y_test, Y_hat)
    RocCurveDisplay.from_estimator(model, X_test, Y_test)
Esempio n. 2
0
def test_confusion_matrix_with_unknown_labels(pyplot, constructor_name):
    """Check that when labels=None, the unique values in `y_pred` and `y_true`
    will be used.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/pull/18405
    """
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)
    # create unseen labels in `y_true` not seen during fitting and not present
    # in 'classifier.classes_'
    y = y + 1

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    common_kwargs = {"labels": None}
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(
            classifier, X, y, **common_kwargs
        )
    else:
        disp = ConfusionMatrixDisplay.from_predictions(
            y, y_pred, **common_kwargs
        )

    display_labels = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
    expected_labels = [str(i) for i in range(n_classes + 1)]
    assert_array_equal(expected_labels, display_labels)
def test_confusion_matrix_display_invalid_option(pyplot, constructor_name):
    """Check the error raise if an invalid parameter value is passed."""
    X, y = make_classification(n_samples=100,
                               n_informative=5,
                               n_classes=5,
                               random_state=0)
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")
    extra_params = {"normalize": "invalid"}

    err_msg = r"normalize must be one of \{'true', 'pred', 'all', None\}"
    with pytest.raises(ValueError, match=err_msg):
        if constructor_name == "from_estimator":
            ConfusionMatrixDisplay.from_estimator(classifier, X, y,
                                                  **extra_params)
        else:
            ConfusionMatrixDisplay.from_predictions(y, y_pred, **extra_params)
def test_confusion_matrix_display_validation(pyplot):
    """Check that we raise the proper error when validating parameters."""
    X, y = make_classification(n_samples=100,
                               n_informative=5,
                               n_classes=5,
                               random_state=0)

    regressor = SVR().fit(X, y)
    y_pred_regressor = regressor.predict(X)
    y_pred_classifier = SVC().fit(X, y).predict(X)

    err_msg = "ConfusionMatrixDisplay.from_estimator only supports classifiers"
    with pytest.raises(ValueError, match=err_msg):
        ConfusionMatrixDisplay.from_estimator(regressor, X, y)

    err_msg = "Mix type of y not allowed, got types"
    with pytest.raises(ValueError, match=err_msg):
        # Force `y_true` to be seen as a regression problem
        ConfusionMatrixDisplay.from_predictions(y + 0.5, y_pred_classifier)
    with pytest.raises(ValueError, match=err_msg):
        ConfusionMatrixDisplay.from_predictions(y, y_pred_regressor)

    err_msg = "Found input variables with inconsistent numbers of samples"
    with pytest.raises(ValueError, match=err_msg):
        ConfusionMatrixDisplay.from_predictions(y, y_pred_classifier[::2])
Esempio n. 5
0
def test_confusion_matrix_display(pyplot, constructor_name):
    """Check the behaviour of the default constructor without using the class
    methods."""
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    cm = confusion_matrix(y, y_pred)
    common_kwargs = {
        "normalize": None,
        "include_values": True,
        "cmap": "viridis",
        "xticks_rotation": 45.0,
    }
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(
            classifier, X, y, **common_kwargs
        )
    else:
        disp = ConfusionMatrixDisplay.from_predictions(
            y, y_pred, **common_kwargs
        )

    assert_allclose(disp.confusion_matrix, cm)
    assert disp.text_.shape == (n_classes, n_classes)

    rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()]
    assert_allclose(rotations, 45.0)

    image_data = disp.im_.get_array().data
    assert_allclose(image_data, cm)

    disp.plot(cmap="plasma")
    assert disp.im_.get_cmap().name == "plasma"

    disp.plot(include_values=False)
    assert disp.text_ is None

    disp.plot(xticks_rotation=90.0)
    rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()]
    assert_allclose(rotations, 90.0)

    disp.plot(values_format="e")
    expected_text = np.array([format(v, "e") for v in cm.ravel(order="C")])
    text_text = np.array([t.get_text() for t in disp.text_.ravel(order="C")])
    assert_array_equal(expected_text, text_text)
Esempio n. 6
0
def runClassifier(clf, X, y):
    kf = KFold(10)

    predictions = numpy.array([])
    targets = numpy.array([])

    for train, test in kf.split(X):
        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test]

        thisFoldClf = clf.fit(X_train, y_train)
        predictions = numpy.append(predictions, thisFoldClf.predict(X_test))
        targets = numpy.append(targets, y_test)

    print(classification_report(targets, predictions))

    ConfusionMatrixDisplay.from_predictions(targets,
                                            predictions,
                                            normalize='true')
    plt.show()
Esempio n. 7
0
def test_confusion_matrix_display_custom_labels(
    pyplot, constructor_name, with_labels, with_display_labels
):
    """Check the resulting plot when labels are given."""
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    ax = pyplot.gca()
    labels = [2, 1, 0, 3, 4] if with_labels else None
    display_labels = ["b", "d", "a", "e", "f"] if with_display_labels else None

    cm = confusion_matrix(y, y_pred, labels=labels)
    common_kwargs = {
        "ax": ax,
        "display_labels": display_labels,
        "labels": labels,
    }
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(
            classifier, X, y, **common_kwargs
        )
    else:
        disp = ConfusionMatrixDisplay.from_predictions(
            y, y_pred, **common_kwargs
        )
    assert_allclose(disp.confusion_matrix, cm)

    if with_display_labels:
        expected_display_labels = display_labels
    elif with_labels:
        expected_display_labels = labels
    else:
        expected_display_labels = list(range(n_classes))

    expected_display_labels_str = [str(name)
                                   for name in expected_display_labels]

    x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
    y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()]

    assert_array_equal(disp.display_labels, expected_display_labels)
    assert_array_equal(x_ticks, expected_display_labels_str)
    assert_array_equal(y_ticks, expected_display_labels_str)
def plot_confusion_matrix(labels: np.ndarray,
                          predictions: np.ndarray,
                          class_label_names: Optional[Dict[Union[str, int],
                                                           Union[str,
                                                                 int]]] = None,
                          normalize: Optional[str] = None,
                          title_fontsize: Optional[int] = 12,
                          x_label_fontsize: Optional[int] = 12,
                          y_label_fontsize: Optional[int] = 12,
                          heatmap_color: Optional[str] = 'Greens') -> None:
    """Plot confusion matrix for a multiclass classification model.

  Args:
    labels: An array of true labels containing multiclass labels.
    predictions: An array of predictions containing multiclass labels.
    class_label_names: Dictionary of multiclass labels and corresponding target
      names. The type of both class lable and target names can be either 'int'
      or 'str'. E.g. {0: 'low_value', 1: 'mid_value', 2: 'high_value'}.
    normalize: A parameter controlling whether to normalize the counts in the
      matrix.
    title_fontsize: Font size of the figure title.
    x_label_fontsize: Font size of the x axis labels.
    y_label_fontsize: Font size of the y axis labels.
    heatmap_color: Color of the heatmap plot.

  Returns:
    Heatmap of confusion matrix.
  """
    utils.assert_label_and_prediction_length_match(labels, predictions)

    if class_label_names is None:
        class_labels = list(set(labels))
        target_names = ['%s' % l for l in class_labels]
    else:
        class_labels = list(class_label_names.keys())
        target_names = list(class_label_names.values())

    plot = ConfusionMatrixDisplay.from_predictions(y_true=labels,
                                                   y_pred=predictions,
                                                   labels=np.unique(labels),
                                                   display_labels=target_names,
                                                   normalize=normalize,
                                                   include_values=True,
                                                   cmap=heatmap_color)
    plot.ax_.set_title('Confusion matrix', fontsize=title_fontsize)
    plot.ax_.set_xlabel('Predicted label', fontsize=x_label_fontsize)
    plot.ax_.set_ylabel('Actual label', fontsize=y_label_fontsize)
    plt.show()
Esempio n. 9
0
def test_confusion_matrix_display_plotting(
    pyplot, constructor_name, normalize, include_values,
):
    """Check the overall plotting rendering."""
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    ax = pyplot.gca()
    cmap = "plasma"

    cm = confusion_matrix(y, y_pred)
    common_kwargs = {
        "normalize": normalize,
        "cmap": cmap,
        "ax": ax,
        "include_values": include_values,
    }
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(
            classifier, X, y, **common_kwargs
        )
    else:
        disp = ConfusionMatrixDisplay.from_predictions(
            y, y_pred, **common_kwargs
        )

    assert disp.ax_ == ax

    if normalize == "true":
        cm = cm / cm.sum(axis=1, keepdims=True)
    elif normalize == "pred":
        cm = cm / cm.sum(axis=0, keepdims=True)
    elif normalize == "all":
        cm = cm / cm.sum()

    assert_allclose(disp.confusion_matrix, cm)
    import matplotlib as mpl

    assert isinstance(disp.im_, mpl.image.AxesImage)
    assert disp.im_.get_cmap().name == cmap
    assert isinstance(disp.ax_, pyplot.Axes)
    assert isinstance(disp.figure_, pyplot.Figure)

    assert disp.ax_.get_ylabel() == "True label"
    assert disp.ax_.get_xlabel() == "Predicted label"

    x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
    y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()]

    expected_display_labels = list(range(n_classes))

    expected_display_labels_str = [
        str(name) for name in expected_display_labels
    ]

    assert_array_equal(disp.display_labels, expected_display_labels)
    assert_array_equal(x_ticks, expected_display_labels_str)
    assert_array_equal(y_ticks, expected_display_labels_str)

    image_data = disp.im_.get_array().data
    assert_allclose(image_data, cm)

    if include_values:
        assert disp.text_.shape == (n_classes, n_classes)
        fmt = ".2g"
        expected_text = np.array([format(v, fmt) for v in cm.ravel(order="C")])
        text_text = np.array(
            [t.get_text() for t in disp.text_.ravel(order="C")]
        )
        assert_array_equal(expected_text, text_text)
    else:
        assert disp.text_ is None
Esempio n. 10
0
predicted_labels = lp_model.transduction_[unlabeled_set]
true_labels = y[unlabeled_set]

print("Label Spreading model: %d labeled & %d unlabeled points (%d total)" %
      (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))

# %%
# Classification report
print(classification_report(true_labels, predicted_labels))

# %%
# Confusion matrix
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(true_labels,
                                        predicted_labels,
                                        labels=lp_model.classes_)

# %%
# Plot the most uncertain predictions
# -----------------------------------
#
# Here, we will pick and show the 10 most uncertain predictions.
from scipy import stats

pred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T)

# %%
# Pick the top 10 most uncertain labels
uncertainty_index = np.argsort(pred_entropies)[-10:]
Esempio n. 11
0
from sklearn.linear_model import RidgeClassifier

clf = RidgeClassifier(tol=1e-2, solver="sparse_cg")
clf.fit(X_train, y_train)
pred = clf.predict(X_test)

# %%
# We plot the confusion matrix of this classifier to find if there is a pattern
# in the classification errors.

import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

fig, ax = plt.subplots(figsize=(10, 5))
ConfusionMatrixDisplay.from_predictions(y_test, pred, ax=ax)
ax.xaxis.set_ticklabels(target_names)
ax.yaxis.set_ticklabels(target_names)
_ = ax.set_title(
    f"Confusion Matrix for {clf.__class__.__name__}\non the original documents"
)

# %%
# The confusion matrix highlights that documents of the `alt.atheism` class are
# often confused with documents with the class `talk.religion.misc` class and
# vice-versa which is expected since the topics are semantically related.
#
# We also observe that some documents of the `sci.space` class can be misclassified as
# `comp.graphics` while the converse is much rarer. A manual inspection of those
# badly classified documents would be required to get some insights on this
# asymmetry. It could be the case that the vocabulary of the space topic could