Example #1
0
def test_classification_report_imbalanced_multiclass_with_digits():
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = ("pre rec spe f1 geo iba sup setosa 0.82609 0.79167 "
                       "0.92157 0.80851 0.85415 0.72010 24 versicolor "
                       "0.33333 0.09677 0.86364 0.15000 0.28910 0.07717 "
                       "31 virginica 0.41860 0.90000 0.54545 0.57143 0.70065 "
                       "0.50831 20 avg / total 0.51375 0.53333 0.79733 "
                       "0.47310 0.57966 0.39788 75")
    report = classification_report_imbalanced(
        y_true,
        y_pred,
        labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names,
        digits=5,
    )
    assert _format_report(report) == expected_report
    # print classification report with label detection
    expected_report = ("pre rec spe f1 geo iba sup 0 0.83 0.79 0.92 0.81 "
                       "0.85 0.72 24 1 0.33 0.10 0.86 0.15 0.29 0.08 31 "
                       "2 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total 0.51 "
                       "0.53 0.80 0.47 0.58 0.40 75")
    report = classification_report_imbalanced(y_true, y_pred)
    assert _format_report(report) == expected_report
Example #2
0
def test_classification_report_imbalanced_multiclass():
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = ("pre rec spe f1 geo iba sup setosa 0.83 0.79 0.92 "
                       "0.81 0.85 0.72 24 versicolor 0.33 0.10 0.86 0.15 "
                       "0.29 0.08 31 virginica 0.42 0.90 0.55 0.57 0.70 "
                       "0.51 20 avg / total 0.51 0.53 0.80 0.47 0.58 0.40 75")

    report = classification_report_imbalanced(
        y_true,
        y_pred,
        labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names,
    )
    assert _format_report(report) == expected_report
    # print classification report with label detection
    expected_report = ("pre rec spe f1 geo iba sup 0 0.83 0.79 0.92 0.81 "
                       "0.85 0.72 24 1 0.33 0.10 0.86 0.15 0.29 0.08 31 "
                       "2 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total "
                       "0.51 0.53 0.80 0.47 0.58 0.40 75")

    report = classification_report_imbalanced(y_true, y_pred)
    assert _format_report(report) == expected_report
Example #3
0
def test_classification_report_imbalanced_dict():
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    report = classification_report_imbalanced(
        y_true,
        y_pred,
        labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names,
        output_dict=True,
    )
    outer_keys = set(report.keys())
    inner_keys = set(report[0].keys())

    expected_outer_keys = {
        0,
        1,
        2,
        "avg_pre",
        "avg_rec",
        "avg_spe",
        "avg_f1",
        "avg_geo",
        "avg_iba",
        "total_support",
    }
    expected_inner_keys = {'spe', 'f1', 'sup', 'rec', 'geo', 'iba', 'pre'}

    assert outer_keys == expected_outer_keys
    assert inner_keys == expected_inner_keys
Example #4
0
def test_classification_report_imbalanced_multiclass_with_string_label():
    y_true, y_pred, _ = make_prediction(binary=False)

    y_true = np.array(["blue", "green", "red"])[y_true]
    y_pred = np.array(["blue", "green", "red"])[y_pred]

    expected_report = ("pre rec spe f1 geo iba sup blue 0.83 0.79 0.92 0.81 "
                       "0.85 0.72 24 green 0.33 0.10 0.86 0.15 0.29 0.08 31 "
                       "red 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total "
                       "0.51 0.53 0.80 0.47 0.58 0.40 75")
    report = classification_report_imbalanced(y_true, y_pred)
    assert _format_report(report) == expected_report

    expected_report = ("pre rec spe f1 geo iba sup a 0.83 0.79 0.92 0.81 0.85 "
                       "0.72 24 b 0.33 0.10 0.86 0.15 0.29 0.08 31 c 0.42 "
                       "0.90 0.55 0.57 0.70 0.51 20 avg / total 0.51 0.53 "
                       "0.80 0.47 0.58 0.40 75")
    report = classification_report_imbalanced(y_true,
                                              y_pred,
                                              target_names=["a", "b", "c"])
    assert _format_report(report) == expected_report
Example #5
0
def test_classification_report_imbalanced_multiclass_with_unicode_label():
    y_true, y_pred, _ = make_prediction(binary=False)

    labels = np.array(["blue\xa2", "green\xa2", "red\xa2"])
    y_true = labels[y_true]
    y_pred = labels[y_pred]

    expected_report = ("pre rec spe f1 geo iba sup blue¢ 0.83 0.79 0.92 0.81 "
                       "0.85 0.72 24 green¢ 0.33 0.10 0.86 0.15 0.29 0.08 31 "
                       "red¢ 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total "
                       "0.51 0.53 0.80 0.47 0.58 0.40 75")
    report = classification_report_imbalanced(y_true, y_pred)
    assert _format_report(report) == expected_report
Example #6
0
# Generate a dataset
X, y = datasets.make_classification(
    n_classes=2,
    class_sep=2,
    weights=[0.1, 0.9],
    n_informative=10,
    n_redundant=1,
    flip_y=0,
    n_features=20,
    n_clusters_per_class=4,
    n_samples=5000,
    random_state=RANDOM_STATE,
)

pipeline = pl.make_pipeline(os.SMOTE(random_state=RANDOM_STATE),
                            LinearSVC(random_state=RANDOM_STATE))

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=RANDOM_STATE)

# Train the classifier with balancing
pipeline.fit(X_train, y_train)

# Test the classifier and get the prediction
y_pred_bal = pipeline.predict(X_test)

# Show the classification report
print(classification_report_imbalanced(y_test, y_pred_bal))