def test_classification_report_imbalanced_multiclass_with_digits(): iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) # print classification report with class names expected_report = ("pre rec spe f1 geo iba sup setosa 0.82609 0.79167 " "0.92157 0.80851 0.85415 0.72010 24 versicolor " "0.33333 0.09677 0.86364 0.15000 0.28910 0.07717 " "31 virginica 0.41860 0.90000 0.54545 0.57143 0.70065 " "0.50831 20 avg / total 0.51375 0.53333 0.79733 " "0.47310 0.57966 0.39788 75") report = classification_report_imbalanced( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names, digits=5, ) assert _format_report(report) == expected_report # print classification report with label detection expected_report = ("pre rec spe f1 geo iba sup 0 0.83 0.79 0.92 0.81 " "0.85 0.72 24 1 0.33 0.10 0.86 0.15 0.29 0.08 31 " "2 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total 0.51 " "0.53 0.80 0.47 0.58 0.40 75") report = classification_report_imbalanced(y_true, y_pred) assert _format_report(report) == expected_report
def test_classification_report_imbalanced_multiclass(): iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) # print classification report with class names expected_report = ("pre rec spe f1 geo iba sup setosa 0.83 0.79 0.92 " "0.81 0.85 0.72 24 versicolor 0.33 0.10 0.86 0.15 " "0.29 0.08 31 virginica 0.42 0.90 0.55 0.57 0.70 " "0.51 20 avg / total 0.51 0.53 0.80 0.47 0.58 0.40 75") report = classification_report_imbalanced( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names, ) assert _format_report(report) == expected_report # print classification report with label detection expected_report = ("pre rec spe f1 geo iba sup 0 0.83 0.79 0.92 0.81 " "0.85 0.72 24 1 0.33 0.10 0.86 0.15 0.29 0.08 31 " "2 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total " "0.51 0.53 0.80 0.47 0.58 0.40 75") report = classification_report_imbalanced(y_true, y_pred) assert _format_report(report) == expected_report
def test_classification_report_imbalanced_dict(): iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) report = classification_report_imbalanced( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names, output_dict=True, ) outer_keys = set(report.keys()) inner_keys = set(report[0].keys()) expected_outer_keys = { 0, 1, 2, "avg_pre", "avg_rec", "avg_spe", "avg_f1", "avg_geo", "avg_iba", "total_support", } expected_inner_keys = {'spe', 'f1', 'sup', 'rec', 'geo', 'iba', 'pre'} assert outer_keys == expected_outer_keys assert inner_keys == expected_inner_keys
def test_classification_report_imbalanced_multiclass_with_string_label(): y_true, y_pred, _ = make_prediction(binary=False) y_true = np.array(["blue", "green", "red"])[y_true] y_pred = np.array(["blue", "green", "red"])[y_pred] expected_report = ("pre rec spe f1 geo iba sup blue 0.83 0.79 0.92 0.81 " "0.85 0.72 24 green 0.33 0.10 0.86 0.15 0.29 0.08 31 " "red 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total " "0.51 0.53 0.80 0.47 0.58 0.40 75") report = classification_report_imbalanced(y_true, y_pred) assert _format_report(report) == expected_report expected_report = ("pre rec spe f1 geo iba sup a 0.83 0.79 0.92 0.81 0.85 " "0.72 24 b 0.33 0.10 0.86 0.15 0.29 0.08 31 c 0.42 " "0.90 0.55 0.57 0.70 0.51 20 avg / total 0.51 0.53 " "0.80 0.47 0.58 0.40 75") report = classification_report_imbalanced(y_true, y_pred, target_names=["a", "b", "c"]) assert _format_report(report) == expected_report
def test_classification_report_imbalanced_multiclass_with_unicode_label(): y_true, y_pred, _ = make_prediction(binary=False) labels = np.array(["blue\xa2", "green\xa2", "red\xa2"]) y_true = labels[y_true] y_pred = labels[y_pred] expected_report = ("pre rec spe f1 geo iba sup blue¢ 0.83 0.79 0.92 0.81 " "0.85 0.72 24 green¢ 0.33 0.10 0.86 0.15 0.29 0.08 31 " "red¢ 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total " "0.51 0.53 0.80 0.47 0.58 0.40 75") report = classification_report_imbalanced(y_true, y_pred) assert _format_report(report) == expected_report
# Generate a dataset X, y = datasets.make_classification( n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=10, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=4, n_samples=5000, random_state=RANDOM_STATE, ) pipeline = pl.make_pipeline(os.SMOTE(random_state=RANDOM_STATE), LinearSVC(random_state=RANDOM_STATE)) # Split the data X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=RANDOM_STATE) # Train the classifier with balancing pipeline.fit(X_train, y_train) # Test the classifier and get the prediction y_pred_bal = pipeline.predict(X_test) # Show the classification report print(classification_report_imbalanced(y_test, y_pred_bal))