def test_roc_precalculated_t4(): """test against precalculated data (with ties) The plot should look like Fig. 2 in Mason & Graham (2002). A naive implementation w/o regard for ties gives an incorrect result in this situation. """ t4 = [(1, 100.0), (1, 100.0), (1, 100.0), (1, 100.0), (1, 80.0), (0, 80.0), (0, 80.0), (1, 60.0), (0, 40.0), (0, 20.0), (1, 0.0), (0, 0.0), (0, 0.0), (0, 0.0), (0, 0.0)] rc = RocCurve.from_labels(*zip(*t4)) auc = rc.auc_score() assert_almost_equal(auc, 0.839, 3)
def test_roc_simulated(): # Test Area under Receiver Operating Characteristic (ROC) curve for _ in range(10): y_true, probas_pred = simulate_predictions(1000, seed=random_seed()) rc = RocCurve.from_labels(y_true, probas_pred) auc_expected1 = _auc(rc.fprs, rc.tprs) auc_expected2 = auc_sklearn(y_true, probas_pred) auc_actual = roc_auc_score(y_true, probas_pred) assert_almost_equal(auc_expected1, auc_actual, 3) assert_almost_equal(auc_expected2, auc_actual, 3)
def test_sample_perverse(): """Perverese cases are 0.0 < AUL < 0.5 """ clusters = [[1], [0, 0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.1111, 4) assert_almost_equal(aul2, 0.1111, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.0, 4)
def test_sample_bad(): """Bad clustering should score poorly """ clusters = [[1, 1, 0, 0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.5, 4) assert_almost_equal(aul2, 0.5, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.6667, 4)
def test_sample_neg_class1(): """Similar to ``test_sample_perfect`` but have a negative of class 1 """ clusters = [[1, 1, 1, 1, 1], [1], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.8690, 4) assert_almost_equal(aul2, 0.8690, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.9167, 4)
def test_sample_cluster0_c0(): """Similar to ``test_sample_perfect`` but have a cluster of class 0 """ clusters = [[1, 1, 1, 1], [0, 0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.6667, 4) assert_almost_equal(aul2, 0.6667, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 1.0, 4)
def test_sample_cluster0_nh(): """Same as in ``test_sample_perfect`` but cluster 0 not homogeneous """ clusters = [[1, 1, 1, 1, 0], [0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.8, 4) assert_almost_equal(aul2, 0.8, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.8333, 4)
def test_sample_perfect(): """Perfect clustering """ clusters = [[1, 1, 1, 1, 1], [0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 1.0, 4) assert_almost_equal(aul2, 1.0, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 1.0, 4)
def add_ranking_metrics(args, clusters, pairs): """Add metrics based on ROC and Lift curves """ args_metrics = utils.METRICS if set(utils.ROC_METRICS) & set(args_metrics): from lsh_hdc.ranking import RocCurve rc = RocCurve.from_clusters(clusters, is_class_pos=class_is_positive) if 'roc_auc' in args_metrics: pairs.append(('roc_auc', rc.auc_score())) if 'roc_max_info' in args_metrics: pairs.append(('roc_max_info', rc.max_informedness())) if set(utils.LIFT_METRICS) & set(args_metrics): from lsh_hdc.ranking import aul_score_from_clusters as aul_score clusters_2xc = ([class_is_positive(point) for point in cluster] for cluster in clusters) if 'aul_score' in args_metrics: pairs.append(('aul_score', aul_score(clusters_2xc)))
def test_roc_precalculated_t2(): """test against precalculated data The plot should look like Fig. 1 in Mason & Graham (2002) """ t2 = [(1, 98.4), (1, 95.2), (1, 94.4), (0, 92.8), (1, 83.2), (1, 81.6), (1, 58.4), (0, 57.6), (0, 28.0), (0, 13.6), (1, 3.2), (0, 2.4), (0, 1.6), (0, 0.8), (0, 0.0)] rc = RocCurve.from_labels(*zip(*t2)) auc = rc.auc_score() assert_almost_equal(auc, 0.875, 3)