def test_sample_bad(): """Bad clustering should score poorly """ clusters = [[1, 1, 0, 0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.5, 4) assert_almost_equal(aul2, 0.5, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.6667, 4)
def test_sample_perverse(): """Perverese cases are 0.0 < AUL < 0.5 """ clusters = [[1], [0, 0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.1111, 4) assert_almost_equal(aul2, 0.1111, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.0, 4)
def test_sample_neg_class1(): """Similar to ``test_sample_perfect`` but have a negative of class 1 """ clusters = [[1, 1, 1, 1, 1], [1], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.8690, 4) assert_almost_equal(aul2, 0.8690, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.9167, 4)
def test_sample_cluster0_c0(): """Similar to ``test_sample_perfect`` but have a cluster of class 0 """ clusters = [[1, 1, 1, 1], [0, 0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.6667, 4) assert_almost_equal(aul2, 0.6667, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 1.0, 4)
def test_sample_cluster0_nh(): """Same as in ``test_sample_perfect`` but cluster 0 not homogeneous """ clusters = [[1, 1, 1, 1, 0], [0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 0.8, 4) assert_almost_equal(aul2, 0.8, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 0.8333, 4)
def test_sample_perfect(): """Perfect clustering """ clusters = [[1, 1, 1, 1, 1], [0], [0]] aul1 = aul_score_from_labels(*clusters_to_labels(clusters)) aul2 = aul_score_from_clusters(clusters) assert_almost_equal(aul1, 1.0, 4) assert_almost_equal(aul2, 1.0, 4) auc = RocCurve.from_clusters(clusters).auc_score() assert_almost_equal(auc, 1.0, 4)
def add_ranking_metrics(args, clusters, pairs): """Add metrics based on ROC and Lift curves """ args_metrics = utils.METRICS if set(utils.ROC_METRICS) & set(args_metrics): from lsh_hdc.ranking import RocCurve rc = RocCurve.from_clusters(clusters, is_class_pos=class_is_positive) if 'roc_auc' in args_metrics: pairs.append(('roc_auc', rc.auc_score())) if 'roc_max_info' in args_metrics: pairs.append(('roc_max_info', rc.max_informedness())) if set(utils.LIFT_METRICS) & set(args_metrics): from lsh_hdc.ranking import aul_score_from_clusters as aul_score clusters_2xc = ([class_is_positive(point) for point in cluster] for cluster in clusters) if 'aul_score' in args_metrics: pairs.append(('aul_score', aul_score(clusters_2xc)))