Example #1
0
def test_roc_precalculated_t4():
    """test against precalculated data (with ties)

    The plot should look like Fig. 2 in Mason & Graham (2002).
    A naive implementation w/o regard for ties gives an incorrect
    result in this situation.
    """
    t4 = [(1, 100.0),
          (1, 100.0),
          (1, 100.0),
          (1, 100.0),
          (1, 80.0),
          (0, 80.0),
          (0, 80.0),
          (1, 60.0),
          (0, 40.0),
          (0, 20.0),
          (1, 0.0),
          (0, 0.0),
          (0, 0.0),
          (0, 0.0),
          (0, 0.0)]
    rc = RocCurve.from_labels(*zip(*t4))
    auc = rc.auc_score()
    assert_almost_equal(auc, 0.839, 3)
Example #2
0
def test_roc_simulated():
    # Test Area under Receiver Operating Characteristic (ROC) curve
    for _ in range(10):
        y_true, probas_pred = simulate_predictions(1000, seed=random_seed())
        rc = RocCurve.from_labels(y_true, probas_pred)
        auc_expected1 = _auc(rc.fprs, rc.tprs)
        auc_expected2 = auc_sklearn(y_true, probas_pred)
        auc_actual = roc_auc_score(y_true, probas_pred)
        assert_almost_equal(auc_expected1, auc_actual, 3)
        assert_almost_equal(auc_expected2, auc_actual, 3)
Example #3
0
def test_sample_perverse():
    """Perverese cases are 0.0 < AUL < 0.5
    """
    clusters = [[1], [0, 0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.1111, 4)
    assert_almost_equal(aul2, 0.1111, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.0, 4)
Example #4
0
def test_sample_bad():
    """Bad clustering should score poorly
    """
    clusters = [[1, 1, 0, 0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.5, 4)
    assert_almost_equal(aul2, 0.5, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.6667, 4)
Example #5
0
def test_sample_neg_class1():
    """Similar to ``test_sample_perfect`` but have a negative of class 1
    """
    clusters = [[1, 1, 1, 1, 1], [1], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.8690, 4)
    assert_almost_equal(aul2, 0.8690, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.9167, 4)
Example #6
0
def test_sample_cluster0_c0():
    """Similar to ``test_sample_perfect`` but have a cluster of class 0
    """
    clusters = [[1, 1, 1, 1], [0, 0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.6667, 4)
    assert_almost_equal(aul2, 0.6667, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 1.0, 4)
Example #7
0
def test_sample_cluster0_nh():
    """Same as in ``test_sample_perfect`` but cluster 0 not homogeneous
    """
    clusters = [[1, 1, 1, 1, 0], [0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.8, 4)
    assert_almost_equal(aul2, 0.8, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.8333, 4)
Example #8
0
def test_sample_perfect():
    """Perfect clustering
    """
    clusters = [[1, 1, 1, 1, 1], [0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 1.0, 4)
    assert_almost_equal(aul2, 1.0, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 1.0, 4)
Example #9
0
def add_ranking_metrics(args, clusters, pairs):
    """Add metrics based on ROC and Lift curves
    """
    args_metrics = utils.METRICS
    if set(utils.ROC_METRICS) & set(args_metrics):
        from lsh_hdc.ranking import RocCurve
        rc = RocCurve.from_clusters(clusters, is_class_pos=class_is_positive)
        if 'roc_auc' in args_metrics:
            pairs.append(('roc_auc', rc.auc_score()))
        if 'roc_max_info' in args_metrics:
            pairs.append(('roc_max_info', rc.max_informedness()))
    if set(utils.LIFT_METRICS) & set(args_metrics):
        from lsh_hdc.ranking import aul_score_from_clusters as aul_score
        clusters_2xc = ([class_is_positive(point) for point in cluster]
                        for cluster in clusters)
        if 'aul_score' in args_metrics:
            pairs.append(('aul_score', aul_score(clusters_2xc)))
Example #10
0
def test_roc_precalculated_t2():
    """test against precalculated data

    The plot should look like Fig. 1 in Mason & Graham (2002)
    """
    t2 = [(1, 98.4),
          (1, 95.2),
          (1, 94.4),
          (0, 92.8),
          (1, 83.2),
          (1, 81.6),
          (1, 58.4),
          (0, 57.6),
          (0, 28.0),
          (0, 13.6),
          (1, 3.2),
          (0, 2.4),
          (0, 1.6),
          (0, 0.8),
          (0, 0.0)]
    rc = RocCurve.from_labels(*zip(*t2))
    auc = rc.auc_score()
    assert_almost_equal(auc, 0.875, 3)