def test_roc_precalculated_t4():
    """test against precalculated data (with ties)

    The plot should look like Fig. 2 in Mason & Graham (2002).
    A naive implementation w/o regard for ties gives an incorrect
    result in this situation.
    """
    t4 = [(1, 100.0),
          (1, 100.0),
          (1, 100.0),
          (1, 100.0),
          (1, 80.0),
          (0, 80.0),
          (0, 80.0),
          (1, 60.0),
          (0, 40.0),
          (0, 20.0),
          (1, 0.0),
          (0, 0.0),
          (0, 0.0),
          (0, 0.0),
          (0, 0.0)]
    rc = RocCurve.from_labels(*zip(*t4))
    auc = rc.auc_score()
    assert_almost_equal(auc, 0.839, 3)
def test_roc_simulated():
    # Test Area under Receiver Operating Characteristic (ROC) curve
    for _ in range(10):
        y_true, probas_pred = simulate_predictions(1000, seed=random_seed())
        rc = RocCurve.from_labels(y_true, probas_pred)
        auc_expected1 = _auc(rc.fprs, rc.tprs)
        auc_expected2 = auc_sklearn(y_true, probas_pred)
        auc_actual = roc_auc_score(y_true, probas_pred)
        assert_almost_equal(auc_expected1, auc_actual, 3)
        assert_almost_equal(auc_expected2, auc_actual, 3)
def test_sample_perverse():
    """Perverese cases are 0.0 < AUL < 0.5
    """
    clusters = [[1], [0, 0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.1111, 4)
    assert_almost_equal(aul2, 0.1111, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.0, 4)
def test_sample_bad():
    """Bad clustering should score poorly
    """
    clusters = [[1, 1, 0, 0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.5, 4)
    assert_almost_equal(aul2, 0.5, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.6667, 4)
def test_sample_neg_class1():
    """Similar to ``test_sample_perfect`` but have a negative of class 1
    """
    clusters = [[1, 1, 1, 1, 1], [1], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.8690, 4)
    assert_almost_equal(aul2, 0.8690, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.9167, 4)
def test_sample_cluster0_c0():
    """Similar to ``test_sample_perfect`` but have a cluster of class 0
    """
    clusters = [[1, 1, 1, 1], [0, 0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.6667, 4)
    assert_almost_equal(aul2, 0.6667, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 1.0, 4)
def test_sample_cluster0_nh():
    """Same as in ``test_sample_perfect`` but cluster 0 not homogeneous
    """
    clusters = [[1, 1, 1, 1, 0], [0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.8, 4)
    assert_almost_equal(aul2, 0.8, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.8333, 4)
def test_sample_perfect():
    """Perfect clustering
    """
    clusters = [[1, 1, 1, 1, 1], [0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 1.0, 4)
    assert_almost_equal(aul2, 1.0, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 1.0, 4)
def test_roc_precalculated_t2():
    """test against precalculated data

    The plot should look like Fig. 1 in Mason & Graham (2002)
    """
    t2 = [(1, 98.4),
          (1, 95.2),
          (1, 94.4),
          (0, 92.8),
          (1, 83.2),
          (1, 81.6),
          (1, 58.4),
          (0, 57.6),
          (0, 28.0),
          (0, 13.6),
          (1, 3.2),
          (0, 2.4),
          (0, 1.6),
          (0, 0.8),
          (0, 0.0)]
    rc = RocCurve.from_labels(*zip(*t2))
    auc = rc.auc_score()
    assert_almost_equal(auc, 0.875, 3)