Example #1
0
def test_roc_curve_hard(setup):
    # roc_curve for hard decisions
    y_true, pred, probas_pred = make_prediction(binary=True)

    # always predict one
    trivial_pred = np.ones(y_true.shape)
    fpr, tpr, thresholds = roc_curve(y_true, trivial_pred)
    roc_auc = auc(fpr, tpr).fetch()
    np.testing.assert_array_almost_equal(roc_auc, 0.50, decimal=2)
    assert fpr.shape == tpr.shape
    assert fpr.shape == thresholds.shape

    # always predict zero
    trivial_pred = np.zeros(y_true.shape)
    fpr, tpr, thresholds = roc_curve(y_true, trivial_pred)
    roc_auc = auc(fpr, tpr).fetch()
    np.testing.assert_array_almost_equal(roc_auc, 0.50, decimal=2)
    assert fpr.shape == tpr.shape
    assert fpr.shape == thresholds.shape

    # hard decisions
    fpr, tpr, thresholds = roc_curve(y_true, pred)
    roc_auc = auc(fpr, tpr).fetch()
    np.testing.assert_array_almost_equal(roc_auc, 0.78, decimal=2)
    assert fpr.shape == tpr.shape
    assert fpr.shape == thresholds.shape
Example #2
0
def test_binary_clf_curve_multiclass_error(setup):
    rng = check_random_state(404)
    y_true = rng.randint(0, 3, size=10)
    y_pred = rng.rand(10)
    msg = "multiclass format is not supported"

    with pytest.raises(ValueError, match=msg):
        roc_curve(y_true, y_pred)
Example #3
0
def test_roc_curve_drop_intermediate(setup):
    # Test that drop_intermediate drops the correct thresholds
    y_true = [0, 0, 0, 0, 1, 1]
    y_score = [0., 0.2, 0.5, 0.6, 0.7, 1.0]
    tpr, fpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=True)
    np.testing.assert_array_almost_equal(thresholds.fetch(), [2., 1., 0.7, 0.])

    # Test dropping thresholds with repeating scores
    y_true = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
    y_score = [0., 0.1, 0.6, 0.6, 0.7, 0.8, 0.9, 0.6, 0.7, 0.8, 0.9, 0.9, 1.0]
    tpr, fpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=True)
    np.testing.assert_array_almost_equal(thresholds.fetch(),
                                         [2.0, 1.0, 0.9, 0.7, 0.6, 0.])
    def testRocCurve(self):
        import numpy as np
        import pandas as pd
        import mars.dataframe as md
        from mars.learn.metrics import roc_curve, auc
        from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc

        client = self.odps.create_mars_cluster(1, 4, 8, name=str(uuid.uuid4()))
        try:
            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.rand(10)
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].to_tensor().astype('float')
            fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
            m = auc(fpr, tpr)

            sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
                raw['a'].to_numpy().astype('int'),
                raw['b'].to_numpy().astype('float'),
                pos_label=2)
            expect_m = sklearn_auc(sk_fpr, sk_tpr)
            self.assertAlmostEqual(m.fetch(), expect_m)
        finally:
            client.stop_server()
Example #5
0
    def testRocCurveAuc(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            run_kwargs = {'timeout': timeout}

            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.rand(10)
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].to_tensor().astype('float')
            fpr, tpr, thresholds = roc_curve(y,
                                             pred,
                                             pos_label=2,
                                             session=sess,
                                             run_kwargs=run_kwargs)
            m = auc(fpr, tpr, session=sess, run_kwargs=run_kwargs)

            sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
                raw['a'].to_numpy().astype('int'),
                raw['b'].to_numpy().astype('float'),
                pos_label=2)
            expect_m = sklearn_auc(sk_fpr, sk_tpr)
            self.assertAlmostEqual(m.fetch(session=sess), expect_m)
Example #6
0
def test_roc_curve_confidence(setup):
    # roc_curve for confidence scores
    y_true, _, probas_pred = make_prediction(binary=True)

    fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5)
    roc_auc = auc(fpr, tpr).fetch()
    np.testing.assert_array_almost_equal(roc_auc, 0.90, decimal=2)
    assert fpr.shape == tpr.shape
    assert fpr.shape == thresholds.shape
Example #7
0
def test_roc_curve_fpr_tpr_increasing(setup):
    # Ensure that fpr and tpr returned by roc_curve are increasing.
    # Construct an edge case with float y_score and sample_weight
    # when some adjacent values of fpr and tpr are actually the same.
    y_true = [0, 0, 1, 1, 1]
    y_score = [0.1, 0.7, 0.3, 0.4, 0.5]
    sample_weight = np.repeat(0.2, 5)
    fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight)
    assert ((mt.diff(fpr) < 0).sum() == 0).to_numpy()
    assert ((mt.diff(tpr) < 0).sum() == 0).to_numpy()
Example #8
0
def test_roc_curve_end_points(setup):
    # Make sure that roc_curve returns a curve start at 0 and ending and
    # 1 even in corner cases
    rng = np.random.RandomState(0)
    y_true = np.array([0] * 50 + [1] * 50)
    y_pred = rng.randint(3, size=100)
    fpr, tpr, thr = roc_curve(y_true, y_pred, drop_intermediate=True).fetch()
    assert fpr[0] == 0
    assert fpr[-1] == 1
    assert fpr.shape == tpr.shape
    assert fpr.shape == thr.shape
Example #9
0
def test_roc_curve(setup):
    for drop in [True, False]:
        # Test Area under Receiver Operating Characteristic (ROC) curve
        y_true, _, probas_pred = make_prediction(binary=True)
        expected_auc = _auc(y_true, probas_pred)

        fpr, tpr, thresholds = roc_curve(y_true, probas_pred,
                                         drop_intermediate=drop).execute().fetch()
        roc_auc = auc(fpr, tpr).to_numpy()
        np.testing.assert_array_almost_equal(roc_auc, expected_auc, decimal=2)
        np.testing.assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred))
        assert fpr.shape == tpr.shape
        assert fpr.shape == thresholds.shape
Example #10
0
def test_dataframe_roc_curve_auc(setup):
    rs = np.random.RandomState(0)
    raw = pd.DataFrame({'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10)})

    df = md.DataFrame(raw)
    y = df['a'].to_tensor().astype('int')
    pred = df['b'].to_tensor().astype('float')
    fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
    m = auc(fpr, tpr)

    sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
        raw['a'].to_numpy().astype('int'),
        raw['b'].to_numpy().astype('float'),
        pos_label=2)
    expect_m = sklearn_auc(sk_fpr, sk_tpr)
    assert pytest.approx(m.fetch()) == expect_m
Example #11
0
def test_roc_returns_consistency(setup):
    # Test whether the returned threshold matches up with tpr
    # make small toy dataset
    y_true, _, probas_pred = make_prediction(binary=True)
    fpr, tpr, thresholds = roc_curve(y_true, probas_pred).fetch()

    # use the given thresholds to determine the tpr
    tpr_correct = []
    for t in thresholds:
        tp = np.sum((probas_pred >= t) & y_true)
        p = np.sum(y_true)
        tpr_correct.append(1.0 * tp / p)

    # compare tpr and tpr_correct to see if the thresholds' order was correct
    np.testing.assert_array_almost_equal(tpr, tpr_correct, decimal=2)
    assert fpr.shape == tpr.shape
    assert fpr.shape == thresholds.shape
Example #12
0
    def testLearnInLocalCluster(self, *_):
        from mars.learn.neighbors import NearestNeighbors
        from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors
        from mars.learn.metrics import roc_curve, auc
        from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc

        with new_cluster(scheduler_n_process=2,
                         worker_n_process=3,
                         shared_memory='20M') as cluster:
            rs = np.random.RandomState(0)
            raw_X = rs.rand(10, 5)
            raw_Y = rs.rand(8, 5)

            X = mt.tensor(raw_X, chunk_size=7)
            Y = mt.tensor(raw_Y, chunk_size=(5, 3))
            nn = NearestNeighbors(n_neighbors=3)
            nn.fit(X)

            ret = nn.kneighbors(Y, session=cluster.session)

            snn = SkNearestNeighbors(n_neighbors=3)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.rand(10)
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].to_tensor().astype('float')
            fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
            m = auc(fpr, tpr)

            sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
                raw['a'].to_numpy().astype('int'),
                raw['b'].to_numpy().astype('float'),
                pos_label=2)
            expect_m = sklearn_auc(sk_fpr, sk_tpr)
            self.assertAlmostEqual(m.fetch(), expect_m)
Example #13
0
def test_roc_curve_multi(setup):
    # roc_curve not applicable for multi-class problems
    y_true, _, probas_pred = make_prediction(binary=False)

    with pytest.raises(ValueError):
        roc_curve(y_true, probas_pred)
Example #14
0
    def testRocCurveMulti(self):
        # roc_curve not applicable for multi-class problems
        y_true, _, probas_pred = make_prediction(binary=False)

        with self.assertRaises(ValueError):
            roc_curve(y_true, probas_pred)