예제 #1
0
def test_spec_group_cols():
    rla = topn.RecListAnalysis(group_cols=['data', 'user'])
    rla.add_metric(topn.precision)
    rla.add_metric(topn.recall)
    rla.add_metric(topn.ndcg)

    recs = pd.DataFrame({
        'data': 'a',
        'user': ['a', 'a', 'a', 'b', 'b'],
        'item': [2, 3, 1, 4, 5],
        'rank': [1, 2, 3, 1, 2],
        'wombat': np.random.randn(5)
    })
    truth = pd.DataFrame({
        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
        'item': [1, 2, 3, 1, 5, 6],
        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
    })

    res = rla.compute(recs, truth)
    print(res)

    assert len(res) == 2
    assert res.index.nlevels == 2
    assert res.index.names == ['data', 'user']
    assert all(res.index.levels[0] == 'a')
    assert all(res.index.levels[1] == ['a', 'b'])
    assert all(res.reset_index().user == ['a', 'b'])
    partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3])
    assert res.ndcg.values == approx([1.0, partial_ndcg])
    assert res.precision.values == approx([1.0, 1/2])
    assert res.recall.values == approx([1.0, 1/3])
def test_run_two():
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.precision)
    rla.add_metric(topn.recall)
    rla.add_metric(topn.ndcg)

    recs = pd.DataFrame({
        'data': 'a',
        'user': ['a', 'a', 'a', 'b', 'b'],
        'item': [2, 3, 1, 4, 5],
        'rank': [1, 2, 3, 1, 2]
    })
    truth = pd.DataFrame({
        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
        'item': [1, 2, 3, 1, 5, 6],
        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
    })

    def prog(inner):
        assert len(inner) == 2
        return inner

    res = rla.compute(recs, truth, progress=prog)
    print(res)

    assert len(res) == 2
    assert res.index.nlevels == 2
    assert res.index.names == ['data', 'user']
    assert all(res.index.levels[0] == 'a')
    assert all(res.index.levels[1] == ['a', 'b'])
    assert all(res.reset_index().user == ['a', 'b'])
    partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3])
    assert res.ndcg.values == approx([1.0, partial_ndcg])
    assert res.precision.values == approx([1.0, 1 / 2])
    assert res.recall.values == approx([1.0, 1 / 3])
예제 #3
0
def test_dcg_empty():
    "empty should be zero"
    assert lm._dcg(np.array([])) == approx(0)
예제 #4
0
def test_dcg_mult():
    "multiple elements should score correctly"
    assert lm._dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi)
    assert lm._dcg(np.array([np.e, 0, 0,
                             np.pi])) == approx(np.e + np.pi / np.log2(4))
예제 #5
0
def test_dcg_single():
    "a single element should be scored at the right place"
    assert lm._dcg(np.array([0.5])) == approx(0.5)
    assert lm._dcg(np.array([0, 0.5])) == approx(0.5)
    assert lm._dcg(np.array([0, 0, 0.5])) == approx(0.5 / np.log2(3))
    assert lm._dcg(np.array([0, 0, 0.5, 0])) == approx(0.5 / np.log2(3))
예제 #6
0
def test_dcg_zeros():
    assert lm._dcg(np.zeros(10)) == approx(0)
예제 #7
0
def test_ndcg_wrong():
    recs = pd.DataFrame({'item': [1, 2]})
    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
    truth = truth.set_index('item')
    assert ndcg(recs, truth) == approx(_dcg([3.0, 5.0] / _dcg([5.0, 4.0, 3.0])))
예제 #8
0
def test_dcg_series():
    "The DCG function should work on a series"
    assert _dcg(pd.Series([np.e, 0, 0, np.pi])) == \
        approx((np.e + np.pi / np.log2(4)))
예제 #9
0
def test_dcg_nan():
    "NANs should be 0"
    assert _dcg(np.array([np.nan, 0.5])) == approx(0.5)