예제 #1
0
def test_cascade_first_stage_has_no_score_mask():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=1, cutoffs=[5])
    ranker = cascade.rankers[0]

    cascade.predict(X, qid)

    assert Cascade.SCORE_MASK not in ranker.predict
예제 #2
0
def cascade(num_stages, cutoffs, score_type='independent'):
    X, y, qid = fixtures.train_data()
    X_val, y_val, qid_val = fixtures.valid_data()
    config = fixtures.cascade_config(num_stages=num_stages, cutoffs=cutoffs)
    config['score_type'] = score_type
    cost = fixtures.feature_costs()
    cascade = Cascade(config, cost)
    cascade.create_boosters(X, y, qid, X_val, y_val, qid_val)
    return cascade
예제 #3
0
def test_cascade_first_stage_applies_cutoff():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=1, cutoffs=[2])
    ranker = cascade.rankers[0]
    ranker.booster.update()
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid)

    expected = (b - a) * [0.01948363]
    np.testing.assert_almost_equal(ranker.kappa[a:b], expected)
예제 #4
0
def test_cascade_score_mask_does_not_appear_in_first_stage():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_one = cascade.rankers[0]
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid, is_train=True)

    assert Cascade.SCORE_MASK not in ranker_one.predict
예제 #5
0
def test_cascade_second_stage_applies_cutoff():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid)

    topk = sorted(ranker_two.predict[a:b], reverse=True)
    expected = (b - a) * [topk[1]]
    np.testing.assert_almost_equal(ranker_two.kappa[a:b], expected)
예제 #6
0
def test_cascade_second_stage_applies_mask():
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_one = cascade.rankers[0]
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    cascade.predict(X, qid)

    expected = [1, 0, 1, 1, 1]
    np.testing.assert_almost_equal(ranker_one.mask[a:b], expected)
    expected = [0, 0, 1, 1, 0]
    np.testing.assert_almost_equal(ranker_two.mask[a:b], expected)
예제 #7
0
def test_cascade_uses_score_mask():
    """As per previous implementation, always use the SCORE_MASK during predict
    regardless of whether we are doing training or inference.
    """
    X, _, qid = fixtures.train_data()
    cascade = factories.cascade(num_stages=2, cutoffs=[4, 2])
    cascade.update()
    ranker_one = cascade.rankers[0]
    ranker_two = cascade.rankers[1]
    offsets = group_offsets(qid)
    a, b = next(offsets)

    for is_train in [True, False]:
        cascade.predict(X, qid, is_train=is_train)

        assert Cascade.SCORE_MASK in ranker_two.predict