def test_cascade_first_stage_applies_cutoff(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=1, cutoffs=[2]) ranker = cascade.rankers[0] ranker.booster.update() offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) expected = (b - a) * [0.01948363] np.testing.assert_almost_equal(ranker.kappa[a:b], expected)
def test_cascade_score_mask_does_not_appear_in_first_stage(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid, is_train=True) assert Cascade.SCORE_MASK not in ranker_one.predict
def test_cascade_second_stage_applies_cutoff(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) topk = sorted(ranker_two.predict[a:b], reverse=True) expected = (b - a) * [topk[1]] np.testing.assert_almost_equal(ranker_two.kappa[a:b], expected)
def test_cascade_second_stage_applies_mask(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) expected = [1, 0, 1, 1, 1] np.testing.assert_almost_equal(ranker_one.mask[a:b], expected) expected = [0, 0, 1, 1, 0] np.testing.assert_almost_equal(ranker_two.mask[a:b], expected)
def test_cascade_uses_score_mask(): """As per previous implementation, always use the SCORE_MASK during predict regardless of whether we are doing training or inference. """ X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) for is_train in [True, False]: cascade.predict(X, qid, is_train=is_train) assert Cascade.SCORE_MASK in ranker_two.predict
def test_cascade_computed_kappa_when_training(): qid = np.array([1, 1, 1, 1, 1]) offsets = group_offsets(qid) a, b = next(offsets) cascade = factories.dummy_cascade() ranker = factories.ranker() ranker.cutoff = 2 prev_mask = [1, 1, 0, 1, 1] scores = np.array([0.1, 1.0, -0.03, 0.5, 0.25]) ranker.predict = np.copy(scores) # according to previous mask ranker.predict[2] = Cascade.SCORE_MASK scores = cascade.ranker_apply_cutoff(ranker, scores, prev_mask, qid, is_train=True) expected = [0.5] * 5 np.testing.assert_almost_equal(ranker.kappa[a:b], expected) assert scores is not ranker.predict