def test_cascade_first_stage_has_no_score_mask(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=1, cutoffs=[5]) ranker = cascade.rankers[0] cascade.predict(X, qid) assert Cascade.SCORE_MASK not in ranker.predict
def cascade(num_stages, cutoffs, score_type='independent'): X, y, qid = fixtures.train_data() X_val, y_val, qid_val = fixtures.valid_data() config = fixtures.cascade_config(num_stages=num_stages, cutoffs=cutoffs) config['score_type'] = score_type cost = fixtures.feature_costs() cascade = Cascade(config, cost) cascade.create_boosters(X, y, qid, X_val, y_val, qid_val) return cascade
def test_cascade_first_stage_applies_cutoff(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=1, cutoffs=[2]) ranker = cascade.rankers[0] ranker.booster.update() offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) expected = (b - a) * [0.01948363] np.testing.assert_almost_equal(ranker.kappa[a:b], expected)
def test_cascade_score_mask_does_not_appear_in_first_stage(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid, is_train=True) assert Cascade.SCORE_MASK not in ranker_one.predict
def test_cascade_second_stage_applies_cutoff(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) topk = sorted(ranker_two.predict[a:b], reverse=True) expected = (b - a) * [topk[1]] np.testing.assert_almost_equal(ranker_two.kappa[a:b], expected)
def test_cascade_second_stage_applies_mask(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) expected = [1, 0, 1, 1, 1] np.testing.assert_almost_equal(ranker_one.mask[a:b], expected) expected = [0, 0, 1, 1, 0] np.testing.assert_almost_equal(ranker_two.mask[a:b], expected)
def test_cascade_uses_score_mask(): """As per previous implementation, always use the SCORE_MASK during predict regardless of whether we are doing training or inference. """ X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) for is_train in [True, False]: cascade.predict(X, qid, is_train=is_train) assert Cascade.SCORE_MASK in ranker_two.predict