def test_cascade_compute_grads_last_stage(): cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) weights = np.array([1., 0.5, 0.25]) grads = cascade.compute_grads(1, cascade.last(), weights) expected = [1., 0.5, 0.25] np.testing.assert_almost_equal(expected, grads)
def test_cascade_first_stage_has_no_score_mask(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=1, cutoffs=[5]) ranker = cascade.rankers[0] cascade.predict(X, qid) assert Cascade.SCORE_MASK not in ranker.predict
def test_cascade_second_stage_score_unkown_type(): cascade = factories.cascade(num_stages=2, cutoffs=[2, 4]) ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] with pytest.raises(RuntimeError): cascade.score_type = 'unkown-foo-bar-baz' prev_ranker = ranker_one cascade.ranker_score(ranker_two, prev_ranker)
def test_cascade_last_ranker_indicator_is_zero(): cascade = factories.cascade(num_stages=1, cutoffs=[1]) ranker = cascade.last() ranker.predict = np.array([1., 2., 3.]) ranker.kappa = np.array([1., 1., 1.]) cascade.ranker_indicator() assert 0. == ranker.indicator_score.all() assert 0. == ranker.indicator_derivative.all()
def test_cascade_first_stage_score_any_type(): cascade = factories.cascade(num_stages=1, cutoffs=[4]) for name, member in ScoreType.__members__.items(): if member.name != name: # skip alias names continue cascade.set_score_type(name) ranker_one = cascade.rankers[0] cascade.ranker_score(ranker_one) assert ranker_one.predict is ranker_one.estimate
def test_cascade_first_stage_applies_cutoff(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=1, cutoffs=[2]) ranker = cascade.rankers[0] ranker.booster.update() offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) expected = (b - a) * [0.01948363] np.testing.assert_almost_equal(ranker.kappa[a:b], expected)
def test_cascade_score_mask_does_not_appear_in_first_stage(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid, is_train=True) assert Cascade.SCORE_MASK not in ranker_one.predict
def test_cascade_second_stage_applies_cutoff(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) topk = sorted(ranker_two.predict[a:b], reverse=True) expected = (b - a) * [topk[1]] np.testing.assert_almost_equal(ranker_two.kappa[a:b], expected)
def test_cascade_collect_weights_independent_scoring(): cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.set_score_type('independent') ranker_one = cascade.rankers[0] ranker_one.weights = np.array([1., 2., 3.]) ranker_two = cascade.rankers[1] ranker_two.weights = np.array([3., 2., 1.]) result = cascade.collect_weights(0, ranker_one) expected = [1., 2., 3.] np.testing.assert_almost_equal(expected, result) result = cascade.collect_weights(1, ranker_two) expected = [3., 2., 1.] np.testing.assert_almost_equal(expected, result)
def test_cascade_second_stage_score_weak_type(): cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.set_score_type('weak') ranker_one = cascade.rankers[0] ranker_one.mask = np.array([1, 1, 1, 1, 0]) ranker_one.estimate = np.array([4., 3., 2., 1., 0.]) ranker_two = cascade.rankers[1] ranker_two.predict = np.array([3., 5., 1., 5., 5.]) prev_ranker = ranker_one cascade.ranker_score(ranker_two, prev_ranker) assert ranker_two.predict is not ranker_two.estimate np.testing.assert_almost_equal(ranker_two.estimate, np.array([4., 5., 2., 5., 0.]))
def test_cascade_second_stage_applies_mask(): X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) cascade.predict(X, qid) expected = [1, 0, 1, 1, 1] np.testing.assert_almost_equal(ranker_one.mask[a:b], expected) expected = [0, 0, 1, 1, 0] np.testing.assert_almost_equal(ranker_two.mask[a:b], expected)
def test_cascade_collect_weights_full_scoring(): cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.set_score_type('full') ranker_one = cascade.rankers[0] ranker_one.weights = np.array([0.75, 0.5, 0.2]) ranker_two = cascade.rankers[1] ranker_two.weights = np.array([0.75, 0.3, 0.1]) result = cascade.collect_weights(0, ranker_one) expected = [1.5, 0.8, 0.3] np.testing.assert_almost_equal(expected, result) result = cascade.collect_weights(1, ranker_two) expected = [0.75, 0.3, 0.1] np.testing.assert_almost_equal(expected, result)
def test_cascade_uses_score_mask(): """As per previous implementation, always use the SCORE_MASK during predict regardless of whether we are doing training or inference. """ X, _, qid = fixtures.train_data() cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.update() ranker_one = cascade.rankers[0] ranker_two = cascade.rankers[1] offsets = group_offsets(qid) a, b = next(offsets) for is_train in [True, False]: cascade.predict(X, qid, is_train=is_train) assert Cascade.SCORE_MASK in ranker_two.predict
def test_cascade_compute_grads(): cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) ranker_one = cascade.rankers[0] ranker_one.weights = np.array([0.5, 0.5, 0.5]) ranker_one.estimate = np.array([-0.5, 0.2, 1.]) ranker_one.indicator_score = np.array([0.5, 0.5, 0.5]) ranker_one.indicator_derivative = np.array([0.25, 0., 0.5]) ranker_two = cascade.rankers[1] ranker_two.weights = np.array([0.7, 0.7, 0.7]) ranker_two.estimate = np.array([0.25, 0.6, 0.3]) ranker_two.indicator_score = np.array([0.5, 0.5, 0.5]) ranker_two.indicator_derivative = np.array([0., 0., 0.]) weights = cascade.compute_grads(0, ranker_one, ranker_one.weights) expected = [Cascade.EPSILON, 0.5, 1.0] np.testing.assert_almost_equal(expected, weights, decimal=4)
def test_cascade_collect_weights_weak_scoring(): cascade = factories.cascade(num_stages=2, cutoffs=[4, 2]) cascade.set_score_type('weak') ranker_one = cascade.rankers[0] ranker_one.weights = np.array([0.7, 0.5, 0.2]) ranker_one.predict = np.array([0.1, 0.2, 0.5]) ranker_one.estimate = np.array([0.1, 0.2, 0.5]) ranker_two = cascade.rankers[1] ranker_two.weights = np.array([0.3, 0.3, 0.1]) ranker_two.predict = np.array([-0.1, 0.3, 0.7]) ranker_two.estimate = np.array([0.1, 0.3, 0.7]) result = cascade.collect_weights(0, ranker_one) expected = [1.0, 0.5, 0.2] np.testing.assert_almost_equal(expected, result) result = cascade.collect_weights(1, ranker_two) expected = [0.0, 0.3, 0.1] np.testing.assert_almost_equal(expected, result)
def test_cascade_set_unkown_score_type(): cascade = factories.cascade(num_stages=1, cutoffs=[4]) with pytest.raises(KeyError): cascade.set_score_type('unkown-foo-bar-baz')