def test_cb_explore_action_change(self): learner = VowpalArgsLearner("--cb_explore 3", VowpalMediatorMocked()) learner.predict(None, [1, 2, 3]) with self.assertRaises(Exception) as e: learner.predict(None, [4, 5, 6]) self.assertTrue("`adf`" in str(e.exception))
def test_learn_cb(self): vw = VowpalMediatorMocked() learner = VowpalArgsLearner("--cb_explore", vw) learner.predict(None, ['yes', 'no']) learner.learn(None, 'no', .5, 0.2, ['yes', 'no']) self.assertIsInstance(vw._learn_calls[0], VowpalEaxmpleMock) self.assertEqual({'x': None}, vw._learn_calls[0].ns) self.assertEqual("2:-0.5:0.2", vw._learn_calls[0].label)
def test_learn_cb_adf(self): vw = VowpalMediatorMocked() learner = VowpalArgsLearner("--cb_explore_adf", vw) learner.predict(None, ['yes', 'no']) learner.learn(None, 'yes', 1, 0.2, ['yes', 'no']) self.assertEqual(2, len(vw._learn_calls[0])) self.assertEqual({'x': None}, vw._learn_calls[0][0].ns[0]) self.assertEqual({'a': 'yes'}, vw._learn_calls[0][0].ns[1]) self.assertEqual("1:-1:0.2", vw._learn_calls[0][0].label) self.assertEqual({'x': None}, vw._learn_calls[0][1].ns[0]) self.assertEqual({'a': 'no'}, vw._learn_calls[0][1].ns[1]) self.assertEqual(None, vw._learn_calls[0][1].label)
def test_flatten_tuples(self): vw = VowpalMediatorMocked() learner = VowpalArgsLearner("--cb_explore", vw) learner.predict([(0, 0, 1)], ['yes', 'no']) learner.learn({'l': (0, 0, 1), 'j': 1}, 'no', .5, 0.2, ['yes', 'no']) self.assertIsInstance(vw._learn_calls[0], VowpalEaxmpleMock) self.assertEqual({'x': [0, 0, 1]}, vw._predict_calls[0].ns) self.assertEqual(None, vw._predict_calls[0].label) self.assertEqual({'x': { 'l_0': 0, 'l_1': 0, 'l_2': 1, 'j': 1 }}, vw._learn_calls[0].ns) self.assertEqual("2:-0.5:0.2", vw._learn_calls[0].label)
def test_cb_adf_learning(self): learner = VowpalArgsLearner() n_actions = 3 n_features = 10 n_examples = 2000 rng = CobaRandom(11111) contexts = [rng.randoms(n_features) for _ in range(n_examples)] pre_learn_rewards = [] for context in contexts[:int(.9 * n_examples)]: actions = [rng.randoms(n_features) for _ in range(n_actions)] rewards = [ sum([a * c for a, c in zip(action, context)]) for action in actions ] rewards = [int(r == max(rewards)) for r in rewards] pre_learn_rewards.append( rng.choice(rewards, learner.predict(context, actions)[0])) for context in contexts[:int(.9 * n_examples)]: actions = [rng.randoms(n_features) for _ in range(n_actions)] rewards = [ sum([a * c for a, c in zip(action, context)]) for action in actions ] rewards = [int(r == max(rewards)) for r in rewards] probs, info = learner.predict(context, actions) choice = rng.choice(list(range(3)), probs) learner.learn(context, actions[choice], rewards[choice], probs[choice], info) post_learn_rewards = [] for context in contexts[int(.9 * n_examples):]: actions = [rng.randoms(n_features) for _ in range(n_actions)] rewards = [ sum([a * c for a, c in zip(action, context)]) for action in actions ] rewards = [int(r == max(rewards)) for r in rewards] post_learn_rewards.append( rng.choice(rewards, learner.predict(context, actions)[0])) average_pre_learn_reward = sum(pre_learn_rewards) / len( pre_learn_rewards) average_post_learn_reward = sum(post_learn_rewards) / len( post_learn_rewards) self.assertAlmostEqual(.33, average_pre_learn_reward, places=2) self.assertAlmostEqual(.78, average_post_learn_reward, places=2)