def test_create_args(self): actual = VowpalLearner( "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1" )._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1" self.assertEqual(actual, expected) actual = VowpalLearner( "--cb_explore 10 --interactions sa --ignore_linear s --bag 2 --random_seed 1" )._create_format([1, 2, 3]) expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1" self.assertEqual(actual, expected) actual = VowpalLearner( "--cb_explore --interactions sa --ignore_linear s --bag 2 --random_seed 1" )._create_format([1, 2, 3]) expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1" self.assertEqual(actual, expected) actual = VowpalLearner( "--cb_explore_adf --interactions sa --ignore_linear s --bag 2" )._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2" self.assertEqual(actual, expected)
def test_predict_epsilon_dict_context_adf(self): learner = VowpalLearner(epsilon=0.05, adf=True, seed=20) self.assertEqual([0.25, 0.25, 0.25, 0.25], learner.predict(1, { 1: 10.2, 2: 3.5 }, [1, 2, 3, 4])) #type: ignore
class Batched: def __init__(self, delay: int, batchsize: int, epsilon: float, seed: int, flags: str): self.learner = VowpalLearner(seed=seed, epsilon=epsilon, flags=flags) self.batchsize = batchsize self.delay = delay self.epsilon = epsilon self.seed = seed self.flags = flags self.mem = {} assert self.delay % self.batchsize == 0 @property def family(self) -> str: return "Batched CB" @property def params(self) -> Dict[str, Any]: return { 'e': self.epsilon, 'seed': self.seed, 'batchsize': self.batchsize, 'delay': self.delay, 'flags': self.flags } def choose(self, key: int, context: Hashable, actions: Sequence[Hashable]) -> int: """Choose which action index to take.""" self.mem[key] = {'context': context} return self.learner.choose(key, context, actions) def learn(self, key: int, context: Hashable, action: Hashable, reward: float) -> None: """Learn about the result of an action that was taken in a context.""" self.mem[key]['action'] = action self.mem[key]['reward'] = reward if len(self.mem) >= self.delay: sumreward = 0 contexts = [] for key, values in self.mem.items(): sumreward += values['reward'] contexts.append((key, values)) if len(contexts) % self.batchsize == 0: for k, v in contexts: self.learner.learn(k, v['context'], v['action'], sumreward / self.batchsize) sumreward = 0 contexts = [] self.mem = {}
def __init__(self, delay: int, batchsize: int, epsilon: float, seed: int, flags: str): self.learner = VowpalLearner(seed=seed, epsilon=epsilon, flags=flags) self.batchsize = batchsize self.delay = delay self.epsilon = epsilon self.seed = seed self.flags = flags self.mem = {} assert self.delay % self.batchsize == 0
def test_predict_epsilon_not_adf_args_error_2(self): learner = VowpalLearner("--cb_explore --epsilon 0.75 --random_seed 20") self.assertEqual( [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75], learner.predict(1, None, [1, 2, 3, 4])) with self.assertRaises(Exception) as e: self.assertEqual( [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75], learner.predict(1, None, [1, 2, 3])) self.assertTrue("--cb_explore_adf" in str(e.exception))
def test_create_cover(self): actual = VowpalLearner(cover=2)._create_format([1, 2, 3]) expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 1" self.assertEqual(actual, expected) actual = VowpalLearner(cover=2, seed=10)._create_format([1, 2, 3]) expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 10" self.assertEqual(actual, expected) actual = VowpalLearner(cover=2, seed=None)._create_format([1, 2, 3]) expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2" self.assertEqual(actual, expected)
def test_learn_dense_cb_explore_adf_2(self): actual = VowpalLearner._learn_format(True, 0.33, [(1, 0, 0), (0, 1, 0), (0, 0, 1)], (1, 2, 3), (0, 0, 1), 0.25) expected = "shared |s 0:1 1:2 2:3\n|a 0:1\n|a 1:1\n0:-0.25:0.33 |a 2:1" self.assertEqual(actual, expected)
def test_learn_sparse_cb_explore_2(self): actual = VowpalLearner._learn_format(False, 0.33, [(1, 0, 0), (0, 1, 0), (0, 0, 1)], ((1, 5), (3, 4)), (1, 0, 0), .5) expected = "1:-0.5:0.33 |s 1:3 5:4" self.assertEqual(actual, expected)
def test_predict_dense_cb_explore_adf(self): actual = VowpalLearner._predict_format(True, (1, 2, 3), [(1, 0, 0), (0, 1, 0), (0, 0, 1)]) expected = "shared |s 0:1 1:2 2:3\n|a 0:1\n|a 1:1\n|a 2:1" self.assertEqual(actual, expected)
def test_create_softmax(self): actual = VowpalLearner(softmax=0.5)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 1" self.assertEqual(actual, expected) actual = VowpalLearner(softmax=0.5, seed=10)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 10" self.assertEqual(actual, expected) actual = VowpalLearner(softmax=0.5, seed=None)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5" self.assertEqual(actual, expected)
def test_learn_sparse_cb_explore_adf_1(self): actual = VowpalLearner._learn_format(True, 0.11, ["A", "B", "C"], { "D": 1, "E": 2 }, "C", 0.33) expected = "shared |s D:1 E:2\n|a A:1\n|a B:1\n0:-0.33:0.11 |a C:1" self.assertEqual(actual, expected)
def test_learn_dense_cb_explore_2(self): actual = VowpalLearner._learn_format(False, 0.33, [(1, 0, 0), (0, 1, 0), (0, 0, 1)], (1, 2, 3), (1, 0, 0), .5) expected = "1:-0.5:0.33 |s 0:1 1:2 2:3" self.assertEqual(actual, expected)
def test_predict_sparse_cb_explore_adf_2(self): actual = VowpalLearner._predict_format(True, { "D": 1, "C": 2 }, ["A", "B", "C"]) expected = "shared |s D:1 C:2\n|a A:1\n|a B:1\n|a C:1" self.assertEqual(actual, expected)
def test_predict_sparse_cb_explore_1(self): actual = VowpalLearner._predict_format(False, { 'a': 1, 3: 2 }, ["A", "B", "C"]) expected = "|s a:1 3:2" self.assertEqual(actual, expected)
def test_learn_dense_cb_explore_1(self): actual = VowpalLearner._learn_format(False, 0.25, [(1, 0, 0), (0, 1, 0), (0, 0, 1)], (1, 2, 3), (0, 1, 0), 1) expected = "2:-1:0.25 |s 0:1 1:2 2:3" self.assertEqual(actual, expected)
def test_learn_sparse_cb_explore_1(self): actual = VowpalLearner._learn_format(False, 0.33, [(1, 0, 0), (0, 1, 0), (0, 0, 1)], { "A": 1, "B": 2 }, (1, 0, 0), .5) expected = "1:-0.5:0.33 |s A:1 B:2" self.assertEqual(actual, expected)
def test_create_bag(self): actual = VowpalLearner(bag=2)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1" self.assertEqual(actual, expected) actual = VowpalLearner(bag=2, seed=10)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 10" self.assertEqual(actual, expected) actual = VowpalLearner(bag=2, seed=None)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2" self.assertEqual(actual, expected) actual = VowpalLearner(bag=2, adf=False)._create_format([1, 2, 3]) expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1" self.assertEqual(actual, expected)
def test_create_epsilon(self): actual = VowpalLearner(epsilon=0.1)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1" self.assertEqual(actual, expected) actual = VowpalLearner(epsilon=0.1, seed=10)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 10" self.assertEqual(actual, expected) actual = VowpalLearner(epsilon=0.1, seed=None)._create_format([1, 2, 3]) expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1" self.assertEqual(actual, expected) actual = VowpalLearner(epsilon=0.1, adf=False)._create_format([1, 2, 3]) expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1" self.assertEqual(actual, expected)
#LambdaSimulation(2000, no_contexts, actions, random_rewards_1, seed=10), #not CB since reward is independent of context #LambdaSimulation(2000, no_contexts, actions, random_rewards_2, seed=10), #not CB since reward is independent of context #LambdaSimulation(2000, no_contexts, actions, random_rewards_3, seed=10), #not CB since reward is independent of context LambdaSimulation(2000, contexts, actions, linear_plus_random_rewards_1, seed=10), LambdaSimulation(2000, contexts, actions, linear_plus_random_rewards_2, seed=10), LambdaSimulation(2000, contexts, actions, polynomial_reward_1, seed=10), ] #define a benchmark: this benchmark replays the simulation 15 times benchmark = Benchmark(simulations, batch_size=1, seeds=list(range(5))) #create the learner factories learner_factories = [ RandomLearner(seed=10), EpsilonLearner(epsilon=0.025, seed=10), UcbTunedLearner(seed=10), VowpalLearner(epsilon=0.025, seed=10), VowpalLearner(epsilon=0.025, is_adf=False, seed=10), VowpalLearner(bag=5, seed=10), ] benchmark.evaluate(learner_factories).standard_plot()
def test_predict_dense_cb_explore(self): actual = VowpalLearner._predict_format(False, (1, 2, 3), ["A", "B", "C"]) expected = "|s 0:1 1:2 2:3" self.assertEqual(actual, expected)
def baseLearner(): from coba.learners import VowpalLearner return VowpalLearner(seed=10, epsilon=0.1, flags='--coin')
def test_predict_epsilon_not_adf(self): learner = VowpalLearner(epsilon=0.75, is_adf=False, seed=30) self.assertEqual([0.25+0.25*0.75,0.25*0.75,0.25*0.75,0.25*0.75],learner.predict(1, None, [1,2,3,4]))
def test_predict_sparse_cb_explore_2(self): actual = VowpalLearner._predict_format(False, ((1, 5, 7), (2, 2, 3)), ["A", "B", "C"]) expected = "|s 1:2 5:2 7:3" self.assertEqual(actual, expected)
def test_predict_cover_not_adf(self): learner = VowpalLearner(cover=5, seed=30) self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
def test_predict_epsilon_adf(self): learner = VowpalLearner(epsilon=0.05, is_adf=True, seed=20) self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
def test_predict_bag_not_adf(self): learner = VowpalLearner(bag=5, is_adf=False, seed=30) self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
def test_predict_bag_adf(self): learner = VowpalLearner(bag=5, is_adf=True, seed=30) self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
""" This is an example script that creates a Benchmark that matches the bandit bakeoff paper. This script requires that the matplotlib and vowpalwabbit packages be installed. """ from coba.learners import RandomLearner, EpsilonLearner, VowpalLearner, UcbTunedLearner, CorralLearner from coba.benchmarks import Benchmark if __name__ == '__main__': benchmark = Benchmark.from_file("./examples/benchmark_short.json") learners = [ RandomLearner(), EpsilonLearner(epsilon=0.025), UcbTunedLearner(), VowpalLearner(bag=5, seed=10), CorralLearner([VowpalLearner(bag=5, seed=10), UcbTunedLearner()], eta=.075, T=40000, seed=10), ] benchmark.evaluate(learners, './examples/bakeoff.log', seed=10).standard_plot()
def test_predict_sparse_cb_explore_adf_3(self): actual = VowpalLearner._predict_format(True, ((1, 3), (1, 2)), ["A", "B", "C"]) expected = "shared |s 1:1 3:2\n|a A:1\n|a B:1\n|a C:1" self.assertEqual(actual, expected)
def test_learn_sparse_cb_explore_adf_2(self): actual = VowpalLearner._learn_format(True, 0.11, ["A", "B", "C"], ((3, 5), (2, 3)), "C", 0.33) expected = "shared |s 3:2 5:3\n|a A:1\n|a B:1\n0:-0.33:0.11 |a C:1" self.assertEqual(actual, expected)