Ejemplo n.º 1
0
    def test_create_args(self):
        actual = VowpalLearner(
            "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore 10 --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore_adf --interactions sa --ignore_linear s --bag 2"
        )._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2"

        self.assertEqual(actual, expected)
Ejemplo n.º 2
0
    def test_predict_epsilon_dict_context_adf(self):
        learner = VowpalLearner(epsilon=0.05, adf=True, seed=20)

        self.assertEqual([0.25, 0.25, 0.25, 0.25],
                         learner.predict(1, {
                             1: 10.2,
                             2: 3.5
                         }, [1, 2, 3, 4]))  #type: ignore
Ejemplo n.º 3
0
class Batched:
    def __init__(self, delay: int, batchsize: int, epsilon: float, seed: int,
                 flags: str):
        self.learner = VowpalLearner(seed=seed, epsilon=epsilon, flags=flags)
        self.batchsize = batchsize
        self.delay = delay
        self.epsilon = epsilon
        self.seed = seed
        self.flags = flags
        self.mem = {}

        assert self.delay % self.batchsize == 0

    @property
    def family(self) -> str:
        return "Batched CB"

    @property
    def params(self) -> Dict[str, Any]:
        return {
            'e': self.epsilon,
            'seed': self.seed,
            'batchsize': self.batchsize,
            'delay': self.delay,
            'flags': self.flags
        }

    def choose(self, key: int, context: Hashable,
               actions: Sequence[Hashable]) -> int:
        """Choose which action index to take."""

        self.mem[key] = {'context': context}

        return self.learner.choose(key, context, actions)

    def learn(self, key: int, context: Hashable, action: Hashable,
              reward: float) -> None:
        """Learn about the result of an action that was taken in a context."""

        self.mem[key]['action'] = action
        self.mem[key]['reward'] = reward

        if len(self.mem) >= self.delay:
            sumreward = 0
            contexts = []
            for key, values in self.mem.items():
                sumreward += values['reward']
                contexts.append((key, values))

                if len(contexts) % self.batchsize == 0:
                    for k, v in contexts:
                        self.learner.learn(k, v['context'], v['action'],
                                           sumreward / self.batchsize)
                    sumreward = 0
                    contexts = []

            self.mem = {}
Ejemplo n.º 4
0
    def __init__(self, delay: int, batchsize: int, epsilon: float, seed: int, flags: str):
        self.learner = VowpalLearner(seed=seed, epsilon=epsilon, flags=flags)
        self.batchsize = batchsize
        self.delay = delay
        self.epsilon = epsilon
        self.seed = seed
        self.flags = flags
        self.mem = {}

        assert self.delay % self.batchsize == 0
Ejemplo n.º 5
0
    def test_predict_epsilon_not_adf_args_error_2(self):
        learner = VowpalLearner("--cb_explore --epsilon 0.75 --random_seed 20")
        self.assertEqual(
            [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75],
            learner.predict(1, None, [1, 2, 3, 4]))

        with self.assertRaises(Exception) as e:
            self.assertEqual(
                [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75],
                learner.predict(1, None, [1, 2, 3]))

        self.assertTrue("--cb_explore_adf" in str(e.exception))
Ejemplo n.º 6
0
    def test_create_cover(self):
        actual = VowpalLearner(cover=2)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(cover=2, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(cover=2, seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2"

        self.assertEqual(actual, expected)
Ejemplo n.º 7
0
    def test_learn_dense_cb_explore_adf_2(self):
        actual = VowpalLearner._learn_format(True, 0.33, [(1, 0, 0), (0, 1, 0),
                                                          (0, 0, 1)],
                                             (1, 2, 3), (0, 0, 1), 0.25)
        expected = "shared |s 0:1 1:2 2:3\n|a 0:1\n|a 1:1\n0:-0.25:0.33 |a 2:1"

        self.assertEqual(actual, expected)
Ejemplo n.º 8
0
    def test_learn_sparse_cb_explore_2(self):
        actual = VowpalLearner._learn_format(False, 0.33,
                                             [(1, 0, 0), (0, 1, 0), (0, 0, 1)],
                                             ((1, 5), (3, 4)), (1, 0, 0), .5)
        expected = "1:-0.5:0.33 |s 1:3 5:4"

        self.assertEqual(actual, expected)
Ejemplo n.º 9
0
    def test_predict_dense_cb_explore_adf(self):
        actual = VowpalLearner._predict_format(True, (1, 2, 3), [(1, 0, 0),
                                                                 (0, 1, 0),
                                                                 (0, 0, 1)])
        expected = "shared |s 0:1 1:2 2:3\n|a 0:1\n|a 1:1\n|a 2:1"

        self.assertEqual(actual, expected)
Ejemplo n.º 10
0
    def test_create_softmax(self):
        actual = VowpalLearner(softmax=0.5)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(softmax=0.5, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(softmax=0.5,
                               seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5"

        self.assertEqual(actual, expected)
Ejemplo n.º 11
0
    def test_learn_sparse_cb_explore_adf_1(self):
        actual = VowpalLearner._learn_format(True, 0.11, ["A", "B", "C"], {
            "D": 1,
            "E": 2
        }, "C", 0.33)
        expected = "shared |s D:1 E:2\n|a A:1\n|a B:1\n0:-0.33:0.11 |a C:1"

        self.assertEqual(actual, expected)
Ejemplo n.º 12
0
    def test_learn_dense_cb_explore_2(self):
        actual = VowpalLearner._learn_format(False,
                                             0.33, [(1, 0, 0), (0, 1, 0),
                                                    (0, 0, 1)], (1, 2, 3),
                                             (1, 0, 0), .5)
        expected = "1:-0.5:0.33 |s 0:1 1:2 2:3"

        self.assertEqual(actual, expected)
Ejemplo n.º 13
0
    def test_predict_sparse_cb_explore_adf_2(self):
        actual = VowpalLearner._predict_format(True, {
            "D": 1,
            "C": 2
        }, ["A", "B", "C"])
        expected = "shared |s D:1 C:2\n|a A:1\n|a B:1\n|a C:1"

        self.assertEqual(actual, expected)
Ejemplo n.º 14
0
    def test_predict_sparse_cb_explore_1(self):
        actual = VowpalLearner._predict_format(False, {
            'a': 1,
            3: 2
        }, ["A", "B", "C"])
        expected = "|s a:1 3:2"

        self.assertEqual(actual, expected)
Ejemplo n.º 15
0
    def test_learn_dense_cb_explore_1(self):
        actual = VowpalLearner._learn_format(False,
                                             0.25, [(1, 0, 0), (0, 1, 0),
                                                    (0, 0, 1)], (1, 2, 3),
                                             (0, 1, 0), 1)
        expected = "2:-1:0.25 |s 0:1 1:2 2:3"

        self.assertEqual(actual, expected)
Ejemplo n.º 16
0
    def test_learn_sparse_cb_explore_1(self):
        actual = VowpalLearner._learn_format(False,
                                             0.33, [(1, 0, 0), (0, 1, 0),
                                                    (0, 0, 1)], {
                                                        "A": 1,
                                                        "B": 2
                                                    }, (1, 0, 0), .5)
        expected = "1:-0.5:0.33 |s A:1 B:2"

        self.assertEqual(actual, expected)
Ejemplo n.º 17
0
    def test_create_bag(self):
        actual = VowpalLearner(bag=2)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, adf=False)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)
Ejemplo n.º 18
0
    def test_create_epsilon(self):
        actual = VowpalLearner(epsilon=0.1)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1,
                               seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1,
                               adf=False)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1"

        self.assertEqual(actual, expected)
Ejemplo n.º 19
0
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_1, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_2, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_3, seed=10), #not CB since reward is independent of context
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_1,
                         seed=10),
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_2,
                         seed=10),
        LambdaSimulation(2000, contexts, actions, polynomial_reward_1,
                         seed=10),
    ]

    #define a benchmark: this benchmark replays the simulation 15 times
    benchmark = Benchmark(simulations, batch_size=1, seeds=list(range(5)))

    #create the learner factories
    learner_factories = [
        RandomLearner(seed=10),
        EpsilonLearner(epsilon=0.025, seed=10),
        UcbTunedLearner(seed=10),
        VowpalLearner(epsilon=0.025, seed=10),
        VowpalLearner(epsilon=0.025, is_adf=False, seed=10),
        VowpalLearner(bag=5, seed=10),
    ]

    benchmark.evaluate(learner_factories).standard_plot()
Ejemplo n.º 20
0
    def test_predict_dense_cb_explore(self):
        actual = VowpalLearner._predict_format(False, (1, 2, 3),
                                               ["A", "B", "C"])
        expected = "|s 0:1 1:2 2:3"

        self.assertEqual(actual, expected)
Ejemplo n.º 21
0
def baseLearner():
    from coba.learners import VowpalLearner
    return VowpalLearner(seed=10, epsilon=0.1, flags='--coin')
Ejemplo n.º 22
0
    def test_predict_epsilon_not_adf(self):
        learner = VowpalLearner(epsilon=0.75, is_adf=False, seed=30) 

        self.assertEqual([0.25+0.25*0.75,0.25*0.75,0.25*0.75,0.25*0.75],learner.predict(1, None, [1,2,3,4]))
Ejemplo n.º 23
0
    def test_predict_sparse_cb_explore_2(self):
        actual = VowpalLearner._predict_format(False, ((1, 5, 7), (2, 2, 3)),
                                               ["A", "B", "C"])
        expected = "|s 1:2 5:2 7:3"

        self.assertEqual(actual, expected)
Ejemplo n.º 24
0
    def test_predict_cover_not_adf(self):
        learner = VowpalLearner(cover=5, seed=30)

        self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
Ejemplo n.º 25
0
    def test_predict_epsilon_adf(self):
        learner = VowpalLearner(epsilon=0.05, is_adf=True, seed=20) 

        self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
Ejemplo n.º 26
0
    def test_predict_bag_not_adf(self):
        learner = VowpalLearner(bag=5, is_adf=False, seed=30)

        self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
Ejemplo n.º 27
0
    def test_predict_bag_adf(self):
        learner = VowpalLearner(bag=5, is_adf=True, seed=30)

        self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
Ejemplo n.º 28
0
"""
This is an example script that creates a Benchmark that matches the bandit bakeoff paper.
This script requires that the matplotlib and vowpalwabbit packages be installed.
"""

from coba.learners import RandomLearner, EpsilonLearner, VowpalLearner, UcbTunedLearner, CorralLearner
from coba.benchmarks import Benchmark

if __name__ == '__main__':
    benchmark = Benchmark.from_file("./examples/benchmark_short.json")

    learners = [
        RandomLearner(),
        EpsilonLearner(epsilon=0.025),
        UcbTunedLearner(),
        VowpalLearner(bag=5, seed=10),
        CorralLearner([VowpalLearner(bag=5, seed=10),
                       UcbTunedLearner()],
                      eta=.075,
                      T=40000,
                      seed=10),
    ]

    benchmark.evaluate(learners, './examples/bakeoff.log',
                       seed=10).standard_plot()
Ejemplo n.º 29
0
    def test_predict_sparse_cb_explore_adf_3(self):
        actual = VowpalLearner._predict_format(True, ((1, 3), (1, 2)),
                                               ["A", "B", "C"])
        expected = "shared |s 1:1 3:2\n|a A:1\n|a B:1\n|a C:1"

        self.assertEqual(actual, expected)
Ejemplo n.º 30
0
    def test_learn_sparse_cb_explore_adf_2(self):
        actual = VowpalLearner._learn_format(True, 0.11, ["A", "B", "C"],
                                             ((3, 5), (2, 3)), "C", 0.33)
        expected = "shared |s 3:2 5:3\n|a A:1\n|a B:1\n0:-0.33:0.11 |a C:1"

        self.assertEqual(actual, expected)