コード例 #1
0
    def test_create_args(self):
        actual = VowpalLearner(
            "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore 10 --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore --interactions sa --ignore_linear s --bag 2 --random_seed 1"
        )._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(
            "--cb_explore_adf --interactions sa --ignore_linear s --bag 2"
        )._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions sa --ignore_linear s --bag 2"

        self.assertEqual(actual, expected)
コード例 #2
0
    def test_predict_epsilon_dict_context_adf(self):
        learner = VowpalLearner(epsilon=0.05, adf=True, seed=20)

        self.assertEqual([0.25, 0.25, 0.25, 0.25],
                         learner.predict(1, {
                             1: 10.2,
                             2: 3.5
                         }, [1, 2, 3, 4]))  #type: ignore
コード例 #3
0
class Batched:
    def __init__(self, delay: int, batchsize: int, epsilon: float, seed: int,
                 flags: str):
        self.learner = VowpalLearner(seed=seed, epsilon=epsilon, flags=flags)
        self.batchsize = batchsize
        self.delay = delay
        self.epsilon = epsilon
        self.seed = seed
        self.flags = flags
        self.mem = {}

        assert self.delay % self.batchsize == 0

    @property
    def family(self) -> str:
        return "Batched CB"

    @property
    def params(self) -> Dict[str, Any]:
        return {
            'e': self.epsilon,
            'seed': self.seed,
            'batchsize': self.batchsize,
            'delay': self.delay,
            'flags': self.flags
        }

    def choose(self, key: int, context: Hashable,
               actions: Sequence[Hashable]) -> int:
        """Choose which action index to take."""

        self.mem[key] = {'context': context}

        return self.learner.choose(key, context, actions)

    def learn(self, key: int, context: Hashable, action: Hashable,
              reward: float) -> None:
        """Learn about the result of an action that was taken in a context."""

        self.mem[key]['action'] = action
        self.mem[key]['reward'] = reward

        if len(self.mem) >= self.delay:
            sumreward = 0
            contexts = []
            for key, values in self.mem.items():
                sumreward += values['reward']
                contexts.append((key, values))

                if len(contexts) % self.batchsize == 0:
                    for k, v in contexts:
                        self.learner.learn(k, v['context'], v['action'],
                                           sumreward / self.batchsize)
                    sumreward = 0
                    contexts = []

            self.mem = {}
コード例 #4
0
    def __init__(self, delay: int, batchsize: int, epsilon: float, seed: int, flags: str):
        self.learner = VowpalLearner(seed=seed, epsilon=epsilon, flags=flags)
        self.batchsize = batchsize
        self.delay = delay
        self.epsilon = epsilon
        self.seed = seed
        self.flags = flags
        self.mem = {}

        assert self.delay % self.batchsize == 0
コード例 #5
0
    def test_predict_epsilon_not_adf_args_error_2(self):
        learner = VowpalLearner("--cb_explore --epsilon 0.75 --random_seed 20")
        self.assertEqual(
            [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75],
            learner.predict(1, None, [1, 2, 3, 4]))

        with self.assertRaises(Exception) as e:
            self.assertEqual(
                [0.25 + 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75, 0.25 * 0.75],
                learner.predict(1, None, [1, 2, 3]))

        self.assertTrue("--cb_explore_adf" in str(e.exception))
コード例 #6
0
    def test_create_cover(self):
        actual = VowpalLearner(cover=2)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(cover=2, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(cover=2, seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --cover 2"

        self.assertEqual(actual, expected)
コード例 #7
0
    def test_learn_dense_cb_explore_adf_2(self):
        actual = VowpalLearner._learn_format(True, 0.33, [(1, 0, 0), (0, 1, 0),
                                                          (0, 0, 1)],
                                             (1, 2, 3), (0, 0, 1), 0.25)
        expected = "shared |s 0:1 1:2 2:3\n|a 0:1\n|a 1:1\n0:-0.25:0.33 |a 2:1"

        self.assertEqual(actual, expected)
コード例 #8
0
    def test_learn_sparse_cb_explore_2(self):
        actual = VowpalLearner._learn_format(False, 0.33,
                                             [(1, 0, 0), (0, 1, 0), (0, 0, 1)],
                                             ((1, 5), (3, 4)), (1, 0, 0), .5)
        expected = "1:-0.5:0.33 |s 1:3 5:4"

        self.assertEqual(actual, expected)
コード例 #9
0
    def test_predict_dense_cb_explore_adf(self):
        actual = VowpalLearner._predict_format(True, (1, 2, 3), [(1, 0, 0),
                                                                 (0, 1, 0),
                                                                 (0, 0, 1)])
        expected = "shared |s 0:1 1:2 2:3\n|a 0:1\n|a 1:1\n|a 2:1"

        self.assertEqual(actual, expected)
コード例 #10
0
    def test_create_softmax(self):
        actual = VowpalLearner(softmax=0.5)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(softmax=0.5, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(softmax=0.5,
                               seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --softmax --lambda 0.5"

        self.assertEqual(actual, expected)
コード例 #11
0
    def test_learn_sparse_cb_explore_adf_1(self):
        actual = VowpalLearner._learn_format(True, 0.11, ["A", "B", "C"], {
            "D": 1,
            "E": 2
        }, "C", 0.33)
        expected = "shared |s D:1 E:2\n|a A:1\n|a B:1\n0:-0.33:0.11 |a C:1"

        self.assertEqual(actual, expected)
コード例 #12
0
    def test_learn_dense_cb_explore_2(self):
        actual = VowpalLearner._learn_format(False,
                                             0.33, [(1, 0, 0), (0, 1, 0),
                                                    (0, 0, 1)], (1, 2, 3),
                                             (1, 0, 0), .5)
        expected = "1:-0.5:0.33 |s 0:1 1:2 2:3"

        self.assertEqual(actual, expected)
コード例 #13
0
    def test_predict_sparse_cb_explore_adf_2(self):
        actual = VowpalLearner._predict_format(True, {
            "D": 1,
            "C": 2
        }, ["A", "B", "C"])
        expected = "shared |s D:1 C:2\n|a A:1\n|a B:1\n|a C:1"

        self.assertEqual(actual, expected)
コード例 #14
0
    def test_predict_sparse_cb_explore_1(self):
        actual = VowpalLearner._predict_format(False, {
            'a': 1,
            3: 2
        }, ["A", "B", "C"])
        expected = "|s a:1 3:2"

        self.assertEqual(actual, expected)
コード例 #15
0
    def test_learn_dense_cb_explore_1(self):
        actual = VowpalLearner._learn_format(False,
                                             0.25, [(1, 0, 0), (0, 1, 0),
                                                    (0, 0, 1)], (1, 2, 3),
                                             (0, 1, 0), 1)
        expected = "2:-1:0.25 |s 0:1 1:2 2:3"

        self.assertEqual(actual, expected)
コード例 #16
0
    def test_learn_sparse_cb_explore_1(self):
        actual = VowpalLearner._learn_format(False,
                                             0.33, [(1, 0, 0), (0, 1, 0),
                                                    (0, 0, 1)], {
                                                        "A": 1,
                                                        "B": 2
                                                    }, (1, 0, 0), .5)
        expected = "1:-0.5:0.33 |s A:1 B:2"

        self.assertEqual(actual, expected)
コード例 #17
0
    def test_create_bag(self):
        actual = VowpalLearner(bag=2)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --bag 2"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(bag=2, adf=False)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --bag 2 --random_seed 1"

        self.assertEqual(actual, expected)
コード例 #18
0
    def test_create_epsilon(self):
        actual = VowpalLearner(epsilon=0.1)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1, seed=10)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 10"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1,
                               seed=None)._create_format([1, 2, 3])
        expected = "--cb_explore_adf --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1"

        self.assertEqual(actual, expected)

        actual = VowpalLearner(epsilon=0.1,
                               adf=False)._create_format([1, 2, 3])
        expected = "--cb_explore 3 --interactions ssa --interactions sa --ignore_linear s --epsilon 0.1 --random_seed 1"

        self.assertEqual(actual, expected)
コード例 #19
0
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_1, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_2, seed=10), #not CB since reward is independent of context
        #LambdaSimulation(2000, no_contexts, actions, random_rewards_3, seed=10), #not CB since reward is independent of context
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_1,
                         seed=10),
        LambdaSimulation(2000,
                         contexts,
                         actions,
                         linear_plus_random_rewards_2,
                         seed=10),
        LambdaSimulation(2000, contexts, actions, polynomial_reward_1,
                         seed=10),
    ]

    #define a benchmark: this benchmark replays the simulation 15 times
    benchmark = Benchmark(simulations, batch_size=1, seeds=list(range(5)))

    #create the learner factories
    learner_factories = [
        RandomLearner(seed=10),
        EpsilonLearner(epsilon=0.025, seed=10),
        UcbTunedLearner(seed=10),
        VowpalLearner(epsilon=0.025, seed=10),
        VowpalLearner(epsilon=0.025, is_adf=False, seed=10),
        VowpalLearner(bag=5, seed=10),
    ]

    benchmark.evaluate(learner_factories).standard_plot()
コード例 #20
0
    def test_predict_dense_cb_explore(self):
        actual = VowpalLearner._predict_format(False, (1, 2, 3),
                                               ["A", "B", "C"])
        expected = "|s 0:1 1:2 2:3"

        self.assertEqual(actual, expected)
コード例 #21
0
def baseLearner():
    from coba.learners import VowpalLearner
    return VowpalLearner(seed=10, epsilon=0.1, flags='--coin')
コード例 #22
0
    def test_predict_epsilon_not_adf(self):
        learner = VowpalLearner(epsilon=0.75, is_adf=False, seed=30) 

        self.assertEqual([0.25+0.25*0.75,0.25*0.75,0.25*0.75,0.25*0.75],learner.predict(1, None, [1,2,3,4]))
コード例 #23
0
    def test_predict_sparse_cb_explore_2(self):
        actual = VowpalLearner._predict_format(False, ((1, 5, 7), (2, 2, 3)),
                                               ["A", "B", "C"])
        expected = "|s 1:2 5:2 7:3"

        self.assertEqual(actual, expected)
コード例 #24
0
    def test_predict_cover_not_adf(self):
        learner = VowpalLearner(cover=5, seed=30)

        self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
コード例 #25
0
    def test_predict_epsilon_adf(self):
        learner = VowpalLearner(epsilon=0.05, is_adf=True, seed=20) 

        self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
コード例 #26
0
    def test_predict_bag_not_adf(self):
        learner = VowpalLearner(bag=5, is_adf=False, seed=30)

        self.assertEqual([1,0,0,0], learner.predict(1, None, [1,2,3,4]))
コード例 #27
0
    def test_predict_bag_adf(self):
        learner = VowpalLearner(bag=5, is_adf=True, seed=30)

        self.assertEqual([0.25,0.25,0.25,0.25],learner.predict(1, None, [1,2,3,4]))
コード例 #28
0
"""
This is an example script that creates a Benchmark that matches the bandit bakeoff paper.
This script requires that the matplotlib and vowpalwabbit packages be installed.
"""

from coba.learners import RandomLearner, EpsilonLearner, VowpalLearner, UcbTunedLearner, CorralLearner
from coba.benchmarks import Benchmark

if __name__ == '__main__':
    benchmark = Benchmark.from_file("./examples/benchmark_short.json")

    learners = [
        RandomLearner(),
        EpsilonLearner(epsilon=0.025),
        UcbTunedLearner(),
        VowpalLearner(bag=5, seed=10),
        CorralLearner([VowpalLearner(bag=5, seed=10),
                       UcbTunedLearner()],
                      eta=.075,
                      T=40000,
                      seed=10),
    ]

    benchmark.evaluate(learners, './examples/bakeoff.log',
                       seed=10).standard_plot()
コード例 #29
0
    def test_predict_sparse_cb_explore_adf_3(self):
        actual = VowpalLearner._predict_format(True, ((1, 3), (1, 2)),
                                               ["A", "B", "C"])
        expected = "shared |s 1:1 3:2\n|a A:1\n|a B:1\n|a C:1"

        self.assertEqual(actual, expected)
コード例 #30
0
    def test_learn_sparse_cb_explore_adf_2(self):
        actual = VowpalLearner._learn_format(True, 0.11, ["A", "B", "C"],
                                             ((3, 5), (2, 3)), "C", 0.33)
        expected = "shared |s 3:2 5:3\n|a A:1\n|a B:1\n0:-0.33:0.11 |a C:1"

        self.assertEqual(actual, expected)