def test_eval_seeds(self): sim1 = LambdaSimulation(3, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = RandomLearner() benchmark = Benchmark([sim1], shuffle=[1, 4]) result = benchmark.evaluate([learner], seed=1) actual_learners = result.learners.to_tuples() actual_simulations = result.simulations.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "random", "random")] expected_simulations = [(0, "LambdaSimulation", "1", "None", '"LambdaSimulation",{"Shuffle":1}'), (1, "LambdaSimulation", "4", "None", '"LambdaSimulation",{"Shuffle":4}')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 2), (0, 0, 3, 1), (1, 0, 1, 0), (1, 0, 2, 2), (1, 0, 3, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_simulations, expected_simulations) self.assertCountEqual(actual_interactions, expected_interactions)
#LambdaSimulation(2000, no_contexts, actions, random_rewards_1, seed=10), #not CB since reward is independent of context #LambdaSimulation(2000, no_contexts, actions, random_rewards_2, seed=10), #not CB since reward is independent of context #LambdaSimulation(2000, no_contexts, actions, random_rewards_3, seed=10), #not CB since reward is independent of context LambdaSimulation(2000, contexts, actions, linear_plus_random_rewards_1, seed=10), LambdaSimulation(2000, contexts, actions, linear_plus_random_rewards_2, seed=10), LambdaSimulation(2000, contexts, actions, polynomial_reward_1, seed=10), ] #define a benchmark: this benchmark replays the simulation 15 times benchmark = Benchmark(simulations, batch_size=1, seeds=list(range(5))) #create the learner factories learner_factories = [ RandomLearner(seed=10), EpsilonLearner(epsilon=0.025, seed=10), UcbTunedLearner(seed=10), VowpalLearner(epsilon=0.025, seed=10), VowpalLearner(epsilon=0.025, is_adf=False, seed=10), VowpalLearner(bag=5, seed=10), ] benchmark.evaluate(learner_factories).standard_plot()
def test_learn(self): learner = RandomLearner() learner.learn(2, None, 1, 1, 1)
def test_predict(self): learner = RandomLearner() self.assertEqual([0.25, 0.25, 0.25, 0.25], learner.predict(None, [1, 2, 3, 4]))
def test_params(self): self.assertEqual({"family": "random"}, RandomLearner().params)
from coba.experiments import Experiment from coba.environments import Environments #this line is required by Python in order to use multi-processing if __name__ == '__main__': # These configuration changes aren't ever required. # They are simply here to serve as an example. # These can also be set automatically by creating a .coba file your project root. CobaContext.cacher.cache_directory = './.coba_cache' CobaContext.experiment.processes = 2 CobaContext.experiment.chunk_by = 'task' #First, we define the learners that we want to test learners = [ RandomLearner(), EpsilonBanditLearner(), VowpalEpsilonLearner(), ] #Next we create the environments we'd like evaluate against environments = Environments.from_linear_synthetic( 1000, n_action_features=0).shuffle([0, 1, 2, 3]) #We then create and evaluate our experiment from our environments and learners result = Experiment(environments, learners).evaluate() #After evaluating can create a quick summary plot to get a sense of how the learners performed result.plot_learners(err='sd') #We can also create a plot examining how specific learners did across each shuffle of our environments