def test_transaction_resume_1(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) working_learner = ModuloLearner() broken_learner = BrokenLearner() #the second Experiment shouldn't ever call broken_factory() because #we're resuming from the first experiment's transaction.log try: first_result = Experiment( [sim], [working_learner], evaluation_task=OnlineOnPolicyEvalTask(False)).evaluate( "coba/tests/.temp/transactions.log") second_result = Experiment( [sim], [broken_learner], evaluation_task=OnlineOnPolicyEvalTask(False)).evaluate( "coba/tests/.temp/transactions.log") actual_learners = second_result.learners.to_tuples() actual_environments = second_result.environments.to_tuples() actual_interactions = second_result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] except Exception as e: raise finally: if Path('coba/tests/.temp/transactions.log').exists(): Path('coba/tests/.temp/transactions.log').unlink() self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_process_rewards_info_logs_kwargs_partial(self): task = OnlineOnPolicyEvalTask(time=False) learner = RecordingLearner(with_info=True, with_log=True) interactions = [ SimulatedInteraction(1,[1,2,3],rewards=[7,8,9]), SimulatedInteraction(2,[4,5,6],rewards=[4,5,6],letters=['d','e','f']), SimulatedInteraction(3,[7,8,9],rewards=[1,2,3],letters=['g','h','i']), ] task_results = list(task.process(learner, interactions)) expected_predict_calls = [(1,[1,2,3]),(2,[4,5,6]),(3,[7,8,9])] expected_predict_returns = [([1,0,0],1),([0,1,0],2),([0,0,1],3)] expected_learn_calls = [(1,1,7,1,1),(2,5,5,1,2),(3,9,3,1,3)] expected_task_results = [ {"rewards":7,'learn':1,'predict':1}, {"rewards":5,'learn':2,'predict':2,'letters':'e'}, {"rewards":3,'learn':3,'predict':3,'letters':'i'} ] self.assertEqual(expected_predict_calls, learner.predict_calls) self.assertEqual(expected_predict_returns, learner.predict_returns) self.assertEqual(expected_learn_calls, learner.learn_calls) self.assertEqual(expected_task_results, task_results)
def test_time(self): task = OnlineOnPolicyEvalTask(time=True) learner = RecordingLearner() interactions = [SimulatedInteraction(1,[1,2,3],rewards=[7,8,9])] task_results = list(task.process(learner, interactions)) self.assertAlmostEqual(0, task_results[0]["predict_time"], places=2) self.assertAlmostEqual(0, task_results[0]["learn_time" ], places=2)
def __init__( self, environments: Sequence[Environment], learners: Sequence[Learner], learner_task: LearnerTask = SimpleLearnerTask(), environment_task: EnvironmentTask = SimpleEnvironmentTask(), evaluation_task: EvaluationTask = OnlineOnPolicyEvalTask() ) -> None: """Instantiate an Experiment. Args: environments: The collection of environments to use in the experiment. learners: The collection of learners to use in the experiment. learner_task: A task which describes a learner. environment_task: A task which describes an environment. evaluation_task: A task which evaluates a learner on an environment. """ self._environments = environments self._learners = learners self._learner_task = learner_task self._environment_task = environment_task self._evaluation_task = evaluation_task self._processes: Optional[int] = None self._maxchunksperchild: Optional[int] = None self._chunk_by: Optional[str] = None
def test_process_sparse_rewards_no_info_no_logs_no_kwargs(self): task = OnlineOnPolicyEvalTask(time=False) learner = RecordingLearner(with_info=False, with_log=False) interactions = [ SimulatedInteraction({'c':1},[{'a':1},{'a':2}],rewards=[7,8]), SimulatedInteraction({'c':2},[{'a':4},{'a':5}],rewards=[4,5]), ] task_results = list(task.process(learner, interactions)) expected_predict_calls = [({'c':1},[{'a':1},{'a':2}]),({'c':2},[{'a':4},{'a':5}])] expected_predict_returns = [[1,0],[0,1]] expected_learn_calls = [({'c':1},{'a':1},7,1,None),({'c':2},{'a':5},5,1,None)] expected_task_results = [{"rewards":7},{"rewards":5}] self.assertEqual(expected_predict_calls, learner.predict_calls) self.assertEqual(expected_predict_returns, learner.predict_returns) self.assertEqual(expected_learn_calls, learner.learn_calls) self.assertEqual(expected_task_results, task_results)
def test_process_reveals_rewards_no_info_no_logs_no_kwargs(self): task = OnlineOnPolicyEvalTask(time=False) learner = RecordingLearner(with_info=False, with_log=False) interactions = [ SimulatedInteraction(1,[1,2,3],reveals=[7,8,9],rewards=[1,3,5]), SimulatedInteraction(2,[4,5,6],reveals=[4,5,6],rewards=[2,4,6]), SimulatedInteraction(3,[7,8,9],reveals=[1,2,3],rewards=[3,5,7]), ] task_results = list(task.process(learner, interactions)) expected_predict_calls = [(1,[1,2,3]),(2,[4,5,6]),(3,[7,8,9])] expected_predict_returns = [[1,0,0],[0,1,0],[0,0,1]] expected_learn_calls = [(1,1,7,1,None),(2,5,5,1,None),(3,9,3,1,None)] expected_task_results = [{"reveals":7,"rewards":1},{"reveals":5,"rewards":4}, {"reveals":3,"rewards":7}] self.assertEqual(expected_predict_calls, learner.predict_calls) self.assertEqual(expected_predict_returns, learner.predict_returns) self.assertEqual(expected_learn_calls, learner.learn_calls) self.assertEqual(expected_task_results, task_results)
def test_no_params(self): sim1 = NoParamsEnvironment() learner = NoParamsLearner() experiment = Experiment([sim1], [learner], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, 'NoParamsLearner', 'NoParamsLearner')] expected_environments = [(0, 'NoParamsEnvironment')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_sim(self): sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner = ModuloLearner() experiment = Experiment([sim1], [learner], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_learners(self): sim = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) learner1 = ModuloLearner("0") #type: ignore learner2 = ModuloLearner("1") #type: ignore experiment = Experiment([sim], [learner1, learner2], evaluation_task=OnlineOnPolicyEvalTask(False)) actual_result = experiment.evaluate() actual_learners = actual_result._learners.to_tuples() actual_environments = actual_result._environments.to_tuples() actual_interactions = actual_result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0'), (1, "Modulo", "Modulo(p=1)", '1')] expected_environments = [(0, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (0, 1, 1, 0), (0, 1, 2, 1)] self.assertCountEqual(actual_learners, expected_learners) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)
def test_ignore_raise(self): CobaContext.logger = IndentLogger(ListSink()) sim1 = LambdaSimulation(2, lambda i: i, lambda i, c: [0, 1, 2], lambda i, c, a: cast(float, a)) sim2 = LambdaSimulation(3, lambda i: i, lambda i, c: [3, 4, 5], lambda i, c, a: cast(float, a)) experiment = Experiment( [sim1, sim2], [ModuloLearner(), BrokenLearner()], evaluation_task=OnlineOnPolicyEvalTask(False)) result = experiment.evaluate() actual_learners = result.learners.to_tuples() actual_environments = result.environments.to_tuples() actual_interactions = result.interactions.to_tuples() expected_learners = [(0, "Modulo", "Modulo(p=0)", '0'), (1, "Broken", "Broken", float('nan'))] expected_environments = [(0, 'LambdaSimulation'), (1, 'LambdaSimulation')] expected_interactions = [(0, 0, 1, 0), (0, 0, 2, 1), (1, 0, 1, 3), (1, 0, 2, 4), (1, 0, 3, 5)] self.assertIsInstance(CobaContext.logger, IndentLogger) self.assertEqual( 2, sum([ int("Unexpected exception:" in item) for item in CobaContext.logger.sink.items ])) self.assertCountEqual(actual_learners[0], expected_learners[0]) self.assertCountEqual(actual_learners[1][:3], expected_learners[1][:3]) self.assertTrue(math.isnan(expected_learners[1][3])) self.assertCountEqual(actual_environments, expected_environments) self.assertCountEqual(actual_interactions, expected_interactions)