def test_process_rewards_info_logs_kwargs_partial(self): task = OnlineOnPolicyEvalTask(time=False) learner = RecordingLearner(with_info=True, with_log=True) interactions = [ SimulatedInteraction(1,[1,2,3],rewards=[7,8,9]), SimulatedInteraction(2,[4,5,6],rewards=[4,5,6],letters=['d','e','f']), SimulatedInteraction(3,[7,8,9],rewards=[1,2,3],letters=['g','h','i']), ] task_results = list(task.process(learner, interactions)) expected_predict_calls = [(1,[1,2,3]),(2,[4,5,6]),(3,[7,8,9])] expected_predict_returns = [([1,0,0],1),([0,1,0],2),([0,0,1],3)] expected_learn_calls = [(1,1,7,1,1),(2,5,5,1,2),(3,9,3,1,3)] expected_task_results = [ {"rewards":7,'learn':1,'predict':1}, {"rewards":5,'learn':2,'predict':2,'letters':'e'}, {"rewards":3,'learn':3,'predict':3,'letters':'i'} ] self.assertEqual(expected_predict_calls, learner.predict_calls) self.assertEqual(expected_predict_returns, learner.predict_returns) self.assertEqual(expected_learn_calls, learner.learn_calls) self.assertEqual(expected_task_results, task_results)
def test_lazy_initialization(self): self.has_been_lazy_initialized = False outer_self = self class LazyErrorList(collections.abc.Sequence): def __init__(self, items) -> None: self._items = items def __len__(self) -> int: return len(self._items) def __getitem__(self, index): outer_self.has_been_lazy_initialized = True return self._items[index] context = LazyErrorList([1]) actions = [LazyErrorList([1])]*2 interaction = SimulatedInteraction(context,actions, rewards=[0,1]) self.assertFalse(self.has_been_lazy_initialized) interaction.context self.assertTrue(self.has_been_lazy_initialized) self.has_been_lazy_initialized = False interaction.actions self.assertTrue(self.has_been_lazy_initialized)
def test_interaction_context_performance(self): interaction = SimulatedInteraction([1, 2, 3] * 100, (1, 2, 3), rewards=(4, 5, 6)) time = timeit.timeit(lambda: interaction.context, number=10000) #best observed was 0.0015 self.assertLess(time, .015)
def test_time(self): task = OnlineOnPolicyEvalTask(time=True) learner = RecordingLearner() interactions = [SimulatedInteraction(1,[1,2,3],rewards=[7,8,9])] task_results = list(task.process(learner, interactions)) self.assertAlmostEqual(0, task_results[0]["predict_time"], places=2) self.assertAlmostEqual(0, task_results[0]["learn_time" ], places=2)
def test_process_sparse_rewards_no_info_no_logs_no_kwargs(self): task = OnlineOnPolicyEvalTask(time=False) learner = RecordingLearner(with_info=False, with_log=False) interactions = [ SimulatedInteraction({'c':1},[{'a':1},{'a':2}],rewards=[7,8]), SimulatedInteraction({'c':2},[{'a':4},{'a':5}],rewards=[4,5]), ] task_results = list(task.process(learner, interactions)) expected_predict_calls = [({'c':1},[{'a':1},{'a':2}]),({'c':2},[{'a':4},{'a':5}])] expected_predict_returns = [[1,0],[0,1]] expected_learn_calls = [({'c':1},{'a':1},7,1,None),({'c':2},{'a':5},5,1,None)] expected_task_results = [{"rewards":7},{"rewards":5}] self.assertEqual(expected_predict_calls, learner.predict_calls) self.assertEqual(expected_predict_returns, learner.predict_returns) self.assertEqual(expected_learn_calls, learner.learn_calls) self.assertEqual(expected_task_results, task_results)
def test_process_reveals_rewards_no_info_no_logs_no_kwargs(self): task = OnlineOnPolicyEvalTask(time=False) learner = RecordingLearner(with_info=False, with_log=False) interactions = [ SimulatedInteraction(1,[1,2,3],reveals=[7,8,9],rewards=[1,3,5]), SimulatedInteraction(2,[4,5,6],reveals=[4,5,6],rewards=[2,4,6]), SimulatedInteraction(3,[7,8,9],reveals=[1,2,3],rewards=[3,5,7]), ] task_results = list(task.process(learner, interactions)) expected_predict_calls = [(1,[1,2,3]),(2,[4,5,6]),(3,[7,8,9])] expected_predict_returns = [[1,0,0],[0,1,0],[0,0,1]] expected_learn_calls = [(1,1,7,1,None),(2,5,5,1,None),(3,9,3,1,None)] expected_task_results = [{"reveals":7,"rewards":1},{"reveals":5,"rewards":4}, {"reveals":3,"rewards":7}] self.assertEqual(expected_predict_calls, learner.predict_calls) self.assertEqual(expected_predict_returns, learner.predict_returns) self.assertEqual(expected_learn_calls, learner.learn_calls) self.assertEqual(expected_task_results, task_results)
def test_sim_source(self): expected_env = MemorySimulation( params={}, interactions=[SimulatedInteraction(1, [1, 2], rewards=[2, 3])]) actual_env = SerializedSimulation(expected_env) self.assertEqual(expected_env.params, actual_env.params) self.assertEqual(len(list(expected_env.read())), len(list(actual_env.read()))) for e_interaction, a_interaction in zip(expected_env.read(), actual_env.read()): self.assertEqual(e_interaction.context, a_interaction.context) self.assertEqual(e_interaction.actions, a_interaction.actions) self.assertEqual(e_interaction.kwargs, a_interaction.kwargs)
def test_process_reward_no_actions_no_probability_no_info_no_logs(self): task = OnlineWarmStartEvalTask(time=False) learner = RecordingLearner(with_info=False, with_log=False) interactions = [ LoggedInteraction(1, 2, reward=3), LoggedInteraction(2, 3, reward=4), LoggedInteraction(3, 4, reward=5), SimulatedInteraction(None,[1,2,3],rewards=[7,8,9]), SimulatedInteraction(None,[4,5,6],rewards=[4,5,6]), SimulatedInteraction(None,[7,8,9],rewards=[1,2,3]), ] task_results = list(task.process(learner, interactions)) expected_predict_calls = [(None,[1,2,3]),(None,[4,5,6]),(None,[7,8,9])] expected_predict_returns = [[1,0,0],[0,1,0],[0,0,1]] expected_learn_calls = [(1,2,3,None,None),(2,3,4,None,None),(3,4,5,None,None),(None,1,7,1,None),(None,5,5,1,None),(None,9,3,1,None)] expected_task_results = [{},{},{},{"rewards":7},{"rewards":5},{"rewards":3}] self.assertEqual(expected_predict_calls, learner.predict_calls) self.assertEqual(expected_predict_returns, learner.predict_returns) self.assertEqual(expected_learn_calls, learner.learn_calls) self.assertEqual(expected_task_results, task_results)
def test_sim_write_read_with_params_and_none_context(self): sink = ListSink() expected_env = MemorySimulation( params={'a': 1}, interactions=[SimulatedInteraction(None, [1, 2], rewards=[2, 3])]) SerializedSimulation(expected_env).write(sink) actual_env = SerializedSimulation(ListSource(sink.items)) self.assertEqual(expected_env.params, actual_env.params) self.assertEqual(len(list(expected_env.read())), len(list(actual_env.read()))) for e_interaction, a_interaction in zip(expected_env.read(), actual_env.read()): self.assertEqual(e_interaction.context, a_interaction.context) self.assertEqual(e_interaction.actions, a_interaction.actions) self.assertEqual(e_interaction.kwargs, a_interaction.kwargs)
def test_reveals_results(self): interaction = SimulatedInteraction((1,2), (1,2,3), reveals=[(1,2),(3,4),(5,6)],rewards=[4,5,6]) self.assertEqual((1,2), interaction.context) self.assertCountEqual((1,2,3), interaction.actions) self.assertEqual({"reveals":[(1,2),(3,4),(5,6)], "rewards":[4,5,6]}, interaction.kwargs)
def test_custom_rewards(self): interaction = SimulatedInteraction((1,2), (1,2,3), rewards=[4,5,6]) self.assertEqual((1,2), interaction.context) self.assertCountEqual((1,2,3), interaction.actions) self.assertEqual({"rewards":[4,5,6] }, interaction.kwargs)
def test_actions_correct_3(self) -> None: self.assertSequenceEqual([(1,2), (3,4)], SimulatedInteraction(None, [(1,2), (3,4)], rewards=[1,2]).actions)
def test_actions_correct_2(self) -> None: self.assertSequenceEqual(["A","B"], SimulatedInteraction(None, ["A","B"], rewards=[1,2]).actions)
def read(self) -> Iterable[SimulatedInteraction]: yield SimulatedInteraction(self._reads, [0,1], rewards=[0,1]) self._reads += 1
def test_context_str(self): interaction = SimulatedInteraction("A", (1,2,3), rewards=(4,5,6)) self.assertEqual("A", interaction.context)
def test_interactions(self): simulation = MemorySimulation([SimulatedInteraction(1, [1,2,3], rewards=[0,1,2]), SimulatedInteraction(2, [4,5,6], rewards=[2,3,4])]) interactions = list(simulation.read()) self.assertEqual(interactions[0], interactions[0]) self.assertEqual(interactions[1], interactions[1])
def test_context_none(self): interaction = SimulatedInteraction(None, (1,2,3), rewards=(4,5,6)) self.assertEqual(None, interaction.context)
def test_context_dense_2(self): interaction = SimulatedInteraction((1,2,3,(0,0,1)), (1,2,3), rewards=(4,5,6)) self.assertEqual((1,2,3,(0,0,1)), interaction.context)
def filter(self, interactions: Iterable[SimulatedInteraction]) -> Iterable[SimulatedInteraction]: for interaction in interactions: yield SimulatedInteraction((interaction.context, self._count), interaction.actions, **interaction.kwargs) self._count += 1
def test_context_sparse_dict(self): interaction = SimulatedInteraction({1:0}, (1,2,3), rewards=(4,5,6)) self.assertEqual({1:0}, interaction.context)