コード例 #1
0
    def test_process_rewards_info_logs_kwargs_partial(self):

        task         = OnlineOnPolicyEvalTask(time=False)
        learner      = RecordingLearner(with_info=True, with_log=True)
        interactions = [
            SimulatedInteraction(1,[1,2,3],rewards=[7,8,9]),
            SimulatedInteraction(2,[4,5,6],rewards=[4,5,6],letters=['d','e','f']),
            SimulatedInteraction(3,[7,8,9],rewards=[1,2,3],letters=['g','h','i']),
        ]

        task_results = list(task.process(learner, interactions))

        expected_predict_calls   = [(1,[1,2,3]),(2,[4,5,6]),(3,[7,8,9])]
        expected_predict_returns = [([1,0,0],1),([0,1,0],2),([0,0,1],3)]
        expected_learn_calls     = [(1,1,7,1,1),(2,5,5,1,2),(3,9,3,1,3)]
        expected_task_results    = [
            {"rewards":7,'learn':1,'predict':1},
            {"rewards":5,'learn':2,'predict':2,'letters':'e'},
            {"rewards":3,'learn':3,'predict':3,'letters':'i'}
        ]

        self.assertEqual(expected_predict_calls, learner.predict_calls)
        self.assertEqual(expected_predict_returns, learner.predict_returns)
        self.assertEqual(expected_learn_calls, learner.learn_calls)
        self.assertEqual(expected_task_results, task_results)
コード例 #2
0
    def test_lazy_initialization(self):

        self.has_been_lazy_initialized = False
        outer_self = self

        class LazyErrorList(collections.abc.Sequence):
            
            def __init__(self, items) -> None:
                self._items = items

            def __len__(self) -> int:
                return len(self._items)

            def __getitem__(self, index):
                outer_self.has_been_lazy_initialized = True
                return self._items[index]

        context = LazyErrorList([1])
        actions = [LazyErrorList([1])]*2

        interaction = SimulatedInteraction(context,actions, rewards=[0,1])
        self.assertFalse(self.has_been_lazy_initialized)
        
        interaction.context
        self.assertTrue(self.has_been_lazy_initialized)
        
        self.has_been_lazy_initialized = False
        interaction.actions
        self.assertTrue(self.has_been_lazy_initialized)
コード例 #3
0
    def test_interaction_context_performance(self):

        interaction = SimulatedInteraction([1, 2, 3] * 100, (1, 2, 3),
                                           rewards=(4, 5, 6))

        time = timeit.timeit(lambda: interaction.context, number=10000)

        #best observed was 0.0015
        self.assertLess(time, .015)
コード例 #4
0
    def test_time(self):

        task         = OnlineOnPolicyEvalTask(time=True)
        learner      = RecordingLearner()
        interactions = [SimulatedInteraction(1,[1,2,3],rewards=[7,8,9])]

        task_results = list(task.process(learner, interactions))

        self.assertAlmostEqual(0, task_results[0]["predict_time"], places=2)
        self.assertAlmostEqual(0, task_results[0]["learn_time"  ], places=2)
コード例 #5
0
    def test_process_sparse_rewards_no_info_no_logs_no_kwargs(self):

        task         = OnlineOnPolicyEvalTask(time=False)
        learner      = RecordingLearner(with_info=False, with_log=False)
        interactions = [
            SimulatedInteraction({'c':1},[{'a':1},{'a':2}],rewards=[7,8]),
            SimulatedInteraction({'c':2},[{'a':4},{'a':5}],rewards=[4,5]),
        ]

        task_results = list(task.process(learner, interactions))

        expected_predict_calls   = [({'c':1},[{'a':1},{'a':2}]),({'c':2},[{'a':4},{'a':5}])]
        expected_predict_returns = [[1,0],[0,1]]
        expected_learn_calls     = [({'c':1},{'a':1},7,1,None),({'c':2},{'a':5},5,1,None)]
        expected_task_results    = [{"rewards":7},{"rewards":5}]

        self.assertEqual(expected_predict_calls, learner.predict_calls)
        self.assertEqual(expected_predict_returns, learner.predict_returns)
        self.assertEqual(expected_learn_calls, learner.learn_calls)
        self.assertEqual(expected_task_results, task_results)
コード例 #6
0
    def test_process_reveals_rewards_no_info_no_logs_no_kwargs(self):

        task         = OnlineOnPolicyEvalTask(time=False)
        learner      = RecordingLearner(with_info=False, with_log=False)
        interactions = [
            SimulatedInteraction(1,[1,2,3],reveals=[7,8,9],rewards=[1,3,5]),
            SimulatedInteraction(2,[4,5,6],reveals=[4,5,6],rewards=[2,4,6]),
            SimulatedInteraction(3,[7,8,9],reveals=[1,2,3],rewards=[3,5,7]),
        ]

        task_results = list(task.process(learner, interactions))

        expected_predict_calls   = [(1,[1,2,3]),(2,[4,5,6]),(3,[7,8,9])]
        expected_predict_returns = [[1,0,0],[0,1,0],[0,0,1]]
        expected_learn_calls     = [(1,1,7,1,None),(2,5,5,1,None),(3,9,3,1,None)]
        expected_task_results    = [{"reveals":7,"rewards":1},{"reveals":5,"rewards":4}, {"reveals":3,"rewards":7}]

        self.assertEqual(expected_predict_calls, learner.predict_calls)
        self.assertEqual(expected_predict_returns, learner.predict_returns)
        self.assertEqual(expected_learn_calls, learner.learn_calls)
        self.assertEqual(expected_task_results, task_results)
コード例 #7
0
    def test_sim_source(self):
        expected_env = MemorySimulation(
            params={},
            interactions=[SimulatedInteraction(1, [1, 2], rewards=[2, 3])])
        actual_env = SerializedSimulation(expected_env)

        self.assertEqual(expected_env.params, actual_env.params)
        self.assertEqual(len(list(expected_env.read())),
                         len(list(actual_env.read())))
        for e_interaction, a_interaction in zip(expected_env.read(),
                                                actual_env.read()):
            self.assertEqual(e_interaction.context, a_interaction.context)
            self.assertEqual(e_interaction.actions, a_interaction.actions)
            self.assertEqual(e_interaction.kwargs, a_interaction.kwargs)
コード例 #8
0
    def test_process_reward_no_actions_no_probability_no_info_no_logs(self):
        task         = OnlineWarmStartEvalTask(time=False)
        learner      = RecordingLearner(with_info=False, with_log=False)
        interactions = [
            LoggedInteraction(1, 2, reward=3),
            LoggedInteraction(2, 3, reward=4),
            LoggedInteraction(3, 4, reward=5),
            SimulatedInteraction(None,[1,2,3],rewards=[7,8,9]),
            SimulatedInteraction(None,[4,5,6],rewards=[4,5,6]),
            SimulatedInteraction(None,[7,8,9],rewards=[1,2,3]),
        ]

        task_results = list(task.process(learner, interactions))

        expected_predict_calls   = [(None,[1,2,3]),(None,[4,5,6]),(None,[7,8,9])]
        expected_predict_returns = [[1,0,0],[0,1,0],[0,0,1]]
        expected_learn_calls     = [(1,2,3,None,None),(2,3,4,None,None),(3,4,5,None,None),(None,1,7,1,None),(None,5,5,1,None),(None,9,3,1,None)]
        expected_task_results    = [{},{},{},{"rewards":7},{"rewards":5},{"rewards":3}]

        self.assertEqual(expected_predict_calls, learner.predict_calls)
        self.assertEqual(expected_predict_returns, learner.predict_returns)
        self.assertEqual(expected_learn_calls, learner.learn_calls)
        self.assertEqual(expected_task_results, task_results)
コード例 #9
0
    def test_sim_write_read_with_params_and_none_context(self):
        sink = ListSink()

        expected_env = MemorySimulation(
            params={'a': 1},
            interactions=[SimulatedInteraction(None, [1, 2], rewards=[2, 3])])
        SerializedSimulation(expected_env).write(sink)
        actual_env = SerializedSimulation(ListSource(sink.items))

        self.assertEqual(expected_env.params, actual_env.params)
        self.assertEqual(len(list(expected_env.read())),
                         len(list(actual_env.read())))
        for e_interaction, a_interaction in zip(expected_env.read(),
                                                actual_env.read()):
            self.assertEqual(e_interaction.context, a_interaction.context)
            self.assertEqual(e_interaction.actions, a_interaction.actions)
            self.assertEqual(e_interaction.kwargs, a_interaction.kwargs)
コード例 #10
0
    def test_reveals_results(self):
        interaction = SimulatedInteraction((1,2), (1,2,3), reveals=[(1,2),(3,4),(5,6)],rewards=[4,5,6])

        self.assertEqual((1,2), interaction.context)
        self.assertCountEqual((1,2,3), interaction.actions)
        self.assertEqual({"reveals":[(1,2),(3,4),(5,6)], "rewards":[4,5,6]}, interaction.kwargs)
コード例 #11
0
    def test_custom_rewards(self):
        interaction = SimulatedInteraction((1,2), (1,2,3), rewards=[4,5,6])

        self.assertEqual((1,2), interaction.context)
        self.assertCountEqual((1,2,3), interaction.actions)
        self.assertEqual({"rewards":[4,5,6] }, interaction.kwargs)
コード例 #12
0
 def test_actions_correct_3(self) -> None:
     self.assertSequenceEqual([(1,2), (3,4)], SimulatedInteraction(None, [(1,2), (3,4)], rewards=[1,2]).actions)
コード例 #13
0
 def test_actions_correct_2(self) -> None:
     self.assertSequenceEqual(["A","B"], SimulatedInteraction(None, ["A","B"], rewards=[1,2]).actions)
コード例 #14
0
 def read(self) -> Iterable[SimulatedInteraction]:
     yield SimulatedInteraction(self._reads, [0,1], rewards=[0,1])
     self._reads += 1
コード例 #15
0
    def test_context_str(self):
        interaction = SimulatedInteraction("A", (1,2,3), rewards=(4,5,6))

        self.assertEqual("A", interaction.context)
コード例 #16
0
    def test_interactions(self):
        simulation   = MemorySimulation([SimulatedInteraction(1, [1,2,3], rewards=[0,1,2]), SimulatedInteraction(2, [4,5,6], rewards=[2,3,4])])
        interactions = list(simulation.read())

        self.assertEqual(interactions[0], interactions[0])
        self.assertEqual(interactions[1], interactions[1])
コード例 #17
0
    def test_context_none(self):
        interaction = SimulatedInteraction(None, (1,2,3), rewards=(4,5,6))

        self.assertEqual(None, interaction.context)
コード例 #18
0
    def test_context_dense_2(self):
        interaction = SimulatedInteraction((1,2,3,(0,0,1)), (1,2,3), rewards=(4,5,6))

        self.assertEqual((1,2,3,(0,0,1)), interaction.context)
コード例 #19
0
    def filter(self, interactions: Iterable[SimulatedInteraction]) -> Iterable[SimulatedInteraction]:
        for interaction in interactions:
            yield SimulatedInteraction((interaction.context, self._count), interaction.actions, **interaction.kwargs)

        self._count += 1
コード例 #20
0
    def test_context_sparse_dict(self):
        interaction = SimulatedInteraction({1:0}, (1,2,3), rewards=(4,5,6))

        self.assertEqual({1:0}, interaction.context)