def test_works(self):
        advantage_function = Cumulative()
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1], [0, 0, 0], ])

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == [0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0]
Example #2
0
    def test_works(self, discount, rewards, expected_advantages):
        advantage_function = RewardToGo(discount=discount)
        episodes = generate_episodes(rewards)

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == expected_advantages
    def test_update(self):
        value_function = Mock()
        value_function.__enter__ = Mock(return_value=(Mock(), None))
        value_function.__exit__ = Mock(return_value=None)

        advantage_function = Gae(value_function=value_function, gamma=1, lambduh=1)
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)
    def test_works(self, gamma, lambduh, rewards, expected_advantages):
        value_function = Mock()
        value_function.__enter__ = Mock(return_value=(Mock(), None))
        value_function.__exit__ = Mock(return_value=None)
        value_function.get_values = Mock()
        value_function.get_values.side_effect = np.asarray(rewards)


        advantage_function = Gae(value_function=value_function, gamma=gamma, lambduh=lambduh)
        episodes = generate_episodes(rewards)

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == expected_advantages
    def test_update(self):
        advantage_function = Cumulative()
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)
Example #6
0
    def test_update(self):
        advantage_function = RewardToGo(discount=1)
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)
    def test_update(self):
        advantage_function = AdvantageFunctionImpl()
        episodes = generate_episodes([[0]])

        with pytest.raises(NotImplementedError):
            advantage_function.update(episodes)