def test_works(self): advantage_function = Cumulative() episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1], [0, 0, 0], ]) advantages = advantage_function.get_advantages(episodes) assert advantages == [0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0]
def test_works(self, discount, rewards, expected_advantages): advantage_function = RewardToGo(discount=discount) episodes = generate_episodes(rewards) advantages = advantage_function.get_advantages(episodes) assert advantages == expected_advantages
def test_update(self): value_function = Mock() value_function.__enter__ = Mock(return_value=(Mock(), None)) value_function.__exit__ = Mock(return_value=None) advantage_function = Gae(value_function=value_function, gamma=1, lambduh=1) episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]]) advantage_function.update(episodes)
def test_works(self, gamma, lambduh, rewards, expected_advantages): value_function = Mock() value_function.__enter__ = Mock(return_value=(Mock(), None)) value_function.__exit__ = Mock(return_value=None) value_function.get_values = Mock() value_function.get_values.side_effect = np.asarray(rewards) advantage_function = Gae(value_function=value_function, gamma=gamma, lambduh=lambduh) episodes = generate_episodes(rewards) advantages = advantage_function.get_advantages(episodes) assert advantages == expected_advantages
def test_update(self): advantage_function = Cumulative() episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]]) advantage_function.update(episodes)
def test_update(self): advantage_function = RewardToGo(discount=1) episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]]) advantage_function.update(episodes)
def test_update(self): advantage_function = AdvantageFunctionImpl() episodes = generate_episodes([[0]]) with pytest.raises(NotImplementedError): advantage_function.update(episodes)