Python generate_episodes Examples

Programming Language: Python

Namespace/Package Name: tst.utilities

Method/Function: generate_episodes

Examples at hotexamples.com: 7

Python generate_episodes - 7 examples found. These are the top rated real world Python examples of tst.utilities.generate_episodes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: cumulative_test.py Project: djjh/reinforcement-learning-labs

    def test_works(self):
        advantage_function = Cumulative()
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1], [0, 0, 0], ])

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == [0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0]

Example #2

Show file

    def test_works(self, discount, rewards, expected_advantages):
        advantage_function = RewardToGo(discount=discount)
        episodes = generate_episodes(rewards)

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == expected_advantages

Example #3

Show file

File: gae_test.py Project: djjh/reinforcement-learning-labs

    def test_update(self):
        value_function = Mock()
        value_function.__enter__ = Mock(return_value=(Mock(), None))
        value_function.__exit__ = Mock(return_value=None)

        advantage_function = Gae(value_function=value_function, gamma=1, lambduh=1)
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)

Example #4

Show file

File: gae_test.py Project: djjh/reinforcement-learning-labs

    def test_works(self, gamma, lambduh, rewards, expected_advantages):
        value_function = Mock()
        value_function.__enter__ = Mock(return_value=(Mock(), None))
        value_function.__exit__ = Mock(return_value=None)
        value_function.get_values = Mock()
        value_function.get_values.side_effect = np.asarray(rewards)


        advantage_function = Gae(value_function=value_function, gamma=gamma, lambduh=lambduh)
        episodes = generate_episodes(rewards)

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == expected_advantages

Example #5

Show file

File: cumulative_test.py Project: djjh/reinforcement-learning-labs

    def test_update(self):
        advantage_function = Cumulative()
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)

Example #6

Show file

    def test_update(self):
        advantage_function = RewardToGo(discount=1)
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)

Example #7

Show file

File: advantage_function_test.py Project: djjh/reinforcement-learning-labs

    def test_update(self):
        advantage_function = AdvantageFunctionImpl()
        episodes = generate_episodes([[0]])

        with pytest.raises(NotImplementedError):
            advantage_function.update(episodes)