Example #1
0
 def test_turnenv(self, mocker):
     """ should return the agent designed by agent_order and turn if env is a TurnEnv. """
     mocker.patch('learnrl.envs.TurnEnv.turn', return_value=2)
     playground = Playground(TurnEnv(), self.agents)
     playground.agents_order = [3, 2, 1, 0, 4]
     _, agent_id = playground._get_next_agent('observation')
     check.equal(agent_id, 1)
Example #2
0
 def test_test(self, mocker):
     """ test should call run with learn=False and render=True. """
     mocker.patch('learnrl.playground.Playground.run')
     playground = Playground(self.env, self.agents)
     playground.test(10)
     _, kwargs = playground.run.call_args
     check.is_false(kwargs.get('learn'))
     check.is_true(kwargs.get('render'))
Example #3
0
class TestPlaygroundAgentOrder:
    """ Playground.set_agent_order """
    @pytest.fixture(autouse=True)
    def setup_playground(self):
        """Setup of used fixtures"""
        self.env = Env()
        self.n_agents = 5
        self.agents = [Agent() for _ in range(self.n_agents)]
        self.playground = Playground(self.env, self.agents)

    def test_default(self):
        """ should have a correct default agent order. """
        check.equal(
            self.playground.agents_order, list(range(self.n_agents)),
            f"Default agents_order shoud be {list(range(self.n_agents))}"
            f"but was {self.playground.agents_order}")

    def test_custom_at_init(self):
        """ should be able to have custom order at initialization. """
        custom_order = [4, 3, 1, 2, 0]
        playground = Playground(self.env,
                                self.agents,
                                agents_order=custom_order)
        check.equal(
            playground.agents_order, custom_order,
            f"Custom agents_order shoud be {custom_order}"
            f"but was {playground.agents_order}")

    def test_custom_after_init(self):
        """ should be able to set custom order after initialization. """
        new_custom_order = [3, 4, 1, 2, 0]
        self.playground.set_agents_order(new_custom_order)

        check.equal(
            self.playground.agents_order, new_custom_order,
            f"Custom agents_order shoud be {new_custom_order}"
            f"but was {self.playground.agents_order}")

    def test_not_enought_indexes(self):
        """ should raise ValueError if not enough indexes in custom order. """
        with pytest.raises(ValueError, match=r"Not every agents*"):
            Playground(self.env, self.agents, agents_order=[4, 3, 1, 2])

    def test_missing_indexes(self):
        """ should raise ValueError if missing indexes in custom order. """
        with pytest.raises(ValueError, match=r".*not taking every index*"):
            Playground(self.env, self.agents, agents_order=[4, 6, 1, 2, 0])
Example #4
0
    def setup_playground(self, mocker):
        """Setup of used fixtures"""

        self.observation = 'observation'
        self.next_observation = 'next_observation'
        self.reward = 1.2
        self.handled_reward = 1.7
        self.done = False
        self.info = {'env_info': 'env_info'}

        mocker.patch('gym.Env.render')
        mocker.patch('gym.Env.step',
                     return_value=(self.next_observation, self.reward,
                                   self.done, self.info))
        self.env = Env()

        mocker.patch('learnrl.agent.Agent.remember')
        mocker.patch('learnrl.agent.Agent.learn')
        self.action = 3
        mocker.patch('learnrl.agent.Agent.act', return_value=self.action)
        self.n_agents = 5
        self.agents = [Agent() for _ in range(self.n_agents)]

        self.agent_id = 0
        mocker.patch('learnrl.playground.Playground._get_next_agent',
                     return_value=(self.agents[self.agent_id], self.agent_id))

        def handler_mocker(cls, reward, done, experience, reward_handler,
                           done_handler, logs):
            experience['reward'] = self.handled_reward
            logs['handled_reward'] = self.handled_reward

        mocker.patch('learnrl.playground.Playground._call_handlers',
                     handler_mocker)
        self.playground = Playground(self.env, self.agents)

        self.previous = [{
            'observation': None,
            'action': None,
            'reward': None,
            'done': None,
            'info': None
        } for _ in range(self.n_agents)]
Example #5
0
 def test_custom_at_init(self):
     """ should be able to have custom order at initialization. """
     custom_order = [4, 3, 1, 2, 0]
     playground = Playground(self.env,
                             self.agents,
                             agents_order=custom_order)
     check.equal(
         playground.agents_order, custom_order,
         f"Custom agents_order shoud be {custom_order}"
         f"but was {playground.agents_order}")
Example #6
0
class TestPlaygroundBuildCallbacks:
    """ Playground._build_callbacks """
    @pytest.fixture(autouse=True)
    def setup_playground(self):
        """Setup of used fixtures"""
        self.env = Env()
        self.n_agents = 5
        self.agents = [Agent() for _ in range(self.n_agents)]
        self.playground = Playground(self.env, self.agents)

    def test_build_callback(self, mocker):
        """ should build a CallbackList from given callbacks and logger
        and set their params and playground. """
        callback_path = 'learnrl.callbacks.callback.Callback'
        mocker.patch(callback_path + '.set_params')
        mocker.patch(callback_path + '.set_playground')

        callbacks = [Callback(), Callback()]
        logger = Callback()
        params = {'param123': 123}
        callbacklist = self.playground._build_callbacks(
            callbacks, logger, params)
        check.is_instance(callbacklist, CallbackList)
        for callback in callbacklist.callbacks:
            args, _ = callback.set_params.call_args
            check.equal(args[0], params)
            args, _ = callback.set_playground.call_args
            check.equal(args[0], self.playground)
        check.is_in(logger, callbacklist.callbacks)

    def test_builb_callback_no_logger(self):
        """ should work if logger is None. """
        callbacks = [Callback(), Callback()]
        logger = None
        params = {'param123': 123}
        callbacklist = self.playground._build_callbacks(
            callbacks, logger, params)
        check.is_instance(callbacklist, CallbackList)
        for callback in callbacklist.callbacks:
            check.equal(callback.params, params)
            check.equal(callback.playground, self.playground)
Example #7
0
 def test_agent_typeerror(self):
     " should raise a TypeError if any agent isn't a subclass of learnrl.Agent. "
     with pytest.raises(TypeError, match=r"agent.*learnrl.Agent"):
         Playground(self.env, [Agent(), 'agent'])
Example #8
0
 def test_no_turnenv(self):
     """ should return the first agent if env is not a TurnEnv. """
     playground = Playground(Env(), self.agents)
     _, agent_id = playground._get_next_agent('observation')
     check.equal(agent_id, 0)
Example #9
0
 def test_missing_indexes(self):
     """ should raise ValueError if missing indexes in custom order. """
     with pytest.raises(ValueError, match=r".*not taking every index*"):
         Playground(self.env, self.agents, agents_order=[4, 6, 1, 2, 0])
Example #10
0
 def test_not_enought_indexes(self):
     """ should raise ValueError if not enough indexes in custom order. """
     with pytest.raises(ValueError, match=r"Not every agents*"):
         Playground(self.env, self.agents, agents_order=[4, 3, 1, 2])
Example #11
0
 def test_fit_warn_render(self, mocker):
     """ fit should warn a RuntimeWarning if render=True. """
     mocker.patch('learnrl.playground.Playground.run')
     playground = Playground(self.env, self.agents)
     with pytest.warns(RuntimeWarning, match=r".*computation speed.*"):
         playground.fit(10, render=True)
Example #12
0
class TestPlaygroundRunStep:
    """Playground._run_step"""
    @pytest.fixture(autouse=True)
    def setup_playground(self, mocker):
        """Setup of used fixtures"""

        self.observation = 'observation'
        self.next_observation = 'next_observation'
        self.reward = 1.2
        self.handled_reward = 1.7
        self.done = False
        self.info = {'env_info': 'env_info'}

        mocker.patch('gym.Env.render')
        mocker.patch('gym.Env.step',
                     return_value=(self.next_observation, self.reward,
                                   self.done, self.info))
        self.env = Env()

        mocker.patch('learnrl.agent.Agent.remember')
        mocker.patch('learnrl.agent.Agent.learn')
        self.action = 3
        mocker.patch('learnrl.agent.Agent.act', return_value=self.action)
        self.n_agents = 5
        self.agents = [Agent() for _ in range(self.n_agents)]

        self.agent_id = 0
        mocker.patch('learnrl.playground.Playground._get_next_agent',
                     return_value=(self.agents[self.agent_id], self.agent_id))

        def handler_mocker(cls, reward, done, experience, reward_handler,
                           done_handler, logs):
            experience['reward'] = self.handled_reward
            logs['handled_reward'] = self.handled_reward

        mocker.patch('learnrl.playground.Playground._call_handlers',
                     handler_mocker)
        self.playground = Playground(self.env, self.agents)

        self.previous = [{
            'observation': None,
            'action': None,
            'reward': None,
            'done': None,
            'info': None
        } for _ in range(self.n_agents)]

    def test_run_step(self):
        """ should update the observation, done and logs correcty. """
        logs = {}
        done = not self.done
        observation, done = self.playground._run_step(
            self.observation,
            self.previous,
            logs=logs,
        )
        check.equal(observation, self.next_observation)
        check.equal(done, self.done)

        for log_name in [
                'reward', 'handled_reward', 'observation', 'next_observation',
                'info', 'done', 'agent_id', 'action'
        ]:
            expected = getattr(self, log_name)
            check.equal(logs[log_name], expected)

    def test_render_not_done(self, mocker):
        """ should render at each step beginning if not done. """
        render_mode = 'render_mode'
        self.playground._run_step(self.observation,
                                  self.previous,
                                  logs={},
                                  render=True,
                                  render_mode=render_mode)

        check.equal(len(self.env.render.call_args_list), 1)
        render_args, _ = self.env.render.call_args
        check.equal(render_args[0], render_mode)

    def test_render_done(self, mocker):
        """ should render at step beginning and end if done. """
        render_mode = 'render_mode'
        self.env.step.return_value = (self.next_observation, self.reward, True,
                                      self.info)
        self.playground._run_step(self.observation,
                                  self.previous,
                                  logs={},
                                  render=True,
                                  render_mode=render_mode)

        check.equal(len(self.env.render.call_args_list), 2)
        render_args, _ = self.env.render.call_args_list[-1]
        check.equal(render_args[0], render_mode)

    def test_learn_without_prev_not_done(self, mocker):
        """ should store experience without learn or remember for first experience. """
        previous = self.previous
        self.playground._run_step(
            self.observation,
            previous,
            logs={},
            learn=True,
        )
        check.equal(
            previous[self.agent_id], {
                name: getattr(self, name)
                for name in
                ['observation', 'action', 'reward', 'done', 'info']
            })
        check.is_false(Agent.learn.called)
        check.is_false(Agent.remember.called)

    def test_learn_not_done(self, mocker):
        """ should call learn and remember once at the beginning of the step if not done. """
        logs = {}
        previous = self.previous
        previous[0] = {
            name: getattr(self, name)
            for name in ['observation', 'action', 'reward', 'done', 'info']
        }
        self.playground._run_step(
            self.observation,
            previous,
            logs=logs,
            learn=True,
        )

        check.equal(len(Agent.learn.call_args_list), 1)
        check.equal(len(Agent.remember.call_args_list), 1)
        check.is_in('agent_0', logs)

    def test_learn_done(self, mocker):
        """ should call learn and remember once at the beginning of the step if not done. """
        logs = {}
        previous = self.previous
        self.env.step.return_value = (self.next_observation, self.reward, True,
                                      self.info)
        self.playground._run_step(
            self.observation,
            previous,
            logs=logs,
            learn=True,
        )

        check.equal(len(Agent.learn.call_args_list), 1)
        check.equal(len(Agent.remember.call_args_list), 1)
        check.is_in('agent_0', logs)
Example #13
0
 def test_turnenv_indexerror(self, mocker):
     """ should raise ValueError if turn result is out of agent_order. """
     mocker.patch('learnrl.envs.TurnEnv.turn', return_value=10)
     playground = Playground(TurnEnv(), self.agents)
     with pytest.raises(ValueError, match=r'Not enough agents.*'):
         playground._get_next_agent('observation')
Example #14
0
 def test_single_agent_argument(self):
     " should transform a single agent in a list containing itself. "
     single_agent = Agent()
     playground = Playground(self.env, single_agent)
     check.equal(playground.agents, [single_agent])
Example #15
0
class TestPlaygroundReset:
    """ Playground._reset """
    @pytest.fixture(autouse=True)
    def setup_playground(self):
        """Setup of used fixtures"""
        self.env = Env()
        self.n_agents = 5
        self.agents = [Agent() for _ in range(self.n_agents)]
        self.playground = Playground(self.env, self.agents)

    def test_no_handlers(self, mocker):
        """ should reset the environment with no handlers. """

        mocker.patch('gym.Env.reset', lambda self: 'obs')

        observation, step, done, previous = self.playground._reset(None, None)
        check.equal(observation, 'obs')
        check.equal(step, 0)
        check.equal(done, False)
        expected_previous = [{
            'observation': None,
            'action': None,
            'reward': None,
            'done': None,
            'info': None
        } for _ in range(len(self.agents))]
        check.equal(previous, expected_previous)

    def test_callable_handlers(self, mocker):
        """ should reset the environment with callable handlers. """
        def done_handler(**kwargs):
            return not kwargs.get('done')

        def reward_handler(**kwargs):
            return 2 * kwargs.get('reward')

        mocker.patch('gym.Env.reset', lambda self: 'obs')

        observation, step, done, previous = self.playground._reset(
            reward_handler, done_handler)
        check.equal(observation, 'obs')
        check.equal(step, 0)
        check.equal(done, False)
        expected_previous = [{
            'observation': None,
            'action': None,
            'reward': None,
            'done': None,
            'info': None
        } for _ in range(len(self.agents))]
        check.equal(previous, expected_previous)

    def test_with_handlers(self, mocker):
        """ should reset the environment and handlers with true handlers. """

        mocker.patch('gym.Env.reset', lambda self: 'obs')

        mocker.patch('learnrl.playground.RewardHandler.reset')
        reward_handler = RewardHandler()

        mocker.patch('learnrl.playground.DoneHandler.reset')
        done_handler = DoneHandler()

        observation, step, done, previous = self.playground._reset(
            reward_handler, done_handler)

        check.equal(observation, 'obs')
        check.equal(step, 0)
        check.equal(done, False)
        expected_previous = [{
            'observation': None,
            'action': None,
            'reward': None,
            'done': None,
            'info': None
        } for _ in range(len(self.agents))]
        check.equal(previous, expected_previous)

        check.is_true(reward_handler.reset.called)
        check.is_true(done_handler.reset.called)
Example #16
0
 def test_test_warn_render(self, mocker):
     """ test should warn a UserWarning if render=False. """
     mocker.patch('learnrl.playground.Playground.run')
     playground = Playground(self.env, self.agents)
     with pytest.warns(UserWarning, match=r".*render=True.*"):
         playground.test(10, render=False, verbose=0)
Example #17
0
 def test_test_warn_learn(self, mocker):
     """ test should warn a UserWarning if learn=True. """
     mocker.patch('learnrl.playground.Playground.run')
     playground = Playground(self.env, self.agents)
     with pytest.warns(UserWarning, match=r".*not act greedy.*"):
         playground.test(10, learn=True)
Example #18
0
    def test_run(self, mocker):
        """ run should call callbacks at the right time and in the right order. """

        steps_outputs = [(f'obs_{i+1}', False) for i in range(9)]
        steps_outputs += [(f'obs_{9}', True)]
        steps_outputs = steps_outputs * 10
        steps_outputs = steps_outputs[::-1]

        def dummy_run_step(*args, **kwargs):
            return steps_outputs.pop()

        class RegisterCallback(Callback):
            """Dummy Callback to register calls"""
            def __init__(self):
                super().__init__()
                self.stored_key = ""

            def on_run_begin(self, logs=None):
                self.stored_key += "|-"

            def on_episodes_cycle_begin(self, episode, logs=None):
                self.stored_key += "["

            def on_episode_begin(self, episode, logs=None):
                self.stored_key += "("

            def on_steps_cycle_begin(self, step, logs=None):
                self.stored_key += "<"

            def on_step_begin(self, step, logs=None):
                self.stored_key += ","

            def on_step_end(self, step, logs=None):
                self.stored_key += "."

            def on_steps_cycle_end(self, step, logs=None):
                self.stored_key += ">"

            def on_episode_end(self, episode, logs=None):
                self.stored_key += ")"

            def on_episodes_cycle_end(self, episode, logs=None):
                self.stored_key += "]"

            def on_run_end(self, logs=None):
                self.stored_key += "-|"

        mocker.patch('learnrl.playground.Playground._get_episodes_cycle_len',
                     return_value=3)
        mocker.patch('learnrl.playground.Playground._reset', lambda *args:
                     ('obs_0', 0, False, {}))
        mocker.patch('learnrl.playground.Playground._build_callbacks',
                     lambda self, callbacks, logger, params: callbacks[0])
        mocker.patch('learnrl.playground.Playground._run_step', dummy_run_step)

        playground = Playground(self.env, self.agents)
        register_callback = RegisterCallback()
        playground.run(episodes=10,
                       callbacks=[register_callback],
                       steps_cycle_len=3)
        episode_key = "(<,.,.,.><,.,.,.><,.,.,.><,.>)"
        expected_key = "|-[" + episode_key*3 + "][" + episode_key*3 + "][" + \
            episode_key*3  + "][" + episode_key + "]-|"
        check.equal(register_callback.stored_key, expected_key)
Example #19
0
 def setup_playground(self):
     """Setup of used fixtures"""
     self.env = Env()
     self.n_agents = 5
     self.agents = [Agent() for _ in range(self.n_agents)]
     self.playground = Playground(self.env, self.agents)
Example #20
0
 def test_env_typeerror(self):
     " should raise a TypeError if the environment isn't a subclass of gym. "
     with pytest.raises(TypeError, match=r"environement.*gym.Env"):
         Playground("env", self.agents)
Example #21
0
 def test_fit_warn_learn(self, mocker):
     """ fit should warn a UserWarning if learn=False. """
     mocker.patch('learnrl.playground.Playground.run')
     playground = Playground(self.env, self.agents)
     with pytest.warns(UserWarning, match=r".*agents will not improve.*"):
         playground.fit(10, learn=False)