Esempio n. 1
0
def test_add_step():
    logger = Logger()
    list_records = [Record(1.0), Record(1.0), Record(1.0), Record(1.0)]
    for ite, record in enumerate(list_records):
        logger.add_steps(record)
        assert ite + 1 == len(logger.current_steps)
        assert record == logger.current_steps[-1]
Esempio n. 2
0
def test_record_init():
    test_fail_values = [None, "deeee", [], {}, object()]
    test_values = [1, 1.0, 0.0, 0, 125, 125.125]

    for value in test_fail_values:
        with pytest.raises(TypeError):
            Record(value=value)

    for value in test_values:
        record = Record(value=value)
        assert record.value == value
Esempio n. 3
0
    def do_step(self, observation, learn=True, logger=None, render=True):
        """


        :param observation:
        :param learn:
        :param logger:
        :param render: if show env render
        :type render: bool
        :return:
        """
        if render:
            self.render()
        action = self.agent.get_action(observation=observation)
        next_observation, reward, done, info = self.environment.step(action)
        if learn:
            self.agent.learn(observation, action, reward, next_observation, done)
        if logger:
            logger.add_steps(Record(reward))
        return next_observation, done, reward
Esempio n. 4
0
def test_max_records():
    list_records = [[Record(1), Record(1),
                     Record(1), Record(1)],
                    [Record(2), Record(2),
                     Record(2), Record(2)],
                    [Record(1.0),
                     Record(1.0),
                     Record(1.0),
                     Record(1.0)],
                    [Record(2.0),
                     Record(2.0),
                     Record(2.0),
                     Record(2.0)],
                    [Record(1), Record(2),
                     Record(3), Record(4)], [Record(-1), Record(1)],
                    [Record(-10),
                     Record(-15),
                     Record(-20),
                     Record(-15)]]

    list_value = [1, 2, 1.0, 2.0, 4, 1, -10]

    for records, value in zip(list_records, list_value):
        assert value == Record.max_records(records)

    list_fail = ["dsdzs", ["dzdqzdq"], [1548, 1548], 1254]
    for records in list_fail:
        with pytest.raises(TypeError):
            Record.max_records(records)

    assert 0 == Record.max_records([])
Esempio n. 5
0
def test_write_log():
    list_steps = [[Record(1), Record(1),
                   Record(1), Record(1)],
                  [Record(2), Record(2),
                   Record(2), Record(2)],
                  [Record(1.0),
                   Record(1.0),
                   Record(1.0),
                   Record(1.0)],
                  [Record(2.0),
                   Record(2.0),
                   Record(2.0),
                   Record(2.0)], [Record(1),
                                  Record(2),
                                  Record(3),
                                  Record(4)], [Record(-1),
                                               Record(1)],
                  [Record(-10),
                   Record(-15),
                   Record(-20),
                   Record(-15)]]

    for ite, records in enumerate(list_steps):
        Logger.write_log("./runs", records, ite)
Esempio n. 6
0
def test_log_episode():
    summary_writer = FakeSummaryWriter()
    list_steps = [[Record(1), Record(1),
                   Record(1), Record(1)],
                  [Record(2), Record(2),
                   Record(2), Record(2)],
                  [Record(1.0),
                   Record(1.0),
                   Record(1.0),
                   Record(1.0)],
                  [Record(2.0),
                   Record(2.0),
                   Record(2.0),
                   Record(2.0)], [Record(1),
                                  Record(2),
                                  Record(3),
                                  Record(4)], [Record(-1),
                                               Record(1)],
                  [Record(-10),
                   Record(-15),
                   Record(-20),
                   Record(-15)]]

    for ite, records in enumerate(list_steps):
        Logger.log_episode(summary_writer, records, ite)
        assert (ite + 1) * 4 == len(summary_writer.add_scalar_call)
        assert ite == summary_writer.add_scalar_call[-1][2]
Esempio n. 7
0
def test_evaluate():
    logger = Logger()
    list_steps = [[Record(1), Record(1),
                   Record(1), Record(1)],
                  [Record(2), Record(2),
                   Record(2), Record(2)],
                  [Record(1.0),
                   Record(1.0),
                   Record(1.0),
                   Record(1.0)],
                  [Record(2.0),
                   Record(2.0),
                   Record(2.0),
                   Record(2.0)], [Record(1),
                                  Record(2),
                                  Record(3),
                                  Record(4)], [Record(-1),
                                               Record(1)],
                  [Record(-10),
                   Record(-15),
                   Record(-20),
                   Record(-15)]]

    for ite, steps in enumerate(list_steps):
        logger.current_steps = steps
        logger.evaluate()
        assert 0 == len(logger.episodes)
Esempio n. 8
0
def test_add_episode():
    logger = Logger()
    list_episodes = [[Record(1), Record(1),
                      Record(1), Record(1)],
                     [Record(2), Record(2),
                      Record(2), Record(2)],
                     [Record(1.0),
                      Record(1.0),
                      Record(1.0),
                      Record(1.0)],
                     [Record(2.0),
                      Record(2.0),
                      Record(2.0),
                      Record(2.0)],
                     [Record(1), Record(2),
                      Record(3), Record(4)], [Record(-1),
                                              Record(1)],
                     [Record(-10),
                      Record(-15),
                      Record(-20),
                      Record(-15)]]
    for ite, episode in enumerate(list_episodes):
        logger.add_episode(episode)
        assert ite + 1 == len(logger.episodes)
        assert episode == logger.episodes[-1]
Esempio n. 9
0
from blobrl import Trainer, Record
from blobrl.agents import CategoricalDQN, DQN, DoubleDQN

import gym

if __name__ == "__main__":

    for agent in [CategoricalDQN, DQN, DoubleDQN]:

        env = gym.make("CartPole-v1")
        a = agent(env.observation_space, env.action_space)
        trainer = Trainer(environment=env, agent=agent)

        for i in range(100):

            trainer.train(max_episode=50, render=False, nb_evaluation=0)
            m = max([Record.sum_records(e) for e in trainer.logger.episodes])
            print(agent.__name__, i, m)
            if m > 200:
                break

        print("####### ", agent.__name__, i, m, " #######")