Ejemplo n.º 1
0
def test_interpretation_plot_q_dqn_returns():
    data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human')
    model = DQN(data)
    learn = AgentLearner(data, model)
    learn.fit(5)
    interp = AgentInterpretationAlpha(learn)
    interp.plot_heatmapped_episode(2)
Ejemplo n.º 2
0
def test_MarkovDecisionProcessDataBunch_init_no_valid():
    error_msg = 'state space is %s but should be %s'
    error_msg2 = 'the datasets in the dataloader seem to be different from the data bunches datasets...'

    max_steps = 50
    # Create 2 itemlists
    train_list = MDPDataset(gym.make('CartPole-v1'), max_steps=max_steps)

    env_databunch = MDPDataBunch.create(train_list, num_workers=0)
    env_databunch.valid_dl = None
    epochs = 3

    assert max_steps == len(train_list)
    assert max_steps == len(train_list)
    assert max_steps == len(env_databunch.train_dl)
    assert env_databunch.valid_dl is None

    for epoch in range(epochs):
        print(f'epoch {epoch}')
        for element in env_databunch.train_dl:
            env_databunch.train_ds.actions = env_databunch.train_ds.env.action_space.sample(
            )
            current_s, actual_s = element.shape[
                1], train_list.env.observation_space.shape[0]
            print(
                f'state {element} action {env_databunch.train_dl.dl.dataset.actions}'
            )
            assert current_s == actual_s, error_msg % (current_s, actual_s)
            assert np.equal(env_databunch.train_dl.dl.dataset.actions,
                            env_databunch.train_ds.actions), error_msg2
def test_individual_env():
    msg = 'the datasets in the dataloader seem to be different from the data bunches datasets...'

    max_steps = 50

    env = 'CarRacing-v0'
    print(f'Testing {env}')
    mdp_databunch = MDPDataBunch.from_env(env,
                                          max_steps=max_steps,
                                          num_workers=0)
    epochs = 1

    assert max_steps == len(mdp_databunch.train_dl)
    assert max_steps == len(mdp_databunch.valid_dl)

    for epoch in range(epochs):
        for _ in mdp_databunch.train_dl:
            mdp_databunch.train_ds.actions = mdp_databunch.train_ds.get_random_action(
            )
            # print(f'state {element.shape} action {mdp_databunch.train_dl.dl.dataset.actions}')
            assert np.sum(
                np.equal(mdp_databunch.train_dl.dl.dataset.actions,
                         mdp_databunch.train_ds.actions)) == np.size(
                             mdp_databunch.train_ds.actions), msg

        for _ in mdp_databunch.valid_dl:
            mdp_databunch.valid_ds.actions = mdp_databunch.valid_ds.get_random_action(
            )
            # print(f'state {element.shape} action {mdp_databunch.valid_dl.dl.dataset.actions}')
            assert np.sum(
                np.equal(mdp_databunch.train_dl.dl.dataset.actions,
                         mdp_databunch.train_ds.actions)) == np.size(
                             mdp_databunch.train_ds.actions), msg
Ejemplo n.º 4
0
def test_interpretation_plot_sequence():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = DQN(data)
    learn = AgentLearner(data, model)

    epochs = 20

    callbacks = learn.model.callbacks  # type: Collection[LearnerCallback]
    [c.on_train_begin(learn=learn, n_epochs=epochs) for c in callbacks]
    for epoch in range(epochs):
        [c.on_epoch_begin(epoch=epoch) for c in callbacks]
        learn.model.train()
        counter = 0
        for element in learn.data.train_dl:
            learn.data.train_ds.actions = learn.predict(element)
            [c.on_step_end(learn=learn) for c in callbacks]

            counter += 1
            # if counter % 100 == 0:# or counter == 0:
        interp = AgentInterpretationAlpha(learn, ds_type=DatasetType.Train)
        interp.plot_heatmapped_episode(epoch)

        [c.on_epoch_end() for c in callbacks]
    [c.on_train_end() for c in callbacks]
Ejemplo n.º 5
0
def test_interpretation_heatmap():
    data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human')
    model = DQN(data)
    learn = AgentLearner(data, model)

    epochs = 10

    callbacks = learn.model.callbacks  # type: Collection[LearnerCallback]
    [c.on_train_begin(learn=learn, n_epochs=epochs) for c in callbacks]
    for epoch in range(epochs):
        [c.on_epoch_begin(epoch=epoch) for c in callbacks]
        learn.model.train()
        for element in learn.data.train_dl:
            learn.data.train_ds.actions = learn.predict(element)
            [c.on_step_end(learn=learn) for c in callbacks]
        [c.on_epoch_end() for c in callbacks]

        # For now we are going to avoid executing learner_callbacks here.
        learn.model.eval()
        for element in learn.data.valid_dl:
            learn.data.valid_ds.actions = learn.predict(element)

        if epoch % 1 == 0:
            interp = AgentInterpretationAlpha(learn)
            interp.plot_heatmapped_episode(epoch)
    [c.on_train_end() for c in callbacks]
def test_fit_function_dqn():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = DQN(data, memory=PriorityExperienceReplay(1000))
    learn = AgentLearner(data, model)
    learn.fit(5)
def test_priority_experience_replay():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = FixedTargetDQN(data, memory=PriorityExperienceReplay(1000))
    learn = AgentLearner(data, model)
    learn.fit(5)
def test_double_dueling_dqn_model_maze():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = DoubleDuelingDQN(data)
    learn = AgentLearner(data, model)

    learn.fit(5)
def test_basic_dqn_model_maze():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=200)
    model = DQN(data)
    learn = AgentLearner(data, model)

    learn.fit(5)
def test_fixed_target_dqn_model_maze():
    print('\n')
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = FixedTargetDQN(data)
    learn = AgentLearner(data, model)

    learn.fit(5)
Ejemplo n.º 11
0
def test_interpretation_plot_q_ddpg_returns():
    data = MDPDataBunch.from_env('Pendulum-v0', render='human')
    # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human')
    model = DDPG(data, batch=8)
    learn = AgentLearner(data, model)

    learn.fit(5)
    interp = AgentInterpretationAlpha(learn)
    interp.plot_heatmapped_episode(2)
Ejemplo n.º 12
0
def test_epsilon():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=100,
                                 add_valid=False)
    model = FixedTargetDQN(data,
                           batch_size=64,
                           max_episodes=100,
                           copy_over_frequency=4)
    learn = AgentLearner(data, model)

    learn.fit(20)
def test_envs_all(env):
    msg = 'the datasets in the dataloader seem to be different from the data bunches datasets...'

    max_steps = 50
    print(f'Testing {env}')
    mdp_databunch = MDPDataBunch.from_env(env,
                                          max_steps=max_steps,
                                          num_workers=0)
    if mdp_databunch is None:
        print(
            f'Env {env} is probably Mujoco... Add imports if you want and try on your own. Don\'t like '
            f'proprietary engines like this. If you have any issues, feel free to make a PR!'
        )
        return
    epochs = 1

    assert max_steps == len(mdp_databunch.train_dl)
    assert max_steps == len(mdp_databunch.valid_dl)

    a_s, s_s = mdp_databunch.get_action_state_size()
    assert a_s is not None
    assert s_s is not None
    a_s.sample()
    s_s.sample()

    for epoch in range(epochs):
        for _ in mdp_databunch.train_dl:
            mdp_databunch.train_ds.actions = mdp_databunch.train_ds.get_random_action(
            )
            # print(f'state {element} action {mdp_databunch.train_dl.dl.dataset.actions}')
            assert np.sum(np.equal(mdp_databunch.train_dl.dl.dataset.actions, mdp_databunch.train_ds.actions)) == \
                   np.size(mdp_databunch.train_ds.actions), msg

        for _ in mdp_databunch.valid_dl:
            mdp_databunch.valid_ds.actions = mdp_databunch.valid_ds.get_random_action(
            )
            # print(f'state {element} action {mdp_databunch.valid_dl.dl.dataset.actions}')
            assert np.sum(np.equal(mdp_databunch.train_dl.dl.dataset.actions, mdp_databunch.train_ds.actions)) == \
                   np.size(mdp_databunch.train_ds.actions), msg
def test_ddpg():
    data = MDPDataBunch.from_env('Pendulum-v0', render='human')
    # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human')
    model = DDPG(data, batch=8)
    learn = AgentLearner(data, model)
    learn.fit(450)
def test_fit_function_ddpg():
    data = MDPDataBunch.from_env('Pendulum-v0', render='human', max_steps=1000)
    model = DDPG(data, memory=PriorityExperienceReplay(1000))
    learn = AgentLearner(data, model)
    learn.fit(5)