def test_to_pickle():
    data = MDPDataBunch.from_env('CartPole-v0',
                                 render='rgb_array',
                                 bs=5,
                                 max_steps=20,
                                 add_valid=False)
    model = create_dqn_model(data,
                             FixedTargetDQNModule,
                             opt=torch.optim.RMSprop)
    memory = ExperienceReplay(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    learner = dqn_learner(data=data,
                          model=model,
                          memory=memory,
                          exploration_method=exploration_method)
    learner.fit(2)

    assert len(data.x.info) == 2
    assert 0 in data.x.info
    assert 1 in data.x.info

    data.to_pickle('./data/test_to_pickle')
    assert os.path.exists('./data/test_to_pickle_CartPole-v0')
Ejemplo n.º 2
0
def test_resolution_wrapper():
    data = MDPDataBunch.from_env('CartPole-v0',
                                 render='rgb_array',
                                 bs=5,
                                 max_steps=10,
                                 add_valid=False,
                                 memory_management_strategy='k_top',
                                 k=1,
                                 feed_type=FEED_TYPE_IMAGE,
                                 res_wrap=partial(ResolutionWrapper,
                                                  w_step=2,
                                                  h_step=2))
    model = create_dqn_model(data,
                             DQNModule,
                             opt=torch.optim.RMSprop,
                             lr=0.1,
                             channels=[32, 32, 32],
                             ks=[5, 5, 5],
                             stride=[2, 2, 2])
    memory = ExperienceReplay(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    learner = dqn_learner(data=data,
                          model=model,
                          memory=memory,
                          exploration_method=exploration_method,
                          callback_fns=[RewardMetric, EpsilonMetric])
    learner.fit(2)
    temp = gym.make('CartPole-v0')
    temp.reset()
    original_shape = temp.render(mode='rgb_array').shape
    assert data.env.render(mode='rgb_array').shape == (original_shape[0] // 2,
                                                       original_shape[1] // 2,
                                                       3)
Ejemplo n.º 3
0
def test_dataset_memory_manager(memory_strategy, k):
    data = MDPDataBunch.from_env('CartPole-v0',
                                 render='rgb_array',
                                 bs=5,
                                 max_steps=20,
                                 add_valid=False,
                                 memory_management_strategy=memory_strategy,
                                 k=k)
    model = create_dqn_model(data, DQNModule, opt=torch.optim.RMSprop, lr=0.1)
    memory = ExperienceReplay(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    learner = dqn_learner(data=data,
                          model=model,
                          memory=memory,
                          exploration_method=exploration_method,
                          callback_fns=[RewardMetric, EpsilonMetric])
    learner.fit(10)

    data_info = {
        episode: data.train_ds.x.info[episode]
        for episode in data.train_ds.x.info if episode != -1
    }
    full_episodes = [
        episode for episode in data_info if not data_info[episode][1]
    ]

    assert sum([not _[1] for _ in data_info.values()
                ]) == k, 'There should be k episodes but there is not.'
    if memory_strategy.__contains__(
            'top') and not memory_strategy.__contains__('both'):
        assert (np.argmax([_[0] for _ in data_info.values()])) in full_episodes
def trained_learner(model_cls,
                    env,
                    s_format,
                    experience,
                    bs,
                    layers,
                    memory_size=1000000,
                    decay=0.001,
                    copy_over_frequency=300,
                    lr=None,
                    epochs=450,
                    **kwargs):
    if lr is None: lr = [0.001, 0.00025]
    memory = experience(memory_size=memory_size, reduce_ram=True)
    explore = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=decay)
    if type(lr) == list: lr = lr[0] if model_cls == DQNModule else lr[1]
    data = MDPDataBunch.from_env(env,
                                 render='human',
                                 bs=bs,
                                 add_valid=False,
                                 keep_env_open=False,
                                 feed_type=s_format,
                                 memory_management_strategy='k_partitions_top',
                                 k=3,
                                 **kwargs)
    if model_cls == DQNModule:
        model = create_dqn_model(data=data,
                                 base_arch=model_cls,
                                 lr=lr,
                                 layers=layers,
                                 opt=optim.RMSprop)
    else:
        model = create_dqn_model(data=data,
                                 base_arch=model_cls,
                                 lr=lr,
                                 layers=layers)
    learn = dqn_learner(data,
                        model,
                        memory=memory,
                        exploration_method=explore,
                        copy_over_frequency=copy_over_frequency,
                        callback_fns=[RewardMetric, EpsilonMetric])
    learn.fit(epochs)
    return learn
def test_dqn_create_dqn_model(model_cls, s_format, env):
    data = MDPDataBunch.from_env(env,
                                 render='rgb_array',
                                 bs=32,
                                 add_valid=False,
                                 feed_type=s_format)
    model = create_dqn_model(data, model_cls)
    model.eval()
    model(data.state.s)

    assert config_env_expectations[env]['action_shape'] == (
        1, data.action.n_possible_values.item())
    if s_format == FEED_TYPE_STATE:
        assert config_env_expectations[env][
            'state_shape'] == data.state.s.shape
def test_metrics_reward_init():
    data = MDPDataBunch.from_env('CartPole-v0',
                                 render='rgb_array',
                                 bs=5,
                                 max_steps=20)
    model = create_dqn_model(data, DQNModule, opt=torch.optim.RMSprop)
    memory = ExperienceReplay(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    learner = dqn_learner(data=data,
                          model=model,
                          memory=memory,
                          exploration_method=exploration_method,
                          callback_fns=[RewardMetric])
    learner.fit(2)
def test_from_pickle():
    data = MDPDataBunch.from_pickle('./data/test_to_pickle_CartPole-v0')
    model = create_dqn_model(data,
                             FixedTargetDQNModule,
                             opt=torch.optim.RMSprop)
    memory = ExperienceReplay(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    learner = dqn_learner(data=data,
                          model=model,
                          memory=memory,
                          exploration_method=exploration_method)
    learner.fit(2)

    assert len(data.x.info) == 4
    assert 0 in data.x.info
    assert 3 in data.x.info
def test_dqn_dqn_learner(model_cls, s_format, mem, env):
    data = MDPDataBunch.from_env(env,
                                 render='rgb_array',
                                 bs=32,
                                 add_valid=False,
                                 keep_env_open=False,
                                 feed_type=s_format)
    model = create_dqn_model(data, model_cls)
    memory = mem(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    dqn_learner(data=data,
                model=model,
                memory=memory,
                exploration_method=exploration_method)

    assert config_env_expectations[env]['action_shape'] == (
        1, data.action.n_possible_values.item())
    if s_format == FEED_TYPE_STATE:
        assert config_env_expectations[env][
            'state_shape'] == data.state.s.shape
def test_export_learner():
    data = MDPDataBunch.from_env('CartPole-v0',
                                 render='rgb_array',
                                 bs=5,
                                 max_steps=20,
                                 add_valid=False)
    model = create_dqn_model(data,
                             FixedTargetDQNModule,
                             opt=torch.optim.RMSprop)
    memory = ExperienceReplay(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    learner = dqn_learner(data=data,
                          model=model,
                          memory=memory,
                          exploration_method=exploration_method)
    learner.fit(2)

    learner.export('test_export.pkl')  #, pickle_data=True)
    learner = load_learner(learner.path, 'test_export.pkl')
    learner.fit(2)
Ejemplo n.º 10
0
def test_databunch_to_pickle():
    data = MDPDataBunch.from_env('CartPole-v0',
                                 render='rgb_array',
                                 bs=5,
                                 max_steps=20,
                                 add_valid=False,
                                 memory_management_strategy='k_partitions_top',
                                 k=3)
    model = create_dqn_model(data, DQNModule, opt=torch.optim.RMSprop, lr=0.1)
    memory = ExperienceReplay(memory_size=1000, reduce_ram=True)
    exploration_method = GreedyEpsilon(epsilon_start=1,
                                       epsilon_end=0.1,
                                       decay=0.001)
    learner = dqn_learner(data=data,
                          model=model,
                          memory=memory,
                          exploration_method=exploration_method,
                          callback_fns=[RewardMetric, EpsilonMetric])
    learner.fit(10)
    data.to_pickle('./data/cartpole_10_epoch')
    MDPDataBunch.from_pickle(env_name='CartPole-v0',
                             path='./data/cartpole_10_epoch')