def test_to_pickle(): data = MDPDataBunch.from_env('CartPole-v0', render='rgb_array', bs=5, max_steps=20, add_valid=False) model = create_dqn_model(data, FixedTargetDQNModule, opt=torch.optim.RMSprop) memory = ExperienceReplay(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) learner = dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method) learner.fit(2) assert len(data.x.info) == 2 assert 0 in data.x.info assert 1 in data.x.info data.to_pickle('./data/test_to_pickle') assert os.path.exists('./data/test_to_pickle_CartPole-v0')
def test_resolution_wrapper(): data = MDPDataBunch.from_env('CartPole-v0', render='rgb_array', bs=5, max_steps=10, add_valid=False, memory_management_strategy='k_top', k=1, feed_type=FEED_TYPE_IMAGE, res_wrap=partial(ResolutionWrapper, w_step=2, h_step=2)) model = create_dqn_model(data, DQNModule, opt=torch.optim.RMSprop, lr=0.1, channels=[32, 32, 32], ks=[5, 5, 5], stride=[2, 2, 2]) memory = ExperienceReplay(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) learner = dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method, callback_fns=[RewardMetric, EpsilonMetric]) learner.fit(2) temp = gym.make('CartPole-v0') temp.reset() original_shape = temp.render(mode='rgb_array').shape assert data.env.render(mode='rgb_array').shape == (original_shape[0] // 2, original_shape[1] // 2, 3)
def test_dataset_memory_manager(memory_strategy, k): data = MDPDataBunch.from_env('CartPole-v0', render='rgb_array', bs=5, max_steps=20, add_valid=False, memory_management_strategy=memory_strategy, k=k) model = create_dqn_model(data, DQNModule, opt=torch.optim.RMSprop, lr=0.1) memory = ExperienceReplay(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) learner = dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method, callback_fns=[RewardMetric, EpsilonMetric]) learner.fit(10) data_info = { episode: data.train_ds.x.info[episode] for episode in data.train_ds.x.info if episode != -1 } full_episodes = [ episode for episode in data_info if not data_info[episode][1] ] assert sum([not _[1] for _ in data_info.values() ]) == k, 'There should be k episodes but there is not.' if memory_strategy.__contains__( 'top') and not memory_strategy.__contains__('both'): assert (np.argmax([_[0] for _ in data_info.values()])) in full_episodes
def trained_learner(model_cls, env, s_format, experience, bs, layers, memory_size=1000000, decay=0.001, copy_over_frequency=300, lr=None, epochs=450, **kwargs): if lr is None: lr = [0.001, 0.00025] memory = experience(memory_size=memory_size, reduce_ram=True) explore = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=decay) if type(lr) == list: lr = lr[0] if model_cls == DQNModule else lr[1] data = MDPDataBunch.from_env(env, render='human', bs=bs, add_valid=False, keep_env_open=False, feed_type=s_format, memory_management_strategy='k_partitions_top', k=3, **kwargs) if model_cls == DQNModule: model = create_dqn_model(data=data, base_arch=model_cls, lr=lr, layers=layers, opt=optim.RMSprop) else: model = create_dqn_model(data=data, base_arch=model_cls, lr=lr, layers=layers) learn = dqn_learner(data, model, memory=memory, exploration_method=explore, copy_over_frequency=copy_over_frequency, callback_fns=[RewardMetric, EpsilonMetric]) learn.fit(epochs) return learn
def test_dqn_create_dqn_model(model_cls, s_format, env): data = MDPDataBunch.from_env(env, render='rgb_array', bs=32, add_valid=False, feed_type=s_format) model = create_dqn_model(data, model_cls) model.eval() model(data.state.s) assert config_env_expectations[env]['action_shape'] == ( 1, data.action.n_possible_values.item()) if s_format == FEED_TYPE_STATE: assert config_env_expectations[env][ 'state_shape'] == data.state.s.shape
def test_metrics_reward_init(): data = MDPDataBunch.from_env('CartPole-v0', render='rgb_array', bs=5, max_steps=20) model = create_dqn_model(data, DQNModule, opt=torch.optim.RMSprop) memory = ExperienceReplay(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) learner = dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method, callback_fns=[RewardMetric]) learner.fit(2)
def test_from_pickle(): data = MDPDataBunch.from_pickle('./data/test_to_pickle_CartPole-v0') model = create_dqn_model(data, FixedTargetDQNModule, opt=torch.optim.RMSprop) memory = ExperienceReplay(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) learner = dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method) learner.fit(2) assert len(data.x.info) == 4 assert 0 in data.x.info assert 3 in data.x.info
def test_dqn_dqn_learner(model_cls, s_format, mem, env): data = MDPDataBunch.from_env(env, render='rgb_array', bs=32, add_valid=False, keep_env_open=False, feed_type=s_format) model = create_dqn_model(data, model_cls) memory = mem(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method) assert config_env_expectations[env]['action_shape'] == ( 1, data.action.n_possible_values.item()) if s_format == FEED_TYPE_STATE: assert config_env_expectations[env][ 'state_shape'] == data.state.s.shape
def test_export_learner(): data = MDPDataBunch.from_env('CartPole-v0', render='rgb_array', bs=5, max_steps=20, add_valid=False) model = create_dqn_model(data, FixedTargetDQNModule, opt=torch.optim.RMSprop) memory = ExperienceReplay(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) learner = dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method) learner.fit(2) learner.export('test_export.pkl') #, pickle_data=True) learner = load_learner(learner.path, 'test_export.pkl') learner.fit(2)
def test_databunch_to_pickle(): data = MDPDataBunch.from_env('CartPole-v0', render='rgb_array', bs=5, max_steps=20, add_valid=False, memory_management_strategy='k_partitions_top', k=3) model = create_dqn_model(data, DQNModule, opt=torch.optim.RMSprop, lr=0.1) memory = ExperienceReplay(memory_size=1000, reduce_ram=True) exploration_method = GreedyEpsilon(epsilon_start=1, epsilon_end=0.1, decay=0.001) learner = dqn_learner(data=data, model=model, memory=memory, exploration_method=exploration_method, callback_fns=[RewardMetric, EpsilonMetric]) learner.fit(10) data.to_pickle('./data/cartpole_10_epoch') MDPDataBunch.from_pickle(env_name='CartPole-v0', path='./data/cartpole_10_epoch')