def test_ddpg_create_ddpg_model(model_cls, s_format, env): data = MDPDataBunch.from_env(env, render='rgb_array', bs=32, add_valid=False, feed_type=s_format) model = create_ddpg_model(data, model_cls) model.eval() model(data.state.s.float()) check_shape(env, data, s_format) data.close()
def test_ddpg_ddpglearner(model_cls, s_format, mem, env): data = MDPDataBunch.from_env(env, render='rgb_array', bs=32, add_valid=False, feed_type=s_format) model = create_ddpg_model(data, model_cls) memory = mem(memory_size=1000, reduce_ram=True) exploration_method = OrnsteinUhlenbeck(size=data.action.taken_action.shape, epsilon_start=1, epsilon_end=0.1, decay=0.001) ddpg_learner(data=data, model=model, memory=memory, exploration_method=exploration_method) check_shape(env, data, s_format) data.close()
def test_ddpg_models_mountain_car_continuous(model_cls, s_format, experience): group_interp = GroupAgentInterpretation() for i in range(5): print('\n') data = MDPDataBunch.from_env( 'MountainCarContinuous-v0', render='rgb_array', bs=40, add_valid=False, keep_env_open=False, feed_type=s_format, memory_management_strategy='k_partitions_top', k=3, res_wrap=partial(ResolutionWrapper, w_step=2, h_step=2)) exploration_method = OrnsteinUhlenbeck( size=data.action.taken_action.shape, epsilon_start=1, epsilon_end=0.1, decay=0.0001) memory = experience(memory_size=1000000, reduce_ram=True) model = create_ddpg_model(data=data, base_arch=model_cls) learner = ddpg_learner(data=data, model=model, memory=memory, exploration_method=exploration_method, callback_fns=[RewardMetric, EpsilonMetric]) learner.fit(450) meta = f'{experience.__name__}_{"FEED_TYPE_STATE" if s_format==FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}' interp = AgentInterpretation(learner, ds_type=DatasetType.Train) interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta) group_interp.add_interpretation(interp) group_interp.to_pickle( f'../docs_src/data/mountaincarcontinuous_{model.name.lower()}/', f'{model.name.lower()}_{meta}') [ g.write('../res/run_gifs/mountaincarcontinuous') for g in interp.generate_gif() ] data.close() del learner del model del data
def trained_learner(model_cls, env, s_format, experience, bs=64, layers=None, render='rgb_array', memory_size=1000000, decay=0.0001, lr=None, actor_lr=None, epochs=450, opt=torch.optim.RMSprop, **kwargs): lr, actor_lr = ifnone(lr, 1e-3), ifnone(actor_lr, 1e-4) data = MDPDataBunch.from_env(env, render=render, bs=bs, add_valid=False, keep_env_open=False, feed_type=s_format, memory_management_strategy='k_partitions_top', k=3, **kwargs) exploration_method = OrnsteinUhlenbeck(size=data.action.taken_action.shape, epsilon_start=1, epsilon_end=0.1, decay=decay) memory = experience(memory_size=memory_size, reduce_ram=True) model = create_ddpg_model(data=data, base_arch=model_cls, lr=lr, actor_lr=actor_lr, layers=layers, opt=opt) learner = ddpg_learner(data=data, model=model, memory=memory, exploration_method=exploration_method, callback_fns=[RewardMetric, EpsilonMetric]) learner.fit(epochs) return learner