def main(): env = NormalizedGymEnv('BeamRiderNoFrameskip-v4', is_atari=True) # agent = DDQNAgent( # env=env, # discount_factor=0.99, # policy_network_params={'dueling': True}, # double_q=True, # prioritized_replay=True, # max_timesteps=int(10e6), # seed=0) # agent.train() env = NormalizedGymEnv('CartPole-v0') net = partial(mlp, hidden_units=[64]) agent = DDQNAgent(env=env, policy_network=net, policy_network_params={}, policy_learning_rate=1e-3, max_timesteps=100000, buffer_size=50000, checkpoint_freq=1000, exploration_fraction=0.3, exploration_final_eps=0.05, prioritized_replay=True, double_q=True, train_freq=1, target_network_update_freq=500, learning_start_timestep=1000, seed=65) agent.train()
def test_continuous_action_space_norm(): env = NormalizedGymEnv('MountainCarContinuous-v0') env.reset() env.step([0.1]) env.close() assert GymEnv.env_action_space_is_discrete(env) is False assert GymEnv.get_env_action_space_dim(env) == 1
def test_discrete_action_space_norm(): env = NormalizedGymEnv('CartPole-v0') env.reset() env.step(1) env.close() assert GymEnv.env_action_space_is_discrete(env) is True assert GymEnv.get_env_action_space_dim(env) == 2
def _get_env(self, job_dir): env_name = self._spec_dict['env']['name'] kwargs = { k: v for k, v in self._spec_dict['env'].items() if k != 'name' and k != 'is_parallel' } kwargs['video'] = self._video if not self._spec_dict['env'].get('is_parallel', False): env = NormalizedGymEnv(env_name, log_dir=job_dir, force_reset=True, **kwargs) else: env = ParallelEnvs(env_name, log_dir=job_dir, force_reset=True, **kwargs) if 'timestep_limit' in self._spec_dict['env']: env.spec.timestep_limit = self._spec_dict['env']['timestep_limit'] return env
def main(): env = NormalizedGymEnv( # 'MountainCarContinuous-v0' 'CartPole-v1' # 'Acrobot-v1' # 'Pendulum-v0' # 'HalfCheetah-v1' ) agent = CEMAgent(env, discount_factor=0.99, n_weight_samples=100, init_var=1., best_pct=0.2, policy_network=None, policy_network_params={}, model_file_path=None, min_std=1e-6, init_std=1.0, adaptive_std=False, seed=5) agent.train(100)
def main(): env = NormalizedGymEnv( 'MountainCarContinuous-v0', # 'Walker2d-v1', # 'Swimmer-v1', # 'CartPole-v1', # 'Acrobot-v1', # 'Pendulum-v0', # 'PongNoFrameskip-v4', # normalize_obs=True, # is_atari=True ) print(env.action_space) agent = TRPOAgent(env, policy_network=mlp, seed=123, baseline_train_iters=5, baseline_model_learning_rate=1e-3, baseline_network=mlp) agent.train(max_timesteps=1000000)
def main(): env = NormalizedGymEnv( 'MountainCarContinuous-v0' # 'CartPole-v1' # 'Walker2d-v1' # 'Acrobot-v1' # 'Pendulum-v0' # 'HalfCheetah-v1' # normalize_obs=True ) print(env.action_space) agent = REINFORCEAgent( env=env, discount_factor=0.99, policy_network=mlp, policy_learning_rate=0.01, entropy_weight=0, baseline_train_iters=5, baseline_model_learning_rate=1e-3, baseline_network=mlp, # baseline_network=None, seed=5) agent.train(10000, n_steps=1024)
""" Regression tests for the REINFORCE agent on OpenAI gym environments """ import pytest import numpy as np import shutil from yarlp.utils.env_utils import NormalizedGymEnv from yarlp.agent.ddqn_agent import DDQNAgent env = NormalizedGymEnv('PongNoFrameskip-v4', is_atari=True) def test_ddqn(): agent = DDQNAgent(env, max_timesteps=10, learning_start_timestep=1, train_freq=5, batch_size=1) agent.train() def test_seed(): agent = DDQNAgent(env, seed=143, max_timesteps=2) agent.train() ob, *_ = agent.replay_buffer.sample(1) agent = DDQNAgent(env, seed=143, max_timesteps=2) agent.train() ob2, *_ = agent.replay_buffer.sample(1)
def test_norm_reward(): env = NormalizedGymEnv('MountainCarContinuous-v0', normalize_rewards=True) env.reset() [env.step([0.01]) for _ in range(env.spec.timestep_limit + 1)]