def main(): # create config config = Config() config.game = args.game config.algo = 'ppo' config.max_steps = int(2e6) config.num_envs = 1 config.optimizer = 'RMSprop' config.lr = 0.0003 config.discount = 0.99 config.use_gae = True config.gae_lambda = 0.95 config.use_grad_clip = True config.max_grad_norm = 0.5 config.rollout_length = 2048 config.value_loss_coef = 0.5 config.entropy_coef = 0 config.ppo_epoch = 10 config.ppo_clip_param = 0.2 config.num_mini_batch = 32 config.use_gpu = True config.seed = args.seed config.num_frame_stack = 1 config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed, num_frame_stack= config.num_frame_stack) model = GaussianPolicy(env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = PPOAgent(config, env, model, logger) agent.run()
def main(): # create config with basic parameters for sac config = Config() config.game = 'HalfCheetah-v2' config.algo = 'sac' config.max_steps = int(1e6) config.num_envs = 1 config.optimizer = 'Adam' config.lr = 0.001 config.discount = 0.99 config.replay_size = int(1e6) config.replay_batch = 100 config.replay_on_gpu = True config.warmup_steps = 10000 config.soft_update_rate = 0.005 config.sac_alpha = 0.2 config.automatic_alpha = False config.intermediate_eval = True config.eval_interval = int(1e4) config.use_gpu = True config.seed = 0 # update config with argparse object (pass game and seed from command line) config.update(args) config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed) config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs=config.num_envs, seed=config.seed) eval_env = make_vec_envs(config.game, num_envs=1, seed=config.seed) model = ConStoSGADCLinearNet(input_dim=env.observation_space.shape[0], action_dim=get_action_dim( env.action_space)).to(config.device) target_model = ConStoSGADCLinearNet( input_dim=env.observation_space.shape[0], action_dim=get_action_dim(env.action_space)).to(config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = SACAgent(config, env, eval_env, model, target_model, logger) agent.run()
def main(): # create config config = Config() config.game = 'BreakoutNoFrameskip-v4' config.algo = 'dqn' config.max_steps = int(1e7) config.num_envs = 4 config.optimizer = 'RMSprop' config.lr = 0.00025 config.discount = 0.99 config.use_grad_clip = True config.max_grad_norm = 5 config.replay_size = int(1e5) config.replay_batch = 32 config.replay_on_gpu = False config.exploration_threshold_start = 1 config.exploration_threshold_end = 0.01 config.exploration_steps = int(1e6) config.target_update_interval = int(1e4) config.learning_start = int(5e4) config.intermediate_eval = True config.eval_interval = int(1e5) config.use_gpu = True config.num_frame_stack = 4 config.seed = 0 config.log_episodes_avg_window = 10000 # update config with argparse object (pass game and seed from command line) config.update(args) config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed) config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed, num_frame_stack= config.num_frame_stack) eval_env = make_vec_envs(config.game, num_envs = 1, seed = config.seed, num_frame_stack= config.num_frame_stack) model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device) target_model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes, config.log_episodes_avg_window) # create agent and run agent = DQNAgent(config, env, eval_env, model, target_model, logger) agent.run()
def main(): # create config config = Config() config.game = 'BreakoutNoFrameskip-v4' config.algo = 'ppo' config.max_steps = int(2e7) config.num_envs = 8 config.optimizer = 'Adam' config.lr = 0.00025 config.discount = 0.99 config.use_gae = True config.gae_lambda = 0.95 config.use_grad_clip = True config.max_grad_norm = 0.5 config.rollout_length = 128 config.value_loss_coef = 0.5 config.entropy_coef = 0.01 config.ppo_epoch = 4 config.ppo_clip_param = 0.1 config.num_mini_batch = 4 config.use_gpu = True config.num_frame_stack = 4 config.seed = 1 # update config with argparse object (pass game and seed from command line) config.update(args) config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed) config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs=config.num_envs, seed=config.seed, num_frame_stack=config.num_frame_stack) model = CatACConvNet(input_channels=env.observation_space.shape[0], action_dim=get_action_dim(env.action_space)).to( config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = PPOAgent(config, env, model, logger) agent.run()