def main(): # create config config = Config() config.game = args.game config.algo = 'ppo' config.max_steps = int(2e6) config.num_envs = 1 config.optimizer = 'RMSprop' config.lr = 0.0003 config.discount = 0.99 config.use_gae = True config.gae_lambda = 0.95 config.use_grad_clip = True config.max_grad_norm = 0.5 config.rollout_length = 2048 config.value_loss_coef = 0.5 config.entropy_coef = 0 config.ppo_epoch = 10 config.ppo_clip_param = 0.2 config.num_mini_batch = 32 config.use_gpu = True config.seed = args.seed config.num_frame_stack = 1 config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs=config.num_envs, seed=config.seed, num_frame_stack=config.num_frame_stack) model = GaussianSeparatedPolicy(env.observation_space.shape[0], action_dim=get_action_dim( env.action_space)).to(config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = PPOAgent(config, env, model, logger) agent.run()
def main(): # create config with basic parameters for a2c config = Config() config.game = 'BreakoutNoFrameskip-v4' config.algo = 'a2c' config.max_steps = int(2e7) config.num_envs = 16 config.optimizer = 'RMSprop' config.lr = 0.0001 config.discount = 0.99 config.use_gae = True config.gae_lambda = 0.95 config.use_grad_clip = True config.max_grad_norm = 5 config.rollout_length = 5 config.value_loss_coef = 0.5 config.entropy_coef = 0.01 config.use_gpu = True config.num_frame_stack = 4 config.seed = 1 # update config with argparse object (pass game and seed from command line) config.update(args) config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed) config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs=config.num_envs, seed=config.seed, num_frame_stack=config.num_frame_stack) model = CatACConvNet(input_channels=env.observation_space.shape[0], action_dim=get_action_dim(env.action_space)).to( config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = A2CAgent(config, env, model, logger) agent.run()
def main(): # create config config = Config() config.game = 'BreakoutNoFrameskip-v4' config.algo = 'dqn' config.max_steps = int(1e7) config.num_envs = 4 config.optimizer = 'RMSprop' config.lr = 0.00025 config.discount = 0.99 config.use_grad_clip = True config.max_grad_norm = 5 config.replay_size = int(1e5) config.replay_batch = 32 config.replay_on_gpu = False config.exploration_threshold_start = 1 config.exploration_threshold_end = 0.01 config.exploration_steps = int(1e6) config.target_update_interval = int(1e4) config.learning_start = int(5e4) config.intermediate_eval = True config.eval_interval = int(1e5) config.use_gpu = True config.num_frame_stack = 4 config.seed = 0 config.log_episodes_avg_window = 10000 # update config with argparse object (pass game and seed from command line) config.update(args) config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed) config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed, num_frame_stack= config.num_frame_stack) eval_env = make_vec_envs(config.game, num_envs = 1, seed = config.seed, num_frame_stack= config.num_frame_stack) model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device) target_model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes, config.log_episodes_avg_window) # create agent and run agent = DQNAgent(config, env, eval_env, model, target_model, logger) agent.run()
def main(): # create config with basic parameters for td3 config = Config() config.game = 'HalfCheetah-v2' config.algo = 'td3' config.max_steps = int(1e6) config.num_envs = 1 config.optimizer = 'Adam' config.lr = 0.001 config.discount = 0.99 config.replay_size = int(1e6) config.replay_batch = 100 config.replay_on_gpu = True config.warmup_steps = 10000 config.action_noise = 0.1 config.target_noise = 0.2 config.target_noise_clip = 0.5 config.policy_delay = 2 config.soft_update_rate = 0.005 config.intermediate_eval = True config.eval_interval = int(1e4) config.use_gpu = True config.seed = 0 # update config with argparse object (pass game and seed from command line) config.update(args) config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed) config.after_set() print(config) # prepare env, model and logger env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed) eval_env = make_vec_envs(config.game, num_envs = 1, seed = config.seed) model = ConDetADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device) target_model = ConDetADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = TD3Agent(config, env, eval_env, model, target_model, logger) agent.run()
def main(): # create config config = Config() config.game = 'starpilot' config.algo = 'ppo' config.max_steps = int(1e8) config.num_envs = 256 config.optimizer = 'Adam' config.lr = 0.0005 config.discount = 0.999 config.use_gae = True config.gae_lambda = 0.95 config.use_grad_clip = True config.max_grad_norm = 0.5 config.rollout_length = 256 config.value_loss_coef = 0.5 config.entropy_coef = 0.01 config.ppo_epoch = 3 config.ppo_clip_param = 0.2 config.num_mini_batch = 8 config.mini_batch_size = 2048 config.use_gpu = True config.num_frame_stack = 1 # update config with argparse object config.update(args) config.tag = '%s-%s' % (config.game, config.algo) if args.tag: config.tag += '-' + args.tag config.after_set() print(config) # prepare env, model and logger env = make_vec_envs_procgen(env_name=config.game, num_envs=config.num_envs, start_level=config.start_level, num_levels=config.num_levels, distribution_mode=config.distribution_mode, num_frame_stack=config.num_frame_stack) Model = SeparateImpalaCNN if args.separate_actor_critic else ImpalaCNN model = Model(input_channels=env.observation_space.shape[0], action_dim=get_action_dim(env.action_space)).to( config.device) logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = PPOAgent(config, env, model, logger) agent.run()
def main(): # prepare env encoder = None if args.use_encoder: encoder = Encoder(latent_size=args.latent_size) weights = torch.load(args.encoder_path, map_location=torch.device('cpu')) for k in list(weights.keys()): if k not in encoder.state_dict().keys(): del weights[k] encoder.load_state_dict(weights) carla_env = gym.make('carla-v0', params=params) env = VecGymCarla(carla_env, args.action_repeat, encoder) # prepare config config = Config() config.game = 'weather%d' % args.weather config.algo = 'ppo' config.max_steps = args.max_steps config.num_envs = 1 config.optimizer = 'Adam' config.lr = args.lr config.discount = 0.99 config.use_gae = True config.gae_lambda = 0.95 config.use_grad_clip = True config.max_grad_norm = 0.5 config.rollout_length = 128 config.value_loss_coef = 1 config.entropy_coef = 0.01 config.ppo_epoch = 4 config.ppo_clip_param = 0.2 config.num_mini_batch = 4 config.use_gpu = True config.save_interval = 10000 config.memory_on_gpu = True config.after_set() print(config) # prepare model if args.use_encoder: Model = CarlaLatentPolicy input_dim = args.latent_size + 1 # 16+1 in paper else: Model = CarlaImgPolicy input_dim = args.latent_size + 1 # 128+1 in paper (16 is too small) model = Model(input_dim, 2).to(config.device) # create ppo agent and run logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes) # create agent and run agent = PPOAgent(config, env, model, logger) agent.run() torch.save(model.state_dict(), args.model_save_path)