def main():
    # create config
    config = Config()
    config.game = args.game
    config.algo = 'ppo'
    config.max_steps = int(2e6)
    config.num_envs = 1
    config.optimizer = 'RMSprop'
    config.lr = 0.0003
    config.discount = 0.99
    config.use_gae = True
    config.gae_lambda = 0.95
    config.use_grad_clip = True
    config.max_grad_norm = 0.5
    config.rollout_length = 2048
    config.value_loss_coef = 0.5
    config.entropy_coef = 0
    config.ppo_epoch = 10
    config.ppo_clip_param = 0.2
    config.num_mini_batch = 32
    config.use_gpu = True
    config.seed = args.seed
    config.num_frame_stack = 1
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game,
                        num_envs=config.num_envs,
                        seed=config.seed,
                        num_frame_stack=config.num_frame_stack)
    model = GaussianSeparatedPolicy(env.observation_space.shape[0],
                                    action_dim=get_action_dim(
                                        env.action_space)).to(config.device)
    logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = PPOAgent(config, env, model, logger)
    agent.run()
Esempio n. 2
0
def main():
    # create config with basic parameters for a2c
    config = Config()
    config.game = 'BreakoutNoFrameskip-v4'
    config.algo = 'a2c'
    config.max_steps = int(2e7)
    config.num_envs = 16
    config.optimizer = 'RMSprop'
    config.lr = 0.0001
    config.discount = 0.99
    config.use_gae = True
    config.gae_lambda = 0.95
    config.use_grad_clip = True
    config.max_grad_norm = 5
    config.rollout_length = 5
    config.value_loss_coef = 0.5
    config.entropy_coef = 0.01
    config.use_gpu = True
    config.num_frame_stack = 4
    config.seed = 1

    # update config with argparse object (pass game and seed from command line)
    config.update(args)
    config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed)
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game,
                        num_envs=config.num_envs,
                        seed=config.seed,
                        num_frame_stack=config.num_frame_stack)
    model = CatACConvNet(input_channels=env.observation_space.shape[0],
                         action_dim=get_action_dim(env.action_space)).to(
                             config.device)
    logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = A2CAgent(config, env, model, logger)
    agent.run()
Esempio n. 3
0
def main():
    # create config
    config = Config()
    config.game = 'BreakoutNoFrameskip-v4'
    config.algo = 'dqn'
    config.max_steps = int(1e7)
    config.num_envs = 4
    config.optimizer = 'RMSprop'
    config.lr = 0.00025
    config.discount = 0.99
    config.use_grad_clip = True
    config.max_grad_norm = 5
    config.replay_size = int(1e5)
    config.replay_batch = 32
    config.replay_on_gpu = False
    config.exploration_threshold_start = 1
    config.exploration_threshold_end = 0.01
    config.exploration_steps = int(1e6)
    config.target_update_interval = int(1e4)
    config.learning_start = int(5e4)
    config.intermediate_eval = True
    config.eval_interval = int(1e5)
    config.use_gpu = True
    config.num_frame_stack = 4
    config.seed = 0
    config.log_episodes_avg_window = 10000

    # update config with argparse object (pass game and seed from command line)
    config.update(args)
    config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed)
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed, num_frame_stack= config.num_frame_stack)
    eval_env = make_vec_envs(config.game, num_envs = 1, seed = config.seed, num_frame_stack= config.num_frame_stack)
    model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
    target_model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
    logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes, config.log_episodes_avg_window)

    # create agent and run
    agent = DQNAgent(config, env, eval_env, model, target_model, logger)
    agent.run()
Esempio n. 4
0
def main():
    # create config with basic parameters for td3
    config = Config()
    config.game = 'HalfCheetah-v2'
    config.algo = 'td3'
    config.max_steps = int(1e6)
    config.num_envs = 1
    config.optimizer = 'Adam'
    config.lr = 0.001
    config.discount = 0.99
    config.replay_size = int(1e6)
    config.replay_batch = 100
    config.replay_on_gpu = True
    config.warmup_steps = 10000
    config.action_noise = 0.1
    config.target_noise = 0.2
    config.target_noise_clip = 0.5
    config.policy_delay = 2
    config.soft_update_rate = 0.005
    config.intermediate_eval = True
    config.eval_interval = int(1e4)
    config.use_gpu = True
    config.seed = 0

    # update config with argparse object (pass game and seed from command line)
    config.update(args)
    config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed)
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed)
    eval_env = make_vec_envs(config.game, num_envs = 1, seed = config.seed)
    model = ConDetADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
    target_model = ConDetADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
    logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = TD3Agent(config, env, eval_env, model, target_model, logger)
    agent.run()
Esempio n. 5
0
def main():
    # create config
    config = Config()
    config.game = 'starpilot'
    config.algo = 'ppo'
    config.max_steps = int(1e8)
    config.num_envs = 256
    config.optimizer = 'Adam'
    config.lr = 0.0005
    config.discount = 0.999
    config.use_gae = True
    config.gae_lambda = 0.95
    config.use_grad_clip = True
    config.max_grad_norm = 0.5
    config.rollout_length = 256
    config.value_loss_coef = 0.5
    config.entropy_coef = 0.01
    config.ppo_epoch = 3
    config.ppo_clip_param = 0.2
    config.num_mini_batch = 8
    config.mini_batch_size = 2048
    config.use_gpu = True
    config.num_frame_stack = 1

    # update config with argparse object
    config.update(args)
    config.tag = '%s-%s' % (config.game, config.algo)
    if args.tag:
        config.tag += '-' + args.tag
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs_procgen(env_name=config.game,
                                num_envs=config.num_envs,
                                start_level=config.start_level,
                                num_levels=config.num_levels,
                                distribution_mode=config.distribution_mode,
                                num_frame_stack=config.num_frame_stack)
    Model = SeparateImpalaCNN if args.separate_actor_critic else ImpalaCNN
    model = Model(input_channels=env.observation_space.shape[0],
                  action_dim=get_action_dim(env.action_space)).to(
                      config.device)
    logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = PPOAgent(config, env, model, logger)
    agent.run()
Esempio n. 6
0
def main():
    # prepare env
    encoder = None
    if args.use_encoder:
        encoder = Encoder(latent_size=args.latent_size)
        weights = torch.load(args.encoder_path,
                             map_location=torch.device('cpu'))
        for k in list(weights.keys()):
            if k not in encoder.state_dict().keys():
                del weights[k]
        encoder.load_state_dict(weights)

    carla_env = gym.make('carla-v0', params=params)
    env = VecGymCarla(carla_env, args.action_repeat, encoder)

    # prepare config
    config = Config()
    config.game = 'weather%d' % args.weather
    config.algo = 'ppo'
    config.max_steps = args.max_steps
    config.num_envs = 1
    config.optimizer = 'Adam'
    config.lr = args.lr
    config.discount = 0.99
    config.use_gae = True
    config.gae_lambda = 0.95
    config.use_grad_clip = True
    config.max_grad_norm = 0.5
    config.rollout_length = 128
    config.value_loss_coef = 1
    config.entropy_coef = 0.01
    config.ppo_epoch = 4
    config.ppo_clip_param = 0.2
    config.num_mini_batch = 4
    config.use_gpu = True
    config.save_interval = 10000
    config.memory_on_gpu = True
    config.after_set()
    print(config)

    # prepare model
    if args.use_encoder:
        Model = CarlaLatentPolicy
        input_dim = args.latent_size + 1  # 16+1 in paper
    else:
        Model = CarlaImgPolicy
        input_dim = args.latent_size + 1  # 128+1 in paper (16 is too small)
    model = Model(input_dim, 2).to(config.device)

    # create ppo agent and run
    logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = PPOAgent(config, env, model, logger)
    agent.run()
    torch.save(model.state_dict(), args.model_save_path)