コード例 #1
0
def main():
    # create config
    config = Config()
    config.game = args.game
    config.algo = 'ppo'
    config.max_steps = int(2e6)
    config.num_envs = 1
    config.optimizer = 'RMSprop'
    config.lr = 0.0003
    config.discount = 0.99
    config.use_gae = True
    config.gae_lambda = 0.95
    config.use_grad_clip = True
    config.max_grad_norm = 0.5
    config.rollout_length = 2048
    config.value_loss_coef = 0.5
    config.entropy_coef = 0
    config.ppo_epoch = 10
    config.ppo_clip_param = 0.2
    config.num_mini_batch = 32
    config.use_gpu = True
    config.seed = args.seed
    config.num_frame_stack = 1
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed, num_frame_stack= config.num_frame_stack)
    model = GaussianPolicy(env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
    logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = PPOAgent(config, env, model, logger)
    agent.run()
コード例 #2
0
ファイル: example_sac.py プロジェクト: qiming-zou/RLCodebase
def main():
    # create config with basic parameters for sac
    config = Config()
    config.game = 'HalfCheetah-v2'
    config.algo = 'sac'
    config.max_steps = int(1e6)
    config.num_envs = 1
    config.optimizer = 'Adam'
    config.lr = 0.001
    config.discount = 0.99
    config.replay_size = int(1e6)
    config.replay_batch = 100
    config.replay_on_gpu = True
    config.warmup_steps = 10000
    config.soft_update_rate = 0.005
    config.sac_alpha = 0.2
    config.automatic_alpha = False
    config.intermediate_eval = True
    config.eval_interval = int(1e4)
    config.use_gpu = True
    config.seed = 0

    # update config with argparse object (pass game and seed from command line)
    config.update(args)
    config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed)
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game,
                        num_envs=config.num_envs,
                        seed=config.seed)
    eval_env = make_vec_envs(config.game, num_envs=1, seed=config.seed)
    model = ConStoSGADCLinearNet(input_dim=env.observation_space.shape[0],
                                 action_dim=get_action_dim(
                                     env.action_space)).to(config.device)
    target_model = ConStoSGADCLinearNet(
        input_dim=env.observation_space.shape[0],
        action_dim=get_action_dim(env.action_space)).to(config.device)
    logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = SACAgent(config, env, eval_env, model, target_model, logger)
    agent.run()
コード例 #3
0
def main():
    # create config
    config = Config()
    config.game = 'BreakoutNoFrameskip-v4'
    config.algo = 'dqn'
    config.max_steps = int(1e7)
    config.num_envs = 4
    config.optimizer = 'RMSprop'
    config.lr = 0.00025
    config.discount = 0.99
    config.use_grad_clip = True
    config.max_grad_norm = 5
    config.replay_size = int(1e5)
    config.replay_batch = 32
    config.replay_on_gpu = False
    config.exploration_threshold_start = 1
    config.exploration_threshold_end = 0.01
    config.exploration_steps = int(1e6)
    config.target_update_interval = int(1e4)
    config.learning_start = int(5e4)
    config.intermediate_eval = True
    config.eval_interval = int(1e5)
    config.use_gpu = True
    config.num_frame_stack = 4
    config.seed = 0
    config.log_episodes_avg_window = 10000

    # update config with argparse object (pass game and seed from command line)
    config.update(args)
    config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed)
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game, num_envs = config.num_envs, seed = config.seed, num_frame_stack= config.num_frame_stack)
    eval_env = make_vec_envs(config.game, num_envs = 1, seed = config.seed, num_frame_stack= config.num_frame_stack)
    model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
    target_model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
    logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes, config.log_episodes_avg_window)

    # create agent and run
    agent = DQNAgent(config, env, eval_env, model, target_model, logger)
    agent.run()
コード例 #4
0
def main():
    # create config
    config = Config()
    config.game = 'BreakoutNoFrameskip-v4'
    config.algo = 'ppo'
    config.max_steps = int(2e7)
    config.num_envs = 8
    config.optimizer = 'Adam'
    config.lr = 0.00025
    config.discount = 0.99
    config.use_gae = True
    config.gae_lambda = 0.95
    config.use_grad_clip = True
    config.max_grad_norm = 0.5
    config.rollout_length = 128
    config.value_loss_coef = 0.5
    config.entropy_coef = 0.01
    config.ppo_epoch = 4
    config.ppo_clip_param = 0.1
    config.num_mini_batch = 4
    config.use_gpu = True
    config.num_frame_stack = 4
    config.seed = 1

    # update config with argparse object (pass game and seed from command line)
    config.update(args)
    config.tag = '%s-%s-%d' % (config.game, config.algo, config.seed)
    config.after_set()
    print(config)

    # prepare env, model and logger
    env = make_vec_envs(config.game,
                        num_envs=config.num_envs,
                        seed=config.seed,
                        num_frame_stack=config.num_frame_stack)
    model = CatACConvNet(input_channels=env.observation_space.shape[0],
                         action_dim=get_action_dim(env.action_space)).to(
                             config.device)
    logger = Logger(SummaryWriter(config.save_path), config.num_echo_episodes)

    # create agent and run
    agent = PPOAgent(config, env, model, logger)
    agent.run()