コード例 #1
0
def train_dqn():
    hyperparams = {
        'batch_size': 32,
        'init_explore_len': 50000,
        # 'init_explore_len': 50,
        'learning_rate': 0.00025,
        # 'grad_momentum': 0.0,
        'grad_decay': 0.95,
        'grad_epsilon': 0.01,
        # 'grad_norm_clip': 5,
        'epsilon': (1.0, 0.1, 1000000),
        'frame_skip': 10,
        'num_recent_feats': 25,
        'steps_per_episode': 150,
        'reward_discount': 0.99,
        'show_screen': True,
        'target_update_freq': 10000,
        'display_freq': 25,
        'updates_per_iter': 1,
        'update_freq': 4,
        'frames_per_epoch': 100000,
        # 'frames_per_epoch': 250,
        'frames_per_eval': 50000,
        # 'screen_resize': (110, 84),
        'experience_replay_len': 1000000,
        # 'cache_size': int(2e4),
        'state_len': 4,
        'joint_vel': 0.5,
        # 'num_frames': 10000000,
        # 'save_freq': 100000,
        # 'eval_freq': 10,
        'num_epochs': 200,  # 1e7 frames
        'eval_epsilon': 0.05,
        'num_recent_episodes': 100,
        'num_recent_steps': 10000
    }
    q_model = CookModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.DQNAgent(hyperparams, q_model, optim, loss,
                        'params/cook_dqn.json')
    task = CookingTask(hyperparams)
    trainer = tb.RLTrainer(hyperparams, agent, task, load_first=True)
    trainer.train_by_epoch()
コード例 #2
0
def train_dqn():
    hyperparams = {
        'batch_size': 32,
        # 'init_explore_len': 500,
        # 'init_explore_len': 50,
        'learning_rate': 0.0005,
        # 'grad_momentum': 0.0,
        'grad_decay': 0.95,
        'grad_epsilon': 0.01,
        # 'grad_norm_clip': 5,
        'epsilon': (1.0, 0.1, 4000000),
        'frame_skip': 4,
        'reward_discount': 0.99,
        'show_screen': False,
        # 'target_update_freq': 10000,
        'display_freq': 100,
        'updates_per_iter': 50000,
        'update_freq': 4,
        'frames_per_epoch': 100000,
        # 'frames_per_epoch': 250,
        'frames_per_eval': 25000,
        # 'screen_resize': (110, 84),
        # 'experience_replay_len': 1000000
        # 'cache_size': int(2e4),
        'state_len': 4,
        # 'num_frames': 10000000,
        # 'save_freq': 100000,
        # 'eval_freq': 10,
        'num_epochs': 400,  # 1e7 frames
        'eval_epsilon': 0.05,
        'num_recent_episodes': 100,
        'tmax': 5,
        'num_recent_steps': 10000
    }
    q_model = BreakoutModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.SNDQNAgent(hyperparams, q_model, optim, loss,
                          'params/breakout_sndqn_l0005.json')
    task = AtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.RLTrainer(hyperparams, agent, task, load_first=False)
    trainer.train_by_epoch()
コード例 #3
0
def train_rdrl():
    hyperparams = {'batch_size': 32,
                   'init_explore_len': 500000,
                   'num_mega_updates': 100000,
                   # 'init_model_train': 500000,
                   # 'init_explore_len': 50,
                   'learning_rate': 0.05,
                   # 'grad_momentum': 0.0,
                   'grad_decay': 0.95,
                   'grad_epsilon': 0.01,
                   # 'grad_norm_clip': 5,
                   'epsilon': (1.0, 0.1, 1000000),
                   'frame_skip': 4,
                   'reward_discount': 0.99,
                   'display_freq': 100,
                   'updates_per_model_iter': 1,
                   'updates_per_iter': 1,
                   # 'trains_per_action_train': 500,
                   'train_freq': 16,
                   'action_train_freq': 16,
                   # 'action_train_freq': 10000,
                   'frames_per_epoch': 100000,
                   # 'frames_per_epoch': 250,
                   'frames_per_eval': 50000,
                   # 'screen_resize': (110, 84),
                   'experience_replay_len': 4000000,
                   'update_target_freq': 20000,
                   # 'cache_size': int(2e4),
                   'state_len': 1,
                   # 'num_frames': 10000000,
                   # 'save_freq': 100000,
                   # 'eval_freq': 10,
                   'num_epochs': 200,  # 1e7 frames
                   'show_screen': False,
                   'rollout_length': 4,
                   'value_rollout_length': 4,
                   'eval_epsilon': 0.05,
                   'action_train_scale': 5,
                   'num_recent_episodes': 100,
                   'num_recent_steps': 10000}
    action_model = ActionModel(hyperparams)
    action_optim = tb.RMSPropOptim(hyperparams)

    state_model = StateModel(hyperparams)
    state_optim = tb.RMSPropOptim(hyperparams)
    state_loss = tb.MSE(hyperparams)

    reward_model = RewardModel(hyperparams)
    reward_optim = tb.RMSPropOptim(hyperparams)
    reward_loss = tb.MSE(hyperparams)

    value_model = ValueModel(hyperparams)
    value_optim = tb.RMSPropOptim(hyperparams)
    value_loss = tb.MSE(hyperparams)

    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.RDRLAgent(hyperparams,
                         action_model, action_optim,
                         state_model, state_loss, state_optim,
                         reward_model, reward_loss, reward_optim,
                         value_model, value_loss, value_optim,
                         'params/breakout_rdrl.json')
    task = AtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.RLTrainer(hyperparams, agent, task)
    trainer.train_by_epoch()