def train_dqn(): hyperparams = { 'batch_size': 32, 'learning_rate': (0.5, 0.001, 4000000), 'grad_decay': 0.99, 'grad_epsilon': 0.01, 'epsilon': [(1, 0.1, 4000000, 0.4), (1, 0.01, 4000000, 0.3), (1, 0.5, 4000000, 0.3)], # 'epsilon': (1, 0.1, 4000000), 'frame_skip': 4, 'reward_discount': 0.99, 'show_screen': False, 'display_freq': 100, # 'updates_per_iter': 1000, 'updates_per_iter': 40000, # 'init_frames': 20000, # 'init_frames': 200000, # 'init_updates': 20000, # 'init_updates': 100000, 'num_threads': 16, # 'update_freq': 4, # 'frames_per_epoch': 5000, 'frames_per_epoch': 100000, 'episodes_per_eval': 32, # 'episodes_per_eval': 16, 'state_len': 4, 'num_epochs': 400, 'eval_freq': 1, 'eval_epsilon': 0.05, 'num_recent_episodes': 100, # 'tmax': 5, 'num_recent_steps': 10000 } q_model = BreakoutModel(hyperparams) loss = tb.MSE(hyperparams) optim = tb.RMSPropVarOptim(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.AsyncSIDQNAgent(hyperparams, q_model, optim, loss, 'params/breakout_async_sidqn_fix.json') task = AsyncAtariTask(hyperparams, 'data/roms/breakout.bin') trainer = tb.AsyncSleepTrainer(hyperparams, agent, task, load_first=False) trainer.train_by_epoch()
def train_dqn(): hyperparams = { 'batch_size': 32, 'init_explore_len': 50000, # 'init_explore_len': 50, 'learning_rate': 0.00025, # 'grad_momentum': 0.0, 'grad_decay': 0.95, 'grad_epsilon': 0.01, # 'grad_norm_clip': 5, 'epsilon': (1.0, 0.1, 1000000), 'frame_skip': 10, 'num_recent_feats': 25, 'steps_per_episode': 150, 'reward_discount': 0.99, 'show_screen': True, 'target_update_freq': 10000, 'display_freq': 25, 'updates_per_iter': 1, 'update_freq': 4, 'frames_per_epoch': 100000, # 'frames_per_epoch': 250, 'frames_per_eval': 50000, # 'screen_resize': (110, 84), 'experience_replay_len': 1000000, # 'cache_size': int(2e4), 'state_len': 4, 'joint_vel': 0.5, # 'num_frames': 10000000, # 'save_freq': 100000, # 'eval_freq': 10, 'num_epochs': 200, # 1e7 frames 'eval_epsilon': 0.05, 'num_recent_episodes': 100, 'num_recent_steps': 10000 } q_model = CookModel(hyperparams) loss = tb.MSE(hyperparams) optim = tb.RMSPropOptim(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.DQNAgent(hyperparams, q_model, optim, loss, 'params/cook_dqn.json') task = CookingTask(hyperparams) trainer = tb.RLTrainer(hyperparams, agent, task, load_first=True) trainer.train_by_epoch()
def train_dqn(): hyperparams = { 'batch_size': 32, # 'init_explore_len': 500, # 'init_explore_len': 50, 'learning_rate': 0.0005, # 'grad_momentum': 0.0, 'grad_decay': 0.95, 'grad_epsilon': 0.01, # 'grad_norm_clip': 5, 'epsilon': (1.0, 0.1, 4000000), 'frame_skip': 4, 'reward_discount': 0.99, 'show_screen': False, # 'target_update_freq': 10000, 'display_freq': 100, 'updates_per_iter': 50000, 'update_freq': 4, 'frames_per_epoch': 100000, # 'frames_per_epoch': 250, 'frames_per_eval': 25000, # 'screen_resize': (110, 84), # 'experience_replay_len': 1000000 # 'cache_size': int(2e4), 'state_len': 4, # 'num_frames': 10000000, # 'save_freq': 100000, # 'eval_freq': 10, 'num_epochs': 400, # 1e7 frames 'eval_epsilon': 0.05, 'num_recent_episodes': 100, 'tmax': 5, 'num_recent_steps': 10000 } q_model = BreakoutModel(hyperparams) loss = tb.MSE(hyperparams) optim = tb.RMSPropOptim(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.SNDQNAgent(hyperparams, q_model, optim, loss, 'params/breakout_sndqn_l0005.json') task = AtariTask(hyperparams, 'data/roms/breakout.bin') trainer = tb.RLTrainer(hyperparams, agent, task, load_first=False) trainer.train_by_epoch()
def train_rdrl(): hyperparams = {'batch_size': 32, 'init_explore_len': 500000, 'num_mega_updates': 100000, # 'init_model_train': 500000, # 'init_explore_len': 50, 'learning_rate': 0.05, # 'grad_momentum': 0.0, 'grad_decay': 0.95, 'grad_epsilon': 0.01, # 'grad_norm_clip': 5, 'epsilon': (1.0, 0.1, 1000000), 'frame_skip': 4, 'reward_discount': 0.99, 'display_freq': 100, 'updates_per_model_iter': 1, 'updates_per_iter': 1, # 'trains_per_action_train': 500, 'train_freq': 16, 'action_train_freq': 16, # 'action_train_freq': 10000, 'frames_per_epoch': 100000, # 'frames_per_epoch': 250, 'frames_per_eval': 50000, # 'screen_resize': (110, 84), 'experience_replay_len': 4000000, 'update_target_freq': 20000, # 'cache_size': int(2e4), 'state_len': 1, # 'num_frames': 10000000, # 'save_freq': 100000, # 'eval_freq': 10, 'num_epochs': 200, # 1e7 frames 'show_screen': False, 'rollout_length': 4, 'value_rollout_length': 4, 'eval_epsilon': 0.05, 'action_train_scale': 5, 'num_recent_episodes': 100, 'num_recent_steps': 10000} action_model = ActionModel(hyperparams) action_optim = tb.RMSPropOptim(hyperparams) state_model = StateModel(hyperparams) state_optim = tb.RMSPropOptim(hyperparams) state_loss = tb.MSE(hyperparams) reward_model = RewardModel(hyperparams) reward_optim = tb.RMSPropOptim(hyperparams) reward_loss = tb.MSE(hyperparams) value_model = ValueModel(hyperparams) value_optim = tb.RMSPropOptim(hyperparams) value_loss = tb.MSE(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.RDRLAgent(hyperparams, action_model, action_optim, state_model, state_loss, state_optim, reward_model, reward_loss, reward_optim, value_model, value_loss, value_optim, 'params/breakout_rdrl.json') task = AtariTask(hyperparams, 'data/roms/breakout.bin') trainer = tb.RLTrainer(hyperparams, agent, task) trainer.train_by_epoch()