strategy_name = 'pdddq_rbp_half' memory = PrioritizedReplayMemory(memory_size) policy_net = DuelingDQN_RBP(w, h, t, len(_game.actions)).to(device).half() target_net = DuelingDQN_RBP(w, h, t, len(_game.actions)).to(device).half() optimizer = Adam(policy_net.parameters(), lr=1e-5) summary_writer = SummaryWriter( 'runs/%s-%s-%s' % (_game.name, strategy_name, datetime.now().isoformat())) model = LearningModel(memory=memory, policy_net=policy_net, target_net=target_net, input_dtype=torch.half, optimizer=optimizer, strategy_name=strategy_name, game_name=_game.name, device=device, status=TrainingStatus(summary_writer)) print('Model prepared') # %% if load_checkpoint(model): print('Resuming') else: print('Starting fresh') # %% train(model, create_game,
def create_game(): return Pong30Min(w, h) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Using device %s' % device) with create_game() as _game: memory = SimpleReplayMemory(memory_size) policy_net = DQN(w, h, t, len(_game.actions)).to(device) model = LearningModel( memory=memory, policy_net=policy_net, target_net=DQN(w, h, t, len(_game.actions)).to(device), optimizer=Adam(policy_net.parameters(), lr=1e-5), game_name=_game.name, strategy_name='30min', device=device ) print('Model prepared') # %% if load_checkpoint(model): print('Resuming') else: print('Starting fresh') # %% train(model, create_game, hyperparams, 500000 // hyperparams.game_steps_per_epoch, validation_episodes=0, save_every=25, example_every=0)
print('Using device %s' % device) def create_game(): return Pong(w, h, t) with create_game() as _game: # memory = PrioritizedReplayMemory(memory_size) memory = SimpleReplayMemory(memory_size) policy_net = DuelingDQN(w, h, t, len(_game.actions)).to(device) target_net = DuelingDQN(w, h, t, len(_game.actions)).to(device) model = LearningModel( memory=memory, policy_net=policy_net, target_net=target_net, optimizer=Adam(policy_net.parameters(), lr=1e-4), strategy_name='mpdddq_d', game_name=_game.name, device=device ) print('Model prepared') # %% if load_checkpoint(model): print('Resuming') else: print('Starting fresh') # %% train(model, create_game, hyperparams, 500 // episode_factor, save_every=25 // episode_factor) save_checkpoint(model)
def train_with(device: torch.device, steps_to_train: int, game_steps_per_step: int, prio_memory: bool): episode_factor = 5 w = h = 84 t = 4 memory_size = 50000 batch_per_game_step = 32 batch_size = game_steps_per_step * batch_per_game_step hyperparams = TrainingHyperparameters( gamma=0.99, beta=linear_increase(0.01 * episode_factor), exploration_rate=linear_decay(0.008 * episode_factor, max_value=1., min_value=0.01), batch_size=batch_size, game_steps_per_step=game_steps_per_step, copy_to_target_every=1000, game_steps_per_epoch=1000 * episode_factor, multi_step_n=4, warmup_rounds=500, init_memory_steps=1000, parallel_game_processes=2, max_batches_prefetch=10, states_on_device=True) with create_game() as _game: strategy_name = 'floaton-steps%d-%s' % (game_steps_per_step, 'prm' if prio_memory else 'srm') if prio_memory: memory = PrioritizedReplayMemory(memory_size) else: memory = SimpleReplayMemory(memory_size) policy_net = DQN_RBP(w, h, t, len(_game.actions)).to(device) target_net = DQN_RBP(w, h, t, len(_game.actions)).to(device) optimizer = Adam(policy_net.parameters(), lr=1e-4) summary_writer = SummaryWriter( 'runs/%s-%s-%s' % (_game.name, strategy_name, datetime.now().isoformat())) model = LearningModel(memory=memory, policy_net=policy_net, target_net=target_net, input_dtype=torch.float, optimizer=optimizer, strategy_name=strategy_name, game_name=_game.name, device=device, status=TrainingStatus(summary_writer)) print('%s: Model prepared' % strategy_name) # %% train(model, create_game, hyperparams, steps_to_train // hyperparams.game_steps_per_epoch, save_every=0) save_checkpoint(model) with create_game() as game: print('Running validation of', strategy_name) print_validation(model, game, 5) print('%s completed' % strategy_name)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Using device %s' % device) with create_game() as _game: strategy_name = 'pdddq_rbp' memory = PrioritizedReplayMemory(memory_size) policy_net = DuelingDQN_RBP(w, h, t, len(_game.actions)).to(device) target_net = DuelingDQN_RBP(w, h, t, len(_game.actions)).to(device) summary_writer = SummaryWriter('runs/%s-%s-%s' % (_game.name, strategy_name, datetime.now().isoformat())) model = LearningModel( memory=memory, policy_net=policy_net, target_net=target_net, optimizer=Adam(policy_net.parameters(), lr=1e-4), strategy_name=strategy_name, game_name=_game.name, device=device, status=TrainingStatus(summary_writer) ) print('Model prepared') # %% if load_checkpoint(model): print('Resuming') else: print('Starting fresh') # %% train(model, create_game, hyperparams, steps_to_train // hyperparams.game_steps_per_epoch, save_every=25 // episode_factor)