save_path=model_path) if load_pretrained_network: doomguy.load_model() if train_network: for episode in range(episodes): print('Episode', episode) doom.new_game() done = False step = 0 while not done: state = doom.get_current_state() action_index = doomguy.act(state) next_state, reward, done = doom.step(action_index) doomguy.remember(state, action_index, reward, next_state, done) step += 1 loss = doomguy.train() doomguy.reset_memory() print('Total steps: {}, loss was: {}'.format(step, loss)) if show_results: doom = VizdoomWrapper(config_path=config_path, reward_table=reward_table, frame_resolution=resolution, show_mode=True, frame_stack=4) for episode in range(3):
doomguy.load_model() for epoch in range(epochs): print('\nEpoch {}\n-------'.format(epoch + 1)) print('\nTraining...') doom.new_game() train_scores = [] train_losses = [] prev_variables = [] tqdm_bar = tqdm.trange(training_episodes_per_epoch, leave=False) for episode in tqdm_bar: # Get state, action, reward, done and next state state = doom.get_current_state() best_action_index = doomguy.act(state) next_state, reward, done = doom.step(best_action_index) # Save to memory doomguy.remember(state, best_action_index, reward, next_state, done) # Replay from memory training_loss = doomguy.replay(replay_batch_size) train_losses.append(training_loss) # Store results on game end if done: score = doom.get_total_reward() train_scores.append(score) doom.new_game() if len(train_scores) > 0: train_scores = np.array(train_scores)