fig = plt.figure(figsize=(20, 20)) bilbo = QLearningAgent(PLAYER_CHAR) mondo = World(WORLD_DIM, bilbo=bilbo, obstacle=True) game_ended = False epoch = 0 anim = [] rewards = 0 env = mondo.create_env(d) anim.append((plt.pcolormesh(env, cmap='CMRmap'), )) while not game_ended and epoch < MAX_EPOCH: epoch += 1 action = bilbo.get_action(0, q_table, possible_moves) bilbo.move(inverse_possible_moves[action])() game_ended = bilbo.game_ended() reward = bilbo.reward() rewards = rewards + reward env = mondo.create_env(d) anim.append((plt.pcolormesh(env, cmap='CMRmap'), )) im_ani = animation.ArtistAnimation(fig, anim, interval=60, repeat_delay=1000, blit=False) writer = animation.FFMpegWriter(fps=epoch)
for ep in range(TOT_EPISODES): #recreate the environment bilbo = QLearningAgent(PLAYER_CHAR) mondo = World(WORLD_DIM, bilbo=bilbo, obstacle=True) np.random.seed() game_ended = False epoch = 0 tot_reward = 0 #if ep % 10 == 0: #a = plt.imshow(render_world(mondo.world,WORLD_DIM,q_table,ep), animated=True) #policy.append((a,)) while not game_ended and epoch < MAX_EPOCH: #the near it gets to the dragon the more random the movement epoch += 1 epsilon_fear = bilbo.fear(epsilon) action = bilbo.get_action(epsilon, q_table, possible_moves) current_state = bilbo.get_current_state() #treasure_gone = bilbo.treasure_gone() old_q_val = q_table[current_state][action] bilbo.move(inverse_possible_moves[action])() new_state = bilbo.get_current_state() #treasure_gone = bilbo.treasure_gone() game_ended = bilbo.game_ended() reward = bilbo.reward() if reward == -DRAGON_PENALTY: new_q_val = reward loss += 1 elif reward == TREASURE_REWARD: