def visualize(self): nr, nc = self.maze.shape z = -0.1 a = self.agent plot_line_seg(0, 0, z, nr, 0, z, 'e1', size=0.2, color='red') plot_line_seg(0, 0, z, 0, nc, z, 'e2', size=0.2, color='red') plot_line_seg(0, nc, z, nr, nc, z, 'e3', size=0.2, color='red') plot_line_seg(nr, 0, z, nr, nc, z, 'e4', size=0.2, color='red') plot_3d(*get_midpoint_for_loc(a.i, a.j), z, 'agent', color='blue', size=1) plot_3d(*get_midpoint_for_loc(nr-1, nc-1), z, 'goal', color='green', size=1) x, y = np.where(self.maze == WALL) plot_3d(x + 0.5, y + 0.5, [z]*len(x), 'wall', color='red', size=1)
def visualize3d(self): # First we need to check if it is :in bounds: nr, nc = self.env.shape z = 0.1 a = self.mousy # plot_3d(x) plot_line_seg(0, 0, z, nr, 0, z, 'e1', size=0.2, color='red') plot_line_seg(0, 0, z, 0, nc, z, 'e2', size=0.2, color='red') plot_line_seg(0, nc, z, nr, nc, z, 'e3', size=0.2, color='red') plot_line_seg(nr, 0, z, nr, nc, z, 'e4', size=0.2, color='red') plot_3d(*get_midpoint_for_loc(a.i, a.j), z, 'mousy', color='blue', size=1) plot_3d(*get_midpoint_for_loc(3, 3), z, 'goal', color='green', size=1) xarr, yarr = np.where(self.env == -1) plot_3d(xarr + 0.5, yarr + 0.5, [z] * len(xarr), 'obstacles', size=1.0)
def visualize(self): nr, nc = self.env.shape z = -0.1 a = self.mousy plot_line_seg(0, 0, z, nr, 0, z, 'e1', size=0.2, color='red') plot_line_seg(0, 0, z, 0, nc, z, 'e2', size=0.2, color='red') plot_line_seg(0, nc, z, nr, nc, z, 'e3', size=0.2, color='red') plot_line_seg(nr, 0, z, nr, nc, z, 'e4', size=0.2, color='red') plot_3d(*get_midpoint_for_loc(a.i, a.j), z, 'mousy', color='blue', size=1) plot_3d(*get_midpoint_for_loc(nr - 1, nc - 1), z, 'goal', color='green', size=1) xarr, yarr = np.where(self.env == -1) plot_3d(xarr + 0.5, yarr + 0.5, [z] * len(xarr), 'obstacles', size=1.0)
def main(): size = 8 q = QLearning(size**2, 4) go_ahead = False while not go_ahead: m = make_test_maze(size) m.visualize3d() conti = input() if conti.lower() == 'n': continue go_ahead = True max_episodes = 400 switch_episodes = 200 for i in range(max_episodes): m.reset() print(i, end=" ") final_score = 0 while not m.has_won(): # EXPLORATION VS EXPLOITATION ### Exploration if random.random() > anneal_probability( i, max_episodes, switch_episodes, 0.5) or i < switch_episodes: # list me all the moves possible for the agent moves = m.compute_possible_moves() # shuffle the moves random.shuffle(moves) # move: tuple, move_idx: bottom(0), top(1), left (2), right(3) move, move_idx = moves[0] ### Exploitation else: moves = m.all_actions s = m.state_for_agent(m.mousy) move_idx = np.argmax(q.q[s]) move = moves[move_idx] at = move_idx st = m.state_for_agent(m.mousy) score = m.do_a_move(move) final_score += score rt = score # print(score) # lm = m.mousy() #last action of the agent st1 = m.state_for_agent(m.mousy) # m.visualize3d() # time.sleep(0.001) q.update(st, at, rt, st1) time.sleep(0.01) print(f"Finished episode with final score of {final_score} ") print(q.q) m.reset() m.visualize3d() agents = [] while not m.has_won(): time.sleep(0.5) # Pick the state of the agent s = m.state_for_agent(m.mousy) # Select the action that has the highest action value a_idx = np.argmax(q.q[s]) agents.append(m.mousy) # Tell our agent to make a move m.do_a_move(m.all_actions[a_idx]) m.visualize3d() for ii, (a1, a2) in enumerate(zip(agents, agents[1:])): plot_line_seg(a1.i + 0.5, a1.j + 0.5, 0, a2.i + 0.5, a2.j + 0.5, 0, f'path{ii}', size=0.1) m.visualize3d() time.sleep(0.3) m.reset()