def showDomain(self, a=0, s=None): if s is None: s = self.state # Draw the environment if self.domain_fig is None: self.agent_fig = plt.figure("Domain") self.domain_fig = plt.imshow(self.map, cmap='GridWorld', interpolation='nearest', vmin=0, vmax=5) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) # pl.tight_layout() self.agent_fig = plt.gca().plot(s[1], s[0], 'kd', markersize=20.0 - self.COLS) plt.show() self.agent_fig.pop(0).remove() self.agent_fig = plt.figure("Domain") #mapcopy = copy(self.map) #mapcopy[s[0],s[1]] = self.AGENT # self.domain_fig.set_data(mapcopy) # Instead of '>' you can use 'D', 'o' self.agent_fig = plt.gca().plot(s[1], s[0], 'k>', markersize=20.0 - self.COLS) plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events()
def showLearning(self, representation): if self.valueFunction_fig is None: plt.figure("Value Function") self.valueFunction_fig = plt.imshow(self.map, cmap='ValueFunction', interpolation='nearest', vmin=self.MIN_RETURN, vmax=self.MAX_RETURN) plt.xticks(np.arange(self.COLS), fontsize=12) plt.yticks(np.arange(self.ROWS), fontsize=12) # Create quivers for each action. 4 in total X = np.arange(self.ROWS) - self.SHIFT Y = np.arange(self.COLS) X, Y = np.meshgrid(X, Y) DX = DY = np.ones(X.shape) C = np.zeros(X.shape) C[0, 0] = 1 # Making sure C has both 0 and 1 # length of arrow/width of bax. Less then 0.5 because each arrow is # offset, 0.4 looks nice but could be better/auto generated arrow_ratio = 0.4 Max_Ratio_ArrowHead_to_ArrowLength = 0.25 ARROW_WIDTH = 0.5 * Max_Ratio_ArrowHead_to_ArrowLength / 5.0 self.upArrows_fig = plt.quiver(Y, X, DY, DX, C, units='y', cmap='Actions', scale_units="height", scale=old_div( self.ROWS, arrow_ratio), width=-1 * ARROW_WIDTH) self.upArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) + self.SHIFT Y = np.arange(self.COLS) X, Y = np.meshgrid(X, Y) self.downArrows_fig = plt.quiver(Y, X, DY, DX, C, units='y', cmap='Actions', scale_units="height", scale=old_div( self.ROWS, arrow_ratio), width=-1 * ARROW_WIDTH) self.downArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) Y = np.arange(self.COLS) - self.SHIFT X, Y = np.meshgrid(X, Y) self.leftArrows_fig = plt.quiver(Y, X, DY, DX, C, units='x', cmap='Actions', scale_units="width", scale=old_div( self.COLS, arrow_ratio), width=ARROW_WIDTH) self.leftArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) Y = np.arange(self.COLS) + self.SHIFT X, Y = np.meshgrid(X, Y) self.rightArrows_fig = plt.quiver(Y, X, DY, DX, C, units='x', cmap='Actions', scale_units="width", scale=old_div( self.COLS, arrow_ratio), width=ARROW_WIDTH) self.rightArrows_fig.set_clim(vmin=0, vmax=1) plt.show() plt.figure("Value Function") V = np.zeros((self.ROWS, self.COLS)) # Boolean 3 dimensional array. The third array highlights the action. # Thie mask is used to see in which cells what actions should exist Mask = np.ones((self.COLS, self.ROWS, self.actions_num), dtype='bool') arrowSize = np.zeros((self.COLS, self.ROWS, self.actions_num), dtype='float') # 0 = suboptimal action, 1 = optimal action arrowColors = np.zeros((self.COLS, self.ROWS, self.actions_num), dtype='uint8') for r in range(self.ROWS): for c in range(self.COLS): if self.map[r, c] == self.BLOCKED: V[r, c] = self.BLOCK_REWARD if self.map[r, c] == self.GOAL: V[r, c] = self.MAX_RETURN if self.map[r, c] == self.PIT: V[r, c] = self.MIN_RETURN if self.map[r, c] == self.EMPTY or self.map[r, c] == self.START: s = np.array([r, c]) As = self.possibleActions(s) terminal = self.isTerminal(s) Qs = representation.Qs(s, terminal) bestA = representation.bestActions(s, terminal, As) V[r, c] = max(Qs[As]) Mask[c, r, As] = False arrowColors[c, r, bestA] = 1 for i in range(len(As)): a = As[i] Q = Qs[i] value = linearMap(Q, self.MIN_RETURN, self.MAX_RETURN, 0, 1) arrowSize[c, r, a] = value # # write value function to txt file # with open('GridWorld_Flag3_Value.txt', 'a') as outfile: # outfile.write('\n' + str(np.round(V,decimals=2)) + '\n'*2) # Show Value Function self.valueFunction_fig.set_data(V) # Show Policy Up Arrows DX = arrowSize[:, :, 0] DY = np.zeros((self.ROWS, self.COLS)) DX = np.ma.masked_array(DX, mask=Mask[:, :, 0]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 0]) C = np.ma.masked_array(arrowColors[:, :, 0], mask=Mask[:, :, 0]) self.upArrows_fig.set_UVC(DY, DX, C) # Show Policy Down Arrows DX = -arrowSize[:, :, 1] DY = np.zeros((self.ROWS, self.COLS)) DX = np.ma.masked_array(DX, mask=Mask[:, :, 1]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 1]) C = np.ma.masked_array(arrowColors[:, :, 1], mask=Mask[:, :, 1]) self.downArrows_fig.set_UVC(DY, DX, C) # Show Policy Left Arrows DX = np.zeros((self.ROWS, self.COLS)) DY = -arrowSize[:, :, 2] DX = np.ma.masked_array(DX, mask=Mask[:, :, 2]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 2]) C = np.ma.masked_array(arrowColors[:, :, 2], mask=Mask[:, :, 2]) self.leftArrows_fig.set_UVC(DY, DX, C) # Show Policy Right Arrows DX = np.zeros((self.ROWS, self.COLS)) DY = arrowSize[:, :, 3] DX = np.ma.masked_array(DX, mask=Mask[:, :, 3]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 3]) C = np.ma.masked_array(arrowColors[:, :, 3], mask=Mask[:, :, 3]) self.rightArrows_fig.set_UVC(DY, DX, C) plt.draw()