def showDomain(self, a=0, s=None): if s is None: s = self.state # Draw the environment if self.domain_fig is None: self.agent_fig = plt.figure("Domain") self.domain_fig = plt.imshow(self.map, cmap='GridWorld', interpolation='nearest', vmin=0, vmax=5) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) # pl.tight_layout() self.agent_fig = plt.gca().plot(s[1], s[0], 'kd', markersize=20.0 - self.COLS) plt.show() self.agent_fig.pop(0).remove() self.agent_fig = plt.figure("Domain") #mapcopy = copy(self.map) #mapcopy[s[0],s[1]] = self.AGENT # self.domain_fig.set_data(mapcopy) # Instead of '>' you can use 'D', 'o' self.agent_fig = plt.gca().plot(s[1], s[0], 'k>', markersize=20.0 - self.COLS) plt.draw()
def showDomain(self, a=0): # Draw the environment s = self.state world = np.zeros((self.blocks, self.blocks), 'uint8') undrawn_blocks = np.arange(self.blocks) while len(undrawn_blocks): A = undrawn_blocks[0] B = s[A] undrawn_blocks = undrawn_blocks[1:] if B == A: # => A is on Table world[0, A] = A + 1 # 0 is white thats why! else: # See if B is already drawn i, j = findElemArray2D(B + 1, world) if len(i): world[i + 1, j] = A + 1 # 0 is white thats why! else: # Put it in the back of the list undrawn_blocks = np.hstack((undrawn_blocks, [A])) if self.domain_fig is None: self.domain_fig = plt.imshow( world, cmap='BlocksWorld', origin='lower', interpolation='nearest') # ,vmin=0,vmax=self.blocks) plt.xticks(np.arange(self.blocks), fontsize=FONTSIZE) plt.yticks(np.arange(self.blocks), fontsize=FONTSIZE) # pl.tight_layout() plt.axis('off') plt.show() else: self.domain_fig.set_data(world) plt.draw()
def showDomain(self, a=0): s = self.state # Draw the environment if self.domain_fig is None: self.move_fig = plt.subplot(111) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig = plt.imshow(s, cmap='FlipBoard', interpolation='nearest', vmin=0, vmax=1) plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) # pl.tight_layout() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig = self.move_fig.plot(a_col, a_row, 'kx', markersize=30.0) plt.show() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig.pop(0).remove() # print a_row,a_col # Instead of '>' you can use 'D', 'o' self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig.set_data(s) plt.draw()
def showDomain(self, a=0, s=None): if s is None: s = self.state # Draw the environment if self.domain_fig is None: self.agent_fig = plt.figure("Domain") self.domain_fig = plt.imshow( self.map, cmap='GridWorld', interpolation='nearest', vmin=0, vmax=5) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) # pl.tight_layout() self.agent_fig = plt.gca( ).plot(s[1], s[0], 'kd', markersize=20.0 - self.COLS) plt.show() self.agent_fig.pop(0).remove() self.agent_fig = plt.figure("Domain") #mapcopy = copy(self.map) #mapcopy[s[0],s[1]] = self.AGENT # self.domain_fig.set_data(mapcopy) # Instead of '>' you can use 'D', 'o' self.agent_fig = plt.gca( ).plot(s[1], s[0], 'k>', markersize=20.0 - self.COLS) plt.draw()
def showDomain(self, a=0): s = self.state # Draw the environment if self.domain_fig is None: self.move_fig = plt.subplot(111) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig = plt.imshow( s, cmap='FlipBoard', interpolation='nearest', vmin=0, vmax=1) plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) # pl.tight_layout() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig = self.move_fig.plot( a_col, a_row, 'kx', markersize=30.0) plt.show() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig.pop(0).remove() # print a_row,a_col # Instead of '>' you can use 'D', 'o' self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig.set_data(s) plt.draw()
def showDomain(self, a): s = self.state # Draw the environment if self.domain_fig is None: self.domain_fig = plt.imshow(self.map, cmap='IntruderMonitoring', interpolation='nearest', vmin=0, vmax=3) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) plt.show() if self.ally_fig is not None: self.ally_fig.pop(0).remove() self.intruder_fig.pop(0).remove() s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2)) s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2)) self.ally_fig = plt.plot(s_ally[:, 1], s_ally[:, 0], 'bo', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) self.intruder_fig = plt.plot(s_intruder[:, 1], s_intruder[:, 0], 'g>', color='gray', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) plt.draw()
def showDomain(self, a=0): # Draw the environment s = self.state world = np.zeros((self.blocks, self.blocks), 'uint8') undrawn_blocks = np.arange(self.blocks) while len(undrawn_blocks): A = undrawn_blocks[0] B = s[A] undrawn_blocks = undrawn_blocks[1:] if B == A: # => A is on Table world[0, A] = A + 1 # 0 is white thats why! else: # See if B is already drawn i, j = findElemArray2D(B + 1, world) if len(i): world[i + 1, j] = A + 1 # 0 is white thats why! else: # Put it in the back of the list undrawn_blocks = np.hstack((undrawn_blocks, [A])) if self.domain_fig is None: plt.figure("Domain") self.domain_fig = plt.imshow( world, cmap='BlocksWorld', origin='lower', interpolation='nearest') # ,vmin=0,vmax=self.blocks) plt.xticks(np.arange(self.blocks), fontsize=FONTSIZE) plt.yticks(np.arange(self.blocks), fontsize=FONTSIZE) # pl.tight_layout() plt.axis('off') plt.show() else: self.domain_fig.set_data(world) plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events()
def showLearning(self, representation): pi = np.zeros( (self.X_discretization, self.XDot_discretization), 'uint8') V = np.zeros((self.X_discretization, self.XDot_discretization)) if self.valueFunction_fig is None: self.valueFunction_fig = plt.figure("Value Function") self.valueFunction_im = plt.imshow( V, cmap='ValueFunction', interpolation='nearest', origin='lower', vmin=self.MIN_RETURN, vmax=self.MAX_RETURN) plt.xticks(self.xTicks, self.xTicksLabels, fontsize=12) plt.yticks(self.yTicks, self.yTicksLabels, fontsize=12) plt.xlabel(r"$x$") plt.ylabel(r"$\dot x$") self.policy_fig = plt.figure("Policy") self.policy_im = plt.imshow( pi, cmap='MountainCarActions', interpolation='nearest', origin='lower', vmin=0, vmax=self.actions_num) plt.xticks(self.xTicks, self.xTicksLabels, fontsize=12) plt.yticks(self.yTicks, self.yTicksLabels, fontsize=12) plt.xlabel(r"$x$") plt.ylabel(r"$\dot x$") plt.show() for row, xDot in enumerate(np.linspace(self.XDOTMIN, self.XDOTMAX, self.XDot_discretization)): for col, x in enumerate(np.linspace(self.XMIN, self.XMAX, self.X_discretization)): s = [x, xDot] Qs = representation.Qs(s, False) As = self.possibleActions() pi[row, col] = representation.bestAction(s, False, As) V[row, col] = max(Qs) self.valueFunction_im.set_data(V) self.policy_im.set_data(pi) self.valueFunction_fig = plt.figure("Value Function") plt.draw() self.policy_fig = plt.figure("Policy") plt.draw()
def showDomain(self, a): s = self.state # Draw the environment if self.domain_fig is None: plt.figure("Domain") self.domain_fig = plt.imshow( self.map, cmap='IntruderMonitoring', interpolation='nearest', vmin=0, vmax=3) plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE) plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE) plt.show() if self.ally_fig is not None: self.ally_fig.pop(0).remove() self.intruder_fig.pop(0).remove() s_ally = s[0:self.NUMBER_OF_AGENTS * 2].reshape((-1, 2)) s_intruder = s[self.NUMBER_OF_AGENTS * 2:].reshape((-1, 2)) self.ally_fig = plt.plot( s_ally[:, 1], s_ally[:, 0], 'bo', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) self.intruder_fig = plt.plot( s_intruder[:, 1], s_intruder[:, 0], 'g>', color='gray', markersize=30.0, alpha=.7, markeredgecolor='k', markeredgewidth=2) plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events()
def gridworld_showlearning(self, representation): dom = self.actual_domain if self.valueFunction_fig is None: plt.figure("Value Function") self.valueFunction_fig = plt.imshow(dom.map, cmap='ValueFunction', interpolation='nearest', vmin=dom.MIN_RETURN, vmax=dom.MAX_RETURN) plt.xticks(np.arange(dom.COLS), fontsize=12) plt.yticks(np.arange(dom.ROWS), fontsize=12) # Create quivers for each action. 4 in total plt.show() plt.figure("Value Function") V = self.get_value_function(representation) # print Acts # Show Value Function self.valueFunction_fig.set_data(V) plt.draw()
def showLearning(self, representation): if self.valueFunction_fig is None: plt.figure("Value Function") self.valueFunction_fig = plt.imshow(self.map, cmap='ValueFunction', interpolation='nearest', vmin=self.MIN_RETURN, vmax=self.MAX_RETURN) plt.xticks(np.arange(self.COLS), fontsize=12) plt.yticks(np.arange(self.ROWS), fontsize=12) # Create quivers for each action. 4 in total X = np.arange(self.ROWS) - self.SHIFT Y = np.arange(self.COLS) X, Y = np.meshgrid(X, Y) DX = DY = np.ones(X.shape) C = np.zeros(X.shape) C[0, 0] = 1 # Making sure C has both 0 and 1 # length of arrow/width of bax. Less then 0.5 because each arrow is # offset, 0.4 looks nice but could be better/auto generated arrow_ratio = 0.4 Max_Ratio_ArrowHead_to_ArrowLength = 0.25 ARROW_WIDTH = 0.5 * Max_Ratio_ArrowHead_to_ArrowLength / 5.0 self.upArrows_fig = plt.quiver(Y, X, DY, DX, C, units='y', cmap='Actions', scale_units="height", scale=self.ROWS / arrow_ratio, width=-1 * ARROW_WIDTH) self.upArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) + self.SHIFT Y = np.arange(self.COLS) X, Y = np.meshgrid(X, Y) self.downArrows_fig = plt.quiver(Y, X, DY, DX, C, units='y', cmap='Actions', scale_units="height", scale=self.ROWS / arrow_ratio, width=-1 * ARROW_WIDTH) self.downArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) Y = np.arange(self.COLS) - self.SHIFT X, Y = np.meshgrid(X, Y) self.leftArrows_fig = plt.quiver(Y, X, DY, DX, C, units='x', cmap='Actions', scale_units="width", scale=self.COLS / arrow_ratio, width=ARROW_WIDTH) self.leftArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) Y = np.arange(self.COLS) + self.SHIFT X, Y = np.meshgrid(X, Y) self.rightArrows_fig = plt.quiver(Y, X, DY, DX, C, units='x', cmap='Actions', scale_units="width", scale=self.COLS / arrow_ratio, width=ARROW_WIDTH) self.rightArrows_fig.set_clim(vmin=0, vmax=1) plt.show() plt.figure("Value Function") V = np.zeros((self.ROWS, self.COLS)) # Boolean 3 dimensional array. The third array highlights the action. # Thie mask is used to see in which cells what actions should exist Mask = np.ones((self.COLS, self.ROWS, self.actions_num), dtype='bool') arrowSize = np.zeros((self.COLS, self.ROWS, self.actions_num), dtype='float') # 0 = suboptimal action, 1 = optimal action arrowColors = np.zeros((self.COLS, self.ROWS, self.actions_num), dtype='uint8') for r in xrange(self.ROWS): for c in xrange(self.COLS): if self.map[r, c] == self.BLOCKED: V[r, c] = 0 if self.map[r, c] == self.GOAL: V[r, c] = self.MAX_RETURN if self.map[r, c] == self.PIT: V[r, c] = self.MIN_RETURN if self.map[r, c] == self.EMPTY or self.map[r, c] == self.START: s = np.array([r, c]) As = self.possibleActions(s) terminal = self.isTerminal(s) Qs = representation.Qs(s, terminal) bestA = representation.bestActions(s, terminal, As) V[r, c] = max(Qs[As]) Mask[c, r, As] = False arrowColors[c, r, bestA] = 1 for i in xrange(len(As)): a = As[i] Q = Qs[i] value = linearMap(Q, self.MIN_RETURN, self.MAX_RETURN, 0, 1) arrowSize[c, r, a] = value # Show Value Function self.valueFunction_fig.set_data(V) # Show Policy Up Arrows DX = arrowSize[:, :, 0] DY = np.zeros((self.ROWS, self.COLS)) DX = np.ma.masked_array(DX, mask=Mask[:, :, 0]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 0]) C = np.ma.masked_array(arrowColors[:, :, 0], mask=Mask[:, :, 0]) self.upArrows_fig.set_UVC(DY, DX, C) # Show Policy Down Arrows DX = -arrowSize[:, :, 1] DY = np.zeros((self.ROWS, self.COLS)) DX = np.ma.masked_array(DX, mask=Mask[:, :, 1]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 1]) C = np.ma.masked_array(arrowColors[:, :, 1], mask=Mask[:, :, 1]) self.downArrows_fig.set_UVC(DY, DX, C) # Show Policy Left Arrows DX = np.zeros((self.ROWS, self.COLS)) DY = -arrowSize[:, :, 2] DX = np.ma.masked_array(DX, mask=Mask[:, :, 2]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 2]) C = np.ma.masked_array(arrowColors[:, :, 2], mask=Mask[:, :, 2]) self.leftArrows_fig.set_UVC(DY, DX, C) # Show Policy Right Arrows DX = np.zeros((self.ROWS, self.COLS)) DY = arrowSize[:, :, 3] DX = np.ma.masked_array(DX, mask=Mask[:, :, 3]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 3]) C = np.ma.masked_array(arrowColors[:, :, 3], mask=Mask[:, :, 3]) self.rightArrows_fig.set_UVC(DY, DX, C) plt.draw()
def showLearning(self, representation): if self.valueFunction_fig is None: plt.figure("Value Function") self.valueFunction_fig = plt.imshow( self.map, cmap='ValueFunction', interpolation='nearest', vmin=self.MIN_RETURN, vmax=self.MAX_RETURN) plt.xticks(np.arange(self.COLS), fontsize=12) plt.yticks(np.arange(self.ROWS), fontsize=12) # Create quivers for each action. 4 in total X = np.arange(self.ROWS) - self.SHIFT Y = np.arange(self.COLS) X, Y = np.meshgrid(X, Y) DX = DY = np.ones(X.shape) C = np.zeros(X.shape) C[0, 0] = 1 # Making sure C has both 0 and 1 # length of arrow/width of bax. Less then 0.5 because each arrow is # offset, 0.4 looks nice but could be better/auto generated arrow_ratio = 0.4 Max_Ratio_ArrowHead_to_ArrowLength = 0.25 ARROW_WIDTH = 0.5 * Max_Ratio_ArrowHead_to_ArrowLength / 5.0 self.upArrows_fig = plt.quiver( Y, X, DY, DX, C, units='y', cmap='Actions', scale_units="height", scale=self.ROWS / arrow_ratio, width=- 1 * ARROW_WIDTH) self.upArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) + self.SHIFT Y = np.arange(self.COLS) X, Y = np.meshgrid(X, Y) self.downArrows_fig = plt.quiver( Y, X, DY, DX, C, units='y', cmap='Actions', scale_units="height", scale=self.ROWS / arrow_ratio, width=- 1 * ARROW_WIDTH) self.downArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) Y = np.arange(self.COLS) - self.SHIFT X, Y = np.meshgrid(X, Y) self.leftArrows_fig = plt.quiver( Y, X, DY, DX, C, units='x', cmap='Actions', scale_units="width", scale=self.COLS / arrow_ratio, width=ARROW_WIDTH) self.leftArrows_fig.set_clim(vmin=0, vmax=1) X = np.arange(self.ROWS) Y = np.arange(self.COLS) + self.SHIFT X, Y = np.meshgrid(X, Y) self.rightArrows_fig = plt.quiver( Y, X, DY, DX, C, units='x', cmap='Actions', scale_units="width", scale=self.COLS / arrow_ratio, width=ARROW_WIDTH) self.rightArrows_fig.set_clim(vmin=0, vmax=1) plt.show() plt.figure("Value Function") V = np.zeros((self.ROWS, self.COLS)) # Boolean 3 dimensional array. The third array highlights the action. # Thie mask is used to see in which cells what actions should exist Mask = np.ones( (self.COLS, self.ROWS, self.actions_num), dtype='bool') arrowSize = np.zeros( (self.COLS, self.ROWS, self.actions_num), dtype='float') # 0 = suboptimal action, 1 = optimal action arrowColors = np.zeros( (self.COLS, self.ROWS, self.actions_num), dtype='uint8') for r in xrange(self.ROWS): for c in xrange(self.COLS): if self.map[r, c] == self.BLOCKED: V[r, c] = 0 if self.map[r, c] == self.GOAL: V[r, c] = self.MAX_RETURN if self.map[r, c] == self.PIT: V[r, c] = self.MIN_RETURN if self.map[r, c] == self.EMPTY or self.map[r, c] == self.START: s = np.array([r, c]) As = self.possibleActions(s) terminal = self.isTerminal(s) Qs = representation.Qs(s, terminal) bestA = representation.bestActions(s, terminal, As) V[r, c] = max(Qs[As]) Mask[c, r, As] = False arrowColors[c, r, bestA] = 1 for i in xrange(len(As)): a = As[i] Q = Qs[i] value = linearMap( Q, self.MIN_RETURN, self.MAX_RETURN, 0, 1) arrowSize[c, r, a] = value # Show Value Function self.valueFunction_fig.set_data(V) # Show Policy Up Arrows DX = arrowSize[:, :, 0] DY = np.zeros((self.ROWS, self.COLS)) DX = np.ma.masked_array(DX, mask=Mask[:, :, 0]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 0]) C = np.ma.masked_array(arrowColors[:, :, 0], mask=Mask[:,:, 0]) self.upArrows_fig.set_UVC(DY, DX, C) # Show Policy Down Arrows DX = -arrowSize[:, :, 1] DY = np.zeros((self.ROWS, self.COLS)) DX = np.ma.masked_array(DX, mask=Mask[:, :, 1]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 1]) C = np.ma.masked_array(arrowColors[:, :, 1], mask=Mask[:,:, 1]) self.downArrows_fig.set_UVC(DY, DX, C) # Show Policy Left Arrows DX = np.zeros((self.ROWS, self.COLS)) DY = -arrowSize[:, :, 2] DX = np.ma.masked_array(DX, mask=Mask[:, :, 2]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 2]) C = np.ma.masked_array(arrowColors[:, :, 2], mask=Mask[:,:, 2]) self.leftArrows_fig.set_UVC(DY, DX, C) # Show Policy Right Arrows DX = np.zeros((self.ROWS, self.COLS)) DY = arrowSize[:, :, 3] DX = np.ma.masked_array(DX, mask=Mask[:, :, 3]) DY = np.ma.masked_array(DY, mask=Mask[:, :, 3]) C = np.ma.masked_array(arrowColors[:, :, 3], mask=Mask[:,:, 3]) self.rightArrows_fig.set_UVC(DY, DX, C) plt.draw()