def showLearning(self, representation): allStates = np.arange(0, self.chainSize) X = np.arange(self.chainSize) * 2.0 / 10.0 - self.SHIFT Y = np.ones(self.chainSize) * self.Y DY = np.zeros(self.chainSize) DX = np.zeros(self.chainSize) C = np.zeros(self.chainSize) if self.value_function_fig is None: self.value_function_fig = plt.subplot(3, 1, 2) self.V_star_line = self.value_function_fig.plot( allStates, self.V_star) V = [representation.V(s, False, self.possibleActions(s=s)) for s in allStates] # Note the comma below, since a tuple of line objects is returned self.V_approx_line, = self.value_function_fig.plot( allStates, V, 'r-', linewidth=3) self.V_star_line = self.value_function_fig.plot( allStates, self.V_star, 'b--', linewidth=3) # Maximum value function is sum of all possible rewards plt.ylim([0, self.GOAL_REWARD * (len(self.GOAL_STATES) + 1)]) self.policy_fig = plt.subplot(3, 1, 3) self.policy_fig.set_xlim(0, self.chainSize * 2 / 10.0) self.policy_fig.set_ylim(0, 2) self.arrows = plt.quiver( X, Y, DX, DY, C, cmap='fiftyChainActions', units='x', width=0.05, scale=.008, alpha=.8) # headwidth=.05, headlength = .03, headaxislength = .02) self.policy_fig.xaxis.set_visible(False) self.policy_fig.yaxis.set_visible(False) V = [representation.V(s, False, self.possibleActions(s=s)) for s in allStates] pi = [representation.bestAction(s, False, self.possibleActions(s=s)) for s in allStates] #pi = [self.optimal_policy[s] for s in allStates] DX = [(2 * a - 1) * self.SHIFT * .1 for a in pi] self.V_approx_line.set_ydata(V) self.arrows.set_UVC(DX, DY, pi) plt.draw()
def showDomain(self, a=0): s = self.state # Draw the environment if self.circles is None: self.domain_fig = plt.subplot(3, 1, 1) plt.figure(1, (self.chainSize * 2 / 10.0, 2)) self.domain_fig.set_xlim(0, self.chainSize * 2 / 10.0) self.domain_fig.set_ylim(0, 2) self.domain_fig.add_patch( mpatches.Circle((1 / 5.0 + 2 / 10.0 * (self.chainSize - 1), self.Y), self.RADIUS * 1.1, fc="w")) # Make the last one double circle self.domain_fig.xaxis.set_visible(False) self.domain_fig.yaxis.set_visible(False) self.circles = [mpatches.Circle((1 / 5.0 + 2 / 10.0 * i, self.Y), self.RADIUS, fc="w") for i in range(self.chainSize)] for i in range(self.chainSize): self.domain_fig.add_patch(self.circles[i]) plt.show() for p in self.circles: p.set_facecolor('w') for p in self.GOAL_STATES: self.circles[p].set_facecolor('g') self.circles[s].set_facecolor('k') plt.draw()
def showDomain(self, a=0): s = self.state # Draw the environment if self.domain_fig is None: self.move_fig = plt.subplot(111) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig = plt.imshow( s, cmap='FlipBoard', interpolation='nearest', vmin=0, vmax=1) plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) # pl.tight_layout() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig = self.move_fig.plot( a_col, a_row, 'kx', markersize=30.0) plt.show() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig.pop(0).remove() # print a_row,a_col # Instead of '>' you can use 'D', 'o' self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig.set_data(s) plt.draw()
def showDomain(self, a=0): s = self.state # Draw the environment if self.circles is None: plt.figure("Domain") self.domain_fig = plt.subplot(3, 1, 1) plt.figure(1, (self.chainSize * 2 / 10.0, 2)) self.domain_fig.set_xlim(0, self.chainSize * 2 / 10.0) self.domain_fig.set_ylim(0, 2) self.domain_fig.add_patch( mpatches.Circle((old_div(1, 5.0) + 2 / 10.0 * (self.chainSize - 1), self.Y), self.RADIUS * 1.1, fc="w")) # Make the last one double circle self.domain_fig.xaxis.set_visible(False) self.domain_fig.yaxis.set_visible(False) self.circles = [ mpatches.Circle((old_div(1, 5.0) + 2 / 10.0 * i, self.Y), self.RADIUS, fc="w") for i in range(self.chainSize) ] for i in range(self.chainSize): self.domain_fig.add_patch(self.circles[i]) plt.show() for p in self.circles: p.set_facecolor('w') for p in self.GOAL_STATES: self.circles[p].set_facecolor('g') self.circles[s].set_facecolor('k') plt.figure("Domain").canvas.draw() plt.figure("Domain").canvas.flush_events()
def showDomain(self, a=0): s = self.state # Draw the environment if self.domain_fig is None: self.move_fig = plt.subplot(111) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig = plt.imshow(s, cmap='FlipBoard', interpolation='nearest', vmin=0, vmax=1) plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE) # pl.tight_layout() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig = self.move_fig.plot(a_col, a_row, 'kx', markersize=30.0) plt.show() a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE]) self.move_fig.pop(0).remove() # print a_row,a_col # Instead of '>' you can use 'D', 'o' self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0) s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE)) self.domain_fig.set_data(s) plt.draw()
def showLearning(self, representation): allStates = np.arange(0, self.chainSize) X = np.arange(self.chainSize) * 2.0 / 10.0 - self.SHIFT Y = np.ones(self.chainSize) * self.Y DY = np.zeros(self.chainSize) DX = np.zeros(self.chainSize) C = np.zeros(self.chainSize) if self.value_function_fig is None: self.value_function_fig = plt.subplot(3, 1, 2) self.V_star_line = self.value_function_fig.plot( allStates, self.V_star) V = [ representation.V(s, False, self.possibleActions(s=s)) for s in allStates ] # Note the comma below, since a tuple of line objects is returned self.V_approx_line, = self.value_function_fig.plot(allStates, V, 'r-', linewidth=3) self.V_star_line = self.value_function_fig.plot(allStates, self.V_star, 'b--', linewidth=3) # Maximum value function is sum of all possible rewards plt.ylim([0, self.GOAL_REWARD * (len(self.GOAL_STATES) + 1)]) self.policy_fig = plt.subplot(3, 1, 3) self.policy_fig.set_xlim(0, self.chainSize * 2 / 10.0) self.policy_fig.set_ylim(0, 2) self.arrows = plt.quiver( X, Y, DX, DY, C, cmap='fiftyChainActions', units='x', width=0.05, scale=.008, alpha=.8 ) # headwidth=.05, headlength = .03, headaxislength = .02) self.policy_fig.xaxis.set_visible(False) self.policy_fig.yaxis.set_visible(False) V = [ representation.V(s, False, self.possibleActions(s=s)) for s in allStates ] pi = [ representation.bestAction(s, False, self.possibleActions(s=s)) for s in allStates ] #pi = [self.optimal_policy[s] for s in allStates] DX = [(2 * a - 1) * self.SHIFT * .1 for a in pi] self.V_approx_line.set_ydata(V) self.arrows.set_UVC(DX, DY, pi) plt.draw()