def showLearning(self, representation): pi = np.zeros( (self.X_discretization, self.XDot_discretization), 'uint8') V = np.zeros((self.X_discretization, self.XDot_discretization)) if self.valueFunction_fig is None: self.valueFunction_fig = plt.figure("Value Function") self.valueFunction_im = plt.imshow( V, cmap='ValueFunction', interpolation='nearest', origin='lower', vmin=self.MIN_RETURN, vmax=self.MAX_RETURN) plt.xticks(self.xTicks, self.xTicksLabels, fontsize=12) plt.yticks(self.yTicks, self.yTicksLabels, fontsize=12) plt.xlabel(r"$x$") plt.ylabel(r"$\dot x$") self.policy_fig = plt.figure("Policy") self.policy_im = plt.imshow( pi, cmap='MountainCarActions', interpolation='nearest', origin='lower', vmin=0, vmax=self.actions_num) plt.xticks(self.xTicks, self.xTicksLabels, fontsize=12) plt.yticks(self.yTicks, self.yTicksLabels, fontsize=12) plt.xlabel(r"$x$") plt.ylabel(r"$\dot x$") plt.show() for row, xDot in enumerate(np.linspace(self.XDOTMIN, self.XDOTMAX, self.XDot_discretization)): for col, x in enumerate(np.linspace(self.XMIN, self.XMAX, self.X_discretization)): s = np.array([x, xDot]) Qs = representation.Qs(s, False) As = self.possibleActions() pi[row, col] = representation.bestAction(s, False, As) V[row, col] = max(Qs) self.valueFunction_im.set_data(V) self.policy_im.set_data(pi) self.valueFunction_fig = plt.figure("Value Function") plt.draw() self.policy_fig = plt.figure("Policy") plt.draw()
def showDomain(self, a): s = self.state # Plot the car and an arrow indicating the direction of accelaration # Parts of this code was adopted from Jose Antonio Martin H. # <*****@*****.**> online source code pos, vel = s if self.domain_fig is None: # Need to initialize the figure self.domain_fig = plt.figure("Mountain Car Domain") # plot mountain mountain_x = np.linspace(self.XMIN, self.XMAX, 1000) mountain_y = np.sin(3 * mountain_x) plt.gca( ).fill_between(mountain_x, min(mountain_y) - self.CAR_HEIGHT * 2, mountain_y, color='g') plt.xlim([self.XMIN - .2, self.XMAX]) plt.ylim( [min(mountain_y) - self.CAR_HEIGHT * 2, max(mountain_y) + self.CAR_HEIGHT * 2]) # plot car self.car = lines.Line2D([], [], linewidth=20, color='b', alpha=.8) plt.gca().add_line(self.car) # Goal plt.plot(self.GOAL, np.sin(3 * self.GOAL), 'yd', markersize=10.0) plt.axis('off') plt.gca().set_aspect('1') self.domain_fig = plt.figure("Mountain Car Domain") #pos = 0 #a = 0 car_middle_x = pos car_middle_y = np.sin(3 * pos) slope = np.arctan(3 * np.cos(3 * pos)) car_back_x = car_middle_x - self.CAR_WIDTH * np.cos(slope) / 2. car_front_x = car_middle_x + self.CAR_WIDTH * np.cos(slope) / 2. car_back_y = car_middle_y - self.CAR_WIDTH * np.sin(slope) / 2. car_front_y = car_middle_y + self.CAR_WIDTH * np.sin(slope) / 2. self.car.set_data([car_back_x, car_front_x], [car_back_y, car_front_y]) # wheels # plott(x(1)-0.05,sin(3*(x(1)-0.05))+0.06,'ok','markersize',12,'MarkerFaceColor',[.5 .5 .5]); # plot(x(1)+0.05,sin(3*(x(1)+0.05))+0.06,'ok','markersize',12,'MarkerFaceColor',[.5 .5 .5]); # Arrows if self.actionArrow is not None: self.actionArrow.remove() self.actionArrow = None if self.actions[a] > 0: self.actionArrow = fromAtoB( car_front_x, car_front_y, car_front_x + self.ARROW_LENGTH * np.cos(slope), car_front_y + self.ARROW_LENGTH * np.sin(slope), #car_front_x + self.CAR_WIDTH*cos(slope)/2., car_front_y + self.CAR_WIDTH*sin(slope)/2.+self.CAR_HEIGHT, 'k', "arc3,rad=0", 0, 0, 'simple' ) if self.actions[a] < 0: self.actionArrow = fromAtoB( car_back_x, car_back_y, car_back_x - self.ARROW_LENGTH * np.cos(slope), car_back_y - self.ARROW_LENGTH * np.sin(slope), #car_front_x + self.CAR_WIDTH*cos(slope)/2., car_front_y + self.CAR_WIDTH*sin(slope)/2.+self.CAR_HEIGHT, 'r', "arc3,rad=0", 0, 0, 'simple' ) plt.draw()