def _plot_valfun(self, VMat): """ :returns: handle to the figure .. warning:: The calling function MUST call pl.draw() or the figures will not be updated. """ if self.valueFunction_fig is None or self.valueFunction_img is None: maxV = VMat.max() minV = VMat.min() self.valueFunction_fig = pl.figure("Value Function") ax = self.valueFunction_fig.add_subplot(111) self.valueFunction_img = ax.imshow( VMat, cmap='ValueFunction', interpolation='nearest', origin='lower', vmin=minV, vmax=maxV) pl.xticks(self.xTicks, self.xTicksLabels, fontsize=12) # Don't need the y labels since we share axes on subplot pl.xlabel(r"$\theta$ (degree)") pl.title('Value Function') norm = colors.Normalize(vmin=VMat.min(), vmax=VMat.max()) self.valueFunction_img.set_data(VMat) self.valueFunction_img.set_norm(norm) self.valueFunction_fig.canvas.draw()
def _plot_policy(self, piMat): """ :returns: handle to the figure .. warning:: The calling function MUST call pl.draw() or the figures will not be updated. """ if self.policy_fig is None or self.policy_img is None: self.policy_fig = pl.figure("Policy") ax = self.policy_fig.add_subplot(111) self.policy_img = ax.imshow( piMat, cmap='InvertedPendulumActions', interpolation='nearest', origin='lower', vmin=0, vmax=self.actions_num) pl.xticks(self.xTicks, self.xTicksLabels, fontsize=12) pl.yticks(self.yTicks, self.yTicksLabels, fontsize=12) pl.xlabel(r"$\theta$ (degree)") pl.ylabel(r"$\dot{\theta}$ (degree/sec)") pl.title('Policy') self.policy_img.set_data(piMat) self.policy_fig.canvas.draw()
def _plot_state(self, fourDimState, a): """ :param fourDimState: Four-dimensional cartpole state (``theta, thetaDot, x, xDot``) :param a: force action on the cart Visualizes the state of the cartpole - the force action on the cart is displayed as an arrow (not including noise!) """ s = fourDimState if (self.domain_fig is None or self.pendulumArm is None) or \ (self.cartBox is None or self.cartBlob is None): # Need to initialize the figure self.domain_fig = pl.figure("Domain") self.domain_ax = self.domain_fig.add_axes( [0, 0, 1, 1], frameon=True, aspect=1.) self.pendulumArm = lines.Line2D( [], [], linewidth=self.PEND_WIDTH, color='black') self.cartBox = mpatches.Rectangle( [0, self.PENDULUM_PIVOT_Y - old_div(self.RECT_HEIGHT, 2.0)], self.RECT_WIDTH, self.RECT_HEIGHT, alpha=.4) self.cartBlob = mpatches.Rectangle( [0, self.PENDULUM_PIVOT_Y - old_div(self.BLOB_WIDTH, 2.0)], self.BLOB_WIDTH, self.BLOB_WIDTH, alpha=.4) self.domain_ax.add_patch(self.cartBox) self.domain_ax.add_line(self.pendulumArm) self.domain_ax.add_patch(self.cartBlob) # Draw Ground groundPath = mpath.Path(self.GROUND_VERTS) groundPatch = mpatches.PathPatch(groundPath, hatch="//") self.domain_ax.add_patch(groundPatch) self.timeText = self.domain_ax.text( self.POSITION_LIMITS[1], self.LENGTH, "") self.rewardText = self.domain_ax.text( self.POSITION_LIMITS[0], self.LENGTH, "") # Allow room for pendulum to swing without getting cut off on graph viewableDistance = self.LENGTH + 0.5 if self.POSITION_LIMITS[0] < -100 * self.LENGTH or self.POSITION_LIMITS[1] > 100 * self.LENGTH: # We have huge position limits, limit the figure width so # cart is still visible self.domain_ax.set_xlim(-viewableDistance, viewableDistance) else: self.domain_ax.set_xlim( self.POSITION_LIMITS[0] - viewableDistance, self.POSITION_LIMITS[1] + viewableDistance) self.domain_ax.set_ylim(-viewableDistance, viewableDistance) # self.domain_ax.set_aspect('equal') pl.show() forceAction = self.AVAIL_FORCE[a] curX = s[StateIndex.X] curTheta = s[StateIndex.THETA] pendulumBobX = curX + self.LENGTH * np.sin(curTheta) pendulumBobY = self.PENDULUM_PIVOT_Y + self.LENGTH * np.cos(curTheta) if self.DEBUG: print('Pendulum Position: ', pendulumBobX, pendulumBobY) # update pendulum arm on figure self.pendulumArm.set_data( [curX, pendulumBobX], [self.PENDULUM_PIVOT_Y, pendulumBobY]) self.cartBox.set_x(curX - old_div(self.RECT_WIDTH, 2.0)) self.cartBlob.set_x(curX - old_div(self.BLOB_WIDTH, 2.0)) if self.actionArrow is not None: self.actionArrow.remove() self.actionArrow = None if forceAction == 0: pass # no force else: # cw or ccw torque if forceAction > 0: # rightward force self.actionArrow = fromAtoB( curX - self.ACTION_ARROW_LENGTH - old_div(self.RECT_WIDTH, 2.0), 0, curX - old_div(self.RECT_WIDTH, 2.0), 0, 'k', "arc3,rad=0", 0, 0, 'simple', ax=self.domain_ax ) else: # leftward force self.actionArrow = fromAtoB( curX + self.ACTION_ARROW_LENGTH + old_div(self.RECT_WIDTH, 2.0), 0, curX + old_div(self.RECT_WIDTH, 2.0), 0, 'r', "arc3,rad=0", 0, 0, 'simple', ax=self.domain_ax ) self.domain_fig.canvas.draw()