def _plot_valfun(self, VMat, xlim=None, ylim=None): """ :returns: handle to the figure """ plt.figure("Value Function") #pl.xticks(self.xTicks,self.xTicksLabels, fontsize=12) #pl.yticks(self.yTicks,self.yTicksLabels, fontsize=12) #pl.xlabel(r"$\theta$ (degree)") #pl.ylabel(r"$\dot{\theta}$ (degree/sec)") plt.title('Value Function') if xlim is not None and ylim is not None: extent = [xlim[0], xlim[1], ylim[0], ylim[1]] else: extent = [0, 1, 0, 1] self.valueFunction_fig = plt.imshow( VMat, cmap='ValueFunction', interpolation='nearest', origin='lower', extent=extent) norm = colors.Normalize(vmin=VMat.min(), vmax=VMat.max()) self.valueFunction_fig.set_data(VMat) self.valueFunction_fig.set_norm(norm) plt.draw()
def show_one_variable(variable, vrange, map1="9x9-2PathR1.txt"): x = vrange ctrl = param_ranges(variable, vrange) y = param_ranges(variable, vrange, cur_map=map1) try: plt.title(variable + " vs AUC") plt.ioff() plt.plot(x, ctrl) plt.plot(x, y) plt.legend() plt.show() finally: return ctrl, y
def showLearning(self, representation): """ ``xSlice`` and ``xDotSlice`` - the value of ``x`` and ``xDot`` respectively, associated with the plotted value function and policy (which are each 2-D grids across ``theta`` and ``thetaDot``). """ xSlice = 0. # value of x assumed when plotting V and pi xDotSlice = 0. # value of xDot assumed when plotting V and pi warnStr = "WARNING: showLearning() called with 4-state "\ "cartpole; only showing slice at (x, xDot) = (%.2f, %.2f)" % ( xSlice, xDotSlice) (thetas, theta_dots) = self._setup_learning(representation) pi = np.zeros((len(theta_dots), len(thetas)), 'uint8') V = np.zeros((len(theta_dots), len(thetas))) for row, thetaDot in enumerate(theta_dots): for col, theta in enumerate(thetas): s = np.array([theta, thetaDot, xSlice, xDotSlice]) terminal = self.isTerminal(s) # Array of Q-function evaluated at all possible actions at # state s Qs = representation.Qs(s, terminal) # Array of all possible actions at state s As = self.possibleActions(s=s) # If multiple optimal actions, pick one randomly a = np.random.choice(As[Qs.max() == Qs]) # Assign pi to be an optimal action (which maximizes # Q-function) pi[row, col] = a # Assign V to be the value of the Q-function under optimal # action V[row, col] = max(Qs) self._plot_policy(pi) plt.title("Policy (Slice at x=0, xDot=0)") self._plot_valfun(V) plt.title("Value Function (Slice at x=0, xDot=0)") pl.draw()
def _plot_policy(self, piMat, title="Policy", var="policy_fig", xlim=None, ylim=None): """ :returns: handle to the figure """ if getattr(self, var, None) is None: plt.figure(title) # define the colormap cmap = plt.cm.jet # extract all colors from the .jet map cmaplist = [cmap(i) for i in range(cmap.N)] # force the first color entry to be grey cmaplist[0] = (.5, .5, .5, 1.0) # create the new map cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) # define the bins and normalize bounds = np.linspace(0, self.actions_num, self.actions_num + 1) norm = mpl.colors.BoundaryNorm(bounds, cmap.N) if xlim is not None and ylim is not None: extent = [xlim[0], xlim[1], ylim[0], ylim[1]] else: extent = [0, 1, 0, 1] self.__dict__[var] = plt.imshow(piMat, interpolation='nearest', origin='lower', cmap=cmap, norm=norm, extent=extent) #pl.xticks(self.xTicks,self.xTicksLabels, fontsize=12) #pl.yticks(self.yTicks,self.yTicksLabels, fontsize=12) #pl.xlabel(r"$\theta$ (degree)") #pl.ylabel(r"$\dot{\theta}$ (degree/sec)") plt.title(title) plt.colorbar() plt.figure(title) self.__dict__[var].set_data(piMat) plt.draw()
def _plot_policy(self, piMat, title="Policy", var="policy_fig", xlim=None, ylim=None): """ :returns: handle to the figure """ if getattr(self, var, None) is None: plt.figure(title) # define the colormap cmap = plt.cm.jet # extract all colors from the .jet map cmaplist = [cmap(i) for i in range(cmap.N)] # force the first color entry to be grey cmaplist[0] = (.5, .5, .5, 1.0) # create the new map cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) # define the bins and normalize bounds = np.linspace(0, self.actions_num, self.actions_num + 1) norm = mpl.colors.BoundaryNorm(bounds, cmap.N) if xlim is not None and ylim is not None: extent = [xlim[0], xlim[1], ylim[0], ylim[1]] else: extent = [0, 1, 0, 1] self.__dict__[var] = plt.imshow( piMat, interpolation='nearest', origin='lower', cmap=cmap, norm=norm, extent=extent) #pl.xticks(self.xTicks,self.xTicksLabels, fontsize=12) #pl.yticks(self.yTicks,self.yTicksLabels, fontsize=12) #pl.xlabel(r"$\theta$ (degree)") #pl.ylabel(r"$\dot{\theta}$ (degree/sec)") plt.title(title) plt.colorbar() plt.figure(title) self.__dict__[var].set_data(piMat) plt.draw()