Ejemplo n.º 1
0
    def _plot_valfun(self, VMat, xlim=None, ylim=None):
        """
        :returns: handle to the figure
        """
        plt.figure("Value Function")
        #pl.xticks(self.xTicks,self.xTicksLabels, fontsize=12)
        #pl.yticks(self.yTicks,self.yTicksLabels, fontsize=12)
        #pl.xlabel(r"$\theta$ (degree)")
        #pl.ylabel(r"$\dot{\theta}$ (degree/sec)")
        plt.title('Value Function')
        if xlim is not None and ylim is not None:
            extent = [xlim[0], xlim[1], ylim[0], ylim[1]]
        else:
            extent = [0, 1, 0, 1]
        self.valueFunction_fig = plt.imshow(
            VMat,
            cmap='ValueFunction',
            interpolation='nearest',
            origin='lower',
            extent=extent)

        norm = colors.Normalize(vmin=VMat.min(), vmax=VMat.max())
        self.valueFunction_fig.set_data(VMat)
        self.valueFunction_fig.set_norm(norm)
        plt.draw()
Ejemplo n.º 2
0
def show_one_variable(variable, vrange, map1="9x9-2PathR1.txt"):
	x = vrange
	ctrl = param_ranges(variable, vrange)
	y = param_ranges(variable, vrange, cur_map=map1)
	try:
		plt.title(variable + " vs AUC")
		plt.ioff()
		plt.plot(x, ctrl)
		plt.plot(x, y)
		plt.legend()
		plt.show()
	finally:
		return ctrl, y
Ejemplo n.º 3
0
    def showLearning(self, representation):
        """

        ``xSlice`` and ``xDotSlice`` - the value of ``x`` and ``xDot``
        respectively, associated with the plotted value function and policy
        (which are each 2-D grids across ``theta`` and ``thetaDot``).

        """
        xSlice = 0.  # value of x assumed when plotting V and pi
        xDotSlice = 0.  # value of xDot assumed when plotting V and pi

        warnStr = "WARNING: showLearning() called with 4-state "\
            "cartpole; only showing slice at (x, xDot) = (%.2f, %.2f)" % (
                xSlice,
                xDotSlice)

        (thetas, theta_dots) = self._setup_learning(representation)

        pi = np.zeros((len(theta_dots), len(thetas)), 'uint8')
        V = np.zeros((len(theta_dots), len(thetas)))

        for row, thetaDot in enumerate(theta_dots):
            for col, theta in enumerate(thetas):
                s = np.array([theta, thetaDot, xSlice, xDotSlice])
                terminal = self.isTerminal(s)
                # Array of Q-function evaluated at all possible actions at
                # state s
                Qs = representation.Qs(s, terminal)
                # Array of all possible actions at state s
                As = self.possibleActions(s=s)
                # If multiple optimal actions, pick one randomly
                a = np.random.choice(As[Qs.max() == Qs])
                # Assign pi to be an optimal action (which maximizes
                # Q-function)
                pi[row, col] = a
                # Assign V to be the value of the Q-function under optimal
                # action
                V[row, col] = max(Qs)

        self._plot_policy(pi)
        plt.title("Policy (Slice at x=0, xDot=0)")
        self._plot_valfun(V)
        plt.title("Value Function (Slice at x=0, xDot=0)")

        pl.draw()
Ejemplo n.º 4
0
    def showLearning(self, representation):
        """

        ``xSlice`` and ``xDotSlice`` - the value of ``x`` and ``xDot``
        respectively, associated with the plotted value function and policy
        (which are each 2-D grids across ``theta`` and ``thetaDot``).

        """
        xSlice = 0.  # value of x assumed when plotting V and pi
        xDotSlice = 0.  # value of xDot assumed when plotting V and pi

        warnStr = "WARNING: showLearning() called with 4-state "\
            "cartpole; only showing slice at (x, xDot) = (%.2f, %.2f)" % (
                xSlice,
                xDotSlice)

        (thetas, theta_dots) = self._setup_learning(representation)

        pi = np.zeros((len(theta_dots), len(thetas)), 'uint8')
        V = np.zeros((len(theta_dots), len(thetas)))

        for row, thetaDot in enumerate(theta_dots):
            for col, theta in enumerate(thetas):
                s = np.array([theta, thetaDot, xSlice, xDotSlice])
                terminal = self.isTerminal(s)
                # Array of Q-function evaluated at all possible actions at
                # state s
                Qs = representation.Qs(s, terminal)
                # Array of all possible actions at state s
                As = self.possibleActions(s=s)
                # If multiple optimal actions, pick one randomly
                a = np.random.choice(As[Qs.max() == Qs])
                # Assign pi to be an optimal action (which maximizes
                # Q-function)
                pi[row, col] = a
                # Assign V to be the value of the Q-function under optimal
                # action
                V[row, col] = max(Qs)

        self._plot_policy(pi)
        plt.title("Policy (Slice at x=0, xDot=0)")
        self._plot_valfun(V)
        plt.title("Value Function (Slice at x=0, xDot=0)")

        pl.draw()
Ejemplo n.º 5
0
    def _plot_policy(self,
                     piMat,
                     title="Policy",
                     var="policy_fig",
                     xlim=None,
                     ylim=None):
        """
        :returns: handle to the figure
        """

        if getattr(self, var, None) is None:
            plt.figure(title)
            # define the colormap
            cmap = plt.cm.jet
            # extract all colors from the .jet map
            cmaplist = [cmap(i) for i in range(cmap.N)]
            # force the first color entry to be grey
            cmaplist[0] = (.5, .5, .5, 1.0)
            # create the new map
            cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)

            # define the bins and normalize
            bounds = np.linspace(0, self.actions_num, self.actions_num + 1)
            norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
            if xlim is not None and ylim is not None:
                extent = [xlim[0], xlim[1], ylim[0], ylim[1]]
            else:
                extent = [0, 1, 0, 1]
            self.__dict__[var] = plt.imshow(piMat,
                                            interpolation='nearest',
                                            origin='lower',
                                            cmap=cmap,
                                            norm=norm,
                                            extent=extent)
            #pl.xticks(self.xTicks,self.xTicksLabels, fontsize=12)
            #pl.yticks(self.yTicks,self.yTicksLabels, fontsize=12)
            #pl.xlabel(r"$\theta$ (degree)")
            #pl.ylabel(r"$\dot{\theta}$ (degree/sec)")
            plt.title(title)

            plt.colorbar()
        plt.figure(title)
        self.__dict__[var].set_data(piMat)
        plt.draw()
Ejemplo n.º 6
0
    def _plot_policy(self, piMat, title="Policy",
                     var="policy_fig", xlim=None, ylim=None):
        """
        :returns: handle to the figure
        """

        if getattr(self, var, None) is None:
            plt.figure(title)
            # define the colormap
            cmap = plt.cm.jet
            # extract all colors from the .jet map
            cmaplist = [cmap(i) for i in range(cmap.N)]
            # force the first color entry to be grey
            cmaplist[0] = (.5, .5, .5, 1.0)
            # create the new map
            cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)

            # define the bins and normalize
            bounds = np.linspace(0, self.actions_num, self.actions_num + 1)
            norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
            if xlim is not None and ylim is not None:
                extent = [xlim[0], xlim[1], ylim[0], ylim[1]]
            else:
                extent = [0, 1, 0, 1]
            self.__dict__[var] = plt.imshow(
                piMat,
                interpolation='nearest',
                origin='lower',
                cmap=cmap,
                norm=norm,
                extent=extent)
            #pl.xticks(self.xTicks,self.xTicksLabels, fontsize=12)
            #pl.yticks(self.yTicks,self.yTicksLabels, fontsize=12)
            #pl.xlabel(r"$\theta$ (degree)")
            #pl.ylabel(r"$\dot{\theta}$ (degree/sec)")
            plt.title(title)

            plt.colorbar()
        plt.figure(title)
        self.__dict__[var].set_data(piMat)
        plt.draw()