Example #1
0
    def showDomain(self, a):
        s = self.state
        # Plot the car
        x, y, speed, heading = s
        car_xmin = x - self.REAR_WHEEL_RELATIVE_LOC
        car_ymin = y - self.CAR_WIDTH / 2.
        if self.domain_fig is None:  # Need to initialize the figure
            self.domain_fig = plt.figure()
            # Goal
            plt.gca().add_patch(
                plt.Circle(self.GOAL,
                           radius=self.GOAL_RADIUS,
                           color='g',
                           alpha=.4))
            plt.xlim([self.XMIN, self.XMAX])
            plt.ylim([self.YMIN, self.YMAX])
            plt.gca().set_aspect('1')
        # Car
        if self.car_fig is not None:
            plt.gca().patches.remove(self.car_fig)

        self.car_fig = mpatches.Rectangle([car_xmin, car_ymin],
                                          self.CAR_LENGTH,
                                          self.CAR_WIDTH,
                                          alpha=.4)
        rotation = mpl.transforms.Affine2D().rotate_deg_around(
            x, y, heading * 180 / np.pi) + plt.gca().transData
        self.car_fig.set_transform(rotation)
        plt.gca().add_patch(self.car_fig)

        plt.draw()
Example #2
0
 def plot(self, y="return", x="learning_steps", save=False):
     """Plots the performance of the experiment
     This function has only limited capabilities.
     For more advanced plotting of results consider
     :py:class:`Tools.Merger.Merger`.
     """
     labels = rlpy.Tools.results.default_labels
     performance_fig = plt.figure("Performance")
     res = self.result
     plt.plot(res[x], res[y], '-bo', lw=3, markersize=10)
     plt.xlim(0, res[x][-1] * 1.01)
     y_arr = np.array(res[y])
     m = y_arr.min()
     M = y_arr.max()
     delta = M - m
     if delta > 0:
         plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
     xlabel = labels[x] if x in labels else x
     ylabel = labels[y] if y in labels else y
     plt.xlabel(xlabel, fontsize=16)
     plt.ylabel(ylabel, fontsize=16)
     if save:
         path = os.path.join(
             self.full_path,
             "{:3}-performance.pdf".format(self.exp_id))
         performance_fig.savefig(path, transparent=True, pad_inches=.1)
     plt.ioff()
     plt.show()
Example #3
0
File: RCCar.py Project: okkhoy/rlpy
    def showDomain(self, a):
        s = self.state
        # Plot the car
        x, y, speed, heading = s
        car_xmin = x - self.REAR_WHEEL_RELATIVE_LOC
        car_ymin = y - old_div(self.CAR_WIDTH, 2.)
        if self.domain_fig is None:  # Need to initialize the figure
            self.domain_fig = plt.figure()
            # Goal
            plt.gca(
            ).add_patch(
                plt.Circle(
                    self.GOAL,
                    radius=self.GOAL_RADIUS,
                    color='g',
                    alpha=.4))
            plt.xlim([self.XMIN, self.XMAX])
            plt.ylim([self.YMIN, self.YMAX])
            plt.gca().set_aspect('1')
        # Car
        if self.car_fig is not None:
            plt.gca().patches.remove(self.car_fig)

        self.car_fig = mpatches.Rectangle(
            [car_xmin,
             car_ymin],
            self.CAR_LENGTH,
            self.CAR_WIDTH,
            alpha=.4)
        rotation = mpl.transforms.Affine2D().rotate_deg_around(
            x, y, heading * 180 / np.pi) + plt.gca().transData
        self.car_fig.set_transform(rotation)
        plt.gca().add_patch(self.car_fig)

        plt.draw()
 def plot(self, y="return", x="learning_steps", save=False):
     """Plots the performance of the experiment
     This function has only limited capabilities.
     For more advanced plotting of results consider
     :py:class:`Tools.Merger.Merger`.
     """
     labels = rlpy.Tools.results.default_labels
     performance_fig = plt.figure("Performance")
     res = self.result
     plt.plot(res[x], res[y], '-bo', lw=3, markersize=10)
     plt.xlim(0, res[x][-1] * 1.01)
     y_arr = np.array(res[y])
     m = y_arr.min()
     M = y_arr.max()
     delta = M - m
     if delta > 0:
         plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
     xlabel = labels[x] if x in labels else x
     ylabel = labels[y] if y in labels else y
     plt.xlabel(xlabel, fontsize=16)
     plt.ylabel(ylabel, fontsize=16)
     if save:
         path = os.path.join(self.full_path,
                             "{:3}-performance.pdf".format(self.exp_id))
         performance_fig.savefig(path, transparent=True, pad_inches=.1)
     plt.ioff()
     plt.show()
Example #5
0
 def showDomain(self, a=None):
     if a is not None:
         a = self.actions[a]
     T = np.empty((self.d, 2))
     T[:, 0] = np.cos(self.theta)
     T[:, 1] = np.sin(self.theta)
     R = np.dot(self.P, T)
     R1 = R - .5 * self.lengths[:, None] * T
     R2 = R + .5 * self.lengths[:, None] * T
     Rx = np.hstack([R1[:, 0], R2[:, 0]]) + self.pos_cm[0]
     Ry = np.hstack([R1[:, 1], R2[:, 1]]) + self.pos_cm[1]
     print(Rx)
     print(Ry)
     f = plt.figure("Swimmer Domain")
     if not hasattr(self, "swimmer_lines"):
         plt.plot(0., 0., "ro")
         self.swimmer_lines = plt.plot(Rx, Ry)[0]
         self.action_text = plt.text(-2, -8, str(a))
         plt.xlim(-5, 15)
         plt.ylim(-10, 10)
     else:
         self.swimmer_lines.set_data(Rx, Ry)
         self.action_text.set_text(str(a))
     plt.figure("Swimmer Domain").canvas.draw()
     plt.figure("Swimmer Domain").canvas.flush_events()
Example #6
0
    def showLearning(self, representation):
        allStates = np.arange(0, self.chainSize)
        X = np.arange(self.chainSize) * 2.0 / 10.0 - self.SHIFT
        Y = np.ones(self.chainSize) * self.Y
        DY = np.zeros(self.chainSize)
        DX = np.zeros(self.chainSize)
        C = np.zeros(self.chainSize)

        if self.value_function_fig is None:
            self.value_function_fig = plt.subplot(3, 1, 2)
            self.V_star_line = self.value_function_fig.plot(
                allStates,
                self.V_star)
            V = [representation.V(s, False, self.possibleActions(s=s))
                 for s in allStates]

            # Note the comma below, since a tuple of line objects is returned
            self.V_approx_line, = self.value_function_fig.plot(
                allStates, V, 'r-', linewidth=3)
            self.V_star_line = self.value_function_fig.plot(
                allStates,
                self.V_star,
                'b--',
                linewidth=3)
            # Maximum value function is sum of all possible rewards
            plt.ylim([0, self.GOAL_REWARD * (len(self.GOAL_STATES) + 1)])

            self.policy_fig = plt.subplot(3, 1, 3)
            self.policy_fig.set_xlim(0, self.chainSize * 2 / 10.0)
            self.policy_fig.set_ylim(0, 2)
            self.arrows = plt.quiver(
                X,
                Y,
                DX,
                DY,
                C,
                cmap='fiftyChainActions',
                units='x',
                width=0.05,
                scale=.008,
                alpha=.8)  # headwidth=.05, headlength = .03, headaxislength = .02)
            self.policy_fig.xaxis.set_visible(False)
            self.policy_fig.yaxis.set_visible(False)

        V = [representation.V(s, False, self.possibleActions(s=s))
             for s in allStates]
        pi = [representation.bestAction(s, False, self.possibleActions(s=s))
              for s in allStates]
        #pi  = [self.optimal_policy[s] for s in allStates]

        DX = [(2 * a - 1) * self.SHIFT * .1 for a in pi]

        self.V_approx_line.set_ydata(V)
        self.arrows.set_UVC(DX, DY, pi)
        plt.draw()
Example #7
0
    def showDomain(self, a):
        if self.gcf is None:
            self.gcf = plt.gcf()

        s = self.state
        # Plot the car
        x, y, speed, heading = s
        car_xmin = x - self.REAR_WHEEL_RELATIVE_LOC
        car_ymin = y - self.CAR_WIDTH / 2.
        if self.domain_fig is None:  # Need to initialize the figure
            self.domain_fig = plt.figure()
            # Goal
            plt.gca(
            ).add_patch(
                plt.Circle(
                    self.GOAL,
                    radius=self.GOAL_RADIUS,
                    color='g',
                    alpha=.4))
            plt.xlim([self.XMIN, self.XMAX])
            plt.ylim([self.YMIN, self.YMAX])
            plt.gca().set_aspect('1')
        # Car
        if self.car_fig is not None:
            plt.gca().patches.remove(self.car_fig)

        if self.slips:            
            slip_x, slip_y = zip(*self.slips)
            try:
                line = plt.axes().lines[0]
                if len(line.get_xdata()) != len(slip_x): # if plot has discrepancy from data
                    line.set_xdata(slip_x)
                    line.set_ydata(slip_y)
            except IndexError:
                plt.plot(slip_x, slip_y, 'x', color='b')

        self.car_fig = mpatches.Rectangle(
            [car_xmin,
             car_ymin],
            self.CAR_LENGTH,
            self.CAR_WIDTH,
            alpha=.4)
        rotation = mpl.transforms.Affine2D().rotate_deg_around(
            x, y, heading * 180 / np.pi) + plt.gca().transData
        self.car_fig.set_transform(rotation)
        plt.gca().add_patch(self.car_fig)

        plt.draw()
        # self.gcf.canvas.draw()
        plt.pause(0.001)
Example #8
0
    def showStateDistribution(self, all_state_list, colors):

        ## HACKY - WILL NEED TO CLEAN UP
        if self.gcf is None:
            self.gcf = plt.gcf()
        plt.gca().add_patch(
            plt.Circle(self.GOAL, radius=self.GOAL_RADIUS, color='g',
                       alpha=.4))
        plt.xlim([self.XMIN, self.XMAX])
        plt.ylim([self.YMIN, self.YMAX])
        plt.gca().set_aspect('1')
        for state_list, color in zip(all_state_list, colors):
            for state in state_list:
                plt.gca().add_patch(
                    plt.Circle(state[:2], radius=0.01, color=color, alpha=.4))

        plt.draw()
        self.gcf.canvas.draw()
Example #9
0
    def plot_trials(self,
                    y="eps_return",
                    x="learning_steps",
                    average=10,
                    save=False):
        """Plots the performance of the experiment
        This function has only limited capabilities.
        For more advanced plotting of results consider
        :py:class:`Tools.Merger.Merger`.
        """
        def movingaverage(interval, window_size):
            window = np.ones(int(window_size)) / float(window_size)
            return np.convolve(interval, window, 'same')

        labels = rlpy.Tools.results.default_labels
        performance_fig = plt.figure("Performance")
        trials = self.trials
        y_arr = np.array(trials[y])
        if average:
            assert type(average) is int, "Filter length is not an integer!"
            y_arr = movingaverage(y_arr, average)
        plt.plot(trials[x], y_arr, '-bo', lw=3, markersize=10)
        plt.xlim(0, trials[x][-1] * 1.01)
        m = y_arr.min()
        M = y_arr.max()
        delta = M - m
        if delta > 0:
            plt.ylim(m - .1 * delta - .1, M + .1 * delta + .1)
        xlabel = labels[x] if x in labels else x
        ylabel = labels[y] if y in labels else y
        plt.xlabel(xlabel, fontsize=16)
        plt.ylabel(ylabel, fontsize=16)
        if save:
            path = os.path.join(self.full_path,
                                "{:3}-trials.pdf".format(self.exp_id))
            performance_fig.savefig(path, transparent=True, pad_inches=.1)
        plt.ioff()
        plt.show()
Example #10
0
    def showDomain(self, a):
        s = self.state
        # Plot the car and an arrow indicating the direction of accelaration
        # Parts of this code was adopted from Jose Antonio Martin H.
        # <*****@*****.**> online source code
        pos, vel = s
        if self.domain_fig is None:  # Need to initialize the figure
            self.domain_fig = plt.figure("Mountain Car Domain")
            # plot mountain
            mountain_x = np.linspace(self.XMIN, self.XMAX, 1000)
            mountain_y = np.sin(3 * mountain_x)
            plt.gca(
            ).fill_between(mountain_x,
                           min(mountain_y) - self.CAR_HEIGHT * 2,
                           mountain_y,
                           color='g')
            plt.xlim([self.XMIN - .2, self.XMAX])
            plt.ylim(
                [min(mountain_y) - self.CAR_HEIGHT * 2,
                 max(mountain_y) + self.CAR_HEIGHT * 2])
            # plot car
            self.car = lines.Line2D([], [], linewidth=20, color='b', alpha=.8)
            plt.gca().add_line(self.car)
            # Goal
            plt.plot(self.GOAL, np.sin(3 * self.GOAL), 'yd', markersize=10.0)
            plt.axis('off')
            plt.gca().set_aspect('1')
        self.domain_fig = plt.figure("Mountain Car Domain")
        #pos = 0
        #a = 0
        car_middle_x = pos
        car_middle_y = np.sin(3 * pos)
        slope = np.arctan(3 * np.cos(3 * pos))
        car_back_x = car_middle_x - self.CAR_WIDTH * np.cos(slope) / 2.
        car_front_x = car_middle_x + self.CAR_WIDTH * np.cos(slope) / 2.
        car_back_y = car_middle_y - self.CAR_WIDTH * np.sin(slope) / 2.
        car_front_y = car_middle_y + self.CAR_WIDTH * np.sin(slope) / 2.
        self.car.set_data([car_back_x, car_front_x], [car_back_y, car_front_y])
        # wheels
        # plott(x(1)-0.05,sin(3*(x(1)-0.05))+0.06,'ok','markersize',12,'MarkerFaceColor',[.5 .5 .5]);
        # plot(x(1)+0.05,sin(3*(x(1)+0.05))+0.06,'ok','markersize',12,'MarkerFaceColor',[.5 .5 .5]);
        # Arrows
        if self.actionArrow is not None:
            self.actionArrow.remove()
            self.actionArrow = None

        if self.actions[a] > 0:
            self.actionArrow = fromAtoB(
                car_front_x, car_front_y,
                car_front_x + self.ARROW_LENGTH *
                np.cos(slope), car_front_y +
                self.ARROW_LENGTH * np.sin(slope),
                #car_front_x + self.CAR_WIDTH*cos(slope)/2., car_front_y + self.CAR_WIDTH*sin(slope)/2.+self.CAR_HEIGHT,
                'k', "arc3,rad=0",
                0, 0, 'simple'
            )
        if self.actions[a] < 0:
            self.actionArrow = fromAtoB(
                car_back_x, car_back_y,
                car_back_x - self.ARROW_LENGTH *
                np.cos(slope), car_back_y -
                self.ARROW_LENGTH * np.sin(slope),
                #car_front_x + self.CAR_WIDTH*cos(slope)/2., car_front_y + self.CAR_WIDTH*sin(slope)/2.+self.CAR_HEIGHT,
                'r', "arc3,rad=0",
                0, 0, 'simple'
            )
        plt.draw()
Example #11
0
    def showLearning(self, representation):
        allStates = np.arange(0, self.chainSize)
        X = np.arange(self.chainSize) * 2.0 / 10.0 - self.SHIFT
        Y = np.ones(self.chainSize) * self.Y
        DY = np.zeros(self.chainSize)
        DX = np.zeros(self.chainSize)
        C = np.zeros(self.chainSize)

        if self.value_function_fig is None:
            self.value_function_fig = plt.subplot(3, 1, 2)
            self.V_star_line = self.value_function_fig.plot(
                allStates, self.V_star)
            V = [
                representation.V(s, False, self.possibleActions(s=s))
                for s in allStates
            ]

            # Note the comma below, since a tuple of line objects is returned
            self.V_approx_line, = self.value_function_fig.plot(allStates,
                                                               V,
                                                               'r-',
                                                               linewidth=3)
            self.V_star_line = self.value_function_fig.plot(allStates,
                                                            self.V_star,
                                                            'b--',
                                                            linewidth=3)
            # Maximum value function is sum of all possible rewards
            plt.ylim([0, self.GOAL_REWARD * (len(self.GOAL_STATES) + 1)])

            self.policy_fig = plt.subplot(3, 1, 3)
            self.policy_fig.set_xlim(0, self.chainSize * 2 / 10.0)
            self.policy_fig.set_ylim(0, 2)
            self.arrows = plt.quiver(
                X,
                Y,
                DX,
                DY,
                C,
                cmap='fiftyChainActions',
                units='x',
                width=0.05,
                scale=.008,
                alpha=.8
            )  # headwidth=.05, headlength = .03, headaxislength = .02)
            self.policy_fig.xaxis.set_visible(False)
            self.policy_fig.yaxis.set_visible(False)

        V = [
            representation.V(s, False, self.possibleActions(s=s))
            for s in allStates
        ]
        pi = [
            representation.bestAction(s, False, self.possibleActions(s=s))
            for s in allStates
        ]
        #pi  = [self.optimal_policy[s] for s in allStates]

        DX = [(2 * a - 1) * self.SHIFT * .1 for a in pi]

        self.V_approx_line.set_ydata(V)
        self.arrows.set_UVC(DX, DY, pi)
        plt.draw()