コード例 #1
0
ファイル: FiftyChain.py プロジェクト: MLDL/rlpy
    def showLearning(self, representation):
        allStates = np.arange(0, self.chainSize)
        X = np.arange(self.chainSize) * 2.0 / 10.0 - self.SHIFT
        Y = np.ones(self.chainSize) * self.Y
        DY = np.zeros(self.chainSize)
        DX = np.zeros(self.chainSize)
        C = np.zeros(self.chainSize)

        if self.value_function_fig is None:
            self.value_function_fig = plt.subplot(3, 1, 2)
            self.V_star_line = self.value_function_fig.plot(
                allStates,
                self.V_star)
            V = [representation.V(s, False, self.possibleActions(s=s))
                 for s in allStates]

            # Note the comma below, since a tuple of line objects is returned
            self.V_approx_line, = self.value_function_fig.plot(
                allStates, V, 'r-', linewidth=3)
            self.V_star_line = self.value_function_fig.plot(
                allStates,
                self.V_star,
                'b--',
                linewidth=3)
            # Maximum value function is sum of all possible rewards
            plt.ylim([0, self.GOAL_REWARD * (len(self.GOAL_STATES) + 1)])

            self.policy_fig = plt.subplot(3, 1, 3)
            self.policy_fig.set_xlim(0, self.chainSize * 2 / 10.0)
            self.policy_fig.set_ylim(0, 2)
            self.arrows = plt.quiver(
                X,
                Y,
                DX,
                DY,
                C,
                cmap='fiftyChainActions',
                units='x',
                width=0.05,
                scale=.008,
                alpha=.8)  # headwidth=.05, headlength = .03, headaxislength = .02)
            self.policy_fig.xaxis.set_visible(False)
            self.policy_fig.yaxis.set_visible(False)

        V = [representation.V(s, False, self.possibleActions(s=s))
             for s in allStates]
        pi = [representation.bestAction(s, False, self.possibleActions(s=s))
              for s in allStates]
        #pi  = [self.optimal_policy[s] for s in allStates]

        DX = [(2 * a - 1) * self.SHIFT * .1 for a in pi]

        self.V_approx_line.set_ydata(V)
        self.arrows.set_UVC(DX, DY, pi)
        plt.draw()
コード例 #2
0
ファイル: FiftyChain.py プロジェクト: MLDL/rlpy
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.circles is None:
         self.domain_fig = plt.subplot(3, 1, 1)
         plt.figure(1, (self.chainSize * 2 / 10.0, 2))
         self.domain_fig.set_xlim(0, self.chainSize * 2 / 10.0)
         self.domain_fig.set_ylim(0, 2)
         self.domain_fig.add_patch(
             mpatches.Circle((1 / 5.0 + 2 / 10.0 * (self.chainSize - 1),
                              self.Y),
                             self.RADIUS * 1.1,
                             fc="w"))  # Make the last one double circle
         self.domain_fig.xaxis.set_visible(False)
         self.domain_fig.yaxis.set_visible(False)
         self.circles = [mpatches.Circle((1 / 5.0 + 2 / 10.0 * i, self.Y), self.RADIUS, fc="w")
                         for i in range(self.chainSize)]
         for i in range(self.chainSize):
             self.domain_fig.add_patch(self.circles[i])
             plt.show()
     for p in self.circles:
         p.set_facecolor('w')
     for p in self.GOAL_STATES:
         self.circles[p].set_facecolor('g')
     self.circles[s].set_facecolor('k')
     plt.draw()
コード例 #3
0
ファイル: FlipBoard.py プロジェクト: smcgregor/rlpy
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.domain_fig is None:
         self.move_fig = plt.subplot(111)
         s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
         self.domain_fig = plt.imshow(
             s,
             cmap='FlipBoard',
             interpolation='nearest',
             vmin=0,
             vmax=1)
         plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         # pl.tight_layout()
         a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
         self.move_fig = self.move_fig.plot(
             a_col,
             a_row,
             'kx',
             markersize=30.0)
         plt.show()
     a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
     self.move_fig.pop(0).remove()
     # print a_row,a_col
     # Instead of '>' you can use 'D', 'o'
     self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0)
     s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
     self.domain_fig.set_data(s)
     plt.draw()
コード例 #4
0
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.circles is None:
         plt.figure("Domain")
         self.domain_fig = plt.subplot(3, 1, 1)
         plt.figure(1, (self.chainSize * 2 / 10.0, 2))
         self.domain_fig.set_xlim(0, self.chainSize * 2 / 10.0)
         self.domain_fig.set_ylim(0, 2)
         self.domain_fig.add_patch(
             mpatches.Circle((old_div(1, 5.0) + 2 / 10.0 *
                              (self.chainSize - 1), self.Y),
                             self.RADIUS * 1.1,
                             fc="w"))  # Make the last one double circle
         self.domain_fig.xaxis.set_visible(False)
         self.domain_fig.yaxis.set_visible(False)
         self.circles = [
             mpatches.Circle((old_div(1, 5.0) + 2 / 10.0 * i, self.Y),
                             self.RADIUS,
                             fc="w") for i in range(self.chainSize)
         ]
         for i in range(self.chainSize):
             self.domain_fig.add_patch(self.circles[i])
             plt.show()
     for p in self.circles:
         p.set_facecolor('w')
     for p in self.GOAL_STATES:
         self.circles[p].set_facecolor('g')
     self.circles[s].set_facecolor('k')
     plt.figure("Domain").canvas.draw()
     plt.figure("Domain").canvas.flush_events()
コード例 #5
0
 def showDomain(self, a=0):
     s = self.state
     # Draw the environment
     if self.domain_fig is None:
         self.move_fig = plt.subplot(111)
         s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
         self.domain_fig = plt.imshow(s,
                                      cmap='FlipBoard',
                                      interpolation='nearest',
                                      vmin=0,
                                      vmax=1)
         plt.xticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         plt.yticks(np.arange(self.BOARD_SIZE), fontsize=FONTSIZE)
         # pl.tight_layout()
         a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
         self.move_fig = self.move_fig.plot(a_col,
                                            a_row,
                                            'kx',
                                            markersize=30.0)
         plt.show()
     a_row, a_col = id2vec(a, [self.BOARD_SIZE, self.BOARD_SIZE])
     self.move_fig.pop(0).remove()
     # print a_row,a_col
     # Instead of '>' you can use 'D', 'o'
     self.move_fig = plt.plot(a_col, a_row, 'kx', markersize=30.0)
     s = s.reshape((self.BOARD_SIZE, self.BOARD_SIZE))
     self.domain_fig.set_data(s)
     plt.draw()
コード例 #6
0
    def showLearning(self, representation):
        allStates = np.arange(0, self.chainSize)
        X = np.arange(self.chainSize) * 2.0 / 10.0 - self.SHIFT
        Y = np.ones(self.chainSize) * self.Y
        DY = np.zeros(self.chainSize)
        DX = np.zeros(self.chainSize)
        C = np.zeros(self.chainSize)

        if self.value_function_fig is None:
            self.value_function_fig = plt.subplot(3, 1, 2)
            self.V_star_line = self.value_function_fig.plot(
                allStates, self.V_star)
            V = [
                representation.V(s, False, self.possibleActions(s=s))
                for s in allStates
            ]

            # Note the comma below, since a tuple of line objects is returned
            self.V_approx_line, = self.value_function_fig.plot(allStates,
                                                               V,
                                                               'r-',
                                                               linewidth=3)
            self.V_star_line = self.value_function_fig.plot(allStates,
                                                            self.V_star,
                                                            'b--',
                                                            linewidth=3)
            # Maximum value function is sum of all possible rewards
            plt.ylim([0, self.GOAL_REWARD * (len(self.GOAL_STATES) + 1)])

            self.policy_fig = plt.subplot(3, 1, 3)
            self.policy_fig.set_xlim(0, self.chainSize * 2 / 10.0)
            self.policy_fig.set_ylim(0, 2)
            self.arrows = plt.quiver(
                X,
                Y,
                DX,
                DY,
                C,
                cmap='fiftyChainActions',
                units='x',
                width=0.05,
                scale=.008,
                alpha=.8
            )  # headwidth=.05, headlength = .03, headaxislength = .02)
            self.policy_fig.xaxis.set_visible(False)
            self.policy_fig.yaxis.set_visible(False)

        V = [
            representation.V(s, False, self.possibleActions(s=s))
            for s in allStates
        ]
        pi = [
            representation.bestAction(s, False, self.possibleActions(s=s))
            for s in allStates
        ]
        #pi  = [self.optimal_policy[s] for s in allStates]

        DX = [(2 * a - 1) * self.SHIFT * .1 for a in pi]

        self.V_approx_line.set_ydata(V)
        self.arrows.set_UVC(DX, DY, pi)
        plt.draw()