Exemple #1
0
    def showDomain(self, a=0, s=None):
        if s is None:
            s = self.state

        # Draw the environment
        if self.domain_fig is None:
            self.agent_fig = plt.figure("Domain")
            self.domain_fig = plt.imshow(self.map,
                                         cmap='GridWorld',
                                         interpolation='nearest',
                                         vmin=0,
                                         vmax=5)
            plt.xticks(np.arange(self.COLS), fontsize=FONTSIZE)
            plt.yticks(np.arange(self.ROWS), fontsize=FONTSIZE)
            # pl.tight_layout()
            self.agent_fig = plt.gca().plot(s[1],
                                            s[0],
                                            'kd',
                                            markersize=20.0 - self.COLS)
            plt.show()
        self.agent_fig.pop(0).remove()
        self.agent_fig = plt.figure("Domain")
        #mapcopy = copy(self.map)
        #mapcopy[s[0],s[1]] = self.AGENT
        # self.domain_fig.set_data(mapcopy)
        # Instead of '>' you can use 'D', 'o'
        self.agent_fig = plt.gca().plot(s[1],
                                        s[0],
                                        'k>',
                                        markersize=20.0 - self.COLS)
        plt.figure("Domain").canvas.draw()
        plt.figure("Domain").canvas.flush_events()
Exemple #2
0
    def showLearning(self, representation):
        if self.valueFunction_fig is None:
            plt.figure("Value Function")
            self.valueFunction_fig = plt.imshow(self.map,
                                                cmap='ValueFunction',
                                                interpolation='nearest',
                                                vmin=self.MIN_RETURN,
                                                vmax=self.MAX_RETURN)
            plt.xticks(np.arange(self.COLS), fontsize=12)
            plt.yticks(np.arange(self.ROWS), fontsize=12)
            # Create quivers for each action. 4 in total
            X = np.arange(self.ROWS) - self.SHIFT
            Y = np.arange(self.COLS)
            X, Y = np.meshgrid(X, Y)
            DX = DY = np.ones(X.shape)
            C = np.zeros(X.shape)
            C[0, 0] = 1  # Making sure C has both 0 and 1
            # length of arrow/width of bax. Less then 0.5 because each arrow is
            # offset, 0.4 looks nice but could be better/auto generated
            arrow_ratio = 0.4
            Max_Ratio_ArrowHead_to_ArrowLength = 0.25
            ARROW_WIDTH = 0.5 * Max_Ratio_ArrowHead_to_ArrowLength / 5.0
            self.upArrows_fig = plt.quiver(Y,
                                           X,
                                           DY,
                                           DX,
                                           C,
                                           units='y',
                                           cmap='Actions',
                                           scale_units="height",
                                           scale=old_div(
                                               self.ROWS, arrow_ratio),
                                           width=-1 * ARROW_WIDTH)
            self.upArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS) + self.SHIFT
            Y = np.arange(self.COLS)
            X, Y = np.meshgrid(X, Y)
            self.downArrows_fig = plt.quiver(Y,
                                             X,
                                             DY,
                                             DX,
                                             C,
                                             units='y',
                                             cmap='Actions',
                                             scale_units="height",
                                             scale=old_div(
                                                 self.ROWS, arrow_ratio),
                                             width=-1 * ARROW_WIDTH)
            self.downArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS)
            Y = np.arange(self.COLS) - self.SHIFT
            X, Y = np.meshgrid(X, Y)
            self.leftArrows_fig = plt.quiver(Y,
                                             X,
                                             DY,
                                             DX,
                                             C,
                                             units='x',
                                             cmap='Actions',
                                             scale_units="width",
                                             scale=old_div(
                                                 self.COLS, arrow_ratio),
                                             width=ARROW_WIDTH)
            self.leftArrows_fig.set_clim(vmin=0, vmax=1)
            X = np.arange(self.ROWS)
            Y = np.arange(self.COLS) + self.SHIFT
            X, Y = np.meshgrid(X, Y)
            self.rightArrows_fig = plt.quiver(Y,
                                              X,
                                              DY,
                                              DX,
                                              C,
                                              units='x',
                                              cmap='Actions',
                                              scale_units="width",
                                              scale=old_div(
                                                  self.COLS, arrow_ratio),
                                              width=ARROW_WIDTH)
            self.rightArrows_fig.set_clim(vmin=0, vmax=1)
            plt.show()
        plt.figure("Value Function")
        V = np.zeros((self.ROWS, self.COLS))
        # Boolean 3 dimensional array. The third array highlights the action.
        # Thie mask is used to see in which cells what actions should exist
        Mask = np.ones((self.COLS, self.ROWS, self.actions_num), dtype='bool')
        arrowSize = np.zeros((self.COLS, self.ROWS, self.actions_num),
                             dtype='float')
        # 0 = suboptimal action, 1 = optimal action
        arrowColors = np.zeros((self.COLS, self.ROWS, self.actions_num),
                               dtype='uint8')
        for r in range(self.ROWS):
            for c in range(self.COLS):
                if self.map[r, c] == self.BLOCKED:
                    V[r, c] = self.BLOCK_REWARD
                if self.map[r, c] == self.GOAL:
                    V[r, c] = self.MAX_RETURN
                if self.map[r, c] == self.PIT:
                    V[r, c] = self.MIN_RETURN
                if self.map[r, c] == self.EMPTY or self.map[r,
                                                            c] == self.START:
                    s = np.array([r, c])
                    As = self.possibleActions(s)
                    terminal = self.isTerminal(s)
                    Qs = representation.Qs(s, terminal)
                    bestA = representation.bestActions(s, terminal, As)
                    V[r, c] = max(Qs[As])
                    Mask[c, r, As] = False
                    arrowColors[c, r, bestA] = 1

                    for i in range(len(As)):
                        a = As[i]
                        Q = Qs[i]
                        value = linearMap(Q, self.MIN_RETURN, self.MAX_RETURN,
                                          0, 1)
                        arrowSize[c, r, a] = value


#        # write value function to txt file
#        with open('GridWorld_Flag3_Value.txt', 'a') as outfile:
#            outfile.write('\n' + str(np.round(V,decimals=2)) + '\n'*2)

# Show Value Function
        self.valueFunction_fig.set_data(V)
        # Show Policy Up Arrows
        DX = arrowSize[:, :, 0]
        DY = np.zeros((self.ROWS, self.COLS))
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 0])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 0])
        C = np.ma.masked_array(arrowColors[:, :, 0], mask=Mask[:, :, 0])
        self.upArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Down Arrows
        DX = -arrowSize[:, :, 1]
        DY = np.zeros((self.ROWS, self.COLS))
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 1])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 1])
        C = np.ma.masked_array(arrowColors[:, :, 1], mask=Mask[:, :, 1])
        self.downArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Left Arrows
        DX = np.zeros((self.ROWS, self.COLS))
        DY = -arrowSize[:, :, 2]
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 2])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 2])
        C = np.ma.masked_array(arrowColors[:, :, 2], mask=Mask[:, :, 2])
        self.leftArrows_fig.set_UVC(DY, DX, C)
        # Show Policy Right Arrows
        DX = np.zeros((self.ROWS, self.COLS))
        DY = arrowSize[:, :, 3]
        DX = np.ma.masked_array(DX, mask=Mask[:, :, 3])
        DY = np.ma.masked_array(DY, mask=Mask[:, :, 3])
        C = np.ma.masked_array(arrowColors[:, :, 3], mask=Mask[:, :, 3])
        self.rightArrows_fig.set_UVC(DY, DX, C)
        plt.draw()