Beispiel #1
0
 def assign_interpol_controller(self):
     """ controller from optimal actions """
     
     self.b_u0 = interpol2D( self.X[0] , self.X[1] , self.u0_policy , bbox=[None, None, None, None], kx=1, ky=1,)
     self.b_u1 = interpol2D( self.X[0] , self.X[1] , self.u1_policy , bbox=[None, None, None, None], kx=1, ky=1,)
     
     self.DS.ctl = self.feedback_law_interpol
    def assign_interpol_controller(self):
        """ controller from optimal actions """

        # Compute grid of u
        self.u_policy_grid = [None]
        self.u_policy_grid[0] = np.zeros(self.dDS.xgriddim, dtype=float)

        # For all state nodes
        for node in range(self.dDS.nodes_n):

            i = self.dDS.nodes_index[node, 0]
            j = self.dDS.nodes_index[node, 1]

            if (self.action_policy[i, j] == -1):
                self.u_policy_grid[0][i, j] = 0

            else:
                self.u_policy_grid[0][i, j] = self.dDS.actions_input[
                    self.action_policy[i, j], 0]

        # Compute Interpol function
        self.x2u0 = interpol2D(
            self.dDS.xd[0],
            self.dDS.xd[1],
            self.u_policy_grid[0],
            bbox=[None, None, None, None],
            kx=1,
            ky=1,
        )

        # Asign Controller
        self.DS.ctl = self.ctl_interpol
    def assign_interpol_controller(self):
        """ controller from optimal actions """
        
        # Compute grid of u
        self.u_policy_grid    = [ None ]
        self.u_policy_grid[0] = np.zeros( self.dDS.xgriddim , dtype = float )
        
        # For all state nodes        
        for node in range( self.dDS.nodes_n ):  
            
                i = self.dDS.nodes_index[ node , 0 ]
                j = self.dDS.nodes_index[ node , 1 ]
                
                if ( self.action_policy[i,j] == -1 ):
                    self.u_policy_grid[0][i,j] = 0 
                    
                else:
                    self.u_policy_grid[0][i,j] = self.dDS.actions_input[ self.action_policy[i,j] , 0 ]
        

        # Compute Interpol function
        self.x2u0 = interpol2D( self.dDS.xd[0] , self.dDS.xd[1] , self.u_policy_grid[0] , bbox=[None, None, None, None], kx=1, ky=1,)
        
        # Asign Controller
        self.DS.ctl = self.ctl_interpol
Beispiel #4
0
    def assign_interpol_controller(self):
        """ controller from optimal actions """

        # Compute grid of u
        self.u_policy_grid = []

        # for all inputs
        for k in range(self.sys.m):
            self.u_policy_grid.append(
                np.zeros(self.grid_sys.xgriddim, dtype=float))

        # For all state nodes
        for node in range(self.grid_sys.nodes_n):

            # use tuple to get dynamic list of indices
            indices = tuple(self.grid_sys.nodes_index[node, i]
                            for i in range(self.n_dim))

            # If no action is good
            if (self.action_policy[indices] == -1):

                # for all inputs
                for k in range(self.sys.m):
                    self.u_policy_grid[k][indices] = 0

            else:
                # for all inputs
                for k in range(self.sys.m):
                    self.u_policy_grid[k][
                        indices] = self.grid_sys.actions_input[
                            self.action_policy[indices], k]

        # Compute Interpol function
        self.interpol_functions = []

        # for all inputs
        for k in range(self.sys.m):
            if self.n_dim == 2:
                self.interpol_functions.append(
                    interpol2D(
                        self.grid_sys.xd[0],
                        self.grid_sys.xd[1],
                        self.u_policy_grid[k],
                        bbox=[None, None, None, None],
                        kx=1,
                        ky=1,
                    ))
            elif self.n_dim == 3:
                self.interpol_functions.append(
                    rgi([
                        self.grid_sys.xd[0], self.grid_sys.xd[1],
                        self.grid_sys.xd[2]
                    ], self.u_policy_grid[k]))
            else:
                points = tuple(self.grid_sys.xd[i] for i in range(self.n_dim))
                self.interpol_functions.append(
                    rgi(points, self.u_policy_grid[k], method='linear'))

        # Asign Controller
        self.ctl.vi_law = self.vi_law
Beispiel #5
0
    def assign_interpol_controller(self):
        """ controller from optimal actions """

        # Compute grid of u
        self.u_policy_grid = []

        # for all inputs
        for k in range(self.sys.m):
            self.u_policy_grid.append(
                np.zeros(self.grid_sys.xgriddim, dtype=float))

        # For all state nodes
        for node in range(self.grid_sys.nodes_n):

            i = self.grid_sys.nodes_index[node, 0]
            j = self.grid_sys.nodes_index[node, 1]

            # If no action is good
            if (self.action_policy[i, j] == -1):

                # for all inputs
                for k in range(self.sys.m):
                    self.u_policy_grid[k][i, j] = 0

            else:
                # for all inputs
                for k in range(self.sys.m):
                    self.u_policy_grid[k][i, j] = self.grid_sys.actions_input[
                        self.action_policy[i, j], k]

        # Compute Interpol function
        self.x2u_interpol_functions = []

        # for all inputs
        for k in range(self.sys.m):
            self.x2u_interpol_functions.append(
                interpol2D(
                    self.grid_sys.xd[0],
                    self.grid_sys.xd[1],
                    self.u_policy_grid[k],
                    bbox=[None, None, None, None],
                    kx=1,
                    ky=1,
                ))

        # Asign Controller
        self.ctl.vi_law = self.vi_law
    def assign_interpol_controller(self):
        """ controller from optimal actions """
        
        # Compute grid of u
        self.u_policy_grid    = []
        
        # for all inputs
        for k in range(self.sys.m):
            self.u_policy_grid.append( np.zeros( self.grid_sys.xgriddim , dtype = float ) )
        
        # For all state nodes        
        for node in range( self.grid_sys.nodes_n ):  
            
                i = self.grid_sys.nodes_index[ node , 0 ]
                j = self.grid_sys.nodes_index[ node , 1 ]
                
                # If no action is good
                if ( self.action_policy[i,j] == -1 ):
                    
                    # for all inputs
                    for k in range(self.sys.m):
                        self.u_policy_grid[k][i,j] = 0 
                    
                else:
                    # for all inputs
                    for k in range(self.sys.m):
                        self.u_policy_grid[k][i,j] = self.grid_sys.actions_input[ self.action_policy[i,j] , k ]
        

        # Compute Interpol function
        self.x2u_interpol_functions = []
        
        # for all inputs
        for k in range(self.sys.m):
            self.x2u_interpol_functions.append(
                    interpol2D( self.grid_sys.xd[0] , 
                                self.grid_sys.xd[1] , 
                                self.u_policy_grid[k] , 
                                bbox=[None, None, None, None], 
                                kx=1, ky=1,) )
        
        # Asign Controller
        self.ctl.vi_law = self.vi_law
Beispiel #7
0
    def Qlearn(self,i,j,k):
        """  """
        J_interpol = interpol2D( self.X[0] , self.X[1] , self.J , bbox=[None, None, None, None], kx=1, ky=1,)
        
        x      = np.array([ self.X[0][i]  ,  self.X[1][j] ])
        u      = self.U[k]        
        x_next = self.X_next[i,j,k]
        
        x_ok = self.X_ok[i,j,k]
        u_ok = self.U_ok[i,j,k]
        
        if self.DS.m ==1:
            u = np.array( [u] )
        
        # New Q sample

        if x_ok and u_ok:
            J_next = J_interpol( x_next[0] , x_next[1] )
            Q_sample = self.g( x , u ) + J_next[0,0]
        else:
            Q_sample = self.INF
        
        # Q update
        error          = Q_sample      - self.Q[i,j,k]
        self.Q[i,j,k]  = self.Q[i,j,k] + self.alpha * error
        
        # J and Policy update        
        Q_list                  = self.Q[i,j,:]        
        self.J[i,j]             = Q_list.min()
        self.action_policy[i,j] = Q_list.argmin()
        self.u0_policy[i,j]     = self.U[ self.action_policy[i,j] ]
                
        # Impossible situation
        if self.J[i,j] > (self.INF-1) :
            self.action_policy[i,j]      = -1
            self.u0_policy[i,j]          =  0
Beispiel #8
0
 def compute_step(self):
     """ One step of value iteration """
     
     # Get interpolation of current cost space
     J_interpol = interpol2D( self.X[0] , self.X[1] , self.J , bbox=[None, None, None, None], kx=1, ky=1,)
     
     for i in range(self.Nx0):
         for j in range(self.Nx1):
             
             # Actual state vector
             x = np.array([ self.X[0][i]  ,  self.X[1][j] ])
             #print x
             
             # One steps costs
             C = np.zeros( self.Nu0 * 2 ) 
             
             for k in range( self.Nu0 * 2 ):
                 
                 # Current u vector to test
                 u = self.U[k]                    
                 
                 # Compute possibles futur states
                 x_next = self.X_next[i,j,k]  #x_next = self.DS.fc( x , u ) * self.dt + x
                                     
                 # validity of the options
                 #x_ok = self.DS.isavalidstate(x_next)
                 #u_ok = self.DS.isavalidinput(x,u)
                 x_ok = self.X_ok[i,j,k]
                 u_ok = self.U_ok[i,j,k]
                 
                 # If the current option is allowable
                 if x_ok and u_ok:
                     
                     J_next = J_interpol( x_next[0] , x_next[1] )
                     
                     # Cost-to-go of a given action
                     C[k] = self.g( x , u ) + J_next[0,0]
                     
                 else:
                     # Not allowable states or inputs/states combinations
                     C[k] = self.INF
                     
                 #print x,u,x_next,C[k]
                     
             self.Jnew[i,j]          = C.min()
             self.action_policy[i,j] = C.argmin()
             self.u0_policy[i,j]     = self.U[ self.action_policy[i,j] ]
             
             # Impossible situation
             if self.Jnew[i,j] > (self.INF-1) :
                 self.action_policy[i,j]      = -1
                 self.u0_policy[i,j]          =  0
             
     
     delta = self.J - self.Jnew
     j_max     = self.Jnew.max()
     delta_max = delta.max()
     delta_min = delta.min()
     print('Max:',j_max,'Delta max:',delta_max, 'Delta min:',delta_min)
     
     self.J = self.Jnew.copy()
    def compute_step(self):
        """ One step of value iteration """
        
        # Get interpolation of current cost space
        J_interpol = interpol2D( self.grid_sys.xd[0] , self.grid_sys.xd[1] , self.J , bbox=[None, None, None, None], kx=1, ky=1,)
        
        # For all state nodes        
        for node in range( self.grid_sys.nodes_n ):  
            
                x = self.grid_sys.nodes_state[ node , : ]
                
                i = self.grid_sys.nodes_index[ node , 0 ]
                j = self.grid_sys.nodes_index[ node , 1 ]

                # One steps costs - Q values
                Q = np.zeros( self.grid_sys.actions_n  ) 
                
                # For all control actions
                for action in range( self.grid_sys.actions_n ):
                    
                    u = self.grid_sys.actions_input[ action , : ]
                    
                    # Compute next state and validity of the action                    
                    
                    if self.uselookuptable:
                        
                        x_next        = self.grid_sys.x_next[node,action,:]
                        action_isok   = self.grid_sys.action_isok[node,action]
                        
                    else:
                        
                        x_next        = self.sys.f( x , u ) * self.dt + x
                        x_ok          = self.sys.isavalidstate(x_next)
                        u_ok          = self.sys.isavalidinput(x,u)
                        action_isok   = ( u_ok & x_ok )
                    
                    # If the current option is allowable
                    if action_isok:
                        
                        J_next = J_interpol( x_next[0] , x_next[1] )
                        
                        # Cost-to-go of a given action
                        Q[action] = self.cf.g( x , u ) + J_next[0,0]
                        
                    else:
                        # Not allowable states or inputs/states combinations
                        Q[action] = self.cf.INF
                        
                        
                self.Jnew[i,j]          = Q.min()
                self.action_policy[i,j] = Q.argmin()
                
                # Impossible situation ( unaceptable situation for any control actions )
                if self.Jnew[i,j] > (self.cf.INF-1) :
                    self.action_policy[i,j]      = -1
        
        
        # Convergence check        
        delta = self.J - self.Jnew
        j_max     = self.Jnew.max()
        delta_max = delta.max()
        delta_min = delta.min()
        print('Max:',j_max,'Delta max:',delta_max, 'Delta min:',delta_min)
        
        self.J = self.Jnew.copy()
Beispiel #10
0
    def compute_step(self):
        """ One step of value iteration """

        # Get interpolation of current cost space
        J_interpol = interpol2D(
            self.grid_sys.xd[0],
            self.grid_sys.xd[1],
            self.J,
            bbox=[None, None, None, None],
            kx=1,
            ky=1,
        )

        # For all state nodes
        for node in range(self.grid_sys.nodes_n):

            x = self.grid_sys.nodes_state[node, :]

            i = self.grid_sys.nodes_index[node, 0]
            j = self.grid_sys.nodes_index[node, 1]

            # One steps costs - Q values
            Q = np.zeros(self.grid_sys.actions_n)

            # For all control actions
            for action in range(self.grid_sys.actions_n):

                u = self.grid_sys.actions_input[action, :]

                # Compute next state and validity of the action

                if self.uselookuptable:

                    x_next = self.grid_sys.x_next[node, action, :]
                    action_isok = self.grid_sys.action_isok[node, action]

                else:

                    x_next = self.sys.f(x, u) * self.dt + x
                    x_ok = self.sys.isavalidstate(x_next)
                    u_ok = self.sys.isavalidinput(x, u)
                    action_isok = (u_ok & x_ok)

                # If the current option is allowable
                if action_isok:

                    J_next = J_interpol(x_next[0], x_next[1])

                    # Cost-to-go of a given action
                    Q[action] = self.cf.g(x, u) + J_next[0, 0]

                else:
                    # Not allowable states or inputs/states combinations
                    Q[action] = self.cf.INF

            self.Jnew[i, j] = Q.min()
            self.action_policy[i, j] = Q.argmin()

            # Impossible situation ( unaceptable situation for any control actions )
            if self.Jnew[i, j] > (self.cf.INF - 1):
                self.action_policy[i, j] = -1

        # Convergence check
        delta = self.J - self.Jnew
        j_max = self.Jnew.max()
        delta_max = delta.max()
        delta_min = delta.min()
        print('Max:', j_max, 'Delta max:', delta_max, 'Delta min:', delta_min)

        self.J = self.Jnew.copy()
Beispiel #11
0
    def compute_step(self):
        """ One step of value iteration """

        # Get interpolation of current cost space
        if self.n_dim == 2:
            J_interpol = interpol2D(self.grid_sys.xd[0],
                                    self.grid_sys.xd[1],
                                    self.J,
                                    bbox=[None, None, None, None],
                                    kx=1,
                                    ky=1)
        elif self.n_dim == 3:
            # call function for random shape
            J_interpol = rgi([
                self.grid_sys.xd[0], self.grid_sys.xd[1], self.grid_sys.xd[2]
            ], self.J)
        else:
            points = tuple(self.grid_sys.xd[i] for i in range(self.n_dim))
            J_interpol = rgi(points, self.J)

        # For all state nodes
        for node in range(self.grid_sys.nodes_n):

            x = self.grid_sys.nodes_state[node, :]

            # use tuple to get dynamic list of indices
            indices = tuple(self.grid_sys.nodes_index[node, i]
                            for i in range(self.n_dim))

            # One steps costs - Q values
            Q = np.zeros(self.grid_sys.actions_n)

            # For all control actions
            for action in range(self.grid_sys.actions_n):

                u = self.grid_sys.actions_input[action, :]

                # Compute next state and validity of the action

                if self.uselookuptable:

                    x_next = self.grid_sys.x_next[node, action, :]
                    action_isok = self.grid_sys.action_isok[node, action]

                else:

                    x_next = self.sys.f(x, u) * self.grid_sys.dt + x
                    x_ok = self.sys.isavalidstate(x_next)
                    u_ok = self.sys.isavalidinput(x, u)
                    action_isok = (u_ok & x_ok)

                # If the current option is allowable
                if action_isok:
                    if self.n_dim == 2:
                        J_next = J_interpol(x_next[0], x_next[1])
                    elif self.n_dim == 3:
                        J_next = J_interpol([x_next[0], x_next[1], x_next[2]])
                    else:
                        J_next = J_interpol(
                            [x_next[0], x_next[1], x_next[2], x_next[3]])

                    # Cost-to-go of a given action
                    y = self.sys.h(x, u, 0)
                    if self.n_dim == 2:
                        Q[action] = (self.cf.g(x, u, y, 0) * self.grid_sys.dt +
                                     J_next[0, 0])
                    else:
                        Q[action] = (self.cf.g(x, u, y, 0) * self.grid_sys.dt +
                                     J_next)

                else:
                    # Not allowable states or inputs/states combinations
                    Q[action] = self.cf.INF

            self.Jnew[indices] = Q.min()
            self.action_policy[indices] = Q.argmin()

            # Impossible situation ( unaceptable situation for any control actions )
            if self.Jnew[indices] > (self.cf.INF - 1):
                self.action_policy[indices] = -1

        # Convergence check
        delta = self.J - self.Jnew
        j_max = self.Jnew.max()
        delta_max = delta.max()
        delta_min = delta.min()
        print('Max:', j_max, 'Delta max:', delta_max, 'Delta min:', delta_min)

        self.J = self.Jnew.copy()

        # TODO: Combine deltas? Check if delta_min or max changes
        return delta_min