def assign_interpol_controller(self): """ controller from optimal actions """ self.b_u0 = interpol2D( self.X[0] , self.X[1] , self.u0_policy , bbox=[None, None, None, None], kx=1, ky=1,) self.b_u1 = interpol2D( self.X[0] , self.X[1] , self.u1_policy , bbox=[None, None, None, None], kx=1, ky=1,) self.DS.ctl = self.feedback_law_interpol
def assign_interpol_controller(self): """ controller from optimal actions """ # Compute grid of u self.u_policy_grid = [None] self.u_policy_grid[0] = np.zeros(self.dDS.xgriddim, dtype=float) # For all state nodes for node in range(self.dDS.nodes_n): i = self.dDS.nodes_index[node, 0] j = self.dDS.nodes_index[node, 1] if (self.action_policy[i, j] == -1): self.u_policy_grid[0][i, j] = 0 else: self.u_policy_grid[0][i, j] = self.dDS.actions_input[ self.action_policy[i, j], 0] # Compute Interpol function self.x2u0 = interpol2D( self.dDS.xd[0], self.dDS.xd[1], self.u_policy_grid[0], bbox=[None, None, None, None], kx=1, ky=1, ) # Asign Controller self.DS.ctl = self.ctl_interpol
def assign_interpol_controller(self): """ controller from optimal actions """ # Compute grid of u self.u_policy_grid = [ None ] self.u_policy_grid[0] = np.zeros( self.dDS.xgriddim , dtype = float ) # For all state nodes for node in range( self.dDS.nodes_n ): i = self.dDS.nodes_index[ node , 0 ] j = self.dDS.nodes_index[ node , 1 ] if ( self.action_policy[i,j] == -1 ): self.u_policy_grid[0][i,j] = 0 else: self.u_policy_grid[0][i,j] = self.dDS.actions_input[ self.action_policy[i,j] , 0 ] # Compute Interpol function self.x2u0 = interpol2D( self.dDS.xd[0] , self.dDS.xd[1] , self.u_policy_grid[0] , bbox=[None, None, None, None], kx=1, ky=1,) # Asign Controller self.DS.ctl = self.ctl_interpol
def assign_interpol_controller(self): """ controller from optimal actions """ # Compute grid of u self.u_policy_grid = [] # for all inputs for k in range(self.sys.m): self.u_policy_grid.append( np.zeros(self.grid_sys.xgriddim, dtype=float)) # For all state nodes for node in range(self.grid_sys.nodes_n): # use tuple to get dynamic list of indices indices = tuple(self.grid_sys.nodes_index[node, i] for i in range(self.n_dim)) # If no action is good if (self.action_policy[indices] == -1): # for all inputs for k in range(self.sys.m): self.u_policy_grid[k][indices] = 0 else: # for all inputs for k in range(self.sys.m): self.u_policy_grid[k][ indices] = self.grid_sys.actions_input[ self.action_policy[indices], k] # Compute Interpol function self.interpol_functions = [] # for all inputs for k in range(self.sys.m): if self.n_dim == 2: self.interpol_functions.append( interpol2D( self.grid_sys.xd[0], self.grid_sys.xd[1], self.u_policy_grid[k], bbox=[None, None, None, None], kx=1, ky=1, )) elif self.n_dim == 3: self.interpol_functions.append( rgi([ self.grid_sys.xd[0], self.grid_sys.xd[1], self.grid_sys.xd[2] ], self.u_policy_grid[k])) else: points = tuple(self.grid_sys.xd[i] for i in range(self.n_dim)) self.interpol_functions.append( rgi(points, self.u_policy_grid[k], method='linear')) # Asign Controller self.ctl.vi_law = self.vi_law
def assign_interpol_controller(self): """ controller from optimal actions """ # Compute grid of u self.u_policy_grid = [] # for all inputs for k in range(self.sys.m): self.u_policy_grid.append( np.zeros(self.grid_sys.xgriddim, dtype=float)) # For all state nodes for node in range(self.grid_sys.nodes_n): i = self.grid_sys.nodes_index[node, 0] j = self.grid_sys.nodes_index[node, 1] # If no action is good if (self.action_policy[i, j] == -1): # for all inputs for k in range(self.sys.m): self.u_policy_grid[k][i, j] = 0 else: # for all inputs for k in range(self.sys.m): self.u_policy_grid[k][i, j] = self.grid_sys.actions_input[ self.action_policy[i, j], k] # Compute Interpol function self.x2u_interpol_functions = [] # for all inputs for k in range(self.sys.m): self.x2u_interpol_functions.append( interpol2D( self.grid_sys.xd[0], self.grid_sys.xd[1], self.u_policy_grid[k], bbox=[None, None, None, None], kx=1, ky=1, )) # Asign Controller self.ctl.vi_law = self.vi_law
def assign_interpol_controller(self): """ controller from optimal actions """ # Compute grid of u self.u_policy_grid = [] # for all inputs for k in range(self.sys.m): self.u_policy_grid.append( np.zeros( self.grid_sys.xgriddim , dtype = float ) ) # For all state nodes for node in range( self.grid_sys.nodes_n ): i = self.grid_sys.nodes_index[ node , 0 ] j = self.grid_sys.nodes_index[ node , 1 ] # If no action is good if ( self.action_policy[i,j] == -1 ): # for all inputs for k in range(self.sys.m): self.u_policy_grid[k][i,j] = 0 else: # for all inputs for k in range(self.sys.m): self.u_policy_grid[k][i,j] = self.grid_sys.actions_input[ self.action_policy[i,j] , k ] # Compute Interpol function self.x2u_interpol_functions = [] # for all inputs for k in range(self.sys.m): self.x2u_interpol_functions.append( interpol2D( self.grid_sys.xd[0] , self.grid_sys.xd[1] , self.u_policy_grid[k] , bbox=[None, None, None, None], kx=1, ky=1,) ) # Asign Controller self.ctl.vi_law = self.vi_law
def Qlearn(self,i,j,k): """ """ J_interpol = interpol2D( self.X[0] , self.X[1] , self.J , bbox=[None, None, None, None], kx=1, ky=1,) x = np.array([ self.X[0][i] , self.X[1][j] ]) u = self.U[k] x_next = self.X_next[i,j,k] x_ok = self.X_ok[i,j,k] u_ok = self.U_ok[i,j,k] if self.DS.m ==1: u = np.array( [u] ) # New Q sample if x_ok and u_ok: J_next = J_interpol( x_next[0] , x_next[1] ) Q_sample = self.g( x , u ) + J_next[0,0] else: Q_sample = self.INF # Q update error = Q_sample - self.Q[i,j,k] self.Q[i,j,k] = self.Q[i,j,k] + self.alpha * error # J and Policy update Q_list = self.Q[i,j,:] self.J[i,j] = Q_list.min() self.action_policy[i,j] = Q_list.argmin() self.u0_policy[i,j] = self.U[ self.action_policy[i,j] ] # Impossible situation if self.J[i,j] > (self.INF-1) : self.action_policy[i,j] = -1 self.u0_policy[i,j] = 0
def compute_step(self): """ One step of value iteration """ # Get interpolation of current cost space J_interpol = interpol2D( self.X[0] , self.X[1] , self.J , bbox=[None, None, None, None], kx=1, ky=1,) for i in range(self.Nx0): for j in range(self.Nx1): # Actual state vector x = np.array([ self.X[0][i] , self.X[1][j] ]) #print x # One steps costs C = np.zeros( self.Nu0 * 2 ) for k in range( self.Nu0 * 2 ): # Current u vector to test u = self.U[k] # Compute possibles futur states x_next = self.X_next[i,j,k] #x_next = self.DS.fc( x , u ) * self.dt + x # validity of the options #x_ok = self.DS.isavalidstate(x_next) #u_ok = self.DS.isavalidinput(x,u) x_ok = self.X_ok[i,j,k] u_ok = self.U_ok[i,j,k] # If the current option is allowable if x_ok and u_ok: J_next = J_interpol( x_next[0] , x_next[1] ) # Cost-to-go of a given action C[k] = self.g( x , u ) + J_next[0,0] else: # Not allowable states or inputs/states combinations C[k] = self.INF #print x,u,x_next,C[k] self.Jnew[i,j] = C.min() self.action_policy[i,j] = C.argmin() self.u0_policy[i,j] = self.U[ self.action_policy[i,j] ] # Impossible situation if self.Jnew[i,j] > (self.INF-1) : self.action_policy[i,j] = -1 self.u0_policy[i,j] = 0 delta = self.J - self.Jnew j_max = self.Jnew.max() delta_max = delta.max() delta_min = delta.min() print('Max:',j_max,'Delta max:',delta_max, 'Delta min:',delta_min) self.J = self.Jnew.copy()
def compute_step(self): """ One step of value iteration """ # Get interpolation of current cost space J_interpol = interpol2D( self.grid_sys.xd[0] , self.grid_sys.xd[1] , self.J , bbox=[None, None, None, None], kx=1, ky=1,) # For all state nodes for node in range( self.grid_sys.nodes_n ): x = self.grid_sys.nodes_state[ node , : ] i = self.grid_sys.nodes_index[ node , 0 ] j = self.grid_sys.nodes_index[ node , 1 ] # One steps costs - Q values Q = np.zeros( self.grid_sys.actions_n ) # For all control actions for action in range( self.grid_sys.actions_n ): u = self.grid_sys.actions_input[ action , : ] # Compute next state and validity of the action if self.uselookuptable: x_next = self.grid_sys.x_next[node,action,:] action_isok = self.grid_sys.action_isok[node,action] else: x_next = self.sys.f( x , u ) * self.dt + x x_ok = self.sys.isavalidstate(x_next) u_ok = self.sys.isavalidinput(x,u) action_isok = ( u_ok & x_ok ) # If the current option is allowable if action_isok: J_next = J_interpol( x_next[0] , x_next[1] ) # Cost-to-go of a given action Q[action] = self.cf.g( x , u ) + J_next[0,0] else: # Not allowable states or inputs/states combinations Q[action] = self.cf.INF self.Jnew[i,j] = Q.min() self.action_policy[i,j] = Q.argmin() # Impossible situation ( unaceptable situation for any control actions ) if self.Jnew[i,j] > (self.cf.INF-1) : self.action_policy[i,j] = -1 # Convergence check delta = self.J - self.Jnew j_max = self.Jnew.max() delta_max = delta.max() delta_min = delta.min() print('Max:',j_max,'Delta max:',delta_max, 'Delta min:',delta_min) self.J = self.Jnew.copy()
def compute_step(self): """ One step of value iteration """ # Get interpolation of current cost space J_interpol = interpol2D( self.grid_sys.xd[0], self.grid_sys.xd[1], self.J, bbox=[None, None, None, None], kx=1, ky=1, ) # For all state nodes for node in range(self.grid_sys.nodes_n): x = self.grid_sys.nodes_state[node, :] i = self.grid_sys.nodes_index[node, 0] j = self.grid_sys.nodes_index[node, 1] # One steps costs - Q values Q = np.zeros(self.grid_sys.actions_n) # For all control actions for action in range(self.grid_sys.actions_n): u = self.grid_sys.actions_input[action, :] # Compute next state and validity of the action if self.uselookuptable: x_next = self.grid_sys.x_next[node, action, :] action_isok = self.grid_sys.action_isok[node, action] else: x_next = self.sys.f(x, u) * self.dt + x x_ok = self.sys.isavalidstate(x_next) u_ok = self.sys.isavalidinput(x, u) action_isok = (u_ok & x_ok) # If the current option is allowable if action_isok: J_next = J_interpol(x_next[0], x_next[1]) # Cost-to-go of a given action Q[action] = self.cf.g(x, u) + J_next[0, 0] else: # Not allowable states or inputs/states combinations Q[action] = self.cf.INF self.Jnew[i, j] = Q.min() self.action_policy[i, j] = Q.argmin() # Impossible situation ( unaceptable situation for any control actions ) if self.Jnew[i, j] > (self.cf.INF - 1): self.action_policy[i, j] = -1 # Convergence check delta = self.J - self.Jnew j_max = self.Jnew.max() delta_max = delta.max() delta_min = delta.min() print('Max:', j_max, 'Delta max:', delta_max, 'Delta min:', delta_min) self.J = self.Jnew.copy()
def compute_step(self): """ One step of value iteration """ # Get interpolation of current cost space if self.n_dim == 2: J_interpol = interpol2D(self.grid_sys.xd[0], self.grid_sys.xd[1], self.J, bbox=[None, None, None, None], kx=1, ky=1) elif self.n_dim == 3: # call function for random shape J_interpol = rgi([ self.grid_sys.xd[0], self.grid_sys.xd[1], self.grid_sys.xd[2] ], self.J) else: points = tuple(self.grid_sys.xd[i] for i in range(self.n_dim)) J_interpol = rgi(points, self.J) # For all state nodes for node in range(self.grid_sys.nodes_n): x = self.grid_sys.nodes_state[node, :] # use tuple to get dynamic list of indices indices = tuple(self.grid_sys.nodes_index[node, i] for i in range(self.n_dim)) # One steps costs - Q values Q = np.zeros(self.grid_sys.actions_n) # For all control actions for action in range(self.grid_sys.actions_n): u = self.grid_sys.actions_input[action, :] # Compute next state and validity of the action if self.uselookuptable: x_next = self.grid_sys.x_next[node, action, :] action_isok = self.grid_sys.action_isok[node, action] else: x_next = self.sys.f(x, u) * self.grid_sys.dt + x x_ok = self.sys.isavalidstate(x_next) u_ok = self.sys.isavalidinput(x, u) action_isok = (u_ok & x_ok) # If the current option is allowable if action_isok: if self.n_dim == 2: J_next = J_interpol(x_next[0], x_next[1]) elif self.n_dim == 3: J_next = J_interpol([x_next[0], x_next[1], x_next[2]]) else: J_next = J_interpol( [x_next[0], x_next[1], x_next[2], x_next[3]]) # Cost-to-go of a given action y = self.sys.h(x, u, 0) if self.n_dim == 2: Q[action] = (self.cf.g(x, u, y, 0) * self.grid_sys.dt + J_next[0, 0]) else: Q[action] = (self.cf.g(x, u, y, 0) * self.grid_sys.dt + J_next) else: # Not allowable states or inputs/states combinations Q[action] = self.cf.INF self.Jnew[indices] = Q.min() self.action_policy[indices] = Q.argmin() # Impossible situation ( unaceptable situation for any control actions ) if self.Jnew[indices] > (self.cf.INF - 1): self.action_policy[indices] = -1 # Convergence check delta = self.J - self.Jnew j_max = self.Jnew.max() delta_max = delta.max() delta_min = delta.min() print('Max:', j_max, 'Delta max:', delta_max, 'Delta min:', delta_min) self.J = self.Jnew.copy() # TODO: Combine deltas? Check if delta_min or max changes return delta_min