def ilqr_pend(dt=0.1): x = T.dscalar("x") # Position. x_dot = T.dscalar("x_dot") # Velocity. u = T.dscalar("u") # Force. #x_dot_dot = l*(x_dot - x**2) -u x_dot_dot = -np.sin(x) - u f = T.stack([ x + (x_dot) * dt, x_dot + x_dot_dot * dt, ]) x_inputs = [x, x_dot] # State vector. u_inputs = [u] # Control vector. dynamics = AutoDiffDynamics(f, x_inputs, u_inputs) return dynamics
def run_IterLinQuadReg_matrix(self, A, B, C, dist_info_sharing='AM', us_init=None): x_input, u_input = self.state_inputs() if np.ndim(A) != 2: if dist_info_sharing == 'GM': A = gmean(A, axis=0) B = gmean(B, axis=0) C = gmean(C, axis=0) print(A.shape, 'A', B.shape, 'B', C.shape, 'C') elif dist_info_sharing == 'AM': A = np.sum(A, axis=0) / A.shape[0] B = np.sum(B, axis=0, keepdims=True) / B.shape[0] B = B.T C = np.sum(C, axis=0) / C.shape[0] else: pass f = self.next_states_matrix(x_input, u_input, A, B, C) dynamics = AutoDiffDynamics(f, x_input, u_input) x_goal = self.augment_state(self.x_goal) if self.Q_terminal.all() == None: cost = QRCost(self.Q, self.R) else: cost = QRCost(self.Q, self.R, Q_terminal=self.Q_terminal, x_goal=x_goal) x0 = self.augment_state(self.x0) if us_init == None: us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size)) ilqr = iLQR(dynamics, cost, self.N) xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration) return xs, us
def run_IterLinQuadReg(self, us_init=None): x_input, u_input = self.state_inputs() x_dot_dot, theta_dot_dot, theta_prime = self.accel( x_input, u_input, self.xd) f = self.next_states(x_input, x_dot_dot, theta_dot_dot, theta_prime) dynamics = AutoDiffDynamics(f, x_input, u_input, hessians=False) x_goal = self.augment_state(self.x_goal) if self.Q_terminal.all() == None: cost = QRCost(self.Q, self.R) else: cost = QRCost(self.Q, self.R, Q_terminal=self.Q_terminal, x_goal=x_goal) x0 = self.augment_state(self.x0) if us_init == None: us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size)) ilqr = iLQR(dynamics, cost, self.N, hessians=False) xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration, n_iterations=1000) # print(ilqr._K,'this is capital K') return xs, us, ilqr._k, ilqr._K
# Acceleration. def acceleration(x_dot, u): x_dot_dot = x_dot * (1 - alpha * dt / m) + u * dt / m return x_dot_dot # Discrete dynamics model definition. f = T.stack([ x_inputs[0] + x_inputs[2] * dt, x_inputs[1] + x_inputs[3] * dt, x_inputs[2] + acceleration(x_inputs[2], u_inputs[0]) * dt, x_inputs[3] + acceleration(x_inputs[3], u_inputs[1]) * dt, ]) dynamics = AutoDiffDynamics(f, x_inputs, u_inputs) # The vehicles are initialized at $(0, 0)$ and $(10, 10)$ with velocities $(0, -5)$ and $(5, 0)$ respectively. # In[8]: N = 200 # Number of time steps in trajectory. x0 = np.array([10.0, 10.0, 5.0, 3.0]) # Initial state. x_traj = get_traj(10.0, 5.0, 5.0, 0.0, 2.0, 0.01) y_traj = get_traj(10.0, 5.0, 3.0, 0.0, 2.0, 0.01) x_path = [] u_path = [] for i in range(N): x_path.append([ x_traj["q"][i][0], y_traj["q"][i][0], x_traj["qd"][i][0],
f = T.stack([ x0_v + (x1_v * dt), x1_v + (((U0 / (T0**2)) * alpha_v * u2 + (U0 / T0) * beta_v * u1 + gamma_v * U0 * u0 - phi_v * (X0 / T0) * x1_v - xi_v * X0 * x0_v) / (X0 / (T0**2))) * dt, # x2_v + , x0_h + (x1_h * dt), x1_h + (((U0 / (T0**2)) * alpha_h * u2 + (U0 / T0) * beta_h * u1 + gamma_h * U0 * u0 - phi_h * (X0 / T0) * x1_h - xi_h * X0 * x0_h) / (X0 / (T0**2))) * dt, # x2_h + , u0 + (u1 * dt), u1 + (u2 * dt) ]) dynamics = AutoDiffDynamics(f, x_inputs, u_inputs) # dynamics = FiniteDiffDynamics(f, 6, 1) # dynamics = BatchAutoDiffDynamics(f, state_size, action_size) # Q = np.eye(dynamics.state_size)#state error # cost = transpose(x) * Q * x + transpose(u) * R * u Q = np.zeros((dynamics.state_size, dynamics.state_size)) Q[0, 0] = 1 # xExp^2 Q[2, 2] = 1 # xExp^2 # Q[4,4] = 0.1 #One of the essential features of LQR is that Q should be a #symmetric positive semidefinite matrix and R should be a positive definite matrix.
def run_IterLinQuadReg(self, us_init, A, B, C, epsilon): x_input = self.state_inputs() x_dot_dot, theta_dot_dot, theta_prime = self.accel(x_input, self.xd) f = self.next_states(x_input, x_dot_dot, theta_dot_dot, theta_prime) dynamics = AutoDiffDynamics(f, x_input[:5], [x_input[5]]) x_goal = self.augment_state(self.x_goal) if self.Q_terminal.all() == None: cost = QRCost_GPS(self.Q, self.R, state_size=5, action_size=1) else: cost = QRCost_GPS(self.Q, self.R, Q_terminal=self.Q_terminal, x_goal=x_goal, state_size=5, action_size=1) x0 = self.augment_state(self.x0) us_local = np.random.uniform(-1, 1, (self.N, dynamics.action_size)) ilqr = iLQR_GPS(dynamics, cost, self.N, A, B, C) xs, mean_us, cov_us = ilqr.fit_GPS(x0, us_init, us_local, cov_method='MEM', on_iteration=self.on_iteration) return xs, mean_us, cov_us # ''' # next_states_matrix: this method will work same as next_state except that it will take A, B state-space matrices as input # ''' # def next_states_matrix(self, X, U, A, B, C): # theta = T.arctan2(X[2], X[3]) # next_theta = theta + X[4] * self.dt # f = T.stack([ # X[0] * A[0][0] + X[1] * A[0][1] + X[2] * A[0][2] + X[3] * A[0][3] + X[4] * A[0][4] + U[0] * A[0][5] + B[0][0], # X[0] * A[1][0] + X[1] * A[1][1] + X[2] * A[1][2] + X[3] * A[1][3] + X[4] * A[1][4] + U[0] * A[1][5] + B[1][0], # T.sin(next_theta), # T.cos(next_theta), # X[0] * A[4][0] + X[1] * A[4][1] + X[2] * A[4][2] + X[3] * A[4][3] + X[4] * A[4][4] + U[0] * A[4][5] + B[4][0] # ]) # return f # ''' # run_IterLinQuadReg_matrix: this method will run iLQR when we are given A, B, C state-space matrices # X(k+1) = A * [X(k) U(k)].T + B ---- Evolution of state over time is governed by this equation # ''' # def run_IterLinQuadReg_matrix(self, A, B, C, us_init=None): # x_input, u_input = self.state_inputs() # f = self.next_states_matrix(x_input, u_input, A, B, C) # dynamics = AutoDiffDynamics(f, x_input, u_input) # x_goal = self.augment_state(self.x_goal) # if self.Q_terminal.all() == None: # cost = QRCost(self.Q, self.R) # else: # cost = QRCost(self.Q, self.R, Q_terminal=self.Q_terminal, x_goal=x_goal) # x0 = self.augment_state(self.x0) # if us_init == None: # us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size)) # ilqr = iLQR(dynamics, cost, self.N) # xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration) # return xs, us # ''' # control_pattern: This modify the control actions based on the user input. # 1 - Normal: based on the correction/mixing parameter gamma generate control # (gamma controls how much noise we want). # 2 - MissingValue: based on the given percentage, set those many values to zero # (it is implicitly it uses "Normal" generated control is used). # 3 - Shuffle: shuffles the entire "Normal" generated control sequence. # 4 - TimeDelay: takes the "Normal" generated control and shifts it by 1 index i.e. one unit time delay. # 5 - Extreme: sets gamma as zeros and generates control based on only noise. # ''' # def control_pattern(self, u, pattern, mean, var, gamma, percent): # if pattern == 'Normal': # u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape) # elif pattern == 'MissingValue': # n = int(u.shape[0] * percent * 0.01) # index = np.random.randint(0, u.shape[0], n) # u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape) # u[index, :] = 0 # elif pattern == 'Shuffle': # u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape) # np.random.shuffle(u) # elif pattern == 'TimeDelay': # u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape) # u = np.roll(u, 1, axis=0) # u[0, :] = 0 # elif pattern == 'Extreme': # u = np.random.normal(mean, var, u.shape) # return u # ''' # noise_traj_generator: In this method we generate trajectories based on some inital condition and noisy control action # which is generated from doing noise free ilQR roll-out and then adding noise to it. # Noise is added to the control action in special way. I use parameter gamma which indicates how much # percentage of original control action sequence must be mixed some percentage of the Gaussian noise. # Parameter: 1 - Mean (mean): Gaussian mean and 2 - Variance (var): Gaussian var. # Takes deaugmented states as input. Input will be the initial point and perfect control action. # Also, based on the control pattern it again remodifies the enitre control action. Look above func. # ''' # def noise_traj_generator(self, x, u, pattern, mean, var, gamma, percent): # u = self.control_pattern(u, pattern, mean, var, gamma, percent) # x_new = self.augment_state(x) # x_new = x_new.reshape(1,x_new.shape[0]) # for i in range(self.N): # temp = (u[i][0] + self.xd[0] * self.xd[2] * x_new[-1][4]**2 * x_new[-1][2])/(self.xd[1] + self.xd[0]) # num = self.xd[3] * x_new[-1][2] - x_new[-1][3] * temp # den = self.xd[2] * (4/3 - (self.xd[0] * x_new[-1][3]**2)/(self.xd[1] + self.xd[0])) # ang_acc = num/den # lin_acc = temp - (self.xd[0] * self.xd[2] * ang_acc * x_new[-1][3])/(self.xd[1] + self.xd[0]) # theta = np.arctan2(x_new[-1][2], x_new[-1][3]) # next_theta = theta + x_new[-1][4] * self.dt # temp_1 = x_new[-1][0] + x_new[-1][1] * self.dt # temp_2 = x_new[-1][1] + lin_acc * self.dt # temp_3 = np.sin(next_theta) # temp_4 = np.cos(next_theta) # temp_5 = x_new[-1][4] + ang_acc * self.dt # x_new = np.concatenate((x_new, [[temp_1, temp_2, temp_3, temp_4, temp_5]]), axis=0) # return x_new, u # ''' # eval_traj_cost: This method calculates the trajectory cost based on Q, R and Q_terminal cost matrices. # Takes the deaugmented states as input. Entire trajectory states including goal and control actions # are sent as inputs. # ''' # def eval_traj_cost(self, x, y, u): # x = self.augment_state(x) # y = self.augment_state(y) # J = 0 # if self.Q_terminal.all() == None: # for i in range(self.N+1): # J = J + np.matmul((x[i,:]-y), np.matmul(self.Q, (x[i,:]-y).T)) + np.matmul((u[i]-u[-1]).T, np.matmul(self.R, u[i]-u[-1])) # J = J + np.matmul((x[-1]-y), np.matmul(self.Q, (x[-1]-y).T)) # else: # for i in range(self.N): # J = J + np.matmul((x[i,:]-y), np.matmul(self.Q, (x[i,:]-y).T)) + np.matmul((u[i]-u[-1]).T, np.matmul(self.R, u[i]-u[-1])) # J = J + np.matmul((x[-1]-y), np.matmul(self.Q_terminal, (x[-1]-y).T)) # return J # ''' # gen_rollouts: Generates specified no. of rollouts and outputs states, control action and cost of all rollouts # Variance of the Gaussian noise will be taken as input from a Unif(0, var_range) uniform distribution. # ''' # def gen_rollouts(self, x_initial, x_goal, u, n_rollouts, pattern='Normal', pattern_rand=False, var_range=10, gamma=0.2, percent=20): # x_rollout = [] # u_rollout = [] # x_gmm = [] # cost = [] # local_policy = self.control_pattern(u, 'Normal', 0, np.random.uniform(0, 5, 1), 0.2, 20) # for i in range(n_rollouts): # if pattern_rand == True: # pattern_seq = np.array(['Normal', 'MissingValue', 'Shuffle', 'TimeDelay', 'Extreme']) # pattern = pattern_seq[np.random.randint(0, 5, 1)][0] # x_new, u_new = self.noise_traj_generator(x_initial, local_policy, pattern, 0, np.random.uniform(0, var_range, 1), gamma, percent) # x_new_temp = self.deaugment_state(x_new) # cost.append(self.eval_traj_cost(x_new_temp, x_goal, u_new)) # x_rollout.append(x_new) # u_rollout.append(u_new) # temp = np.append(x_new[:-1,:], u_new, axis=1) # temp = np.append(temp, x_new[1:,:], axis=1) # x_gmm.append(temp) # x_rollout = np.array(x_rollout).reshape(len(x_rollout)*len(x_rollout[0]), len(x_rollout[0][0,:])) # u_rollout = np.array(u_rollout).reshape(len(u_rollout)*len(u_rollout[0]), len(u_rollout[0][0,:])) # x_gmm = np.array(x_gmm).reshape(len(x_gmm)*len(x_gmm[0]), len(x_gmm[0][0,:])) # cost = np.array(cost).reshape(n_rollouts, 1) # return x_rollout, u_rollout, local_policy, x_gmm, cost
Ft_temp = u_squash[1] f = T.stack([ x_inputs[0] + x_inputs[2] * dt, x_inputs[1] + x_inputs[3] * dt, x_inputs[2] + dt * (Fl_temp * length_torque_constant + ((a + b) * mboom * m_end + a * rho**2 + (mboom * m_end + rho**2) * l_temp) * psi_temp**2) / (mboom * m_end + rho**2), x_inputs[3] + dt * (mboom * Ft_temp * gear_ratio - 2 * ((a + b) * mboom * m_end + a * rho**2 + (mboom * m_end + rho**2) * l_temp) * p_temp * psi_temp) / (Jc * mboom + mboom * m_end * (a + b)**2 + (a * rho)**2 + l_temp * (2 * (a + b) * mboom * m_end + 2 * a * rho**2 + (mboom * m_end + rho**2) * l_temp)) ]) dynamics = AutoDiffDynamics(f, x_inputs, u_inputs, hessians=True) # %% Generate Initial Trajectories X0, U0 = genTrajectory(x0, xe) Xe, Ue, Te = extension(xe[0], xf[0], grasping_velocity) N = U0.shape[0] t = np.arange(N + 1) * dt l_og = X0[:, 0] theta_og = X0[:, 1] x_og = (l_og + a) * np.cos(theta_og) y_og = (l_og + a) * np.sin(theta_og) Fl_og = np.append(U0[:, 0], 0) Ft_og = np.append(U0[:, 1], 0) # %% Generate Costs Q = np.diag([275, 275, 275, 275])