예제 #1
0
def ilqr_pend(dt=0.1):
    x = T.dscalar("x")  # Position.
    x_dot = T.dscalar("x_dot")  # Velocity.
    u = T.dscalar("u")  # Force.

    #x_dot_dot = l*(x_dot - x**2) -u
    x_dot_dot = -np.sin(x) - u

    f = T.stack([
        x + (x_dot) * dt,
        x_dot + x_dot_dot * dt,
    ])

    x_inputs = [x, x_dot]  # State vector.
    u_inputs = [u]  # Control vector.

    dynamics = AutoDiffDynamics(f, x_inputs, u_inputs)
    return dynamics
예제 #2
0
 def run_IterLinQuadReg_matrix(self,
                               A,
                               B,
                               C,
                               dist_info_sharing='AM',
                               us_init=None):
     x_input, u_input = self.state_inputs()
     if np.ndim(A) != 2:
         if dist_info_sharing == 'GM':
             A = gmean(A, axis=0)
             B = gmean(B, axis=0)
             C = gmean(C, axis=0)
             print(A.shape, 'A', B.shape, 'B', C.shape, 'C')
         elif dist_info_sharing == 'AM':
             A = np.sum(A, axis=0) / A.shape[0]
             B = np.sum(B, axis=0, keepdims=True) / B.shape[0]
             B = B.T
             C = np.sum(C, axis=0) / C.shape[0]
     else:
         pass
     f = self.next_states_matrix(x_input, u_input, A, B, C)
     dynamics = AutoDiffDynamics(f, x_input, u_input)
     x_goal = self.augment_state(self.x_goal)
     if self.Q_terminal.all() == None:
         cost = QRCost(self.Q, self.R)
     else:
         cost = QRCost(self.Q,
                       self.R,
                       Q_terminal=self.Q_terminal,
                       x_goal=x_goal)
     x0 = self.augment_state(self.x0)
     if us_init == None:
         us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size))
     ilqr = iLQR(dynamics, cost, self.N)
     xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration)
     return xs, us
예제 #3
0
 def run_IterLinQuadReg(self, us_init=None):
     x_input, u_input = self.state_inputs()
     x_dot_dot, theta_dot_dot, theta_prime = self.accel(
         x_input, u_input, self.xd)
     f = self.next_states(x_input, x_dot_dot, theta_dot_dot, theta_prime)
     dynamics = AutoDiffDynamics(f, x_input, u_input, hessians=False)
     x_goal = self.augment_state(self.x_goal)
     if self.Q_terminal.all() == None:
         cost = QRCost(self.Q, self.R)
     else:
         cost = QRCost(self.Q,
                       self.R,
                       Q_terminal=self.Q_terminal,
                       x_goal=x_goal)
     x0 = self.augment_state(self.x0)
     if us_init == None:
         us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size))
     ilqr = iLQR(dynamics, cost, self.N, hessians=False)
     xs, us = ilqr.fit(x0,
                       us_init,
                       on_iteration=self.on_iteration,
                       n_iterations=1000)
     # print(ilqr._K,'this is capital K')
     return xs, us, ilqr._k, ilqr._K
# Acceleration.
def acceleration(x_dot, u):
    x_dot_dot = x_dot * (1 - alpha * dt / m) + u * dt / m
    return x_dot_dot


# Discrete dynamics model definition.
f = T.stack([
    x_inputs[0] + x_inputs[2] * dt,
    x_inputs[1] + x_inputs[3] * dt,
    x_inputs[2] + acceleration(x_inputs[2], u_inputs[0]) * dt,
    x_inputs[3] + acceleration(x_inputs[3], u_inputs[1]) * dt,
])

dynamics = AutoDiffDynamics(f, x_inputs, u_inputs)

# The vehicles are initialized at $(0, 0)$ and $(10, 10)$ with velocities $(0, -5)$ and $(5, 0)$ respectively.

# In[8]:

N = 200  # Number of time steps in trajectory.
x0 = np.array([10.0, 10.0, 5.0, 3.0])  # Initial state.

x_traj = get_traj(10.0, 5.0, 5.0, 0.0, 2.0, 0.01)
y_traj = get_traj(10.0, 5.0, 3.0, 0.0, 2.0, 0.01)
x_path = []
u_path = []
for i in range(N):
    x_path.append([
        x_traj["q"][i][0], y_traj["q"][i][0], x_traj["qd"][i][0],
예제 #5
0
f = T.stack([
    x0_v + (x1_v * dt),
    x1_v + (((U0 / (T0**2)) * alpha_v * u2 +
             (U0 / T0) * beta_v * u1 + gamma_v * U0 * u0 - phi_v *
             (X0 / T0) * x1_v - xi_v * X0 * x0_v) / (X0 / (T0**2))) * dt,
    # x2_v + ,
    x0_h + (x1_h * dt),
    x1_h + (((U0 / (T0**2)) * alpha_h * u2 +
             (U0 / T0) * beta_h * u1 + gamma_h * U0 * u0 - phi_h *
             (X0 / T0) * x1_h - xi_h * X0 * x0_h) / (X0 / (T0**2))) * dt,
    # x2_h + ,
    u0 + (u1 * dt),
    u1 + (u2 * dt)
])

dynamics = AutoDiffDynamics(f, x_inputs, u_inputs)
# dynamics = FiniteDiffDynamics(f, 6, 1)
# dynamics = BatchAutoDiffDynamics(f, state_size, action_size)

# Q = np.eye(dynamics.state_size)#state error

# cost = transpose(x) * Q * x + transpose(u) * R * u

Q = np.zeros((dynamics.state_size, dynamics.state_size))

Q[0, 0] = 1  # xExp^2
Q[2, 2] = 1  # xExp^2

# Q[4,4] = 0.1
#One of the essential features of LQR is that Q should be a
#symmetric positive semidefinite matrix and R should be a positive definite matrix.
예제 #6
0
    def run_IterLinQuadReg(self, us_init, A, B, C, epsilon):
        x_input = self.state_inputs()
        x_dot_dot, theta_dot_dot, theta_prime = self.accel(x_input, self.xd)
        f = self.next_states(x_input, x_dot_dot, theta_dot_dot, theta_prime)
        dynamics = AutoDiffDynamics(f, x_input[:5], [x_input[5]])
        x_goal = self.augment_state(self.x_goal)
        if self.Q_terminal.all() == None:
            cost = QRCost_GPS(self.Q, self.R, state_size=5, action_size=1)
        else:
            cost = QRCost_GPS(self.Q,
                              self.R,
                              Q_terminal=self.Q_terminal,
                              x_goal=x_goal,
                              state_size=5,
                              action_size=1)
        x0 = self.augment_state(self.x0)
        us_local = np.random.uniform(-1, 1, (self.N, dynamics.action_size))
        ilqr = iLQR_GPS(dynamics, cost, self.N, A, B, C)
        xs, mean_us, cov_us = ilqr.fit_GPS(x0,
                                           us_init,
                                           us_local,
                                           cov_method='MEM',
                                           on_iteration=self.on_iteration)
        return xs, mean_us, cov_us


#     '''
#     next_states_matrix: this method will work same as next_state except that it will take A, B state-space matrices as input
#     '''
#     def next_states_matrix(self, X, U, A, B, C):
#         theta = T.arctan2(X[2], X[3])
#         next_theta = theta + X[4] * self.dt
#         f = T.stack([
#             X[0] * A[0][0] + X[1] * A[0][1] + X[2] * A[0][2] + X[3] * A[0][3] + X[4] * A[0][4] + U[0] * A[0][5] + B[0][0],
#             X[0] * A[1][0] + X[1] * A[1][1] + X[2] * A[1][2] + X[3] * A[1][3] + X[4] * A[1][4] + U[0] * A[1][5] + B[1][0],
#             T.sin(next_theta),
#             T.cos(next_theta),
#             X[0] * A[4][0] + X[1] * A[4][1] + X[2] * A[4][2] + X[3] * A[4][3] + X[4] * A[4][4] + U[0] * A[4][5] + B[4][0]
#         ])
#         return f

#     '''
#     run_IterLinQuadReg_matrix: this method will run iLQR when we are given A, B, C state-space matrices
#                         X(k+1) = A * [X(k) U(k)].T + B ---- Evolution of state over time is governed by this equation
#     '''
#     def run_IterLinQuadReg_matrix(self, A, B, C, us_init=None):
#         x_input, u_input = self.state_inputs()
#         f = self.next_states_matrix(x_input, u_input, A, B, C)
#         dynamics = AutoDiffDynamics(f, x_input, u_input)
#         x_goal = self.augment_state(self.x_goal)
#         if self.Q_terminal.all() == None:
#             cost = QRCost(self.Q, self.R)
#         else:
#             cost = QRCost(self.Q, self.R, Q_terminal=self.Q_terminal, x_goal=x_goal)
#         x0 = self.augment_state(self.x0)
#         if us_init == None:
#             us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size))
#         ilqr = iLQR(dynamics, cost, self.N)
#         xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration)
#         return xs, us

#     '''
#     control_pattern: This modify the control actions based on the user input.
#                         1 - Normal: based on the correction/mixing parameter gamma generate control
#                                 (gamma controls how much noise we want).
#                         2 - MissingValue: based on the given percentage, set those many values to zero
#                                 (it is implicitly it uses "Normal" generated control is used).
#                         3 - Shuffle: shuffles the entire "Normal" generated control sequence.
#                         4 - TimeDelay: takes the "Normal" generated control and shifts it by 1 index i.e. one unit time delay.
#                         5 - Extreme: sets gamma as zeros and generates control based on only noise.
#     '''
#     def control_pattern(self, u, pattern, mean, var, gamma, percent):
#         if pattern == 'Normal':
#             u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape)
#         elif pattern == 'MissingValue':
#             n = int(u.shape[0] * percent * 0.01)
#             index = np.random.randint(0, u.shape[0], n)
#             u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape)
#             u[index, :] = 0
#         elif pattern == 'Shuffle':
#             u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape)
#             np.random.shuffle(u)
#         elif pattern == 'TimeDelay':
#             u = gamma * u + (1 - gamma) * np.random.normal(mean, var, u.shape)
#             u = np.roll(u, 1, axis=0)
#             u[0, :] = 0
#         elif pattern == 'Extreme':
#             u = np.random.normal(mean, var, u.shape)
#         return u

#     '''
#     noise_traj_generator: In this method we generate trajectories based on some inital condition and noisy control action
#                             which is generated from doing noise free ilQR roll-out and then adding noise to it.
#                             Noise is added to the control action in special way. I use parameter gamma which indicates how much
#                             percentage of original control action sequence must be mixed some percentage of the Gaussian noise.
#                             Parameter: 1 - Mean (mean): Gaussian mean and 2 - Variance (var): Gaussian var.
#                             Takes deaugmented states as input. Input will be the initial point and perfect control action.
#                             Also, based on the control pattern it again remodifies the enitre control action. Look above func.
#     '''
#     def noise_traj_generator(self, x, u, pattern, mean, var, gamma, percent):
#         u = self.control_pattern(u, pattern, mean, var, gamma, percent)
#         x_new = self.augment_state(x)
#         x_new = x_new.reshape(1,x_new.shape[0])
#         for i in range(self.N):
#             temp = (u[i][0] + self.xd[0] * self.xd[2] * x_new[-1][4]**2 * x_new[-1][2])/(self.xd[1] + self.xd[0])
#             num = self.xd[3] * x_new[-1][2] - x_new[-1][3] * temp
#             den = self.xd[2] * (4/3 - (self.xd[0] * x_new[-1][3]**2)/(self.xd[1] + self.xd[0]))
#             ang_acc = num/den
#             lin_acc = temp - (self.xd[0] * self.xd[2] * ang_acc * x_new[-1][3])/(self.xd[1] + self.xd[0])
#             theta = np.arctan2(x_new[-1][2], x_new[-1][3])
#             next_theta = theta + x_new[-1][4] * self.dt
#             temp_1 = x_new[-1][0] + x_new[-1][1] * self.dt
#             temp_2 = x_new[-1][1] + lin_acc * self.dt
#             temp_3 = np.sin(next_theta)
#             temp_4 = np.cos(next_theta)
#             temp_5 = x_new[-1][4] + ang_acc * self.dt
#             x_new = np.concatenate((x_new, [[temp_1, temp_2, temp_3, temp_4, temp_5]]), axis=0)
#         return x_new, u

#     '''
#     eval_traj_cost: This method calculates the trajectory cost based on Q, R and Q_terminal cost matrices.
#                         Takes the deaugmented states as input. Entire trajectory states including goal and control actions
#                         are sent as inputs.
#     '''
#     def eval_traj_cost(self, x, y, u):
#         x = self.augment_state(x)
#         y = self.augment_state(y)
#         J = 0
#         if self.Q_terminal.all() == None:
#             for i in range(self.N+1):
#                 J = J + np.matmul((x[i,:]-y), np.matmul(self.Q, (x[i,:]-y).T)) + np.matmul((u[i]-u[-1]).T, np.matmul(self.R, u[i]-u[-1]))
#             J = J + np.matmul((x[-1]-y), np.matmul(self.Q, (x[-1]-y).T))
#         else:
#             for i in range(self.N):
#                 J = J + np.matmul((x[i,:]-y), np.matmul(self.Q, (x[i,:]-y).T)) + np.matmul((u[i]-u[-1]).T, np.matmul(self.R, u[i]-u[-1]))
#             J = J + np.matmul((x[-1]-y), np.matmul(self.Q_terminal, (x[-1]-y).T))
#         return J

#     '''
#     gen_rollouts: Generates specified no. of rollouts and outputs states, control action and cost of all rollouts
#                     Variance of the Gaussian noise will be taken as input from a Unif(0, var_range) uniform distribution.
#     '''
#     def gen_rollouts(self, x_initial, x_goal, u, n_rollouts, pattern='Normal', pattern_rand=False, var_range=10, gamma=0.2, percent=20):
#         x_rollout = []
#         u_rollout = []
#         x_gmm = []
#         cost = []
#         local_policy = self.control_pattern(u, 'Normal', 0, np.random.uniform(0, 5, 1), 0.2, 20)
#         for i in range(n_rollouts):
#             if pattern_rand == True:
#                 pattern_seq = np.array(['Normal', 'MissingValue', 'Shuffle', 'TimeDelay', 'Extreme'])
#                 pattern = pattern_seq[np.random.randint(0, 5, 1)][0]
#             x_new, u_new = self.noise_traj_generator(x_initial, local_policy, pattern, 0, np.random.uniform(0, var_range, 1), gamma, percent)
#             x_new_temp = self.deaugment_state(x_new)
#             cost.append(self.eval_traj_cost(x_new_temp, x_goal, u_new))
#             x_rollout.append(x_new)
#             u_rollout.append(u_new)
#             temp = np.append(x_new[:-1,:], u_new, axis=1)
#             temp = np.append(temp, x_new[1:,:], axis=1)
#             x_gmm.append(temp)
#         x_rollout = np.array(x_rollout).reshape(len(x_rollout)*len(x_rollout[0]), len(x_rollout[0][0,:]))
#         u_rollout = np.array(u_rollout).reshape(len(u_rollout)*len(u_rollout[0]), len(u_rollout[0][0,:]))
#         x_gmm = np.array(x_gmm).reshape(len(x_gmm)*len(x_gmm[0]), len(x_gmm[0][0,:]))
#         cost = np.array(cost).reshape(n_rollouts, 1)
#         return x_rollout, u_rollout, local_policy, x_gmm, cost
예제 #7
0
Ft_temp = u_squash[1]
f = T.stack([
    x_inputs[0] + x_inputs[2] * dt, x_inputs[1] + x_inputs[3] * dt,
    x_inputs[2] + dt * (Fl_temp * length_torque_constant +
                        ((a + b) * mboom * m_end + a * rho**2 +
                         (mboom * m_end + rho**2) * l_temp) * psi_temp**2) /
    (mboom * m_end + rho**2), x_inputs[3] + dt *
    (mboom * Ft_temp * gear_ratio - 2 *
     ((a + b) * mboom * m_end + a * rho**2 +
      (mboom * m_end + rho**2) * l_temp) * p_temp * psi_temp) /
    (Jc * mboom + mboom * m_end * (a + b)**2 + (a * rho)**2 + l_temp *
     (2 * (a + b) * mboom * m_end + 2 * a * rho**2 +
      (mboom * m_end + rho**2) * l_temp))
])

dynamics = AutoDiffDynamics(f, x_inputs, u_inputs, hessians=True)

# %% Generate Initial Trajectories
X0, U0 = genTrajectory(x0, xe)
Xe, Ue, Te = extension(xe[0], xf[0], grasping_velocity)
N = U0.shape[0]
t = np.arange(N + 1) * dt
l_og = X0[:, 0]
theta_og = X0[:, 1]
x_og = (l_og + a) * np.cos(theta_og)
y_og = (l_og + a) * np.sin(theta_og)
Fl_og = np.append(U0[:, 0], 0)
Ft_og = np.append(U0[:, 1], 0)

# %% Generate Costs
Q = np.diag([275, 275, 275, 275])