def __init__(self, env, x_eps=1e-6, u_eps=1e-6): """Inits MujocoFiniteDiffDynamicsBase with a MujocoEnv. :param env (MujocoEnv): a Gym MuJoCo environment. Must not be wrapped. :param x_eps (float): increment to use for finite difference on state. :param u_eps (float): increment to use for finite difference on control. """ MujocoFiniteDiff.__init__(self, env) FiniteDiffDynamics.__init__(self, self._mujoco_f, self.state_size, self.action_size, x_eps=x_eps, u_eps=u_eps)
def setup(self): J_hist = [] def on_iteration(iteration_count, xs, us, J_opt, accepted, converged): J_hist.append(J_opt) info = "converged" if converged else ("accepted" if accepted else "failed") print("iteration", iteration_count, info, J_opt) def f(x, us, i): max_bounds = 8.0 min_bounds = -8.0 diff = (max_bounds - min_bounds) / 2.0 mean = (max_bounds + min_bounds) / 2.0 us = diff * np.tanh(us) + mean y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, us) return np.array(y) dynamics = FiniteDiffDynamics(f, 12, 6) x_path = [] u_path = [] count = 0 N = self.runner.get_length() while count < self.runner.get_length(): count += 1 self.runner.step() u_path.append(self.runner.ddx.flatten().tolist()) self.x.append(self.runner.x.flatten()) x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist() x_path.append(x) u_path = u_path[:-1] expSigma = self.runner.get_expSigma() size = expSigma[0].shape[0] Q = [np.zeros((size * 2, size * 2))] * len(expSigma) for ii in range(len(expSigma) - 2, -1, -1): Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii]) x0 = x_path[0] x_path = np.array(x_path) us_path = np.array(u_path) us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size)) R = 0.1 * np.eye(dynamics.action_size) cost = PathQsRCost(Q, R, x_path=x_path, u_path=us_path) ilqr = iLQR(dynamics, cost, N - 1) xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration) R = 0.5 * np.eye(dynamics.action_size) cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=self.us) ilqr2 = iLQR(dynamics, cost2, N - 1) self.cntrl = RecedingHorizonController(x0, ilqr2)
def setup(self): J_hist = [] def on_iteration(iteration_count, xs, us, J_opt, accepted, converged): J_hist.append(J_opt) info = "converged" if converged else ("accepted" if accepted else "failed") print("iteration", iteration_count, info, J_opt) def f(x, u, i): y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, u) return np.array(y) dynamics = FiniteDiffDynamics(f, 12, 6) x_path = [] u_path = [] count = 0 N = self.runner.get_length() while count < self.runner.get_length(): count += 1 self.runner.step() u_path.append(self.runner.ddx.flatten().tolist()) self.x.append(self.runner.x.flatten()) x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist() x_path.append(x) u_path = u_path[:-1] expSigma = self.runner.get_expSigma() size = expSigma[0].shape[0] Q = [np.zeros((size * 2, size * 2))] * len(expSigma) for ii in range(len(expSigma) - 2, -1, -1): Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii]) x0 = x_path[0] x_path = np.array(x_path) u_path = np.array(u_path) #####change################################!!!!! changed R = 5.0e-4 * np.eye(dynamics.action_size) R[3,3] = 3.0e-3 R[4,4] = 3.0e-3 R[5,5] = 3.0e-3 #R = 0.1 * np.eye(dynamics.action_size) # cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=u_path) # # # Random initial action path. us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size)) # J_hist = [] ilqr = iLQR(dynamics, cost2, N-1) self.xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration)
def control_ilqr(N=1000): pend = Pendulum() dynamics = FiniteDiffDynamics(pend.f, pend.state_size, pend.action_size) # The coefficients weigh how much your state error is worth to you vs # the size of your controls. You can favor a solution that uses smaller # controls by increasing R's coefficient. Q = 100 * np.eye(pend.state_size) R = 50 * np.eye(pend.action_size) Q_terminal = np.diag((1000, 1000, 1000)) cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=pend.x_goal) x0 = pend.initial_state # Initial state. us_init = np.random.uniform(-2, 2, (N, 1)) # Random initial action path. ilqr = iLQR(dynamics, cost, N) xs, us = ilqr.fit(x0, us_init) pend.close() return us
def control_ilqr(): cart = MountainCar() dynamics = FiniteDiffDynamics(cart.f, cart.state_size, cart.action_size) # The coefficients weigh how much your state error is worth to you vs # the size of your controls. You can favor a solution that uses smaller # controls by increasing R's coefficient. Q = 500 * np.eye(cart.state_size) R = 100 * np.eye(cart.action_size) Q_terminal = np.eye(cart.state_size)*1000 cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=cart.x_goal) # cost = BatchAutoDiffCost(cart.cost_function, cart.state_size, cart.action_size) N = 1000 # Number of time-steps in trajectory. x0 = cart.initial_state # Initial state. us_init = np.random.uniform(-1, 1, (N, 1)) # Random initial action path. ilqr = iLQR(dynamics, cost, N) xs, us = ilqr.fit(x0, us_init) cart.close() return us
return np.array(y) def f2(x, u, i): y = model.runge_integrator(my_model, x, 0.01, u) return np.array(y) ############################################## MODIFY #################### ###### original : return np.array(y) dynamics = FiniteDiffDynamics(f, 12, 6) file_name = "/home/jack/backup/leg1.pickle" #file_name = "/home/jack/catkin_ws/src/ambf_walker/Train/gotozero.pickle" runner = TPGMMRunner.TPGMMRunner(file_name) x_path = [] u_path = [] #us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size)) count = 0 N = runner.get_length() # print(N) while count < runner.get_length(): count += 1 runner.step()
def f(x, u, i): """Batched implementation of the dynamics model. Args: x: State vector [*, state_size]. u: Control vector [*, action_size]. i: Current time step [*, 1]. Returns: Next state vector [*, state_size]. """ x_ = x[..., 0] x_dot = x[..., 1] F = u[..., 0] # Acceleration. x_dot_dot = x_dot * (1 - alpha * dt / m) + F * dt / m # Discrete dynamics model definition. return np.array([ x_ + x_dot * dt, x_dot + x_dot_dot * dt, ]) # Compile the dynamics. # NOTE: This can be slow as it's computing and compiling the derivatives. # But that's okay since it's only a one-time cost on startup. dynamics = FiniteDiffDynamics(f, state_size, action_size)
# T.dscalar("Rhip_dot"), #9 # T.dscalar("Rknee_dot"), #10 # T.dscalar("Rankle_dot"), #11 # ] # # u_inputs = [ # T.dscalar("Lhip_ddot"), # T.dscalar("Lknee_ddot"), # T.dscalar("Lankle_ddot"), # T.dscalar("Rhip_ddot"), # T.dscalar("Rknee_ddot"), # T.dscalar("Rankle_ddot"), # ] # dynamics = FiniteDiffDynamics(f, 12, 6) curr_x = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) curr_u = np.array([5.0, 2.0, 1.0, 5.0, 2.0, 1.0]) print(dynamics.f(curr_x, curr_u, 0)) x0 = [0.0, 0.0, -0.349, 0.0, 0.0, -0.349, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] hip = model.get_traj(0.0, -0.3, 0.0, 0.0, tf, dt) knee = model.get_traj(0.0, 0.20, 0.0, 0., tf, dt) ankle = model.get_traj(-0.349, -0.1, 0.0, 0.0, tf, dt) x_path = [] u_path = [] N = int(tf / dt) for i in range(N): x_path.append([