Exemple #1
0
    def __init__(self, env, x_eps=1e-6, u_eps=1e-6):
        """Inits MujocoFiniteDiffDynamicsBase with a MujocoEnv.

        :param env (MujocoEnv): a Gym MuJoCo environment. Must not be wrapped.
        :param x_eps (float): increment to use for finite difference on state.
        :param u_eps (float): increment to use for finite difference on control.
        """
        MujocoFiniteDiff.__init__(self, env)
        FiniteDiffDynamics.__init__(self,
                                    self._mujoco_f,
                                    self.state_size,
                                    self.action_size,
                                    x_eps=x_eps,
                                    u_eps=u_eps)
    def setup(self):

        J_hist = []

        def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
            J_hist.append(J_opt)
            info = "converged" if converged else ("accepted" if accepted else "failed")
            print("iteration", iteration_count, info, J_opt)

        def f(x, us, i):
            max_bounds = 8.0
            min_bounds = -8.0
            diff = (max_bounds - min_bounds) / 2.0
            mean = (max_bounds + min_bounds) / 2.0
            us = diff * np.tanh(us) + mean
            y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, us)
            return np.array(y)

        dynamics = FiniteDiffDynamics(f, 12, 6)

        x_path = []
        u_path = []
        count = 0
        N = self.runner.get_length()
        while count < self.runner.get_length():
            count += 1
            self.runner.step()
            u_path.append(self.runner.ddx.flatten().tolist())
            self.x.append(self.runner.x.flatten())
            x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist()
            x_path.append(x)

        u_path = u_path[:-1]
        expSigma = self.runner.get_expSigma()
        size = expSigma[0].shape[0]
        Q = [np.zeros((size * 2, size * 2))] * len(expSigma)
        for ii in range(len(expSigma) - 2, -1, -1):
            Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii])

        x0 = x_path[0]
        x_path = np.array(x_path)
        us_path = np.array(u_path)

        us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size))
        R = 0.1 * np.eye(dynamics.action_size)

        cost = PathQsRCost(Q, R, x_path=x_path, u_path=us_path)
        ilqr = iLQR(dynamics, cost, N - 1)

        xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration)

        R = 0.5 * np.eye(dynamics.action_size)

        cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=self.us)

        ilqr2 = iLQR(dynamics, cost2, N - 1)

        self.cntrl = RecedingHorizonController(x0, ilqr2)
    def setup(self):

        J_hist = []

        def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
            J_hist.append(J_opt)
            info = "converged" if converged else ("accepted" if accepted else "failed")
            print("iteration", iteration_count, info, J_opt)

        def f(x, u, i):
            y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, u)

            return np.array(y)

        dynamics = FiniteDiffDynamics(f, 12, 6)


        x_path = []
        u_path = []
        count = 0
        N = self.runner.get_length()
        while count < self.runner.get_length():
            count += 1
            self.runner.step()
            u_path.append(self.runner.ddx.flatten().tolist())
            self.x.append(self.runner.x.flatten())
            x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist()
            x_path.append(x)

        u_path = u_path[:-1]
        expSigma = self.runner.get_expSigma()
        size = expSigma[0].shape[0]
        Q = [np.zeros((size * 2, size * 2))] * len(expSigma)
        for ii in range(len(expSigma) - 2, -1, -1):
            Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii])

        x0 = x_path[0]
        x_path = np.array(x_path)
        u_path = np.array(u_path)

        #####change################################!!!!! changed
        R = 5.0e-4 * np.eye(dynamics.action_size)
        R[3,3] = 3.0e-3
        R[4,4] = 3.0e-3
        R[5,5] = 3.0e-3
        #R = 0.1 * np.eye(dynamics.action_size)
        #
        cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=u_path)
        #
        # # Random initial action path.
        us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size))
        #
        J_hist = []
        ilqr = iLQR(dynamics, cost2, N-1)
        self.xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration)
Exemple #4
0
def control_ilqr(N=1000):
    pend = Pendulum()
    dynamics = FiniteDiffDynamics(pend.f, pend.state_size, pend.action_size)

    # The coefficients weigh how much your state error is worth to you vs
    # the size of your controls. You can favor a solution that uses smaller
    # controls by increasing R's coefficient.
    Q = 100 * np.eye(pend.state_size)
    R = 50 * np.eye(pend.action_size)

    Q_terminal = np.diag((1000, 1000, 1000))
    cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=pend.x_goal)

    x0 = pend.initial_state  # Initial state.
    us_init = np.random.uniform(-2, 2, (N, 1))  # Random initial action path.

    ilqr = iLQR(dynamics, cost, N)
    xs, us = ilqr.fit(x0, us_init)
    pend.close()

    return us
Exemple #5
0
def control_ilqr():
    cart = MountainCar()
    dynamics = FiniteDiffDynamics(cart.f, cart.state_size, cart.action_size)

    # The coefficients weigh how much your state error is worth to you vs
    # the size of your controls. You can favor a solution that uses smaller
    # controls by increasing R's coefficient.
    Q = 500 * np.eye(cart.state_size)
    R = 100 * np.eye(cart.action_size)

    Q_terminal = np.eye(cart.state_size)*1000
    cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=cart.x_goal)
    # cost = BatchAutoDiffCost(cart.cost_function, cart.state_size, cart.action_size)

    N = 1000  # Number of time-steps in trajectory.
    x0 = cart.initial_state  # Initial state.
    us_init = np.random.uniform(-1, 1, (N, 1)) # Random initial action path.

    ilqr = iLQR(dynamics, cost, N)
    xs, us = ilqr.fit(x0, us_init)
    cart.close()

    return us
Exemple #6
0
    return np.array(y)


def f2(x, u, i):

    y = model.runge_integrator(my_model, x, 0.01, u)

    return np.array(y)


############################################## MODIFY ####################

###### original : return np.array(y)

dynamics = FiniteDiffDynamics(f, 12, 6)

file_name = "/home/jack/backup/leg1.pickle"
#file_name = "/home/jack/catkin_ws/src/ambf_walker/Train/gotozero.pickle"
runner = TPGMMRunner.TPGMMRunner(file_name)

x_path = []
u_path = []

#us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size))
count = 0
N = runner.get_length()
# print(N)
while count < runner.get_length():
    count += 1
    runner.step()
def f(x, u, i):
    """Batched implementation of the dynamics model.

    Args:
        x: State vector [*, state_size].
        u: Control vector [*, action_size].
        i: Current time step [*, 1].

    Returns:
        Next state vector [*, state_size].
    """
    x_ = x[..., 0]
    x_dot = x[..., 1]
    F = u[..., 0]

    # Acceleration.

    x_dot_dot = x_dot * (1 - alpha * dt / m) + F * dt / m

    # Discrete dynamics model definition.
    return np.array([
        x_ + x_dot * dt,
        x_dot + x_dot_dot * dt,
    ])


# Compile the dynamics.
# NOTE: This can be slow as it's computing and compiling the derivatives.
# But that's okay since it's only a one-time cost on startup.
dynamics = FiniteDiffDynamics(f, state_size, action_size)
Exemple #8
0
#     T.dscalar("Rhip_dot"), #9
#     T.dscalar("Rknee_dot"), #10
#     T.dscalar("Rankle_dot"), #11
# ]
#
# u_inputs = [
#     T.dscalar("Lhip_ddot"),
#     T.dscalar("Lknee_ddot"),
#     T.dscalar("Lankle_ddot"),
#     T.dscalar("Rhip_ddot"),
#     T.dscalar("Rknee_ddot"),
#     T.dscalar("Rankle_ddot"),
# ]
#

dynamics = FiniteDiffDynamics(f, 12, 6)

curr_x = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
curr_u = np.array([5.0, 2.0, 1.0, 5.0, 2.0, 1.0])
print(dynamics.f(curr_x, curr_u, 0))
x0 = [0.0, 0.0, -0.349, 0.0, 0.0, -0.349, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
hip = model.get_traj(0.0, -0.3, 0.0, 0.0, tf, dt)
knee = model.get_traj(0.0, 0.20, 0.0, 0., tf, dt)
ankle = model.get_traj(-0.349, -0.1, 0.0, 0.0, tf, dt)

x_path = []
u_path = []
N = int(tf / dt)

for i in range(N):
    x_path.append([