def simulate_with_actions(state, N, action_schedule):
    quad_dev = Quad(state)
    trajectory = [state.copy()]
    action_schedule[:, 2] += M * G
    for i in range(N-1):
        action = action_schedule[i]
        state, _ = quad_dev.step(state, action)
        trajectory.append(state.copy())
    return np.array(trajectory)
def main():

    simulation_time = 20

    ## prepare the trajectory(course) to track
    idx = np.arange(0, 4 * simulation_time, dl)
    course_pos = curve_pos(idx)
    course_vel = curve_vel(idx)
    speed = 0.5 # m/s
    course_vel = course_vel / np.linalg.norm(course_vel, axis=0) * (speed + np.random.rand())  # normalize with the same speed, but not direction.
    course_vel[:, 0] = 0  # avoid nan
    course_vel[:, -1] = 0  # stop
    course = np.concatenate((course_pos, course_vel), axis=0) # desired state with dim 6, shape [dim, timeslot]

    # init state and target state
    init_state = course[:, 0] + np.array([1., 2., 0., 0., 0., 0.])#np.array([-1., 1., 0., 0., 0., 0.]) # 
    state = init_state
    nearest, _ = calc_nearest_index(state, course, 0)
    target_states = course[:, nearest]

    # quadrotor object
    quad = Quad(init_state)

    # dimension
    nu = 4
    nx = init_state.shape[0] # 6

    # controller parameters setting for both lqr and linear mpc
    u = np.zeros(nu)
    horizon = 20  # predict and control horizon in mpc
    Q = np.array([20. , 20.,  20., 8, 8, 8])
    QN = Q
    # Q = np.array([8. , 8.,  8., 0.8, 0.8, 0.8])
    # QN = np.array([10., 10., 10., 1.0, 1.0, 1.0])
    R = np.array([6., 6., 6.])
    Ad, Bd = linearized_model(state) # actually use linear time-invariant model linearized from the equilibrium point
    
    # record state
    labels = ['x', 'y', 'z', 'vx', 'vy', 'vz'] # convenient for plot
    state_control = {x: [] for x in labels}
    target_chosen = {x: [] for x in labels}
    for ii in range(6):
        target_chosen[labels[ii]].append(target_states[ii])
    err = [] # tracking error

    # visualization
    fig = plt.figure()
    ax = plt.axes(projection='3d')

    # control loop
    timestamp = 0.0
    while timestamp <= simulation_time:
        print("timestamp", timestamp)
        if TRACKING:
            target_states, nearest, chosen_idxs = calc_ref_trajectory(state, course, nearest, horizon)
        else:
            target_states = np.array([0, 0, 10, 0, 0, 0])

        if CONTROLLER=='MPC':
            mpc_policy = mpc(state, target_states, Ad, Bd, horizon, Q, QN, R, TRACKING)
            nominal_action, actions, nominal_state = mpc_policy.solve()
            action = nominal_action #- np.dot(K,(target_states - nominal_state))
            roll_r, pitch_r, thrust_r = action
            u[0] = - pitch_r  
            u[1] = - roll_r
            u[2] = 0.0
            u[3] = thrust_r + M * G

        elif CONTROLLER=='LQR':
            lqr_policy = FiniteHorizonLQR(Ad, Bd, Q, R, QN, horizon=horizon)  # instantiate a lqr controller
            lqr_policy.set_target_state(target_states)  ## set target state to koopman observable state
            lqr_policy.sat_val = np.array([PI/8., PI/8., 0.75])
            K, ustar = lqr_policy(state)
            
            u[0] = - ustar[1]
            u[1] = - ustar[0]
            u[2] = 0.0
            u[3] = ustar[2] + M * G
            print("K", K)

        elif CONTROLLER=='PID':
            # Compute control errors
            x, y, z, dx, dy, dz = state
            x_r, y_r, z_r, dx_r, dy_r, dz_r = target_states
            ex = x - x_r
            ey = y - y_r
            ez = z - z_r
            dex = dx - dx_r
            dey = dy - dy_r
            dez = dz - dz_r
            xi = 1.2
            wn = 3.0
            
            Kp = - wn * wn
            Kd = - 2 * wn * xi
            Kxp = 1.2 * Kp
            Kxd = 1.2 * Kd
            Kyp = Kp
            Kyd = Kd
            Kzp = 0.8 * Kp
            Kzd = 0.8 * Kd
            
            pitch_r = Kxp * ex + Kxd * dex
            roll_r = Kyp * ey + Kyd * dey
            thrust_r = (Kzp * ez + Kzd * dez + 9.8) * 0.44
            
            u[0] = pitch_r
            u[1] = roll_r
            u[2] = 0.
            u[3] = thrust_r + M * G



        next_state = quad.step(state, u)
        state = next_state

        timestamp = timestamp + dt


        if TRACKING:
            err.append(state[:3] - target_states[:3, -1])
        else:
            err.append(state[:3] - target_states[:3])
        
        # record
        for ii in range(len(labels)):
            state_control[labels[ii]].append(state[ii])
            if TRACKING:
                target_chosen[labels[ii]].append(target_states[ii, -1])
            else:
                target_chosen[labels[ii]].append(target_states[ii])

        ## plot
        if True: #timestamp > dt:
            # # plot in real time
            plt.cla()
            x = np.append(init_state[0], state_control['x'])
            y = np.append(init_state[1], state_control['y'])
            z = np.append(init_state[2], state_control['z'])
            final_idx = len(x) - 1
            ax.plot3D(x, y, z, label='Quadcopter flight trajectory')
            ax.plot3D([init_state[0]], [init_state[1]], [init_state[2]], label='Initial position', color='blue', marker='x')
            ax.plot3D([x[final_idx]], [y[final_idx]], [z[final_idx]], label='Final position', color='green', marker='o')
            if TRACKING:
                plt_len = chosen_idxs[-1] + 100
                ax.plot3D(course[0, :plt_len], course[1, :plt_len], course[2, :plt_len])
                # ax.scatter3D([course[0, chosen_idxs[0]]], [course[1, chosen_idxs[0]]], [course[2, chosen_idxs[0]]], color='red', label='chosen target flight trajectory',
                #              marker='o')
                for i in range(horizon):
                    ax.scatter3D([course[0, chosen_idxs[i]]], [course[1, chosen_idxs[i]]], [course[2, chosen_idxs[i]]], color='red', marker='o')

            ax.scatter3D([target_states[0]], [target_states[1]], [target_states[2]], color='red', marker='o')
            plt.legend()
            plt.pause(0.01)
    err = np.stack(err)
    
    plt.figure()
    plt.plot(err[:,0], label='x')
    plt.plot(err[:,1], label='y')
    plt.plot(err[:,2], label='z')
    plt.legend()
    plt.show()
Exemple #3
0
def main():

    quad = Quad()  ### instantiate a quadcopter

    ### the timing parameters for the quad are used
    ### to construct the koopman operator and the adjoint dif-eq
    koopman_operator = KoopmanOperator(quad.time_step)
    adjoint = Adjoint(quad.time_step)

    task = Task()  ### this creates the task

    simulation_time = 1000
    horizon = 20  ### time horizon
    sat_val = 6.0  ### saturation value
    control_reg = np.diag([1.] * 4)  ### control regularization
    inv_control_reg = np.linalg.inv(control_reg)  ### precompute this
    default_action = lambda x: np.random.uniform(-0.1, 0.1, size=(
        4, ))  ### in case lqr control returns NAN

    ### initialize the state
    #_R = np.diag([1.0, 1.0, 1.0])
    _R = euler2mat(np.random.uniform(-1., 1., size=(3, )))
    _p = np.array([0., 0., 0.])
    _g = RpToTrans(_R, _p).ravel()
    _twist = np.random.uniform(-1., 1., size=(6, ))  #* 2.0
    state = np.r_[_g, _twist]

    target_orientation = np.array([0., 0., -9.81])

    err = np.zeros(simulation_time)
    for t in range(simulation_time):

        #### measure state and transform through koopman observables
        m_state = get_measurement(state)

        t_state = koopman_operator.transform_state(m_state)
        err[t] = np.linalg.norm(m_state[:3] -
                                target_orientation) + np.linalg.norm(
                                    m_state[3:])

        Kx, Ku = koopman_operator.get_linearization(
        )  ### grab the linear matrices
        lqr_policy = FiniteHorizonLQR(
            Kx, Ku, task.Q, task.R, task.Qf,
            horizon=horizon)  # instantiate a lqr controller
        lqr_policy.set_target_state(
            task.target_expanded_state
        )  ## set target state to koopman observable state
        lqr_policy.sat_val = sat_val  ### set the saturation value

        ### forward sim the koopman dynamical system (here fdx, fdu is just Kx, Ku in a list)
        trajectory, fdx, fdu, action_schedule = koopman_operator.simulate(
            t_state, horizon, policy=lqr_policy)
        ldx, ldu = task.get_linearization_from_trajectory(
            trajectory, action_schedule)
        mudx = lqr_policy.get_linearization_from_trajectory(trajectory)

        rhof = task.mdx(trajectory[-1])  ### get terminal condition for adjoint
        rho = adjoint.simulate_adjoint(rhof, ldx, ldu, fdx, fdu, mudx, horizon)

        ustar = -np.dot(inv_control_reg, fdu[0].T.dot(
            rho[0])) + lqr_policy(t_state)
        ustar = np.clip(ustar, -sat_val, sat_val)  ### saturate control

        if np.isnan(ustar).any():
            ustar = default_action(None)

        ### advacne quad subject to ustar
        next_state = quad.step(state, ustar)
        ### update the koopman operator from data
        koopman_operator.compute_operator_from_data(
            get_measurement(state), ustar, get_measurement(next_state))

        state = next_state  ### backend : update the simulator state
        ### we can also use a decaying weight on inf gain
        task.inf_weight = 100.0 * (0.99**(t))
        if t % 100 == 0:
            print('time : {}, pose : {}, {}'.format(t * quad.time_step,
                                                    get_measurement(state),
                                                    ustar))

    t = [i * quad.time_step for i in range(simulation_time)]
    plt.plot(t, err)
    plt.xlabel('t')
    plt.ylabel('tracking error')
    plt.show()