コード例 #1
0
def apply_controller(plant, params, H, policy=None):
    '''
    Starts the plant and applies the current policy to the plant for a duration specified by H (in seconds).

    @param plant is a class that controls our robot (simulation)
    @param params is a dictionary with some useful values
    @param H Horizon for applying controller (in seconds)
    @param policy is a function pointer to the code that implements your
            control solution. It will be called as u = policy( state )
    '''

    # start robot
    x_t, t = plant.get_plant_state()
    if plant.noise is not None:
        # randomize state
        Sx_t = np.zeros((x_t.shape[0], x_t.shape[0]))
        L_noise = np.linalg.cholesky(plant.noise)
        x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise)

    sum_of_error = 0
    H_steps = int(np.ceil(H / plant.dt))
    for i in xrange(H_steps):
        # convert input angle dimensions to complex representation
        x_t_ = gTrig_np(x_t[None, :], params['angle_dims']).flatten()
        #  get command from policy (this should be fast, or at least account for delays in processing):
        u_t = policy(x_t_)

        #  send command to robot:
        plant.apply_control(u_t)
        plant.step()
        x_t, t = plant.get_plant_state()
        l = plant.params['l']
        st = np.sin(x_t_[3])
        ct = np.cos(x_t_[3])
        goal = np.array([0, l])
        end = np.array([x_t[0] + l * st, -l * ct])
        dist = np.sqrt((goal[0] - end[0]) * (goal[0] - end[0]) +
                       (goal[1] - end[1]) * (goal[1] - end[1]))
        sum_of_error = sum_of_error + dist

        if plant.noise is not None:
            # randomize state
            x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise)

        if plant.done:
            break

    print("Error this episode was: ", sum_of_error)

    # stop robot
    plant.stop()
コード例 #2
0
ファイル: ilqr_cartpole_learn.py プロジェクト: harwiltz/ilqr
def apply_controller(plant,
                     agent_swingup,
                     agent_stable,
                     params,
                     H,
                     horizon=None):
    x_t, t = plant.get_plant_state()
    for i in range(H):
        x_t_ = gTrig_np(x_t[None, :], params['angle_dims']).flatten()
        if (horizon is None) or i < horizon:
            u_t = agent_swingup.policy(x_t, i)
        else:
            if (horizon is not None) and (i == horizon):
                print("Reached end of control sequence")
                print("Final state : {}".format(x_t))
                input("Press ENTER to continue...")
            u_t = agent_stable.policy(x_t, i)
        plant.apply_control(u_t)
        plant.step()
        x_t, t = plant.get_plant_state()
        if plant.done:
            break
    plant.stop()
コード例 #3
0
def apply_controller(plant, params, H, policy=None):
    '''
    Starts the plant and applies the current policy to the plant for a duration specified by H (in seconds).

    @param plant is a class that controls our robot (simulation)
    @param params is a dictionary with some useful values 
    @param H Horizon for applying controller (in seconds)
    @param policy is a function pointer to the code that implements your 
            control solution. It will be called as u = policy( state )
    '''

    # start robot
    x_t, t = plant.get_plant_state()

    if plant.noise is not None:
        # randomize state
        Sx_t = np.zeros((x_t.shape[0], x_t.shape[0]))
        L_noise = np.linalg.cholesky(plant.noise)
        x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise)

    sum_of_error = 0
    integral_pos = 0
    integral_angle = 0
    error_pos = 0
    error_angle = 0
    error_prev_pos = 0
    error_prev_angle = 0
    angle = []
    position = []
    #x_t_prev=gTrig_np(x_t[None,:], params['angle_dims']).flatten()
    H_steps = int(np.ceil(H / plant.dt))
    for i in range(H_steps):

        # convert input angle dimensions to complex representation
        x_t_ = gTrig_np(x_t[None, :], params['angle_dims']).flatten()
        #print(x_t_[4])
        #  get command from policy (this should be fast, or at least account for delays in processing):

        #u_t,integral_pos,integral_angle,error_pos,error_angle,error_prev_pos,error_prev_angle = policy(x_t_,integral_pos,integral_angle,error_pos,error_angle,error_prev_pos,error_prev_angle)
        u_t = LQR(plant, x_t_)
        error_prev_pos = error_pos
        error_prev_angle = error_angle
        angle.append(x_t_[3])
        position.append(x_t_[0])

        #  send command to robot:
        plant.apply_control(u_t)
        plant.step()
        x_t, t = plant.get_plant_state()
        l = plant.params['l']
        err = np.array([0, l]) - np.array([x_t[0] + l * x_t_[3], -l * x_t_[4]])
        dist = np.dot(err, err)
        sum_of_error = sum_of_error + dist

        if plant.noise is not None:
            # randomize state
            x_t = x_t + np.random.randn(x_t.shape[0]).dot(L_noise)

        if plant.done:
            break
    plt.plot(position)
    plt.ylim(-1, 1)
    plt.title("Plot of position values with respect to steps ")
    plt.ylabel("Positions")
    plt.xlabel("steps")

    #plt.plot(position)
    plt.show()
    print "Error this episode %f" % (sum_of_error)

    # stop robot
    plant.stop()