Python calc_ilqr_input 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: deeprl_hw3.ilqr

메소드/함수: calc_ilqr_input

hotexamples.com에서의 예제들: 4

Python calc_ilqr_input - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 deeprl_hw3.ilqr.calc_ilqr_input에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def run_ilqr_controller(env, render_flag, sim_env, tN):
    env.reset()
    if render_flag:
        env.render()
        time.sleep(0.01)

    total_reward = 0
    num_steps = 0

    q = [env.q]
    dq = [env.dq]
    u = []
    total_cost = []

    while True:
        U, cost = ilqr.calc_ilqr_input(env, sim_env, tN)
        total_cost = total_cost + cost

        action = U[0]
        u.append(np.copy(action))
        _, reward, is_terminal, _ = env._step(action)
        q.append(np.copy(env.q))
        dq.append(np.copy(env.dq))
        if render_flag:
            env.render()
            time.sleep(0.01)

        total_reward += reward
        num_steps += 1
        if is_terminal:
            break
    return q, dq, u, total_reward, num_steps, total_cost

예제 #2

파일 보기

파일: iLQR.py 프로젝트: hainow/DRL3

def control_ilqr(env_name="TwoLinkArm-v0"):

    env, sim_env = gym.make(env_name), gym.make(env_name)
    U, X, costs, rewards = ilqr.calc_ilqr_input(env,
                                                sim_env,
                                                tN=100,
                                                max_iter=1000000)
    plot_costs_ilqr(costs, "iLQR: " + env_name)
    plot_rewards_ilqr(rewards, "iLQR: " + env_name)
    plot_states_and_control_ilqr(X, U, "iLQR: " + env_name)

    print("\nShowing optimal trajectory")
    final_reward = show_optimal_trajectory(env, U)
    print("Total Reward for optimal trajectory: {}".format(final_reward))

예제 #3

파일 보기

파일: run_ilqr.py 프로젝트: madratman/deeprl_hw3

import matplotlib.patches as mpatches

env_name = 'TwoLinkArm-v1'
env = gym.make(env_name)
sim_env = gym.make(env_name)
initial_state = env.reset()

total_cost = 0
num_steps = 0
tN = 50
max_iter = 100

x0 = copy.copy(env.state)
X, U, cost, list_of_costs = calc_ilqr_input(env,
                                            sim_env,
                                            tN=tN,
                                            max_iter=max_iter,
                                            x0=x0)

R = []

for i in range(tN):
    print("Control u = {}, reward={}".format(str(U[i]), total_cost))
    x_next, cost_i, is_terminal, debug_info = env.step(U[i])
    env.render()

    total_cost += cost_i
    R.append(cost_i)

    if is_terminal:
        break

예제 #4

파일 보기

파일: iLQR.py 프로젝트: sangkeun00/CMU-10703

print(l_uu)
print(l_ux.shape)
print(l_ux)

print('check final cost.....................')
'''
fl, flx, flxx = cost_final(env, X[100])
'''
print(fl)
print(flx.shape)
print(flx)
print(flxx.shape)
print(flxx)
'''

optimal_U = calc_ilqr_input(env, sim_env, tN=100, max_iter=1000)

X, cost_list, inter_cost_list, accu_inter_cost_list, inter_cost_sum, final_cost, cost_sum = simulate(
    env, x0, optimal_U)

for i in range(len(optimal_U)):
    # print(i)
    # env.render()
    time.sleep(0.2)
    x, reward, done, info = env._step(optimal_U[i])
    # print(x)
    # print(done)
    # f.write(str(optimal_U[i])+';'+str(x)+';'+str(reward)+';'+str(done)+'\n')
    f_q.write(str(x) + '\n')
    f_u.write(str(optimal_U[i]) + '\n')
f_q.close()