예제 #1
0
def run_ilqr_controller(env, render_flag, sim_env, tN):
    env.reset()
    if render_flag:
        env.render()
        time.sleep(0.01)

    total_reward = 0
    num_steps = 0

    q = [env.q]
    dq = [env.dq]
    u = []
    total_cost = []

    while True:
        U, cost = ilqr.calc_ilqr_input(env, sim_env, tN)
        total_cost = total_cost + cost

        action = U[0]
        u.append(np.copy(action))
        _, reward, is_terminal, _ = env._step(action)
        q.append(np.copy(env.q))
        dq.append(np.copy(env.dq))
        if render_flag:
            env.render()
            time.sleep(0.01)

        total_reward += reward
        num_steps += 1
        if is_terminal:
            break
    return q, dq, u, total_reward, num_steps, total_cost
예제 #2
0
파일: iLQR.py 프로젝트: hainow/DRL3
def control_ilqr(env_name="TwoLinkArm-v0"):

    env, sim_env = gym.make(env_name), gym.make(env_name)
    U, X, costs, rewards = ilqr.calc_ilqr_input(env,
                                                sim_env,
                                                tN=100,
                                                max_iter=1000000)
    plot_costs_ilqr(costs, "iLQR: " + env_name)
    plot_rewards_ilqr(rewards, "iLQR: " + env_name)
    plot_states_and_control_ilqr(X, U, "iLQR: " + env_name)

    print("\nShowing optimal trajectory")
    final_reward = show_optimal_trajectory(env, U)
    print("Total Reward for optimal trajectory: {}".format(final_reward))
예제 #3
0
import matplotlib.patches as mpatches

env_name = 'TwoLinkArm-v1'
env = gym.make(env_name)
sim_env = gym.make(env_name)
initial_state = env.reset()

total_cost = 0
num_steps = 0
tN = 50
max_iter = 100

x0 = copy.copy(env.state)
X, U, cost, list_of_costs = calc_ilqr_input(env,
                                            sim_env,
                                            tN=tN,
                                            max_iter=max_iter,
                                            x0=x0)

R = []

for i in range(tN):
    print("Control u = {}, reward={}".format(str(U[i]), total_cost))
    x_next, cost_i, is_terminal, debug_info = env.step(U[i])
    env.render()

    total_cost += cost_i
    R.append(cost_i)

    if is_terminal:
        break
예제 #4
0
print(l_uu)
print(l_ux.shape)
print(l_ux)

print('check final cost.....................')
'''
fl, flx, flxx = cost_final(env, X[100])
'''
print(fl)
print(flx.shape)
print(flx)
print(flxx.shape)
print(flxx)
'''

optimal_U = calc_ilqr_input(env, sim_env, tN=100, max_iter=1000)

X, cost_list, inter_cost_list, accu_inter_cost_list, inter_cost_sum, final_cost, cost_sum = simulate(
    env, x0, optimal_U)

for i in range(len(optimal_U)):
    # print(i)
    # env.render()
    time.sleep(0.2)
    x, reward, done, info = env._step(optimal_U[i])
    # print(x)
    # print(done)
    # f.write(str(optimal_U[i])+';'+str(x)+';'+str(reward)+';'+str(done)+'\n')
    f_q.write(str(x) + '\n')
    f_u.write(str(optimal_U[i]) + '\n')
f_q.close()