Python SUSD 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: search_methods.SUSD_search

클래스/타입: SUSD

hotexamples.com에서의 예제들: 7

Python SUSD - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 search_methods.SUSD_search.SUSD에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

SUSD(8)

search(6)

자주 사용되는 메소드들

SUSD (8)

search (6)

예제 #1

파일 보기

nb_states = 2
nb_actions = 2
WB_model = WB_Model(dt)
BB_model = BB_Model(nb_states, nb_actions)
loaded = BB_model.load_model(path='linear_2D_')
if loaded: print("2 Input Linear model loaded!")

# create the SUSD learner
Q = np.eye(2)
R = np.eye(2)
K0 = np.array([[-10., -10.],[-10., -10.]])
N_trajectories = 10
N_agents = 25
T_rollout = 10

S = SUSD(WB_model, Q, R, K0.copy(), N_agents, T_rollout)
PG = Policy_Gradient(WB_model, Q, R, K0.copy(), N_trajectories, T_rollout)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)

# get optimal K cost
C_lqr = 0
x = x0.copy()
Nsteps = int(T_rollout/dt)
for t in range(Nsteps):
    # compute input
    u = np.dot(-K_lqr, x)

    # compute cost

예제 #2

파일 보기

파일: linear_1D_SUSD_no_control.py 프로젝트: tony-x-lin/susd_search

for idn in range(len(n)):
    # define simulation length
    Nsteps = 500

    # define starting state
    x0 = np.array([-8, 9])

    # load model predictor
    nb_states = 2
    nb_actions = 1
    wb_model = WB_Model(dt)

    # create the SUSD learner
    Q = np.eye(2)
    R = np.array([1])
    K0 = np.array([0.1, 0.1])
    N_rollout = n[idn]
    T_rollout = 20

    S = SUSD(wb_model, Q, R, K0, N_rollout, T_rollout, alpha=0.35, term_len=100)

    # estimate optimal K using SUSD
    conveged, iters = S.search(x0, max_iter=100)
    K = S.K.reshape((nb_actions, nb_states))
    print "SUSD Search took", iters, "iterations"
    print "Estimated K:"; print K
    z_list[idn] = S.z_buf

cost_plot(z_list, n)
plt.show()

예제 #3

파일 보기

nb_states = 4
nb_actions = 2
WB_model = WB_Model(dt)

# create the SUSD learner
Q = np.eye(4)
R = np.eye(2)
K0 = np.array([[-2., -2., -2., -2.], [-2., -2., -2., -2.]])
N_agents = 30
T_rollout = 10

S = SUSD(WB_model,
         Q,
         R,
         K0,
         N_agents,
         T_rollout,
         dt=dt,
         alpha=0.1,
         term_len=200)

# compute the optimal K
A, B = WB_model.system()
sys = control.StateSpace(A, B, np.eye(4), np.zeros((4, 2)), 1)
K_lqr, _, _ = control.lqr(sys, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0, r=0.001, max_iter=100000)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"

예제 #4

파일 보기

R = np.array([1])
K0 = np.array([-10., -10.])
N_points = 10
N_trajectories = 100
N_agents = 5
T_rollout = 10

RS = Random_Search(WB_model,
                   Q,
                   R,
                   K0.copy(),
                   N_points,
                   T_rollout,
                   0.01 * np.eye(2),
                   dt=dt)
S = SUSD(WB_model, Q, R, K0.copy(), N_agents, T_rollout, dt=dt, alpha=0.9)
PG = Policy_Gradient(WB_model,
                     Q,
                     R,
                     K0.copy(),
                     N_trajectories,
                     T_rollout,
                     dt=dt,
                     alpha=9E-1)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)
print "LQR Gains:", -K_lqr

# estimate optimal K using FD

예제 #5

파일 보기

파일: linear_12D_comparison.py 프로젝트: GeorgiaTechSystemsResearch/SUSD-Policy-Gradient

N_agents = 30
T_rollout = 10

RS = Random_Search(WB_model,
                   Q,
                   R,
                   K0.copy(),
                   N_points,
                   T_rollout,
                   0.5 * np.eye(12),
                   dt=dt)
S = SUSD(WB_model,
         Q,
         R,
         K0.copy(),
         N_agents,
         T_rollout,
         dt=dt,
         alpha=0.1,
         term_len=100)
PG = Policy_Gradient(WB_model,
                     Q,
                     R,
                     K0.copy(),
                     N_trajectories,
                     T_rollout,
                     dt=dt,
                     alpha=0.8)

# compute the optimal K
A, B = WB_model.system()

예제 #6

파일 보기

파일: said_linear_SUSD.py 프로젝트: GeorgiaTechSystemsResearch/SUSD-Policy-Gradient

# load model predictor
nb_states = 2
nb_actions = 1
model = BB_Model(nb_states, nb_actions)
model.load_model(path='linear_said_')
WB_model = WB_Model(dt)

# create the SUSD learner
Q = np.eye(2)
R = np.array([1])
K0 = np.array([-10., -10.])
N_rollout = 3
T_rollout = 30

S = SUSD(WB_model, Q, R, K0, N_rollout, T_rollout)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"

print "Estimated K:", K
print "LQR K:", -K_lqr

# simulate the trajectory
x = np.zeros([nb_states, Nsteps])

예제 #7

파일 보기

파일: said_linear_2D_SUSD.py 프로젝트: GeorgiaTechSystemsResearch/SUSD-Policy-Gradient

# load model predictor
nb_states = 2
nb_actions = 2
bb_model = BB_Model(nb_states, nb_actions)
loaded = bb_model.load_model(path='linear_2D_')
if loaded: print("2 Input Linear model loaded!")
WB_model = WB_Model(dt)

# create the SUSD learner
Q = np.eye(2)
R = np.eye(2)
K0 = np.array([[-1., -1.], [-1., -1.]])
N_agents = 10
T_rollout = 15

S = SUSD(bb_model, Q, R, K0, N_agents, T_rollout)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"

print "Estimated K:", K
print "LQR K:", -K_lqr

# simulate the trajectory
x = np.zeros([nb_states, Nsteps])