#w1 = fb._w1(x)
#print("w1: ", w1)

#w2 = fb._w2(x)
#print("w2: ", w2)

#mu = fb.feedback(x, v)
#nu = fb.noisy_feedback(x, v)
#print("mean u = ", mu)
#print("noisy u = ", nu)

#lp = fb.log_prob(u, x, v)
#print("log prob of [1, 1] is ", lp)

# Generate v, y desired and check that we can match with no model mismatch.
from reinforce import Reinforce
r = Reinforce(1, 1, 1, 1, 100, dyn, None, fb)
current_x = np.array([[0.1], [0.0], [0.1], [0.0]])
vs = r._generate_v()
y_desireds = r._generate_y(current_x, vs)

ys = []
print("current x: ", current_x)
for v, y_desired in zip(vs, y_desireds):
    u = dyn._f_q(current_x) + dyn._M_q(current_x) @ v
    current_x = dyn.integrate(current_x, u, dt=0.001)
    ys.append(dyn.observation(current_x))

print("ys: ", ys)
print("y_desireds:", y_desireds)
コード例 #2
0
reference[0,:]=np.pi #*np.sin(np.linspace(0,T*time_step,T))
reference[2,:]=np.pi #*np.cos(np.linspace(0,T*time_step,T))

learned_path=np.zeros((4,T+1))
learned_controls_path=np.zeros((2,T))

nominal_path=np.zeros((4,T+1))
nominal_controls_path=np.zeros((2,T))
true_path=np.zeros((4,T+1))
true_controls_path=np.zeros((2,T))
x0=0.0*np.ones((4,1))

if linear_fb:
    x=x0.copy()
    for t in range(T):
        y=dyn.observation(x)
        ydot=dyn.observation_dot(x)

        desired_linear_system_state=np.zeros((4,1))
        desired_linear_system_state[0,0]=y[0,0]
        desired_linear_system_state[1,0]=ydot[0,0]
        desired_linear_system_state[2,0]=y[1,0]
        desired_linear_system_state[3,0]=ydot[1,0]
        #desired_linear_system_state=dyn.wrap_angles(desired_linear_system_state)
        ref=np.reshape(reference[:,t],(4,1))
        #np.concatenate([y,ydot],axis=0)

        diff=dyn.observation_delta(ref,desired_linear_system_state)
        # diff=np.zeros((4,1))
        # ref1 = ref[0]
        # des1 = desired_linear_system_state[0]