Example #1
0
 def solve(self):
     [self.un_Q, self.un_x, self.phi_Q, self.phi_x, self.bf_Q, self.bf_x] = GSC.mdp(self.G, self.R, self.RT, self.L, self.d, self.x0, self.gamma)
     print("scmdp policy solved")
Example #2
0
 def solve(self):
     [un_Q, un_x, phi_Q, phi_x, bf_Q, bf_x] = GSC.mdp(self.G, self.R, self.RT, self.L, self.d, self.x0, self.gamma)
     self.bf_Q = bf_Q
     print("Resulted bf policy:")
     print(self.bf_Q)
     print(bf_x)
Example #3
0
G[2,:,:]=np.array([[0.5, 0.5],[0.5,0.5]])

R=np.zeros((T-1,n,A))
R0=np.array([[1,1,1],[10,10,10]])
for i in range(T-1):
    R[i,:,:]=R0
RT=np.array([[1],[30]])

# L=np.eye(n)
# d=np.array([[1],[0.4]]
L= np.zeros((m,n))
d= np.zeros((m,1))
L[0,0]=1
L[1,1]=5
d[0,0]=1
d[1,0]=1
x0= np.array([[1],[0]])
gamma=0.99

[un_Q, un_x, phi_Q, phi_x, bf_Q, bf_x ]=GSC.mdp(G, R, RT, L, d, x0, gamma)
print(np.shape(phi_x))
res_un= np.dot(d,np.ones((1,T)))-np.dot(L,un_x)
res_phi= np.dot(d,np.ones((1,T)))-np.dot(L,phi_x)
res_bf=np.dot(d,np.ones((1,T)))-np.dot(L,bf_x)
print(np.amin(res_un))
print(np.amin(res_phi))
print(np.amin(res_bf))
# print(np.dot(L,un_x))
# print(np.dot(L,phi_x))
# print(np.dot(L,bf_x))