Esempio n. 1
0
Pl[5,0,5]=0.1
Pl[6,0,6]=1
Pl[0,1,0]=1
Pl[1,1,1]=0
Pl[1,1,0]=1
Pl[2,1,1]=1
Pl[3,1,2]=1
Pl[4,1,3]=1
Pl[5,1,4]=1    
Pl[6,1,5]=1
   
Rl = np.zeros((7,2))
Rl[[0,6],:]=1
absorv = np.zeros((7,1))
absorv[[0,6]]=1
fmdp = RL.finiteMDP(7,2,0.9,Pl,Rl,absorv)

J,traj = fmdp.runPolicy(10000,3,poltype = "exploration") #choose this value
data = np.load("Q1.npz")
Qr = fmdp.traces2Q(traj)
if np.sqrt(sum(sum((data['Q1']-Qr)**2)))<1:
    print("Aproximação de Q dentro do previsto. OK\n")
else:
    print("Aproximação de Q fora do previsto. FAILED\n")

J,traj = fmdp.runPolicy(3,3,poltype = "exploitation", polpar = Qr)
if np.sqrt(sum(sum((data['traj2']-traj)**2)))<1:
    print("Trajectória óptima. OK\n")
else:
    print("Trajectória não óptima. FAILED\n")
    
Esempio n. 2
0
Pl[:, 1, :] = np.array([[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 1, 0], [0, 0, 0,
                                                                   1]])

Pl[:, 2, :] = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1,
                                                                   0]])

Pl[:, 3, :] = np.array([[0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 0,
                                                                   1]])

Rl = np.array([[-1, -1, -1, 0], [-1, 0, -1, -1], [-1, -1, -1, 0],
               [-1, 0, -1, 0]])

absorv = np.zeros((4, 1))
absorv[-1] = 1

fmdp = RL.finiteMDP(4, 4, 0.9, Pl, Rl, absorv)

J, traj = fmdp.runPolicy(3000, 0, poltype="exploration")
data = np.load("Q2.npz")
Qr = fmdp.traces2Q(traj)
result = np.sqrt(sum(sum((data['Q1'] - Qr)**2)))
if result < 1:
    print("Aproximação de Q dentro do previsto. OK\n")
else:
    print("Aproximação de Q fora do previsto. FAILED\n")

J, traj = fmdp.runPolicy(3, 1, poltype="exploitation", polpar=Qr)
result = np.sqrt(sum(sum((data['traj2'] - traj)**2)))
if result < 1:
    print("Trajectória óptima. OK\n")
else: