T = {} for s1,a in it.product(MS,L.A): # TOFIX if NROB == 2: S2 = rob.around(s1,DIM) else: # NROB > 2 S2 = rob.around(s1[0],DIM) for i in range(1,NROB-1): S2 = list(it.product(S2,rob.around(s1[i],DIM))) p = 1.0 / float(len(S2)) T[(s1,a)] = {} for s2 in S2: s = (s2[0],s2[1]) T[(s1,a)][s] = p M = mdp.mdp(MS,L.A,T) # ==== POMDP partially observable ==== P = pomdp.pomdp() P.initProduct(L,M) # single observation single_obs = ['n','s','e','w','h'] # collective observation P.O = list(func.powerset(single_obs)) # inexpensive state space construction P.Z = {} for (l,m) in P.S: # print l,m # if NROB > 2:
MT = { ('tl','l') : { 'tl' : 1 }, ('tl','ol') : { 'lose' : 1 }, ('tl','or') : { 'win' : 1 }, ('tr','l') : { 'tr' : 1 }, ('tr','ol') : { 'win' : 1 }, ('tr','or') : { 'lose' : 1 }, ('win','l') : { 'win' : 1 }, ('win','ol') : { 'win' : 1 }, ('win','or') : { 'win' : 1 }, ('lose','l') : { 'lose' : 1 }, ('lose','ol') : { 'lose' : 1 }, ('lose','or') : { 'lose' : 1 } } M = mdp.mdp(MS,L.A,MT) # ==== POMDP partially observable ==== P = pomdp.pomdp() P.initProduct(L,M) # observations P.O = ['hl','hr'] # observation function P.Z = { (0,P.M.inv_names['tl']) : {'hl' : 0.85, 'hr' : 0.15}, (0,P.M.inv_names['tr']) : {'hl' : 0.15, 'hr' : 0.85}, (0,P.M.inv_names['win']) : {'hl' : 0.5, 'hr' : 0.5}, (0,P.M.inv_names['lose']) : {'hl' : 0.5, 'hr' : 0.5} }
T = {} for s1,a in it.product(MS,L.A): # TOFIX if NROB == 2: S2 = rob.around(s1,DIM) else: # NROB > 2 S2 = rob.around(s1[0],DIM) for i in range(1,NROB-1): S2 = list(it.product(S2,rob.around(s1[i],DIM))) p = 1.0 / float(len(S2)) T[(s1,a)] = {} for s2 in S2: s = (s2[0],s2[1]) T[(s1,a)][s] = p M = mdp.mdp(MS,L.A,T) # ==== POMDP partially observable ==== print "-> POMDP" print "--> INIT PRODUCT POMDP" P = pomdp.pomdp() P.initProduct(L,M) print "--> OBSERVATION FUNCTION" P.O = ['some','none'] # inexpensive state space construction P.Z = {} for (l,m) in P.S: #print l,m if NROB > 2: # TOFIX