#sim((run, Prob, P)) print "-> RUN "+str(run) # count history #ch = np.zeros(MAXITER+1, dtype=INTTYPE) # controller initial state ci = L.names[random.choice(L.S)] # environment initial state (non observable) ei = M.names[random.choice(M.S)] # data tracking ch[run,0] = func.counter(ci,ei,DIM,COUNTER) init = { s for s in P.S if P.L.names[s[0]] == ci } distr = { s:1./len(init) for s in init } # initialize belief state b = belief.belief(P,P.Z,init=distr) # first observation o = func.weighted_choice( P.Z[((L.inv_names[ci]),(M.inv_names[ei]))]) # belief update b.update(obs=o) if PRINT: print 'b(s):',b.d[((L.inv_names[ci]),(M.inv_names[ei]))] \ if ((L.inv_names[ci]),(M.inv_names[ei])) in b.d else 0 rob.print_grid(ci,ei,DIM,pause=PAUSE) for it in range(MAXITER): # scheduled choice a = None if SCHEDULER == 0: a = sched.beliefScheduler(b, P, Prob, sched.avgMinVal, verbose=PRINT) elif SCHEDULER == 1: a = sched.beliefScheduler(b, P, Prob, sched.maxMinVal, verbose=PRINT) elif SCHEDULER == 2:
Prob = imp.importProb(PRISMFILENAME,NPAR) for run in range(RUNS): #sim((run, Prob, P)) print "-> RUN "+str(run) # controller initial state ci = L.names[random.choice(L.S)] # environment initial state (not observable) ei = 'tl' #M.names[random.choice([M.inv_names['tl'],M.inv_names['tr']])] init = { s for s in P.S if P.L.names[s[0]] == ci } distr = {(0,M.inv_names['tl']):0.5, (0,M.inv_names['tr']):0.5} # initialize belief state b = belief.belief(P,P.Z,init=distr) # first observation o = func.weighted_choice( P.Z[((L.inv_names[ci]),(M.inv_names[ei]))]) # belief update print 'init: ', b.d b.update(obs=o) if PRINT: print 'b(s):',b.d[((L.inv_names[ci]),(M.inv_names[ei]))] \ if ((L.inv_names[ci]),(M.inv_names[ei])) in b.d else 0 for it in range(MAXITER): print 'belief: ', b.d # scheduled choice a = None if SCHEDULER == 0: a = sched.beliefScheduler(b, P, Prob, sched.avgMinVal, verbose=PRINT) elif SCHEDULER == 1: a = sched.beliefScheduler(b, P, Prob, sched.maxMinVal, verbose=PRINT)
maximum = -1 act_max = None for a in L.A: ba = b.returnUpdate(act=a) val = sched.avgMinScheduler(ba,Prob,P) #val = rob.opt(ba,Prob,P) if round(val,10) == round(maximum,10): act_max.add(a) print 'EQ act:',a,' - val:',val elif val > maximum: maximum = val act_max = {a} print 'OK act:',a,' - val:',val else: print 'NO act:',a,' - val:',val pass a = random.choice(list(act_max)) # apply action ci = rob.step(ci,a,DIM) ei = (rob.step(ei[0],random.choice(L.A),DIM), rob.step(ei[1],random.choice(L.A),DIM)) # extract observation o = func.weighted_choice(P.Z[(L.inv_names[ci],M.inv_names[ei])]) # belief update b.update(act=a,obs=o) print 'act:',a print 'obs:',o print 'b(s):',b.d[(L.inv_names[ci],M.inv_names[ei])] \ if (L.inv_names[ci],M.inv_names[ei]) in b.d else 0 rob.print_grid(ci,ei,DIM,pause=PAUSE)