def __init__(self, pomdp, prior): self.pomdp = pomdp # initial belief self.prior = prior # extended states (state id, observation id) S = set(it.product(pomdp.states.values(),pomdp.observations.values())) # transition function T = dict() for (s1,o1),a in it.product(S,pomdp.actions.values()): for (s2,o2) in S: if pomdp.inv_states[s2] in pomdp.transitionFunction[(pomdp.inv_states[s1],pomdp.inv_actions[a])] and pomdp.inv_observations[o2] in pomdp.observationFunction[pomdp.inv_states[s2]]: if ((s1,o1),a) not in T: T[((s1,o1),a)] = dict() T[((s1,o1),a)][(s2,o2)] = pomdp.transitionFunction[(pomdp.inv_states[s1],pomdp.inv_actions[a])][pomdp.inv_states[s2]] * pomdp.observationFunction[pomdp.inv_states[s2]][pomdp.inv_observations[o2]] print 'S:',S,'\nA:',pomdp.actions.values(),'\nT:',T MarkovDecisionProcess.__init__(self,S,pomdp.actions.values(),T)
def __init__(self, pomdp, horizon, prior): """ Generation of the belief space MDP starting from a prior distribution and limited to a fixed horizon H, level 0 is the root, level H is included in the generation as a special level without outgoing transitions. :param pomdp: partially observable model :param horizon: generation horizon :parm prior: initial belief distribution """ self.pomdp = pomdp # initial state self.root = prior # fringe self.fringe = set() # belief states B = set() # belief transition function T = dict() # belief-state space generation using a BFS Q = Queue() nxtQ = Queue() Q.put(prior) level = 0 while not Q.empty() and level < horizon: current = Q.get() B.add(current) for act in pomdp.actions: for obs in pomdp.observations: nxt = self.beliefUpdate(pomdp,current,act,obs) # find-the-copy function find = lambda lst, el : \ reduce(lambda a,b: a if a != None else b, \ map(lambda x : x if el.equals(x) else None, lst)) # search for the same belief state same = find(list(B) + list(Q.queue) + list(nxtQ.queue),nxt) # do not add the state if already visited before if same == None: nxtQ.put(nxt) else: # keep the reference to compute # the transition function probability nxt = same # add the probability mass to the transition function # compute Pr(o|b,a) pr = 0.0 for i1,p1 in current.distr.iteritems(): sub_pr = 0.0 for i2,p2 in nxt.distr.iteritems(): if i2 in pomdp.transitionFunction[(i1,act)] and \ obs in pomdp.observationFunction[i2]: sub_pr += pomdp.transitionFunction[(i1,act)][i2] \ * pomdp.observationFunction[i2][obs] pr += p1 * sub_pr # do not add null mass probabilities if pr > 0.0: if (current, pomdp.actions[act]) not in T: # if (b,a) entry does not exist, create it T[(current,pomdp.actions[act])] = {} if nxt not in T[(current,pomdp.actions[act])]: # if (b,a)(b') entry does not exist # create it as the probability T[(current,pomdp.actions[act])][nxt] = pr else: # if (b,a)(b') entry already exists # increment the probability T[(current,pomdp.actions[act])][nxt] += pr # if Q is empty replace it with nxtQ of the next level if Q.empty(): Q = nxtQ nxtQ = Queue() level += 1 # add last level from Q for b in Q.queue: B.add(b) self.fringe.add(b) MarkovDecisionProcess.__init__(self,B,pomdp.actions.values(),T)