Python MarkovDecisionProcess Examples

Programming Language: Python

Namespace/Package Name: markovDecisionProcess

Examples at hotexamples.com: 2

Python MarkovDecisionProcess - 2 examples found. These are the top rated real world Python examples of markovDecisionProcess.MarkovDecisionProcess extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

__init__(2)

Example #1

Show file

File: extendedMarkovDecisionProcess.py Project: marcotinacci/partially-observable-planning

	def __init__(self, pomdp, prior):
		self.pomdp = pomdp
		# initial belief
		self.prior = prior
		# extended states (state id, observation id)
		S = set(it.product(pomdp.states.values(),pomdp.observations.values()))
		# transition function
		T = dict()
		for (s1,o1),a in it.product(S,pomdp.actions.values()):
			for (s2,o2) in S:
				if pomdp.inv_states[s2] in pomdp.transitionFunction[(pomdp.inv_states[s1],pomdp.inv_actions[a])] and pomdp.inv_observations[o2] in pomdp.observationFunction[pomdp.inv_states[s2]]:
					if ((s1,o1),a) not in T:
						T[((s1,o1),a)] = dict()
					T[((s1,o1),a)][(s2,o2)] = pomdp.transitionFunction[(pomdp.inv_states[s1],pomdp.inv_actions[a])][pomdp.inv_states[s2]] * pomdp.observationFunction[pomdp.inv_states[s2]][pomdp.inv_observations[o2]]
		print 'S:',S,'\nA:',pomdp.actions.values(),'\nT:',T

		MarkovDecisionProcess.__init__(self,S,pomdp.actions.values(),T)

Example #2

Show file

File: beliefMarkovDecisionProcess.py Project: marcotinacci/partially-observable-planning

	def __init__(self, pomdp, horizon, prior):
		""" 
		Generation of the belief space MDP starting from a prior distribution
		and limited to a fixed horizon H, level 0 is the root, level H is
		included in the generation as a special level without outgoing 
		transitions.
			:param pomdp: partially observable model
			:param horizon: generation horizon
			:parm prior: initial belief distribution
		"""
		self.pomdp = pomdp
		# initial state
		self.root = prior
		# fringe
		self.fringe = set()
		# belief states
		B = set()
		# belief transition function
		T = dict()
		# belief-state space generation using a BFS
		Q = Queue()
		nxtQ = Queue()
		Q.put(prior)
		level = 0
		while not Q.empty() and level < horizon:
			current = Q.get()
			B.add(current)
			for act in pomdp.actions:
				for obs in pomdp.observations:
					nxt = self.beliefUpdate(pomdp,current,act,obs)
					# find-the-copy function
					find = lambda lst, el : \
						reduce(lambda a,b: a if a != None else b, \
						map(lambda x : x if el.equals(x) else None, lst))
					# search for the same belief state
					same = find(list(B) + list(Q.queue) + list(nxtQ.queue),nxt)
					# do not add the state if already visited before
					if same == None:
						nxtQ.put(nxt)
					else:
						# keep the reference to compute 
						# the transition function probability
						nxt = same
					# add the probability mass to the transition function
					# compute Pr(o|b,a)
					pr = 0.0
					for i1,p1 in current.distr.iteritems():
						sub_pr = 0.0
						for i2,p2 in nxt.distr.iteritems():
							if i2 in pomdp.transitionFunction[(i1,act)] and \
								obs in pomdp.observationFunction[i2]:
								sub_pr += pomdp.transitionFunction[(i1,act)][i2] \
									* pomdp.observationFunction[i2][obs]
						pr += p1 * sub_pr
					# do not add null mass probabilities
					if pr > 0.0:
						if (current, pomdp.actions[act]) not in T:
							# if (b,a) entry does not exist, create it
							T[(current,pomdp.actions[act])] = {}
						if nxt not in T[(current,pomdp.actions[act])]:
							# if (b,a)(b') entry does not exist
							# create it as the probability
							T[(current,pomdp.actions[act])][nxt] = pr
						else:
							# if (b,a)(b') entry already exists 
							# increment the probability
							T[(current,pomdp.actions[act])][nxt] += pr
			# if Q is empty replace it with nxtQ of the next level
			if Q.empty():
				Q = nxtQ
				nxtQ = Queue()
				level += 1

		# add last level from Q 
		for b in Q.queue:
			B.add(b)
			self.fringe.add(b)

		MarkovDecisionProcess.__init__(self,B,pomdp.actions.values(),T)