def __init__(self, policy):
		# world object, (starting state is trivial)
		world = World((0,0),(1,1))
		value = {}
		for state in world.allStates():
			value[state] = 0
		discount = 0.9
		delta = 1
		while abs(delta) > 0.00001:
			delta = 0
			for state in world.allStates():
				world.setState(state)
				old = value[state]
				# we can set the minimum to 0 since we know every value will be 0 or positive
				curMax = 0
				for move in world.moveList():
					if world.posAfterMove(move) == (0,0):
						probSum = 10
					else:
						probSum = 0
						for nextState,prob in world.nextPreyStates():
							probSum += prob*discount*value[nextState]
					curMax = max(curMax,probSum)
				value[state] = curMax
				delta = max(delta,abs(old - curMax))
		value[(0,0)] = 10
		self.value 		  = value
		self.actionList   = []
		self.allList  = []
		self.bottomPolicy = policy
		self.discount     = discount
	def isOptimal(self,state, move):
		world    = World((0,0),(1,1))
		ourMove  = 0
		bestMove = 0
		for nmove in world.moveList():
			world.setState(state)
			world.move(nmove)
			if world.position == (0,0):
				probSum = 10
			else:
				probSum = 0
				for nextState,prob in world.nextPreyStates():
					probSum += prob*self.discount*self.value[nextState]
			bestMove = max(bestMove,probSum)
			if nmove == move:
				ourMove = probSum
		return ourMove/bestMove > 0.97