def __init__(self, policy): # world object, (starting state is trivial) world = World((0,0),(1,1)) value = {} for state in world.allStates(): value[state] = 0 discount = 0.9 delta = 1 while abs(delta) > 0.00001: delta = 0 for state in world.allStates(): world.setState(state) old = value[state] # we can set the minimum to 0 since we know every value will be 0 or positive curMax = 0 for move in world.moveList(): if world.posAfterMove(move) == (0,0): probSum = 10 else: probSum = 0 for nextState,prob in world.nextPreyStates(): probSum += prob*discount*value[nextState] curMax = max(curMax,probSum) value[state] = curMax delta = max(delta,abs(old - curMax)) value[(0,0)] = 10 self.value = value self.actionList = [] self.allList = [] self.bottomPolicy = policy self.discount = discount
def isOptimal(self,state, move): world = World((0,0),(1,1)) ourMove = 0 bestMove = 0 for nmove in world.moveList(): world.setState(state) world.move(nmove) if world.position == (0,0): probSum = 10 else: probSum = 0 for nextState,prob in world.nextPreyStates(): probSum += prob*self.discount*self.value[nextState] bestMove = max(bestMove,probSum) if nmove == move: ourMove = probSum return ourMove/bestMove > 0.97