def nextMoveOne(self, state, pacmanAction): ''' For Q-Learning use. Don't loop through all possible next states, just calculate one next state. ARGS: state: current Game state, numbering from 0 to num_states-1 pacmanAction: action for pacman to take, numbering from 0 to 3 for the four directions RETURN: nextState: next state returned by environment after taking the action reward: reward at next state done: whether next state is an end state ''' pacmanLocX, pacmanLocY, ghostLocX, ghostLocY = self.state2coord(state) # Move pacman pacmanLocX_next, pacmanLocY_next = self.move(pacmanLocX, pacmanLocY, pacmanAction) # Get next action for chasing ghost t_x, t_y = ghost_move(pacmanLocX, pacmanLocY, ghostLocX, ghostLocY, self.num_ghosts, self.grid, self.ghost_type) if (not self.moveThrough(pacmanLocX_next, pacmanLocY_next, pacmanLocX, pacmanLocY, t_x, t_y, ghostLocX, ghostLocY)): nextState = self.coord2state(pacmanLocX_next, pacmanLocY_next, t_x, t_y) reward, done = self.calculate_reward(pacmanLocX_next, pacmanLocY_next, t_x, t_y) else: nextState = self.coord2state(pacmanLocX_next, pacmanLocY_next, ghostLocX, ghostLocY) reward = self.loseReward done = True return nextState, reward, done
def nextMove(self, currState, action): ''' Returns all possible next states given action, and the reward given next states. ARGS: currState: Current game state, ranges from 0 to numStates-1. action: Action to perform. RETURN: nextStates: List of possible next states give current state and action. Largly depends on potential ghost movements. rewards: List of rewards for each potential next state. ''' nextStates = [] # possible next states rewards = [] # rewards for each next state done = [] # whether game is done for each next state probs = [] # transition probs for each next state if 'Chase' in self.ghost_type: defaultProb = 1 / 4 else: defaultProb = 1 / math.pow(4, self.num_ghosts) # Get currState coordinates pacmanLocX, pacmanLocY, ghostLocX, ghostLocY = self.state2coord( currState) # Check whether current state w/in bounds if (self.grid[pacmanLocY][pacmanLocX] == True): return probs, nextStates, rewards, done # Check whether current state is in done state: eaten by ghost or eaten pellet _, currStateDone = self.calculate_reward(pacmanLocX, pacmanLocY, ghostLocX, ghostLocY) if (currStateDone): return probs, nextStates, rewards, done # Get pacman location after performing action pacmanLocX_next, pacmanLocY_next = self.move(pacmanLocX, pacmanLocY, action) ## Get all possible ghost states # Get next location from chasing ghost t_x, t_y = ghost_move(pacmanLocX, pacmanLocY, ghostLocX, ghostLocY, self.num_ghosts, self.grid, ['Random', 'Chase']) chaseLocX, chaseLocY = t_x[1], t_y[1] if (chaseLocX > ghostLocX[1]): chaseGhostAction = RIGHT elif (chaseLocX < ghostLocX[1]): chaseGhostAction = LEFT elif (chaseLocY > ghostLocY[1]): chaseGhostAction = UP else: chaseGhostAction = DOWN for i in range(4): # need to be hardcoded, <numGhosts> number of loops # Get ghost next location given action ghostLocX_next, ghostLocY_next = self.evalGhostAction( ghostLocX, ghostLocY, [i, chaseGhostAction]) # if pacman not moving through ghost if (not self.moveThrough(pacmanLocX_next, pacmanLocY_next, pacmanLocX, pacmanLocY, ghostLocX_next, ghostLocY_next, ghostLocX, ghostLocY)): state = self.coord2state(pacmanLocX_next, pacmanLocY_next, ghostLocX_next, ghostLocY_next) nextStates.append(state) reward, doneStatus = self.calculate_reward( pacmanLocX_next, pacmanLocY_next, ghostLocX_next, ghostLocY_next) rewards.append(reward) done.append(doneStatus) else: # if pacman moving through ghost, set pacman as new state and keep old ghost states so pacman and ghost overlaps and game ends state = self.coord2state(pacmanLocX_next, pacmanLocY_next, ghostLocX, ghostLocY) nextStates.append(state) rewards.append(self.loseReward) done.append(True) probs.append(defaultProb) return probs, nextStates, rewards, done