Beispiel #1
0
    def nextMoveOne(self, state, pacmanAction):
        ''' 
        For Q-Learning use. Don't loop through all possible next states,
        just calculate one next state.

        ARGS:
            state: current Game state, numbering from 0 to num_states-1
            pacmanAction: action for pacman to take, numbering from 0 to 3 for the four directions
        RETURN:
            nextState: next state returned by environment after taking the action
            reward: reward at next state
            done: whether next state is an end state

        '''
        pacmanLocX, pacmanLocY, ghostLocX, ghostLocY = self.state2coord(state)
        # Move pacman
        pacmanLocX_next, pacmanLocY_next = self.move(pacmanLocX, pacmanLocY,
                                                     pacmanAction)
        # Get next action for chasing ghost
        t_x, t_y = ghost_move(pacmanLocX, pacmanLocY, ghostLocX, ghostLocY,
                              self.num_ghosts, self.grid, self.ghost_type)
        if (not self.moveThrough(pacmanLocX_next, pacmanLocY_next, pacmanLocX,
                                 pacmanLocY, t_x, t_y, ghostLocX, ghostLocY)):
            nextState = self.coord2state(pacmanLocX_next, pacmanLocY_next, t_x,
                                         t_y)
            reward, done = self.calculate_reward(pacmanLocX_next,
                                                 pacmanLocY_next, t_x, t_y)
        else:
            nextState = self.coord2state(pacmanLocX_next, pacmanLocY_next,
                                         ghostLocX, ghostLocY)
            reward = self.loseReward
            done = True
        return nextState, reward, done
Beispiel #2
0
    def nextMove(self, currState, action):
        '''
        Returns all possible next states given action, and the reward given next states.
        ARGS:
            currState: Current game state, ranges from 0 to numStates-1.
            action: Action to perform.
        RETURN:
            nextStates: List of possible next states give current state and action.
                        Largly depends on potential ghost movements.
            rewards: List of rewards for each potential next state.
        '''
        nextStates = []
        # possible next states
        rewards = []
        # rewards for each next state
        done = []
        # whether game is done for each next state
        probs = []
        # transition probs for each next state
        if 'Chase' in self.ghost_type:
            defaultProb = 1 / 4
        else:
            defaultProb = 1 / math.pow(4, self.num_ghosts)

        # Get currState coordinates
        pacmanLocX, pacmanLocY, ghostLocX, ghostLocY = self.state2coord(
            currState)

        # Check whether current state w/in bounds
        if (self.grid[pacmanLocY][pacmanLocX] == True):
            return probs, nextStates, rewards, done
        # Check whether current state is in done state: eaten by ghost or eaten pellet
        _, currStateDone = self.calculate_reward(pacmanLocX, pacmanLocY,
                                                 ghostLocX, ghostLocY)
        if (currStateDone):
            return probs, nextStates, rewards, done
        # Get pacman location after performing action
        pacmanLocX_next, pacmanLocY_next = self.move(pacmanLocX, pacmanLocY,
                                                     action)

        ## Get all possible ghost states
        # Get next location from chasing ghost
        t_x, t_y = ghost_move(pacmanLocX, pacmanLocY, ghostLocX, ghostLocY,
                              self.num_ghosts, self.grid, ['Random', 'Chase'])
        chaseLocX, chaseLocY = t_x[1], t_y[1]
        if (chaseLocX > ghostLocX[1]):
            chaseGhostAction = RIGHT
        elif (chaseLocX < ghostLocX[1]):
            chaseGhostAction = LEFT
        elif (chaseLocY > ghostLocY[1]):
            chaseGhostAction = UP
        else:
            chaseGhostAction = DOWN

        for i in range(4):  # need to be hardcoded, <numGhosts> number of loops
            # Get ghost next location given action
            ghostLocX_next, ghostLocY_next = self.evalGhostAction(
                ghostLocX, ghostLocY, [i, chaseGhostAction])
            # if pacman not moving through ghost
            if (not self.moveThrough(pacmanLocX_next, pacmanLocY_next,
                                     pacmanLocX, pacmanLocY, ghostLocX_next,
                                     ghostLocY_next, ghostLocX, ghostLocY)):
                state = self.coord2state(pacmanLocX_next, pacmanLocY_next,
                                         ghostLocX_next, ghostLocY_next)
                nextStates.append(state)
                reward, doneStatus = self.calculate_reward(
                    pacmanLocX_next, pacmanLocY_next, ghostLocX_next,
                    ghostLocY_next)
                rewards.append(reward)
                done.append(doneStatus)
            else:  # if pacman moving through ghost, set pacman as new state and keep old ghost states so pacman and ghost overlaps and game ends
                state = self.coord2state(pacmanLocX_next, pacmanLocY_next,
                                         ghostLocX, ghostLocY)
                nextStates.append(state)
                rewards.append(self.loseReward)
                done.append(True)
            probs.append(defaultProb)

        return probs, nextStates, rewards, done