Exemple #1
0
    def getAction(self, state):
        pqueue = []  #creating a priority queue
        leaves = []
        root = state
        legal = state.getLegalPacmanActions()
        successor = [(state.generatePacmanSuccessor(action), action) for action in legal]
        for i in successor: #Push the successors of parent node into the queue
            node = {}
            node["parent"] = state
            node["action"] = i[1]
            node["state"] = i[0]
            node["depth"] = 1
            node["score"] = scoreEvaluation(root) - scoreEvaluation(node["state"])
            node["cost"] = node["depth"] + node["score"]
            pqueue.append(node)

        while pqueue:
            pqueue = self.sort_nodes(pqueue)
            node = pqueue.pop(0)
            curr_state = node["state"]
            act = node["action"]
            depth = node["depth"]

            if curr_state is not None:
                legal = curr_state.getLegalPacmanActions()
                if legal:
                    for action in legal:
                        successor = []
                        s = curr_state.generatePacmanSuccessor(action)
                        if s is not None:
                            successor.append((s, act))

                if (curr_state.isWin()) or (curr_state.isLose()) or (s is None):
                    leaves.append(node)

                else:

                    for child in successor:
                        if (child[0] is not None):
                            sub_node = {}
                            sub_node["state"] = child[0]
                            sub_node["action"] = act
                            sub_node["parent"] = node
                            sub_node["depth"] = depth + 1
                            sub_node["score"] = scoreEvaluation(root) - scoreEvaluation(child[0])
                            sub_node["cost"] = sub_node["depth"] + sub_node["score"]
                            pqueue.append(sub_node)

        max_score = -99999999
        if leaves:

            for value in leaves:
                leaf_score = scoreEvaluation(value["state"])
                if (leaf_score > max_score):
                    max_score = leaf_score
                    result = value["action"]
            return result


			class RandomSequenceAgent(Agent):
    def getAction(self, state):
        frontier = []
        stateMetadata = {}

        legal = state.getLegalPacmanActions()
        successors = [(state.generatePacmanSuccessor(action), action) for action in legal]
        for successor in successors:
            stateMetadata[successor[0]] = successor[1]
            frontier.append((successor[0], scoreEvaluation(successor[0])))

        while frontier:
            node = frontier[0][0]
            frontier = frontier[:len(frontier)-1] #frontier acts as LIFO

            if node.isWin():
                return stateMetadata[node]

            elif node.isLose():
                continue

            legal = node.getLegalPacmanActions()
            successors = [node.generatePacmanSuccessor(action) for action in legal]

            for successor in successors:
                if successor is None:
                    for frontierNode in frontier:
                        if frontierNode[0].isWin():
                            return stateMetadata[frontierNode]
                        elif frontierNode[0].isLose():
                            frontier.remove(frontierNode)
                    return stateMetadata[max(frontier, key=lambda x : x[1])[0]]
                stateMetadata[successor] = stateMetadata[node]
                frontier.append((successor, scoreEvaluation(successor)))
        return Directions.STOP
Exemple #3
0
    def getAction(self, state):
        # TODO: write A* Algorithm instead of returning Directions.STOP
        legal = state.getLegalPacmanActions()
        depthCoefficient = 1  # set a coefficient to modify
        overtime = False
        checkingList = []

        for i in legal:
            childState = state.generatePacmanSuccessor(i)
            cost = 1 * depthCoefficient - (scoreEvaluation(childState) -
                                           scoreEvaluation(state))
            checkingList.append((state.generatePacmanSuccessor(i), i, cost, 1))
            # tuple in list: (state, firststep's direction, cost, depth)

        for i in checkingList:
            if i[0].isWin():
                return i[1]

        while (True):
            if not len(checkingList):
                return Directions.STOP
            minCost = (checkingList[0])[2]
            for i in checkingList:
                if i[2] < minCost:
                    minCost = i[2]
            # minCost = min(checkingList)[2]
            bestNodes = [
                checkingList.index(i) for i in checkingList if i[2] == minCost
            ]
            if not len(bestNodes):
                return
            choosenNode = checkingList.pop(random.choice(bestNodes))
            legal = choosenNode[0].getLegalPacmanActions()
            for i in legal:
                childState = choosenNode[0].generatePacmanSuccessor(i)
                if childState is None:
                    overtime = True
                    break
                elif childState.isWin():
                    return choosenNode[1]
                elif not childState.isLose():
                    cost = (1 + choosenNode[3]) * depthCoefficient - \
                           (scoreEvaluation(childState) - scoreEvaluation(state))
                    checkingList.append(
                        (childState, choosenNode[1], cost, choosenNode[3] + 1))
                    # if next state is lose, never check it or evaluate it
            if overtime:
                break

        # minCost = min(checkingList)[2]
        minCost = (checkingList[0])[2]
        for i in checkingList:
            if i[2] <= minCost:
                minCost = i[2]
        bestNodes = [
            checkingList.index(i) for i in checkingList if i[2] == minCost
        ]
        return (checkingList[(random.choice(bestNodes))])[1]
Exemple #4
0
    def getAction(self, state):
        # TODO: write DFS Algorithm instead of returning Directions.STOP

        fringe = Stack()  # instance of Stack class

        bestScore = [
        ]  # to store the node with the maximum score and the associated action
        score = []  # list storing the score for all the fringe nodes
        terminal = []  # to store state leading to none state
        winState = []  # store all the win states

        legal = state.getLegalPacmanActions()

        for actions in legal:
            ns = state.generatePacmanSuccessor(actions)
            fringe.push((ns, actions))  # Initial states pushed on to the queue

        while not fringe.isEmpty():
            # Loop runs unitl Stack has nodes
            newState, action = fringe.pop()  # pop the nodes in FIFO manner
            if newState.isWin():
                winState.append(
                    (newState, action)
                )  # if the state is a win state, append it to the winState list
            elif newState.isLose():
                continue  # if the state is a lose state, continue
            else:
                legal1 = newState.getLegalPacmanActions(
                )  # else explore paths for all other nodes
                for ele in legal1:
                    ns1 = newState.generatePacmanSuccessor(ele)

                    if ns1 != None:
                        fringe.push(
                            (ns1, action)
                        )  #if the state doesn't reutrn None, push it to the Stack
                    else:
                        terminal.append(
                            (newState, action)
                        )  #if the state returns None, add it to the terminal

        if len(
                winState
        ) != 0:  #if winState has elements, append the scores for all win states to 'score' list
            for state, action in winState:
                score.append((scoreEvaluation(state), action))

        else:  #if winState has no elements, append the scores for all win states to 'score' list
            for state, action in terminal:
                score.append((scoreEvaluation(state), action))

        bestScore = max(
            score, key=lambda x: x[0]
        )  #calculating and storing the highest score node and the associated action in bestScore
        return bestScore[1]  #return the direction
Exemple #5
0
    def getAction(self, state):
        # stores the first action for all the nodes, ex: {<gameStateInstance> : <action>}
        base_action = {}
        max_score = 0
        max_state = state

        open_list = priorityQueue()
        closed_list = priorityQueue()

        open_list.insert(([state], 0))

        while (not open_list.isEmpty()):
            current_value = open_list.pop()
            current_path = current_value[0]
            last_visited_state = current_path[-1]

            # g(x) = depth, root node is depth 0, depth for each successor will be length of it's parent path
            g_cost = len(current_path)

            if last_visited_state.isWin():
                return Directions.STOP

            # get all legal actions for pacman
            legal = last_visited_state.getLegalPacmanActions()

            # get successor states for each action
            for action in legal:
                successor = last_visited_state.generatePacmanSuccessor(action)
                if successor == None:
                    break
                if successor.isLose():
                    continue
                new_path = current_path + [successor]

                new_score = scoreEvaluation(successor)

                h_cost = - (scoreEvaluation(successor) - scoreEvaluation(state))

                total_cost = g_cost + h_cost
                new_data = (new_path, total_cost)

                open_list.insert(new_data)

                # base action for current state is same as the base action for parent state
                try:
                    base_action[successor] = base_action[last_visited_state]
                except:
                    base_action[successor] = action

                if new_score > max_score:
                    max_score = new_score
                    max_state = successor

        return base_action[max_state]
Exemple #6
0
    def getAction(self, state):
        # TODO: write BFS Algorithm instead of returning Directions.STOP
        queue = []  #creating a queue
        leaves = []

        legal = state.getLegalPacmanActions()
        if legal:
            successor = [(state.generatePacmanSuccessor(action), action) for action in legal]
        for i in successor:
            node = {}
            node["parent"] = node
            node["action"] = i[1]
            node["state"] = i[0]
            queue.append(node)

        while queue:
            node = queue.pop(0)
            curr_state = node["state"]
            parent_action = node["action"]

            if curr_state is not None:
                legal = curr_state.getLegalPacmanActions()
                if legal is not None:
                    successor = []
                    for action in legal:
                        instance = curr_state.generatePacmanSuccessor(action)
                        if(instance is not None):
                            successor.append((instance,parent_action))


                if (curr_state.isWin() or curr_state.isLose() or instance is None):
                    leaves.append(node)
                else:
                     for child in successor:
                         sub_node ={}
                         sub_node["state"] = child[0]
                         sub_node["action"] = child[1]
                         sub_node["parent"] = node
                         queue.append(sub_node)


        max_score = -9999

        if leaves:
            for value in leaves:
                 if((scoreEvaluation(value["state"])>max_score)):
                     max_score  = scoreEvaluation(value["state"])
                     result = value["action"]
            return result
Exemple #7
0
 def defaultPolicy(self,state):
     rollout = 0
     while rollout < 5:
         if state.isWin() or state.isLose():
             return scoreEvaluation(state)
         else:
             legal = state.getLegalPacmanActions()
             if legal:
                 random_action = legal[random.randint(0, len(legal) - 1)]
                 state = state.generatePacmanSuccessor(random_action)
                 if state is None:
                     self.flag = False
                     return 0
         rollout = rollout + 1
     return scoreEvaluation(state)
    def getAction(self, state):
        nonetype_flag = 0                   # used to set the flag when None type is returned from getPacmanSuccessor
        successors = []
        legal = state.getLegalPacmanActions()
        for action in legal:
            successors.append((state.generatePacmanSuccessor(action),action))
        while(successors):
            if (nonetype_flag == 1):
                break                       #break while loop
            path, action = successors.pop(-1)
            if (path.isWin()):
                return action
            legal = path.getLegalPacmanActions()
            for next_action in legal:
                next_successor = path.generatePacmanSuccessor(next_action)
                if (next_successor == None):
                    nonetype_flag = 1
                    break
                successors.append((next_successor,action))

        # If not reaching a terminal state, return the action leading to the node with
        #the best score and no children based on the heuristic function (scoreEvaluation)
        if (successors):
            scored = [(scoreEvaluation(state), action) for state, action in successors]
            bestScore = max(scored)[0]
            for pair in scored:
                if pair[0] == bestScore:
                    # returning the 1st best action
                    bestActions = pair[1]
                    break
            return bestActions
        return Directions.STOP
    def getAction(self, state):
        fringe = []  # This contains nodes to be expanded
        leaf_node = []  # This list contains all the leaf nodes
        explored = []
        directions_from_start_state = state.getLegalPacmanActions(
        )  #initial directions
        initial_successors = [
            (state.generatePacmanSuccessor(direction), direction)
            for direction in directions_from_start_state
        ]  # list of initial successors
        for node in initial_successors:  # iterating over initial successors
            fringe.append(node)  # append to fringe to be explored further

        while fringe:  # loop till fringe is not empty
            current_node = fringe.pop()
            explored.append(current_node[0])
            if current_node[0] == None:
                continue
            legal = current_node[0].getLegalPacmanActions()
            successors = []
            if legal:  # if only action is legal
                for action in legal:
                    current_state = current_node[0].generatePacmanSuccessor(
                        action)
                    explored.append(current_state)
                    if current_state != None and action is not None:  # sometimes action is equating to None
                        successors.append(
                            (current_state, current_node[1])
                        )  # appending the successor and the action taken to get to the suv

            if len(successors) == 0 or current_node[0].isWin(
            ) or current_node[0].isLose():
                leaf_node.append(current_node)

            else:
                for succ_node in successors:
                    fringe.append(succ_node)

        # get best choice
        max_score = -10000000
        if leaf_node:
            for node in leaf_node:
                if node[0] != None and node[1] != None and scoreEvaluation(
                        node[0]) > max_score:
                    max_score = scoreEvaluation(node[0])
                    result = node[1]
            return result
    def getAction(self, state):
        
        # TODO: write A* Algorithm instead of returning Directions.STOP
        root_scoreEvaluation = scoreEvaluation(state)
        nonetype_flag = 0                   # used to set the flag when None type is returned from getPacmanSuccessor
        successors = []
        legal = state.getLegalPacmanActions()
        depth = 1
        for action in legal:
            initial_successor = state.generatePacmanSuccessor(action)
            cost = depth - (scoreEvaluation(initial_successor) - root_scoreEvaluation)
            successors.append((cost,initial_successor , action, depth))
        while(successors):
            if (nonetype_flag == 1):
                break                       #break while loop
            #successors.sort()
            #successors.reverse()
            cost, path, action, depth  = successors.pop(successors.index(min(successors)))
            if (path.isWin()):
                print "Reached win state"
                return action
            legal = path.getLegalPacmanActions()
            for next_action in legal:
                next_successor = path.generatePacmanSuccessor(next_action)
                if (next_successor == None):
                    nonetype_flag = 1
                    break
                cost = (depth+1) - (scoreEvaluation(path) - root_scoreEvaluation)
                successors.append((cost, next_successor, action, depth + 1))

        # If not reaching a terminal state, return the action leading to the node with
        #the best score and no children based on the heuristic function (scoreEvaluation)
        if(successors):
            scored = [(scoreEvaluation(state), action) for cost, state, action, depth in successors]
            bestScore = max(scored)[0]
            for pair in scored:
                if pair[0] == bestScore:
                    # returning the 1st best action
                    bestActions = pair[1]
                    break
            return bestActions
        return Directions.STOP
        '''
    def getAction(self, state):
        none_flag = 0  # handles None type
        node_depth = 1  # used to find the depth of each node
        legal = state.getLegalPacmanActions()  # gets all legal actions
        root_score_eval = scoreEvaluation(state)
        fringe = []  #list of successors
        for action in legal:
            current_state = state.generatePacmanSuccessor(action)
            cost = node_depth - (scoreEvaluation(current_state) -
                                 root_score_eval)  # A-star hueristic
            fringe.append((cost, current_state, action,
                           node_depth))  # append into successor list
        while (fringe):  #iterate over successor
            if (none_flag == 1):
                break
            fringe.sort()
            cost, cur_state, action, node_depth = fringe.pop(0)
            if (cur_state.isWin()):
                return action
            legal = cur_state.getLegalPacmanActions()
            if legal:
                for next_action in legal:
                    child_node = cur_state.generatePacmanSuccessor(next_action)
                    if (child_node == None):
                        none_flag = 1
                        break
                    cost = (node_depth + 1) - (scoreEvaluation(child_node) -
                                               root_score_eval)
                    fringe.append((cost, child_node, action, node_depth + 1))

        bestAction_pseudo = Directions.STOP
        scored = [(scoreEvaluation(state), n_depth, action)
                  for cost, state, action, n_depth in fringe]
        bestScore = max(scored)[0]
        new_scored = [(score, n_depth, action)
                      for score, n_depth, action in scored
                      if score == bestScore]
        if (new_scored != None):
            bestAction_pseudo = min(new_scored, key=lambda item: item[1])[2]

        return bestAction_pseudo
Exemple #12
0
 def getAction(self, state):
     # get all legal actions for pacman
     legal = state.getLegalPacmanActions()
     # get all the successor state for these actions
     successors = [(state.generateSuccessor(0, action), action) for action in legal]
     # evaluate the successor states using scoreEvaluation heuristic
     scored = [(scoreEvaluation(state), action) for state, action in successors]
     # get best choice
     bestScore = max(scored)[0]
     # get all actions that lead to the highest score
     bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
     # return random action from the list of the best actions
     return random.choice(bestActions)
Exemple #13
0
 def score(self, chromosomes, state):
     curr_state = state
     sflag = True
     for i in range(len(chromosomes)):
         if curr_state:
             if curr_state.isWin() + curr_state.isLose() == 0:
                 curr_state = curr_state.generatePacmanSuccessor(chromosomes[i])
             else:
                 break
     if curr_state is None:
         sflag = False
         return -9999, sflag
     else:
         return scoreEvaluation(curr_state), sflag
Exemple #14
0
    def getAction(self, state):
        # TODO: write DFS Algorithm instead of returning Directions.STOP
        legal = state.getLegalPacmanActions()
        extendedNote = []  # explored node
        checkingStack = []  # frontier node
        overtime = False

        for i in legal:
            checkingStack.append((state.generatePacmanSuccessor(i), i))
            # tuple in stack: (state, firststep's direction)

        while (True):
            if not len(checkingStack
                       ):  # if no element in checking stack, go to evaluation
                break
            topElement = checkingStack.pop()  # check the top of the stack
            legal = topElement[0].getLegalPacmanActions()
            # extend top element
            for i in legal:
                childState = topElement[0].generatePacmanSuccessor(i)
                if childState is None:
                    overtime = True
                    break
                elif childState.isWin():
                    # if next state is win, put it into evaluation list but not check its future state
                    extendedNote.append((childState, topElement[1]))
                elif not childState.isLose():
                    # if next state is not lose, put it into checking stack for future
                    checkingStack.append((childState, topElement[1]))
                    # if next state is lose, never check it or evaluate it
            # record for evaluation
            extendedNote.append(topElement)
            if overtime:
                break

        extendedNote.extend(
            checkingStack)  # for evaluation, count on all known state

        scored = []
        for i in extendedNote:
            scored.append((scoreEvaluation(i[0]), i[1]))

        bestScore = max(scored)[0]
        bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
        return random.choice(bestActions)
Exemple #15
0
    def getAction(self, state):
        # TODO: write BFS Algorithm instead of returning Directions.STOP
        legal = state.getLegalPacmanActions()
        lastLayerState = [(state.generatePacmanSuccessor(action), action)
                          for action in legal]
        overtime = False

        for i in lastLayerState:
            if i[0].isWin():
                return i[1]

        while (True):
            tempState = []  # current layer nodes, frontier nodes
            for i in lastLayerState:  # for loop to extend all nodes in last layer and store in current layer
                legal = i[0].getLegalPacmanActions()
                for j in legal:
                    nextState = i[0].generatePacmanSuccessor(j)
                    if nextState is not None:  # if not timeout
                        if nextState.isLose(
                        ):  # if child node is lose state, ignore
                            continue
                        elif nextState.isWin():
                            return i[1]
                        else:
                            tempState.append(
                                (nextState, i[1]))  # if not, extend it
                    else:
                        overtime = True
                        break
            if overtime:
                break
            else:
                lastLayerState = tempState  # explored node (only last layer)

        scored = []

        for i in lastLayerState:  # when timeout, check nodes in last layer
            scored.append((scoreEvaluation(i[0]), i[1]))

        bestScore = max(scored)[0]
        bestActions = [pair[1] for pair in scored if pair[0] == bestScore]
        return random.choice(bestActions)
Exemple #16
0
    def getAction(self, state):
        # queue initialised with base state
        queue = [state]

        # stores the first action to reach specific nodes, ex: {<gameStateInstance> : <action>}
        base_action = {}

        max_score = 0
        max_state = state

        while(queue):
            current_state = queue.pop(0)
            if current_state.isWin():
                return Directions.STOP

            # get all legal actions for pacman
            legal = current_state.getLegalPacmanActions()

            # get successor states for each action
            for action in legal:
                successor = current_state.generatePacmanSuccessor(action)
                if successor == None:
                    break
                if successor.isLose():
                    continue

                # append successor state in queue
                queue.append(successor)
                score = scoreEvaluation(successor)

                # base action for current state is same as the base action for parent state
                try:
                    base_action[successor] = base_action[current_state]
                except:
                    base_action[successor] = action

                if score > max_score:
                    max_score = score
                    max_state = successor

        return base_action[max_state]
Exemple #17
0
    def getAction(self, state):
        # TODO: write BFS Algorithm instead of returning Directions.STOP
        frontier = []  # FIFO queue to store the states
        frontier.append(state)  # add state to the front of the list
        explored = []  # keeps track of the visited states
        track = {}  # maps states with the actions needed to get there
        maxState = None  #state representing maxscore so far
        maxScore = 0  #maximum score discovered so far

        #loops till the queue is empty
        while frontier:
            tempNode = frontier.pop(
                0)  # pops an element from the front of the list
            if tempNode in explored or tempNode.isWin() or tempNode.isLose():
                continue
            explored.append(tempNode)  # marks state as visited
            legalactions = tempNode.getLegalPacmanActions()  # possible actions
            for action in legalactions:  #iterates through each neighbour
                current = tempNode.generatePacmanSuccessor(action)
                if current == None:
                    break
                if tempNode == state:
                    newState = {
                        current: action
                    }  #adds the intial action to get to the state
                    track.update(newState)
                else:
                    newState = {
                        current: track[tempNode]
                    }  #adds the intial action to get to the state
                    track.update(newState)
                frontier.append(current)
                score = scoreEvaluation(current)
                if score > maxScore:  # gets the best score obtained so far and it's corresponding state
                    maxScore = score
                    maxstate = current
        return track[maxstate]
Exemple #18
0
    def getAction(self, state):
        # TODO: write A* Algorithm instead of returning Directions.STOP
        #clas that groups a state with its associated cost
        class Node:
            def __init__(self, s, p):
                self.currentstate = s
                self.priority = p

        startNode = Node(state, 0)
        frontier = []  #Queue
        frontier.append(startNode)
        #dictionary that maps state with the action to be taken to reach that state
        startAction = {}
        new = {startNode: None}
        startAction.update(new)
        depth = 0  #counts the depth
        #best cost so far to reach a particular state
        costsofar = {}
        costsofar.update(new)
        #best cost encountered so far
        bestsofar = 9999
        #loops till the queue is empty
        while frontier:
            #sorts the list according to the cost - implements priority queue
            if depth != 0:
                for i in range(len(frontier)):
                    for k in range(len(frontier) - 1, i, -1):
                        if (frontier[k].priority < frontier[k - 1].priority):
                            tempState = frontier[k]
                            frontier[k] = frontier[k - 1]
                            frontier[k - 1] = frontier[k]
            tempNode = frontier.pop(0)  # current Node
            if tempNode.currentstate.isWin() or tempNode.currentstate.isLose():
                continue
            #get ossible actions
            legalactions = tempNode.currentstate.getLegalPacmanActions()
            depth = depth + 1
            #loops for all actions possible
            for action in legalactions:
                current = tempNode.currentstate.generatePacmanSuccessor(
                    action)  #neighbour node
                if current == None:
                    break
                #heuristic function to calculate co
                cost = depth - (scoreEvaluation(current) -
                                scoreEvaluation(tempNode.currentstate))
                if current not in costsofar.keys(
                ) or costsofar[current] > cost:
                    new = {current: cost}
                    costsofar.update(new)
                    if depth == 1:
                        new = {current: action}
                        startAction.update(new)
                    else:
                        new = {current: startAction[tempNode.currentstate]}
                        startAction.update(new)
                    nodestate = Node(current, cost)
                    frontier.append(nodestate)
                    if cost < bestsofar:
                        bestsofar = cost
                        beststate = current

        return startAction[beststate]
Exemple #19
0
    def getAction(self, state):
        # TODO: write A* Algorithm instead of returning Directions.STOP

        fringe = Queue()  # instance of Queue class
        score = []  # list storing the score for all the fringe nodes
        bestScore = (
        )  # to store the node with the maximum score and the associated action
        depth = 1  # Initialising initial depth for the node that is 1
        winState = []  # store all the win states
        terminal = []  # to store state leading to none state

        legal = state.getLegalPacmanActions()

        for actions in legal:
            ns = state.generatePacmanSuccessor(actions)
            cost = depth - (
                scoreEvaluation(ns) - scoreEvaluation(state)
            )  #Calculating the evaluation function for the initial states

            fringe.push(
                (ns, actions, cost, depth))  #push states on to the queue

        while not fringe.isEmpty():  #Loop until queue has elements

            fringe.getList().sort(key=lambda x: x[
                2])  #sorting the queue in ascending order based on 'cost'
            fringe.getList().reverse(
            )  #revrsing the list so that the node with the lowest cost is popped for exploration
            newState, action, c, d = fringe.pop()  #pop the node in FIFO manner

            if newState.isWin():
                winState.append((newState, action))
            elif newState.isLose():
                continue
            else:
                legal1 = newState.getLegalPacmanActions()

                for ele in legal1:
                    ns1 = newState.generatePacmanSuccessor(ele)
                    if ns1 != None:
                        cost = (d + 1) - (
                            scoreEvaluation(ns1) - scoreEvaluation(state)
                        )  #calculate the cost(evaluation function) of the nodes and push on to the queue
                        fringe.push((ns1, action, cost, (d + 1)))

                    else:
                        terminal.append(
                            (newState, action)
                        )  #if node returns none for the next successor, append it to the 'terminal' list

        if len(winState
               ) != 0:  #if winState has nodes, append it to the 'score' list
            for state, action in winState:
                score.append((scoreEvaluation(state), action))
        else:  #else calculate score for other nodes and and append it to 'score' list
            for state, action in terminal:
                score.append((scoreEvaluation(state), action))

        bestScore = max(score, key=lambda x: x[0]
                        )  #calculate the highest score among all the nodes
        return bestScore[1]  #return the direction
    def getAction(self, state):
        # TODO: write A* Algorithm instead of returning Directions.STOP
        '''
        Declaring and Initializing primary variables and data structures.
        '''
        node_stack = []
        leaf_nodes = []
        nodes = {}
        nodes["state"] = state
        nodes["action"] = None
        nodes["ancestor"] = None
        nodes["g(x)"] = None
        nodes["h(x)"] = None
        nodes["total_cost"] = None
        '''
        Getting root state and legal actions and successor based on it.
        '''
        original_state = state
        legal = state.getLegalPacmanActions()
        successor = [(state.generatePacmanSuccessor(action), action)
                     for action in legal]

        for element in successor:
            temp_nodes = {}
            temp_nodes["state"] = element[0]
            temp_nodes["action"] = element[1]
            temp_nodes["ancestor"] = state
            temp_nodes["g(x)"] = 1
            temp_nodes["h(x)"] = scoreEvaluation(
                original_state) - scoreEvaluation(temp_nodes["state"])
            temp_nodes["total_cost"] = temp_nodes["g(x)"] - temp_nodes["h(x)"]
            node_stack.append(temp_nodes)
        '''
        Loop that iterates through the list as Queue finds path using A*.
        '''
        while node_stack:
            node_stack = sorted(node_stack, key=lambda k: k['total_cost'])
            current_node = node_stack.pop(0)
            i_state = current_node["state"]
            i_action = current_node["action"]

            legal = i_state.getLegalPacmanActions()
            if legal:
                successor = [(i_state.generatePacmanSuccessor(action),
                              i_action) for action in legal]

            refined_successor = [
                element for element in successor if None not in element
            ]

            if (i_state.isWin()) or (i_state.isLose()) or (refined_successor is
                                                           None):
                leaf_nodes.append(current_node)
            else:
                for successor_child in refined_successor:
                    if (successor_child[0] is not None):
                        temp_nodes = {}
                        temp_nodes["state"] = successor_child[0]
                        temp_nodes["action"] = successor_child[1]
                        temp_nodes["ancestor"] = current_node
                        temp_nodes["g(x)"] = current_node["g(x)"] + 1
                        temp_nodes["h(x)"] = scoreEvaluation(
                            original_state) - scoreEvaluation(
                                successor_child[0])
                        temp_nodes["total_cost"] = temp_nodes[
                            "g(x)"] + temp_nodes["h(x)"]
                        node_stack.append(temp_nodes)
        '''
        Returns the action with highest score.
        '''
        node_t = max(leaf_nodes, key=lambda p: scoreEvaluation(p["state"]))
        return node_t["action"]
        print(node_t["action"])
        '''
    def getAction(self, state):
        # TODO: write DFS Algorithm instead of returning Directions.STOP
        '''
        Declaring and Initializing primary variables and data structures.
        '''
        node_stack = []
        leaf_nodes = []
        nodes = {}
        nodes["state"] = state
        nodes["action"] = None
        nodes["ancestor"] = None
        '''
        Getting root state and legal actions and successor based on it.
        '''
        legal = state.getLegalPacmanActions()
        random.shuffle(legal)
        successor = [(state.generatePacmanSuccessor(action), action)
                     for action in legal]

        for element in successor:
            temp_nodes = {}
            temp_nodes["state"] = element[0]
            temp_nodes["action"] = element[1]
            temp_nodes["ancestor"] = nodes
            node_stack.append(temp_nodes)
        '''
        Loop that iterates through the list as Queue finds path using BFS.
        '''
        while node_stack:
            current_node = node_stack.pop(0)
            i_state = current_node["state"]
            i_action = current_node["action"]

            if (i_state is not None):
                legal = i_state.getLegalPacmanActions()
                #random.shuffle(legal)
                if legal:
                    successor = [(i_state.generatePacmanSuccessor(action),
                                  i_action) for action in legal]

                refined_successor = [
                    element for element in successor if None not in element
                ]

                if (i_state.isWin()) or (i_state.isLose()) or (
                        refined_successor is None):
                    leaf_nodes.append(current_node)
                else:
                    for successor_child in refined_successor:
                        temp_nodes = {}
                        temp_nodes["state"] = successor_child[0]
                        temp_nodes["action"] = successor_child[1]
                        temp_nodes["ancestor"] = nodes
                        leaf_nodes.append(temp_nodes)
        '''
        Returns the action with highest score.
        '''
        max_score = float("-inf")
        if leaf_nodes is not None:
            for j in leaf_nodes:
                current_score = scoreEvaluation(j["state"])
                if (current_score >= max_score):
                    max_score = current_score
                    final_action = j["action"]
            return final_action
 def getAction(self, state):
     # stores states , depth and action of leaf nodes
     queue1 = []
     # stores all the parent and child relations traversed
     predecessor = []
     # stores all the states and their respeective actions traversed
     all_actions = []
     # initialize queue with root state
     queue1.append((state, 0, ""))
     none_flag = False
     while queue1:
         temp1 = queue1.pop(0)
         depth = temp1[1]
         current_state = temp1[0]
         legal = current_state.getLegalPacmanActions()
         for action in legal:
             successor = current_state.generatePacmanSuccessor(action)
             if (successor == None):
                 none_flag = True
                 break
             if (successor.isLose()):
                 continue
             if (successor.isWin()):
                 # If reached goal backtrace and return best action
                 Parent = current_state
                 Child = None
                 while Parent != state:
                     for i in range(0, int(predecessor.__len__())):
                         if (predecessor[i][0] == Parent):
                             Child = predecessor[i][0]
                             Parent = predecessor[i][1]
                 # Finding action of best state
                 bestAction = [
                     pair[1] for pair in all_actions if pair[0] == Child
                 ]
                 return bestAction[0]
             else:
                 queue1.append((successor, depth + 1, action))
                 predecessor.append((successor, current_state))
                 all_actions.append((successor, action))
         if (none_flag):
             break
     scores = [(scoreEvaluation(current_state), depth, current_state)
               for current_state, depth, action in queue1]
     # Finding best scores best on score evaluation
     bestScore = max(scores, key=lambda item: item[0])[0]
     bestScores = [(scoreEvaluation(current_state), depth, current_state)
                   for current_state, depth, action in queue1
                   if scoreEvaluation(current_state) == bestScore]
     # Choosing best state based on shallowest depth among all the bestscores
     bestState = min(bestScores, key=lambda item: item[1])[2]
     # Backtracking till we get child of root node
     Parent = bestState
     Child = None
     while Parent != state:
         for i in range(0, int(predecessor.__len__())):
             if (predecessor[i][0] == Parent):
                 Child = predecessor[i][0]
                 Parent = predecessor[i][1]
     # Finding action of best state
     bestAction = [pair[1] for pair in all_actions if pair[0] == Child]
     return bestAction[0]
    def getAction(self, state):
        # TODO: write A* Algorithm instead of returning Directions.STOP
        
        # Queue that stores the game states
        state_queue = []
        # Depth
        depth = 0
        # Cost function
        cost = 0
        # List that stores the leaf nodes
        leaf_list = []
        # List that stores the win states
        win_state_list = []

        # Get legal pacman actions
        legal = state.getLegalPacmanActions()
        # Increment depth
        depth = depth + 1
        
        for action in legal:
            # Generate successors
            successor = state.generatePacmanSuccessor(action)
            # Calculate cost of the node
            cost = depth - (scoreEvaluation(successor) - scoreEvaluation(state))
            # Append successor, action, cost of the node, and the depth of the node to the queue
            state_queue.append((successor, action, cost, depth))
        
        # Sort the queue in the increasing order of the cost function
        state_queue.sort(key=lambda tuples: tuples[2])
        
        while state_queue:
            
            # Pop the first element of the queue
            next_state, action, cost, depth = state_queue.pop(0)
            
            # Check for win state and append it and the action to the list of win state
            if next_state.isWin():
                win_state_list.append((next_state, action))
            
            # Get legal pacman actions
            legal = next_state.getLegalPacmanActions()
            # Increment depth
            depth = depth + 1

            for next_action in legal:
                # Generate successor
                child = next_state.generatePacmanSuccessor(next_action)
                # Check if successor is None, append the parent and the action to the list of leaves
                if child == None:
                    depth = depth - 1
                    next_cost = depth - (scoreEvaluation(next_state) - scoreEvaluation(state))
                    leaf_list.append((next_state, action, next_cost, depth))
                # Check if successor is a Win state, append the state and the action to the list of Win states
                elif child.isWin():
                    win_state_list.append((child, action))
                # Else, append state, action, cost of the node, and the depth of the node to the original Queue
                else:
                    next_cost = depth - (scoreEvaluation(child) - scoreEvaluation(next_state))
                    next_successors = (child, action, next_cost, depth)
                    state_queue.append(next_successors)
                
                # Sort the queue in the increasing order of the cost function
                state_queue.sort(key=lambda tuples: tuples[2])
            
            # Sort the queue in the increasing order of the cost function
            state_queue.sort(key=lambda tuples: tuples[2])
            
        
        # For all the win states, return the action leading to the win state with the best score
        while win_state_list:
            max_score = 0
            for win_pair in win_state_list:
                if scoreEvaluation(win_pair[0]) > max_score:
                    max_score = scoreEvaluation(win_pair[0])
                    bestAction = win_pair[1]
            return bestAction
        
        # Sort the list of leaves in increasing order of cost of the node
        leaf_list.sort(key=lambda tuples: tuples[2])

        # Return best action
        bestAction = leaf_list[0][1]
        return bestAction
    def getAction(self, state):
        # TODO: write BFS Algorithm instead of returning Directions.STOP
        
        # Queue that stores the game states
        state_queue = []
        # List that stores the leaf nodes
        leaf_list = []
        # List that stores the win states
        win_state_list = []
        
        # Get legal pacman actions
        legal = state.getLegalPacmanActions()
        # Generate successors
        successors = [(state.generatePacmanSuccessor(action), action) for action in legal]
        # Append successors to the Queue
        state_queue.extend(successors)
        
        
        while state_queue:
            # Pop first element of the Queue
            next_state, action = state_queue.pop(0)
            
            # Check for win state and append it and the action to the list of win state
            if next_state.isWin():
                win_state_list.append((next_state, action))
        
            # Get legal pacman actions
            legal = next_state.getLegalPacmanActions()
            # Generate successors
            next_successors = [(next_state.generatePacmanSuccessor(next_action), next_action) for next_action in legal]
            
            for tuple in next_successors:
                # Check if successor is None, append the parent state and action to the list of leaves
                if tuple[0] == None:
                    leaf_list.append((next_state, action))
                # Check if successor is a Win state, append the state to the list of Win states
                elif tuple[0].isWin():
                    win_state_list.append((tuple[0], action))
                # Else, append state and action to the original Queue
                else:
                    state_queue.append((tuple[0], action))


        # For all the win states, return the action leading to the win state with the best score
        while win_state_list:
            max_score = 0
            for win_pair in win_state_list:
                if scoreEvaluation(win_pair[0]) > max_score:
                    max_score = scoreEvaluation(win_state_list)
                    bestAction = win_pair[1]
            return bestAction
        
        
        # For all the leaf nodes, evaluate the score and return the action leading to the win state with the best score
        scored = [(scoreEvaluation(state), action) for state, action in leaf_list]
        bestScore = max(scored)[0]
        print bestScore
        for tuple in scored:
            if tuple[0] == bestScore:
                bestAction = tuple[1]
                break
        print bestAction
        return bestAction
Exemple #25
0
    def getAction(self, state):
        # TODO: write Genetic Algorithm instead of returning Directions.STOP
        self.flag = True
        all = state.getAllPossibleActions()
        #print all
        for i in range(8):
            for j in range(5):
                self.total_actions[i][j]= all[random.randint(0, len(all) - 1)]
        #print self.total_actions

        #calculate score for each sequence
        score = []
        cur_state = state

        for i in range(8):
            for j in range(5):
                if cur_state.isWin():
                    return self.total_actions[i][0]
                elif cur_state.isLose():
                    break
                else:
                    #print self.total_actions[j]
                    cur_state = cur_state.generatePacmanSuccessor(self.total_actions[i][j])
                    #print scoreEvaluation(cur_state)
            #print cur_state
            score.append(scoreEvaluation(cur_state))

        #print "Before",self.total_actions

        while self.flag:
            i = 0
            chrom = []
            for s in score:
                parent = {}
                parent["actions"] = list(self.total_actions[i])
                parent["score"] = s
                parent["id"] = i
                chrom.append(parent)
                i = i + 1

            chrom.sort(key=lambda x: x["score"])

            #print chrom

            rank = 1
            for i in range(8):
                chrom[i]["rank"] = i + 1

            #############################################
            # find children
            random_test = random.randint(0,1)
            next_generation = []
            for i in range(0,4):
                parent1 = self.selectParent(chrom)
                #print parent1
                parent2 = self.selectParent(chrom)
                if random_test < 0.7:
                    child1 = self.crossover(parent1["actions"], parent2["actions"])
                    child2 = self.crossover(parent1["actions"], parent2["actions"])
                    next_generation.append(list(child1))
                    next_generation.append(list(child2))

                else:
                    #print parent1
                    child1 = list(parent1["actions"])
                    child2 = list(parent2["actions"])
                    next_generation.append(child1)
                    next_generation.append(child2)

            ###############################################
            #print next_generation
            new_generation = []
            random_check = random.randint(0,1)
            for i in range(8):
                if random_check < 0.1:
                    new_gen = self.mutate(next_generation[i],state)
                else:
                    new_gen = next_generation[i]
                    #print new_gen
                new_generation.append(new_gen)

            #print new_generation
            ###scoring
            scores = []
            cur_state = state
            if cur_state is None:
                self.flag = None
                break
            for i in range(8):
                for j in range(5):
                    if cur_state:
                        if cur_state.isWin():
                            return new_generation[i][0]
                        elif cur_state.isLose():
                            break
                        else:
                            # print self.total_actions[j]
                            cur_state = cur_state.generatePacmanSuccessor(new_generation[i][j])

                        # print scoreEvaluation(cur_state)
                    elif cur_state is None:
                        self.flag = False
                        break
                if cur_state:
                    scores.append(scoreEvaluation(cur_state))
            #print score

            for i in range(8):
                for j in range(5):
                    self.total_actions[i][j] = new_generation[i][j]

            for i in range(len(scores)):
                score[i] = scores[i]

        i = 0
        chrome = []
        for s in scores:
            children = {}
            children["actions"] = list(new_generation[i])
            #print next_generation[i]
            children["score"] = s
            children["id"] = i
            chrome.append(children)
            i = i + 1

        chrome.sort(key=lambda x: x["score"])
        #print chrome

        dict = chrom[7]
        return dict["actions"][0]
 def fOfXEvaluation(self,state,stateMetadata,root):
     return int(stateMetadata[state][1]) -(scoreEvaluation(state) - scoreEvaluation(root));