def getAction(self, state): pqueue = [] #creating a priority queue leaves = [] root = state legal = state.getLegalPacmanActions() successor = [(state.generatePacmanSuccessor(action), action) for action in legal] for i in successor: #Push the successors of parent node into the queue node = {} node["parent"] = state node["action"] = i[1] node["state"] = i[0] node["depth"] = 1 node["score"] = scoreEvaluation(root) - scoreEvaluation(node["state"]) node["cost"] = node["depth"] + node["score"] pqueue.append(node) while pqueue: pqueue = self.sort_nodes(pqueue) node = pqueue.pop(0) curr_state = node["state"] act = node["action"] depth = node["depth"] if curr_state is not None: legal = curr_state.getLegalPacmanActions() if legal: for action in legal: successor = [] s = curr_state.generatePacmanSuccessor(action) if s is not None: successor.append((s, act)) if (curr_state.isWin()) or (curr_state.isLose()) or (s is None): leaves.append(node) else: for child in successor: if (child[0] is not None): sub_node = {} sub_node["state"] = child[0] sub_node["action"] = act sub_node["parent"] = node sub_node["depth"] = depth + 1 sub_node["score"] = scoreEvaluation(root) - scoreEvaluation(child[0]) sub_node["cost"] = sub_node["depth"] + sub_node["score"] pqueue.append(sub_node) max_score = -99999999 if leaves: for value in leaves: leaf_score = scoreEvaluation(value["state"]) if (leaf_score > max_score): max_score = leaf_score result = value["action"] return result class RandomSequenceAgent(Agent):
def getAction(self, state): frontier = [] stateMetadata = {} legal = state.getLegalPacmanActions() successors = [(state.generatePacmanSuccessor(action), action) for action in legal] for successor in successors: stateMetadata[successor[0]] = successor[1] frontier.append((successor[0], scoreEvaluation(successor[0]))) while frontier: node = frontier[0][0] frontier = frontier[:len(frontier)-1] #frontier acts as LIFO if node.isWin(): return stateMetadata[node] elif node.isLose(): continue legal = node.getLegalPacmanActions() successors = [node.generatePacmanSuccessor(action) for action in legal] for successor in successors: if successor is None: for frontierNode in frontier: if frontierNode[0].isWin(): return stateMetadata[frontierNode] elif frontierNode[0].isLose(): frontier.remove(frontierNode) return stateMetadata[max(frontier, key=lambda x : x[1])[0]] stateMetadata[successor] = stateMetadata[node] frontier.append((successor, scoreEvaluation(successor))) return Directions.STOP
def getAction(self, state): # TODO: write A* Algorithm instead of returning Directions.STOP legal = state.getLegalPacmanActions() depthCoefficient = 1 # set a coefficient to modify overtime = False checkingList = [] for i in legal: childState = state.generatePacmanSuccessor(i) cost = 1 * depthCoefficient - (scoreEvaluation(childState) - scoreEvaluation(state)) checkingList.append((state.generatePacmanSuccessor(i), i, cost, 1)) # tuple in list: (state, firststep's direction, cost, depth) for i in checkingList: if i[0].isWin(): return i[1] while (True): if not len(checkingList): return Directions.STOP minCost = (checkingList[0])[2] for i in checkingList: if i[2] < minCost: minCost = i[2] # minCost = min(checkingList)[2] bestNodes = [ checkingList.index(i) for i in checkingList if i[2] == minCost ] if not len(bestNodes): return choosenNode = checkingList.pop(random.choice(bestNodes)) legal = choosenNode[0].getLegalPacmanActions() for i in legal: childState = choosenNode[0].generatePacmanSuccessor(i) if childState is None: overtime = True break elif childState.isWin(): return choosenNode[1] elif not childState.isLose(): cost = (1 + choosenNode[3]) * depthCoefficient - \ (scoreEvaluation(childState) - scoreEvaluation(state)) checkingList.append( (childState, choosenNode[1], cost, choosenNode[3] + 1)) # if next state is lose, never check it or evaluate it if overtime: break # minCost = min(checkingList)[2] minCost = (checkingList[0])[2] for i in checkingList: if i[2] <= minCost: minCost = i[2] bestNodes = [ checkingList.index(i) for i in checkingList if i[2] == minCost ] return (checkingList[(random.choice(bestNodes))])[1]
def getAction(self, state): # TODO: write DFS Algorithm instead of returning Directions.STOP fringe = Stack() # instance of Stack class bestScore = [ ] # to store the node with the maximum score and the associated action score = [] # list storing the score for all the fringe nodes terminal = [] # to store state leading to none state winState = [] # store all the win states legal = state.getLegalPacmanActions() for actions in legal: ns = state.generatePacmanSuccessor(actions) fringe.push((ns, actions)) # Initial states pushed on to the queue while not fringe.isEmpty(): # Loop runs unitl Stack has nodes newState, action = fringe.pop() # pop the nodes in FIFO manner if newState.isWin(): winState.append( (newState, action) ) # if the state is a win state, append it to the winState list elif newState.isLose(): continue # if the state is a lose state, continue else: legal1 = newState.getLegalPacmanActions( ) # else explore paths for all other nodes for ele in legal1: ns1 = newState.generatePacmanSuccessor(ele) if ns1 != None: fringe.push( (ns1, action) ) #if the state doesn't reutrn None, push it to the Stack else: terminal.append( (newState, action) ) #if the state returns None, add it to the terminal if len( winState ) != 0: #if winState has elements, append the scores for all win states to 'score' list for state, action in winState: score.append((scoreEvaluation(state), action)) else: #if winState has no elements, append the scores for all win states to 'score' list for state, action in terminal: score.append((scoreEvaluation(state), action)) bestScore = max( score, key=lambda x: x[0] ) #calculating and storing the highest score node and the associated action in bestScore return bestScore[1] #return the direction
def getAction(self, state): # stores the first action for all the nodes, ex: {<gameStateInstance> : <action>} base_action = {} max_score = 0 max_state = state open_list = priorityQueue() closed_list = priorityQueue() open_list.insert(([state], 0)) while (not open_list.isEmpty()): current_value = open_list.pop() current_path = current_value[0] last_visited_state = current_path[-1] # g(x) = depth, root node is depth 0, depth for each successor will be length of it's parent path g_cost = len(current_path) if last_visited_state.isWin(): return Directions.STOP # get all legal actions for pacman legal = last_visited_state.getLegalPacmanActions() # get successor states for each action for action in legal: successor = last_visited_state.generatePacmanSuccessor(action) if successor == None: break if successor.isLose(): continue new_path = current_path + [successor] new_score = scoreEvaluation(successor) h_cost = - (scoreEvaluation(successor) - scoreEvaluation(state)) total_cost = g_cost + h_cost new_data = (new_path, total_cost) open_list.insert(new_data) # base action for current state is same as the base action for parent state try: base_action[successor] = base_action[last_visited_state] except: base_action[successor] = action if new_score > max_score: max_score = new_score max_state = successor return base_action[max_state]
def getAction(self, state): # TODO: write BFS Algorithm instead of returning Directions.STOP queue = [] #creating a queue leaves = [] legal = state.getLegalPacmanActions() if legal: successor = [(state.generatePacmanSuccessor(action), action) for action in legal] for i in successor: node = {} node["parent"] = node node["action"] = i[1] node["state"] = i[0] queue.append(node) while queue: node = queue.pop(0) curr_state = node["state"] parent_action = node["action"] if curr_state is not None: legal = curr_state.getLegalPacmanActions() if legal is not None: successor = [] for action in legal: instance = curr_state.generatePacmanSuccessor(action) if(instance is not None): successor.append((instance,parent_action)) if (curr_state.isWin() or curr_state.isLose() or instance is None): leaves.append(node) else: for child in successor: sub_node ={} sub_node["state"] = child[0] sub_node["action"] = child[1] sub_node["parent"] = node queue.append(sub_node) max_score = -9999 if leaves: for value in leaves: if((scoreEvaluation(value["state"])>max_score)): max_score = scoreEvaluation(value["state"]) result = value["action"] return result
def defaultPolicy(self,state): rollout = 0 while rollout < 5: if state.isWin() or state.isLose(): return scoreEvaluation(state) else: legal = state.getLegalPacmanActions() if legal: random_action = legal[random.randint(0, len(legal) - 1)] state = state.generatePacmanSuccessor(random_action) if state is None: self.flag = False return 0 rollout = rollout + 1 return scoreEvaluation(state)
def getAction(self, state): nonetype_flag = 0 # used to set the flag when None type is returned from getPacmanSuccessor successors = [] legal = state.getLegalPacmanActions() for action in legal: successors.append((state.generatePacmanSuccessor(action),action)) while(successors): if (nonetype_flag == 1): break #break while loop path, action = successors.pop(-1) if (path.isWin()): return action legal = path.getLegalPacmanActions() for next_action in legal: next_successor = path.generatePacmanSuccessor(next_action) if (next_successor == None): nonetype_flag = 1 break successors.append((next_successor,action)) # If not reaching a terminal state, return the action leading to the node with #the best score and no children based on the heuristic function (scoreEvaluation) if (successors): scored = [(scoreEvaluation(state), action) for state, action in successors] bestScore = max(scored)[0] for pair in scored: if pair[0] == bestScore: # returning the 1st best action bestActions = pair[1] break return bestActions return Directions.STOP
def getAction(self, state): fringe = [] # This contains nodes to be expanded leaf_node = [] # This list contains all the leaf nodes explored = [] directions_from_start_state = state.getLegalPacmanActions( ) #initial directions initial_successors = [ (state.generatePacmanSuccessor(direction), direction) for direction in directions_from_start_state ] # list of initial successors for node in initial_successors: # iterating over initial successors fringe.append(node) # append to fringe to be explored further while fringe: # loop till fringe is not empty current_node = fringe.pop() explored.append(current_node[0]) if current_node[0] == None: continue legal = current_node[0].getLegalPacmanActions() successors = [] if legal: # if only action is legal for action in legal: current_state = current_node[0].generatePacmanSuccessor( action) explored.append(current_state) if current_state != None and action is not None: # sometimes action is equating to None successors.append( (current_state, current_node[1]) ) # appending the successor and the action taken to get to the suv if len(successors) == 0 or current_node[0].isWin( ) or current_node[0].isLose(): leaf_node.append(current_node) else: for succ_node in successors: fringe.append(succ_node) # get best choice max_score = -10000000 if leaf_node: for node in leaf_node: if node[0] != None and node[1] != None and scoreEvaluation( node[0]) > max_score: max_score = scoreEvaluation(node[0]) result = node[1] return result
def getAction(self, state): # TODO: write A* Algorithm instead of returning Directions.STOP root_scoreEvaluation = scoreEvaluation(state) nonetype_flag = 0 # used to set the flag when None type is returned from getPacmanSuccessor successors = [] legal = state.getLegalPacmanActions() depth = 1 for action in legal: initial_successor = state.generatePacmanSuccessor(action) cost = depth - (scoreEvaluation(initial_successor) - root_scoreEvaluation) successors.append((cost,initial_successor , action, depth)) while(successors): if (nonetype_flag == 1): break #break while loop #successors.sort() #successors.reverse() cost, path, action, depth = successors.pop(successors.index(min(successors))) if (path.isWin()): print "Reached win state" return action legal = path.getLegalPacmanActions() for next_action in legal: next_successor = path.generatePacmanSuccessor(next_action) if (next_successor == None): nonetype_flag = 1 break cost = (depth+1) - (scoreEvaluation(path) - root_scoreEvaluation) successors.append((cost, next_successor, action, depth + 1)) # If not reaching a terminal state, return the action leading to the node with #the best score and no children based on the heuristic function (scoreEvaluation) if(successors): scored = [(scoreEvaluation(state), action) for cost, state, action, depth in successors] bestScore = max(scored)[0] for pair in scored: if pair[0] == bestScore: # returning the 1st best action bestActions = pair[1] break return bestActions return Directions.STOP '''
def getAction(self, state): none_flag = 0 # handles None type node_depth = 1 # used to find the depth of each node legal = state.getLegalPacmanActions() # gets all legal actions root_score_eval = scoreEvaluation(state) fringe = [] #list of successors for action in legal: current_state = state.generatePacmanSuccessor(action) cost = node_depth - (scoreEvaluation(current_state) - root_score_eval) # A-star hueristic fringe.append((cost, current_state, action, node_depth)) # append into successor list while (fringe): #iterate over successor if (none_flag == 1): break fringe.sort() cost, cur_state, action, node_depth = fringe.pop(0) if (cur_state.isWin()): return action legal = cur_state.getLegalPacmanActions() if legal: for next_action in legal: child_node = cur_state.generatePacmanSuccessor(next_action) if (child_node == None): none_flag = 1 break cost = (node_depth + 1) - (scoreEvaluation(child_node) - root_score_eval) fringe.append((cost, child_node, action, node_depth + 1)) bestAction_pseudo = Directions.STOP scored = [(scoreEvaluation(state), n_depth, action) for cost, state, action, n_depth in fringe] bestScore = max(scored)[0] new_scored = [(score, n_depth, action) for score, n_depth, action in scored if score == bestScore] if (new_scored != None): bestAction_pseudo = min(new_scored, key=lambda item: item[1])[2] return bestAction_pseudo
def getAction(self, state): # get all legal actions for pacman legal = state.getLegalPacmanActions() # get all the successor state for these actions successors = [(state.generateSuccessor(0, action), action) for action in legal] # evaluate the successor states using scoreEvaluation heuristic scored = [(scoreEvaluation(state), action) for state, action in successors] # get best choice bestScore = max(scored)[0] # get all actions that lead to the highest score bestActions = [pair[1] for pair in scored if pair[0] == bestScore] # return random action from the list of the best actions return random.choice(bestActions)
def score(self, chromosomes, state): curr_state = state sflag = True for i in range(len(chromosomes)): if curr_state: if curr_state.isWin() + curr_state.isLose() == 0: curr_state = curr_state.generatePacmanSuccessor(chromosomes[i]) else: break if curr_state is None: sflag = False return -9999, sflag else: return scoreEvaluation(curr_state), sflag
def getAction(self, state): # TODO: write DFS Algorithm instead of returning Directions.STOP legal = state.getLegalPacmanActions() extendedNote = [] # explored node checkingStack = [] # frontier node overtime = False for i in legal: checkingStack.append((state.generatePacmanSuccessor(i), i)) # tuple in stack: (state, firststep's direction) while (True): if not len(checkingStack ): # if no element in checking stack, go to evaluation break topElement = checkingStack.pop() # check the top of the stack legal = topElement[0].getLegalPacmanActions() # extend top element for i in legal: childState = topElement[0].generatePacmanSuccessor(i) if childState is None: overtime = True break elif childState.isWin(): # if next state is win, put it into evaluation list but not check its future state extendedNote.append((childState, topElement[1])) elif not childState.isLose(): # if next state is not lose, put it into checking stack for future checkingStack.append((childState, topElement[1])) # if next state is lose, never check it or evaluate it # record for evaluation extendedNote.append(topElement) if overtime: break extendedNote.extend( checkingStack) # for evaluation, count on all known state scored = [] for i in extendedNote: scored.append((scoreEvaluation(i[0]), i[1])) bestScore = max(scored)[0] bestActions = [pair[1] for pair in scored if pair[0] == bestScore] return random.choice(bestActions)
def getAction(self, state): # TODO: write BFS Algorithm instead of returning Directions.STOP legal = state.getLegalPacmanActions() lastLayerState = [(state.generatePacmanSuccessor(action), action) for action in legal] overtime = False for i in lastLayerState: if i[0].isWin(): return i[1] while (True): tempState = [] # current layer nodes, frontier nodes for i in lastLayerState: # for loop to extend all nodes in last layer and store in current layer legal = i[0].getLegalPacmanActions() for j in legal: nextState = i[0].generatePacmanSuccessor(j) if nextState is not None: # if not timeout if nextState.isLose( ): # if child node is lose state, ignore continue elif nextState.isWin(): return i[1] else: tempState.append( (nextState, i[1])) # if not, extend it else: overtime = True break if overtime: break else: lastLayerState = tempState # explored node (only last layer) scored = [] for i in lastLayerState: # when timeout, check nodes in last layer scored.append((scoreEvaluation(i[0]), i[1])) bestScore = max(scored)[0] bestActions = [pair[1] for pair in scored if pair[0] == bestScore] return random.choice(bestActions)
def getAction(self, state): # queue initialised with base state queue = [state] # stores the first action to reach specific nodes, ex: {<gameStateInstance> : <action>} base_action = {} max_score = 0 max_state = state while(queue): current_state = queue.pop(0) if current_state.isWin(): return Directions.STOP # get all legal actions for pacman legal = current_state.getLegalPacmanActions() # get successor states for each action for action in legal: successor = current_state.generatePacmanSuccessor(action) if successor == None: break if successor.isLose(): continue # append successor state in queue queue.append(successor) score = scoreEvaluation(successor) # base action for current state is same as the base action for parent state try: base_action[successor] = base_action[current_state] except: base_action[successor] = action if score > max_score: max_score = score max_state = successor return base_action[max_state]
def getAction(self, state): # TODO: write BFS Algorithm instead of returning Directions.STOP frontier = [] # FIFO queue to store the states frontier.append(state) # add state to the front of the list explored = [] # keeps track of the visited states track = {} # maps states with the actions needed to get there maxState = None #state representing maxscore so far maxScore = 0 #maximum score discovered so far #loops till the queue is empty while frontier: tempNode = frontier.pop( 0) # pops an element from the front of the list if tempNode in explored or tempNode.isWin() or tempNode.isLose(): continue explored.append(tempNode) # marks state as visited legalactions = tempNode.getLegalPacmanActions() # possible actions for action in legalactions: #iterates through each neighbour current = tempNode.generatePacmanSuccessor(action) if current == None: break if tempNode == state: newState = { current: action } #adds the intial action to get to the state track.update(newState) else: newState = { current: track[tempNode] } #adds the intial action to get to the state track.update(newState) frontier.append(current) score = scoreEvaluation(current) if score > maxScore: # gets the best score obtained so far and it's corresponding state maxScore = score maxstate = current return track[maxstate]
def getAction(self, state): # TODO: write A* Algorithm instead of returning Directions.STOP #clas that groups a state with its associated cost class Node: def __init__(self, s, p): self.currentstate = s self.priority = p startNode = Node(state, 0) frontier = [] #Queue frontier.append(startNode) #dictionary that maps state with the action to be taken to reach that state startAction = {} new = {startNode: None} startAction.update(new) depth = 0 #counts the depth #best cost so far to reach a particular state costsofar = {} costsofar.update(new) #best cost encountered so far bestsofar = 9999 #loops till the queue is empty while frontier: #sorts the list according to the cost - implements priority queue if depth != 0: for i in range(len(frontier)): for k in range(len(frontier) - 1, i, -1): if (frontier[k].priority < frontier[k - 1].priority): tempState = frontier[k] frontier[k] = frontier[k - 1] frontier[k - 1] = frontier[k] tempNode = frontier.pop(0) # current Node if tempNode.currentstate.isWin() or tempNode.currentstate.isLose(): continue #get ossible actions legalactions = tempNode.currentstate.getLegalPacmanActions() depth = depth + 1 #loops for all actions possible for action in legalactions: current = tempNode.currentstate.generatePacmanSuccessor( action) #neighbour node if current == None: break #heuristic function to calculate co cost = depth - (scoreEvaluation(current) - scoreEvaluation(tempNode.currentstate)) if current not in costsofar.keys( ) or costsofar[current] > cost: new = {current: cost} costsofar.update(new) if depth == 1: new = {current: action} startAction.update(new) else: new = {current: startAction[tempNode.currentstate]} startAction.update(new) nodestate = Node(current, cost) frontier.append(nodestate) if cost < bestsofar: bestsofar = cost beststate = current return startAction[beststate]
def getAction(self, state): # TODO: write A* Algorithm instead of returning Directions.STOP fringe = Queue() # instance of Queue class score = [] # list storing the score for all the fringe nodes bestScore = ( ) # to store the node with the maximum score and the associated action depth = 1 # Initialising initial depth for the node that is 1 winState = [] # store all the win states terminal = [] # to store state leading to none state legal = state.getLegalPacmanActions() for actions in legal: ns = state.generatePacmanSuccessor(actions) cost = depth - ( scoreEvaluation(ns) - scoreEvaluation(state) ) #Calculating the evaluation function for the initial states fringe.push( (ns, actions, cost, depth)) #push states on to the queue while not fringe.isEmpty(): #Loop until queue has elements fringe.getList().sort(key=lambda x: x[ 2]) #sorting the queue in ascending order based on 'cost' fringe.getList().reverse( ) #revrsing the list so that the node with the lowest cost is popped for exploration newState, action, c, d = fringe.pop() #pop the node in FIFO manner if newState.isWin(): winState.append((newState, action)) elif newState.isLose(): continue else: legal1 = newState.getLegalPacmanActions() for ele in legal1: ns1 = newState.generatePacmanSuccessor(ele) if ns1 != None: cost = (d + 1) - ( scoreEvaluation(ns1) - scoreEvaluation(state) ) #calculate the cost(evaluation function) of the nodes and push on to the queue fringe.push((ns1, action, cost, (d + 1))) else: terminal.append( (newState, action) ) #if node returns none for the next successor, append it to the 'terminal' list if len(winState ) != 0: #if winState has nodes, append it to the 'score' list for state, action in winState: score.append((scoreEvaluation(state), action)) else: #else calculate score for other nodes and and append it to 'score' list for state, action in terminal: score.append((scoreEvaluation(state), action)) bestScore = max(score, key=lambda x: x[0] ) #calculate the highest score among all the nodes return bestScore[1] #return the direction
def getAction(self, state): # TODO: write A* Algorithm instead of returning Directions.STOP ''' Declaring and Initializing primary variables and data structures. ''' node_stack = [] leaf_nodes = [] nodes = {} nodes["state"] = state nodes["action"] = None nodes["ancestor"] = None nodes["g(x)"] = None nodes["h(x)"] = None nodes["total_cost"] = None ''' Getting root state and legal actions and successor based on it. ''' original_state = state legal = state.getLegalPacmanActions() successor = [(state.generatePacmanSuccessor(action), action) for action in legal] for element in successor: temp_nodes = {} temp_nodes["state"] = element[0] temp_nodes["action"] = element[1] temp_nodes["ancestor"] = state temp_nodes["g(x)"] = 1 temp_nodes["h(x)"] = scoreEvaluation( original_state) - scoreEvaluation(temp_nodes["state"]) temp_nodes["total_cost"] = temp_nodes["g(x)"] - temp_nodes["h(x)"] node_stack.append(temp_nodes) ''' Loop that iterates through the list as Queue finds path using A*. ''' while node_stack: node_stack = sorted(node_stack, key=lambda k: k['total_cost']) current_node = node_stack.pop(0) i_state = current_node["state"] i_action = current_node["action"] legal = i_state.getLegalPacmanActions() if legal: successor = [(i_state.generatePacmanSuccessor(action), i_action) for action in legal] refined_successor = [ element for element in successor if None not in element ] if (i_state.isWin()) or (i_state.isLose()) or (refined_successor is None): leaf_nodes.append(current_node) else: for successor_child in refined_successor: if (successor_child[0] is not None): temp_nodes = {} temp_nodes["state"] = successor_child[0] temp_nodes["action"] = successor_child[1] temp_nodes["ancestor"] = current_node temp_nodes["g(x)"] = current_node["g(x)"] + 1 temp_nodes["h(x)"] = scoreEvaluation( original_state) - scoreEvaluation( successor_child[0]) temp_nodes["total_cost"] = temp_nodes[ "g(x)"] + temp_nodes["h(x)"] node_stack.append(temp_nodes) ''' Returns the action with highest score. ''' node_t = max(leaf_nodes, key=lambda p: scoreEvaluation(p["state"])) return node_t["action"] print(node_t["action"]) '''
def getAction(self, state): # TODO: write DFS Algorithm instead of returning Directions.STOP ''' Declaring and Initializing primary variables and data structures. ''' node_stack = [] leaf_nodes = [] nodes = {} nodes["state"] = state nodes["action"] = None nodes["ancestor"] = None ''' Getting root state and legal actions and successor based on it. ''' legal = state.getLegalPacmanActions() random.shuffle(legal) successor = [(state.generatePacmanSuccessor(action), action) for action in legal] for element in successor: temp_nodes = {} temp_nodes["state"] = element[0] temp_nodes["action"] = element[1] temp_nodes["ancestor"] = nodes node_stack.append(temp_nodes) ''' Loop that iterates through the list as Queue finds path using BFS. ''' while node_stack: current_node = node_stack.pop(0) i_state = current_node["state"] i_action = current_node["action"] if (i_state is not None): legal = i_state.getLegalPacmanActions() #random.shuffle(legal) if legal: successor = [(i_state.generatePacmanSuccessor(action), i_action) for action in legal] refined_successor = [ element for element in successor if None not in element ] if (i_state.isWin()) or (i_state.isLose()) or ( refined_successor is None): leaf_nodes.append(current_node) else: for successor_child in refined_successor: temp_nodes = {} temp_nodes["state"] = successor_child[0] temp_nodes["action"] = successor_child[1] temp_nodes["ancestor"] = nodes leaf_nodes.append(temp_nodes) ''' Returns the action with highest score. ''' max_score = float("-inf") if leaf_nodes is not None: for j in leaf_nodes: current_score = scoreEvaluation(j["state"]) if (current_score >= max_score): max_score = current_score final_action = j["action"] return final_action
def getAction(self, state): # stores states , depth and action of leaf nodes queue1 = [] # stores all the parent and child relations traversed predecessor = [] # stores all the states and their respeective actions traversed all_actions = [] # initialize queue with root state queue1.append((state, 0, "")) none_flag = False while queue1: temp1 = queue1.pop(0) depth = temp1[1] current_state = temp1[0] legal = current_state.getLegalPacmanActions() for action in legal: successor = current_state.generatePacmanSuccessor(action) if (successor == None): none_flag = True break if (successor.isLose()): continue if (successor.isWin()): # If reached goal backtrace and return best action Parent = current_state Child = None while Parent != state: for i in range(0, int(predecessor.__len__())): if (predecessor[i][0] == Parent): Child = predecessor[i][0] Parent = predecessor[i][1] # Finding action of best state bestAction = [ pair[1] for pair in all_actions if pair[0] == Child ] return bestAction[0] else: queue1.append((successor, depth + 1, action)) predecessor.append((successor, current_state)) all_actions.append((successor, action)) if (none_flag): break scores = [(scoreEvaluation(current_state), depth, current_state) for current_state, depth, action in queue1] # Finding best scores best on score evaluation bestScore = max(scores, key=lambda item: item[0])[0] bestScores = [(scoreEvaluation(current_state), depth, current_state) for current_state, depth, action in queue1 if scoreEvaluation(current_state) == bestScore] # Choosing best state based on shallowest depth among all the bestscores bestState = min(bestScores, key=lambda item: item[1])[2] # Backtracking till we get child of root node Parent = bestState Child = None while Parent != state: for i in range(0, int(predecessor.__len__())): if (predecessor[i][0] == Parent): Child = predecessor[i][0] Parent = predecessor[i][1] # Finding action of best state bestAction = [pair[1] for pair in all_actions if pair[0] == Child] return bestAction[0]
def getAction(self, state): # TODO: write A* Algorithm instead of returning Directions.STOP # Queue that stores the game states state_queue = [] # Depth depth = 0 # Cost function cost = 0 # List that stores the leaf nodes leaf_list = [] # List that stores the win states win_state_list = [] # Get legal pacman actions legal = state.getLegalPacmanActions() # Increment depth depth = depth + 1 for action in legal: # Generate successors successor = state.generatePacmanSuccessor(action) # Calculate cost of the node cost = depth - (scoreEvaluation(successor) - scoreEvaluation(state)) # Append successor, action, cost of the node, and the depth of the node to the queue state_queue.append((successor, action, cost, depth)) # Sort the queue in the increasing order of the cost function state_queue.sort(key=lambda tuples: tuples[2]) while state_queue: # Pop the first element of the queue next_state, action, cost, depth = state_queue.pop(0) # Check for win state and append it and the action to the list of win state if next_state.isWin(): win_state_list.append((next_state, action)) # Get legal pacman actions legal = next_state.getLegalPacmanActions() # Increment depth depth = depth + 1 for next_action in legal: # Generate successor child = next_state.generatePacmanSuccessor(next_action) # Check if successor is None, append the parent and the action to the list of leaves if child == None: depth = depth - 1 next_cost = depth - (scoreEvaluation(next_state) - scoreEvaluation(state)) leaf_list.append((next_state, action, next_cost, depth)) # Check if successor is a Win state, append the state and the action to the list of Win states elif child.isWin(): win_state_list.append((child, action)) # Else, append state, action, cost of the node, and the depth of the node to the original Queue else: next_cost = depth - (scoreEvaluation(child) - scoreEvaluation(next_state)) next_successors = (child, action, next_cost, depth) state_queue.append(next_successors) # Sort the queue in the increasing order of the cost function state_queue.sort(key=lambda tuples: tuples[2]) # Sort the queue in the increasing order of the cost function state_queue.sort(key=lambda tuples: tuples[2]) # For all the win states, return the action leading to the win state with the best score while win_state_list: max_score = 0 for win_pair in win_state_list: if scoreEvaluation(win_pair[0]) > max_score: max_score = scoreEvaluation(win_pair[0]) bestAction = win_pair[1] return bestAction # Sort the list of leaves in increasing order of cost of the node leaf_list.sort(key=lambda tuples: tuples[2]) # Return best action bestAction = leaf_list[0][1] return bestAction
def getAction(self, state): # TODO: write BFS Algorithm instead of returning Directions.STOP # Queue that stores the game states state_queue = [] # List that stores the leaf nodes leaf_list = [] # List that stores the win states win_state_list = [] # Get legal pacman actions legal = state.getLegalPacmanActions() # Generate successors successors = [(state.generatePacmanSuccessor(action), action) for action in legal] # Append successors to the Queue state_queue.extend(successors) while state_queue: # Pop first element of the Queue next_state, action = state_queue.pop(0) # Check for win state and append it and the action to the list of win state if next_state.isWin(): win_state_list.append((next_state, action)) # Get legal pacman actions legal = next_state.getLegalPacmanActions() # Generate successors next_successors = [(next_state.generatePacmanSuccessor(next_action), next_action) for next_action in legal] for tuple in next_successors: # Check if successor is None, append the parent state and action to the list of leaves if tuple[0] == None: leaf_list.append((next_state, action)) # Check if successor is a Win state, append the state to the list of Win states elif tuple[0].isWin(): win_state_list.append((tuple[0], action)) # Else, append state and action to the original Queue else: state_queue.append((tuple[0], action)) # For all the win states, return the action leading to the win state with the best score while win_state_list: max_score = 0 for win_pair in win_state_list: if scoreEvaluation(win_pair[0]) > max_score: max_score = scoreEvaluation(win_state_list) bestAction = win_pair[1] return bestAction # For all the leaf nodes, evaluate the score and return the action leading to the win state with the best score scored = [(scoreEvaluation(state), action) for state, action in leaf_list] bestScore = max(scored)[0] print bestScore for tuple in scored: if tuple[0] == bestScore: bestAction = tuple[1] break print bestAction return bestAction
def getAction(self, state): # TODO: write Genetic Algorithm instead of returning Directions.STOP self.flag = True all = state.getAllPossibleActions() #print all for i in range(8): for j in range(5): self.total_actions[i][j]= all[random.randint(0, len(all) - 1)] #print self.total_actions #calculate score for each sequence score = [] cur_state = state for i in range(8): for j in range(5): if cur_state.isWin(): return self.total_actions[i][0] elif cur_state.isLose(): break else: #print self.total_actions[j] cur_state = cur_state.generatePacmanSuccessor(self.total_actions[i][j]) #print scoreEvaluation(cur_state) #print cur_state score.append(scoreEvaluation(cur_state)) #print "Before",self.total_actions while self.flag: i = 0 chrom = [] for s in score: parent = {} parent["actions"] = list(self.total_actions[i]) parent["score"] = s parent["id"] = i chrom.append(parent) i = i + 1 chrom.sort(key=lambda x: x["score"]) #print chrom rank = 1 for i in range(8): chrom[i]["rank"] = i + 1 ############################################# # find children random_test = random.randint(0,1) next_generation = [] for i in range(0,4): parent1 = self.selectParent(chrom) #print parent1 parent2 = self.selectParent(chrom) if random_test < 0.7: child1 = self.crossover(parent1["actions"], parent2["actions"]) child2 = self.crossover(parent1["actions"], parent2["actions"]) next_generation.append(list(child1)) next_generation.append(list(child2)) else: #print parent1 child1 = list(parent1["actions"]) child2 = list(parent2["actions"]) next_generation.append(child1) next_generation.append(child2) ############################################### #print next_generation new_generation = [] random_check = random.randint(0,1) for i in range(8): if random_check < 0.1: new_gen = self.mutate(next_generation[i],state) else: new_gen = next_generation[i] #print new_gen new_generation.append(new_gen) #print new_generation ###scoring scores = [] cur_state = state if cur_state is None: self.flag = None break for i in range(8): for j in range(5): if cur_state: if cur_state.isWin(): return new_generation[i][0] elif cur_state.isLose(): break else: # print self.total_actions[j] cur_state = cur_state.generatePacmanSuccessor(new_generation[i][j]) # print scoreEvaluation(cur_state) elif cur_state is None: self.flag = False break if cur_state: scores.append(scoreEvaluation(cur_state)) #print score for i in range(8): for j in range(5): self.total_actions[i][j] = new_generation[i][j] for i in range(len(scores)): score[i] = scores[i] i = 0 chrome = [] for s in scores: children = {} children["actions"] = list(new_generation[i]) #print next_generation[i] children["score"] = s children["id"] = i chrome.append(children) i = i + 1 chrome.sort(key=lambda x: x["score"]) #print chrome dict = chrom[7] return dict["actions"][0]
def fOfXEvaluation(self,state,stateMetadata,root): return int(stateMetadata[state][1]) -(scoreEvaluation(state) - scoreEvaluation(root));