def depthFirstSearch(problem): """ Search the deepest nodes in the search tree first. Your search algorithm needs to return a list of actions that reaches the goal. Make sure to implement a graph search algorithm. To get started, you might want to try some of these simple commands to understand the search problem that is being passed in: print "Start:", problem.getStartState() print "Is the start a goal?", problem.isGoalState(problem.getStartState()) print "Start's successors:", problem.getSuccessors(problem.getStartState()) """ "*** YOUR CODE HERE ***" from util import Stack nodes = Stack() closed = dict() moves = list() for var in problem.getSuccessors(problem.getStartState()): state = FringeState(var[0]) state.moves.append(Directions.convToDirection(var[1])) nodes.push(state) closed[problem.getStartState()] = True while(False == nodes.isEmpty()): current_node = nodes.pop() #print "Current node:", current_node if(closed.has_key(current_node.pos)): continue if(problem.isGoalState(current_node.pos)): print "Goal reached!" moves = current_node.moves break for var in problem.getSuccessors(current_node.pos): state = FringeState(var[0]) state.moves= copy.deepcopy(current_node.moves) state.moves.append(Directions.convToDirection(var[1])) nodes.push(state) closed[current_node.pos] = True return moves
def update(self, transitionsBatch): """ Update the Q-Values from the given batch of transitions :param transitionsBatch: List of tuples (qState, action, nextQState, reward, isStateFinal, list of legal actions) """ trainingBatchQStates = [] trainingBatchTargetQValues = [] # Convert raw states to our q-states and calculate update policy for each transition in batch for aQState, anAction, aReward, aNextQState, isTerminal, nextStateLegalActions in transitionsBatch: # aReward = util.rescale(aReward, -510, 1000, -1, 1) actionsQValues = self.model.model.predict(np.array([aQState]))[0] targetQValues = actionsQValues.copy() # Update rule if isTerminal: updatedQValueForAction = aReward else: nextActionsQValues = self.model.model.predict( np.array([aNextQState]))[0] nextStateLegalActionsIndices = [ Directions.getIndex(action) for action in nextStateLegalActions ] try: nextStateLegalActionsIndices.remove(4) except: pass nextStateLegalActionsQValues = np.array( nextActionsQValues)[nextStateLegalActionsIndices] maxNextActionQValue = max(nextStateLegalActionsQValues) updatedQValueForAction = ( aReward + self.trainingRoom.discount * maxNextActionQValue) targetQValues[Directions.getIndex( anAction)] = updatedQValueForAction trainingBatchQStates.append(aQState) trainingBatchTargetQValues.append(targetQValues) return self.model.model.train_on_batch( x=np.array(trainingBatchQStates), y=np.array(trainingBatchTargetQValues))
def getFeatures(self, state, action): from pacmanAgents import CarefulGreedyAgent dangerousActions = CarefulGreedyAgent()._getAction(state)[1] dangerousActionsBools = np.array([action in dangerousActions for action in Directions.asList() if action != Directions.STOP]).astype(float) legalActions = getLegalActions(state) return np.concatenate((dangerousActionsBools, legalActions)).astype(dtype=float)
def getAction(self, rawState, epsilon): legalActions = rawState.getLegalActions() legalActions.remove(Directions.STOP) if util.flipCoin(epsilon): return random.choice(legalActions) else: qValues = [(Directions.getIndex(action), self.getQValue(rawState, action)) for action in legalActions] qValues = sorted(qValues, key=lambda x: x[1], reverse=True) for index, qValue in qValues: action = Directions.fromIndex(index) if action in legalActions: return action
def aStarSearch(problem, heuristic=nullHeuristic): """Search the node that has the lowest combined cost and heuristic first.""" from util import PriorityQueue nodes = PriorityQueue() closed = dict() moves = list() for var in problem.getSuccessors(problem.getStartState()): state = FringeState(var[0]) state.moves.append(Directions.convToDirection(var[1])) state.cost = var[2] nodes.push(state, state.cost + heuristic(state.pos,problem)) closed[problem.getStartState()] = True while(False == nodes.isEmpty()): current_node = nodes.pop() #print "Current node:", current_node if(closed.has_key(current_node.pos)): continue if(problem.isGoalState(current_node.pos)): print "Goal reached!" moves = current_node.moves break for var in problem.getSuccessors(current_node.pos): state = FringeState(var[0]) state.moves= copy.deepcopy(current_node.moves) state.moves.append(Directions.convToDirection(var[1])) state.cost = var[2] + current_node.cost nodes.push(state, state.cost + heuristic(state.pos,problem)) closed[current_node.pos] = True return moves
def uniformCostSearch(problem): """Search the node of least total cost first.""" from util import PriorityQueue nodes = PriorityQueue() closed = dict() moves = list() for var in problem.getSuccessors(problem.getStartState()): state = FringeState(var[0]) state.moves.append(Directions.convToDirection(var[1])) state.cost = var[2] nodes.push(state, state.cost) closed[problem.getStartState()] = True while(False == nodes.isEmpty()): current_node = nodes.pop() #print "Current node:", current_node if(closed.has_key(current_node.pos)): continue if(problem.isGoalState(current_node.pos)): print "Goal reached!" moves = current_node.moves break for var in problem.getSuccessors(current_node.pos): state = FringeState(var[0]) state.moves= copy.deepcopy(current_node.moves) state.moves.append(Directions.convToDirection(var[1])) state.cost = var[2] + current_node.cost nodes.push(state, state.cost) closed[current_node.pos] = True return moves
def breadthFirstSearch(problem): """Search the shallowest nodes in the search tree first.""" "*** YOUR CODE HERE ***" from util import Queue nodes = Queue() closed = dict() moves = list() for var in problem.getSuccessors(problem.getStartState()): state = FringeState(var[0]) state.moves.append(Directions.convToDirection(var[1])) nodes.push(state) closed[problem.getStartState()] = True while(False == nodes.isEmpty()): current_node = nodes.pop() #print "Current node:", current_node if(closed.has_key(current_node.pos)): continue if(problem.isGoalState(current_node.pos)): print "Goal reached!" moves = current_node.moves break for var in problem.getSuccessors(current_node.pos): state = FringeState(var[0]) state.moves= copy.deepcopy(current_node.moves) state.moves.append(Directions.convToDirection(var[1])) nodes.push(state) closed[current_node.pos] = True return moves
def getAction(self, rawState, epsilon): legalActions = rawState.getLegalActions() legalActions.remove(Directions.STOP) qState = self.trainingRoom.featuresExtractor.getFeatures( rawState, None) if util.flipCoin(epsilon): return random.choice(legalActions) else: qValues = list( enumerate(self.model.model.predict(np.array([qState]))[0])) qValues = sorted(qValues, key=lambda x: x[1], reverse=True) for index, qValue in qValues: action = Directions.fromIndex(index) if action in legalActions: return action
def before_turn(self): board = self.board # 1. get all the empty slots and choose randomly between them # 2. choose randomly(0.8) between putting 2 or 4 there emptySlots = [(x,y) for x in range(4) for y in range(4) if board[x][y] == None] numberToPutInSlot = weighted_choice() x,y = choose_uni_from_seq(emptySlots) self.create_new_tile(x, y, numberToPutInSlot) # check that there left some moves here, or it's a game over noMoreMoves = True for move in Directions.generator(): if self.is_legal_turn(move): noMoreMoves = False if noMoreMoves: self.display_score_and_exit() if self.agent: self.after(AFTER_FOR_NEW_TURN, self.update_turn, self.agent.getAction(self.board)) self.ignoreKeys = False
def getLegalActions(state): legalActions = state.getLegalActions() if Directions.STOP in legalActions: legalActions.remove(Directions.STOP) return np.array([Directions.fromIndex(i) in legalActions for i in range(4)])
def getGhostDirections(state): return np.array([Directions.getIndex(s.getDirection()) for s in state.getGhostStates()]) / 4.0
def getNextStatesOfMyTurn(board): ''' gets a state - a board - and yields tuples (move,nextState) ''' for direction in Directions.generator(): if Miniboard.isLegalAction(board, direction): nextBoard = Miniboard.calculateNextBoardUsing(direction, board) yield(direction, nextBoard)
def test_legal_turn(self): self.create_from_list([0,0,0,0,2,4,2,4]) self.debug_board() for d in Directions.generator(): print(d, self.is_legal_turn(d))
def willBeInCorridor(self, gameState, action): successor = self.getSuccessor(gameState, action) myPos = successor.getAgentPosition(self.index) legalActionsAtSuccessor = successor.getLegalActions(self.index) willBeInCorridor = len(legalActionsAtSuccessor) == 3 and action in legalActionsAtSuccessor and Directions.REVERSE( action) in legalActionsAtSuccessor return float(willBeInCorridor)
def remember(self, state, action, reward, nextState): from game import Directions self.replayMemory[str(state.__hash__()) + str(Directions.getIndex(action))] = (state, action, reward, nextState)