def genmove(self, moves, state, color): assert not state.endOfGame() moveNr = len(moves) self.numSimulations = moveNr * 100 if moveNr == 1: return moves[0] #agent init self.moves = moves self.count = dict(zip(moves, [0] * moveNr)) self.avg_rewards = dict(zip(moves, [0] * moveNr)) #agent start self.preAction = self._choose_action() self.count[self.preAction] += 1 self.time += 1 coord = move_to_coord(self.preAction, state.size) point = coord_to_point(coord[0], coord[1], state.size) copy_board = copy.deepcopy(state) copy_board.play_move_gomoku(point, color) reward = copy_board.mysimulate(color) self.avg_rewards[self.preAction] += ( (reward - self.avg_rewards[self.preAction]) / self.count[self.preAction]) highest_reward = max(self.avg_rewards.values()) for move in self.avg_rewards: if self.avg_rewards[move] == highest_reward: self.bestMove = move #agent step for _ in range(self.numSimulations): self.preAction = self._choose_action() self.count[self.preAction] += 1 self.time += 1 coord = move_to_coord(self.preAction, state.size) point = coord_to_point(coord[0], coord[1], state.size) copy_board = copy.deepcopy(state) copy_board.play_move_gomoku(point, color) reward = copy_board.mysimulate(color) self.avg_rewards[self.preAction] += ( (reward - self.avg_rewards[self.preAction]) / self.count[self.preAction]) #update self.bestMove if self.avg_rewards[self.preAction] > self.avg_rewards[ self.bestMove]: self.bestMove = self.preAction return self.bestMove
def play_move(board, move, color): #print(type(move)) if isinstance(move,str): coord = move_to_coord(move,board.size) point = coord_to_point(coord[0],coord[1],board.size) board.play_move_gomoku(point, color) else: board.play_move_gomoku(move, color)
def undo(board,move): if isinstance(move,str): coord = move_to_coord(move,board.size) point = coord_to_point(coord[0],coord[1],board.size) board.board[point]=EMPTY board.current_player=GoBoardUtil.opponent(board.current_player) else: board.board[move]=EMPTY board.current_player=GoBoardUtil.opponent(board.current_player)
def simulate(self, state, move, color): stats = [0] * 3 #convert the last move to the index point coord = move_to_coord(move, state.size) point = coord_to_point(coord[0], coord[1], state.size) state.play_move_gomoku(point, color) moveNr = state.moveNumber() for _ in range(self.numSimulations): winner, _ = state.simulate() stats[winner] += 1 state.resetToMoveNumber(moveNr) assert sum(stats) == self.numSimulations assert moveNr == state.moveNumber() state.undoMove() eval = (stats[BLACK] + 0.5 * stats[EMPTY]) / self.numSimulations if state.current_player == WHITE: eval = 1 - eval return eval