def genmove(self, moves, state, color): assert not state.endOfGame() moveNr = len(moves) self.numSimulations = moveNr * 100 if moveNr == 1: return moves[0] #agent init self.moves = moves self.count = dict(zip(moves, [0] * moveNr)) self.avg_rewards = dict(zip(moves, [0] * moveNr)) #agent start self.preAction = self._choose_action() self.count[self.preAction] += 1 self.time += 1 coord = move_to_coord(self.preAction, state.size) point = coord_to_point(coord[0], coord[1], state.size) copy_board = copy.deepcopy(state) copy_board.play_move_gomoku(point, color) reward = copy_board.mysimulate(color) self.avg_rewards[self.preAction] += ( (reward - self.avg_rewards[self.preAction]) / self.count[self.preAction]) highest_reward = max(self.avg_rewards.values()) for move in self.avg_rewards: if self.avg_rewards[move] == highest_reward: self.bestMove = move #agent step for _ in range(self.numSimulations): self.preAction = self._choose_action() self.count[self.preAction] += 1 self.time += 1 coord = move_to_coord(self.preAction, state.size) point = coord_to_point(coord[0], coord[1], state.size) copy_board = copy.deepcopy(state) copy_board.play_move_gomoku(point, color) reward = copy_board.mysimulate(color) self.avg_rewards[self.preAction] += ( (reward - self.avg_rewards[self.preAction]) / self.count[self.preAction]) #update self.bestMove if self.avg_rewards[self.preAction] > self.avg_rewards[ self.bestMove]: self.bestMove = self.preAction return self.bestMove
def play_move(board, move, color): #print(type(move)) if isinstance(move,str): coord = move_to_coord(move,board.size) point = coord_to_point(coord[0],coord[1],board.size) board.play_move_gomoku(point, color) else: board.play_move_gomoku(move, color)
def undo(board,move): if isinstance(move,str): coord = move_to_coord(move,board.size) point = coord_to_point(coord[0],coord[1],board.size) board.board[point]=EMPTY board.current_player=GoBoardUtil.opponent(board.current_player) else: board.board[move]=EMPTY board.current_player=GoBoardUtil.opponent(board.current_player)
def simulate(self, state, move, color): stats = [0] * 3 #convert the last move to the index point coord = move_to_coord(move, state.size) point = coord_to_point(coord[0], coord[1], state.size) state.play_move_gomoku(point, color) moveNr = state.moveNumber() for _ in range(self.numSimulations): winner, _ = state.simulate() stats[winner] += 1 state.resetToMoveNumber(moveNr) assert sum(stats) == self.numSimulations assert moveNr == state.moveNumber() state.undoMove() eval = (stats[BLACK] + 0.5 * stats[EMPTY]) / self.numSimulations if state.current_player == WHITE: eval = 1 - eval return eval
def playSingleGame(alternative=False): if not alternative: p1=pexpect.spawn('python3 '+player1,timeout=timeout+1) p2=pexpect.spawn('python3 '+player2,timeout=timeout+1) else: p1=pexpect.spawn('python3 '+player2,timeout=timeout+1) p2=pexpect.spawn('python3 '+player1,timeout=timeout+1) ob=pexpect.spawn('python3 flat_mc_player/Gomoku3.py') setupPlayer(p1) setupPlayer(p2) result=None numTimeout=0 sw=0 board = simple_board.SimpleGoBoard(7) while 1: if sw==0: move=getMove(p1,'b') assert(move!='pass') if move=='resign': result=2 break elif move=='timeout': result=2 break playMove(p2,'b',move) playMove(ob,'b',move) move = gtp_connection.move_to_coord(move, 7) move = board.pt(move[0], move[1]) board.play_move_gomoku(move, 1) else: move=getMove(p2,'w') assert(move!='pass') if move=='resign': result=1 break elif move=='timeout': result=1 break playMove(p1,'w',move) playMove(ob,'w',move) move = gtp_connection.move_to_coord(move, 7) move = board.pt(move[0], move[1]) board.play_move_gomoku(move, 2) sw=1-sw print(move) print(board_util.GoBoardUtil.get_twoD_board(board)) ob.sendline('gogui-rules_final_result') ob.expect(['= black','= white','= draw','= unknown']) status=ob.after.decode("utf-8")[2:] if status=='black': result=1 break elif status=='white': result=2 break elif status=='draw': result=0 break else: assert(status=='unknown') print(status) return result,numTimeout