예제 #1
0
    def genmove(self, moves, state, color):
        assert not state.endOfGame()
        moveNr = len(moves)
        self.numSimulations = moveNr * 100
        if moveNr == 1:
            return moves[0]

        #agent init
        self.moves = moves
        self.count = dict(zip(moves, [0] * moveNr))
        self.avg_rewards = dict(zip(moves, [0] * moveNr))

        #agent start
        self.preAction = self._choose_action()
        self.count[self.preAction] += 1
        self.time += 1
        coord = move_to_coord(self.preAction, state.size)
        point = coord_to_point(coord[0], coord[1], state.size)
        copy_board = copy.deepcopy(state)
        copy_board.play_move_gomoku(point, color)
        reward = copy_board.mysimulate(color)
        self.avg_rewards[self.preAction] += (
            (reward - self.avg_rewards[self.preAction]) /
            self.count[self.preAction])

        highest_reward = max(self.avg_rewards.values())
        for move in self.avg_rewards:
            if self.avg_rewards[move] == highest_reward:
                self.bestMove = move

        #agent step
        for _ in range(self.numSimulations):
            self.preAction = self._choose_action()
            self.count[self.preAction] += 1
            self.time += 1
            coord = move_to_coord(self.preAction, state.size)
            point = coord_to_point(coord[0], coord[1], state.size)
            copy_board = copy.deepcopy(state)
            copy_board.play_move_gomoku(point, color)
            reward = copy_board.mysimulate(color)
            self.avg_rewards[self.preAction] += (
                (reward - self.avg_rewards[self.preAction]) /
                self.count[self.preAction])
            #update self.bestMove
            if self.avg_rewards[self.preAction] > self.avg_rewards[
                    self.bestMove]:
                self.bestMove = self.preAction

        return self.bestMove
예제 #2
0
def play_move(board, move, color):
    #print(type(move))
    if isinstance(move,str):
        coord = move_to_coord(move,board.size)
        point = coord_to_point(coord[0],coord[1],board.size)
        board.play_move_gomoku(point, color)
    else:
        board.play_move_gomoku(move, color)
예제 #3
0
def undo(board,move):

    if isinstance(move,str):
        coord = move_to_coord(move,board.size)
        point = coord_to_point(coord[0],coord[1],board.size)
        board.board[point]=EMPTY
        board.current_player=GoBoardUtil.opponent(board.current_player)
    else:
        board.board[move]=EMPTY
        board.current_player=GoBoardUtil.opponent(board.current_player)
예제 #4
0
 def simulate(self, state, move, color):
     stats = [0] * 3
     #convert the last move to the index point
     coord = move_to_coord(move, state.size)
     point = coord_to_point(coord[0], coord[1], state.size)
     state.play_move_gomoku(point, color)
     moveNr = state.moveNumber()
     for _ in range(self.numSimulations):
         winner, _ = state.simulate()
         stats[winner] += 1
         state.resetToMoveNumber(moveNr)
     assert sum(stats) == self.numSimulations
     assert moveNr == state.moveNumber()
     state.undoMove()
     eval = (stats[BLACK] + 0.5 * stats[EMPTY]) / self.numSimulations
     if state.current_player == WHITE:
         eval = 1 - eval
     return eval
예제 #5
0
def playSingleGame(alternative=False):
    if not alternative:
        p1=pexpect.spawn('python3 '+player1,timeout=timeout+1)
        p2=pexpect.spawn('python3 '+player2,timeout=timeout+1)
    else:
        p1=pexpect.spawn('python3 '+player2,timeout=timeout+1)
        p2=pexpect.spawn('python3 '+player1,timeout=timeout+1)

    ob=pexpect.spawn('python3 flat_mc_player/Gomoku3.py')
    setupPlayer(p1)
    setupPlayer(p2)
    result=None
    numTimeout=0
    sw=0

    board = simple_board.SimpleGoBoard(7)

    while 1:
        if sw==0:
            move=getMove(p1,'b')
            assert(move!='pass')
            if move=='resign':
                result=2
                break
            elif move=='timeout':
                result=2
                break
            playMove(p2,'b',move)
            playMove(ob,'b',move)

            move = gtp_connection.move_to_coord(move, 7)
            move = board.pt(move[0], move[1])
            board.play_move_gomoku(move, 1)
            
        else:
            move=getMove(p2,'w')
            assert(move!='pass')
            if move=='resign':
                result=1
                break
            elif move=='timeout':
                result=1
                break
            playMove(p1,'w',move)
            playMove(ob,'w',move)

            move = gtp_connection.move_to_coord(move, 7)
            move = board.pt(move[0], move[1])
            board.play_move_gomoku(move, 2)
            

        sw=1-sw
        print(move)
        print(board_util.GoBoardUtil.get_twoD_board(board))


        ob.sendline('gogui-rules_final_result')
        ob.expect(['= black','= white','= draw','= unknown'])
        status=ob.after.decode("utf-8")[2:]


        if status=='black':
            result=1
            break
        elif status=='white':
            result=2
            break
        elif status=='draw':
            result=0
            break
        else:
            assert(status=='unknown')
    print(status)

    return result,numTimeout