Esempio n. 1
0
    def genmove(self, moves, state, color):
        assert not state.endOfGame()
        moveNr = len(moves)
        self.numSimulations = moveNr * 100
        if moveNr == 1:
            return moves[0]

        #agent init
        self.moves = moves
        self.count = dict(zip(moves, [0] * moveNr))
        self.avg_rewards = dict(zip(moves, [0] * moveNr))

        #agent start
        self.preAction = self._choose_action()
        self.count[self.preAction] += 1
        self.time += 1
        coord = move_to_coord(self.preAction, state.size)
        point = coord_to_point(coord[0], coord[1], state.size)
        copy_board = copy.deepcopy(state)
        copy_board.play_move_gomoku(point, color)
        reward = copy_board.mysimulate(color)
        self.avg_rewards[self.preAction] += (
            (reward - self.avg_rewards[self.preAction]) /
            self.count[self.preAction])

        highest_reward = max(self.avg_rewards.values())
        for move in self.avg_rewards:
            if self.avg_rewards[move] == highest_reward:
                self.bestMove = move

        #agent step
        for _ in range(self.numSimulations):
            self.preAction = self._choose_action()
            self.count[self.preAction] += 1
            self.time += 1
            coord = move_to_coord(self.preAction, state.size)
            point = coord_to_point(coord[0], coord[1], state.size)
            copy_board = copy.deepcopy(state)
            copy_board.play_move_gomoku(point, color)
            reward = copy_board.mysimulate(color)
            self.avg_rewards[self.preAction] += (
                (reward - self.avg_rewards[self.preAction]) /
                self.count[self.preAction])
            #update self.bestMove
            if self.avg_rewards[self.preAction] > self.avg_rewards[
                    self.bestMove]:
                self.bestMove = self.preAction

        return self.bestMove
Esempio n. 2
0
def play_move(board, move, color):
    #print(type(move))
    if isinstance(move,str):
        coord = move_to_coord(move,board.size)
        point = coord_to_point(coord[0],coord[1],board.size)
        board.play_move_gomoku(point, color)
    else:
        board.play_move_gomoku(move, color)
Esempio n. 3
0
def undo(board,move):

    if isinstance(move,str):
        coord = move_to_coord(move,board.size)
        point = coord_to_point(coord[0],coord[1],board.size)
        board.board[point]=EMPTY
        board.current_player=GoBoardUtil.opponent(board.current_player)
    else:
        board.board[move]=EMPTY
        board.current_player=GoBoardUtil.opponent(board.current_player)
Esempio n. 4
0
 def simulate(self, state, move, color):
     stats = [0] * 3
     #convert the last move to the index point
     coord = move_to_coord(move, state.size)
     point = coord_to_point(coord[0], coord[1], state.size)
     state.play_move_gomoku(point, color)
     moveNr = state.moveNumber()
     for _ in range(self.numSimulations):
         winner, _ = state.simulate()
         stats[winner] += 1
         state.resetToMoveNumber(moveNr)
     assert sum(stats) == self.numSimulations
     assert moveNr == state.moveNumber()
     state.undoMove()
     eval = (stats[BLACK] + 0.5 * stats[EMPTY]) / self.numSimulations
     if state.current_player == WHITE:
         eval = 1 - eval
     return eval