Esempio n. 1
0
def run(states, sensible_moves, currentPlayer, lastMove):
    n = 5
    width, height = 8, 8
    board = Board(width=width, height=height, n_in_row=n)
    board.init_board()

    board.states = states
    board.availables = sensible_moves
    board.current_player = currentPlayer
    board.last_move = lastMove

    best_policy = PolicyValueNetNumpy(width, height, policy_param)
    mcts_player = MCTSPlayer(best_policy.policy_value_fn,
                             c_puct=5,
                             n_playout=400)

    nextmove = mcts_player.get_action(board)

    return nextmove
Esempio n. 2
0
def run(states, sensible_moves, currentPlayer, lastMove):
    #胜利所需要连续的子
    n = 5
    #棋盘宽度,高度
    width, height = 8, 8
    board = Board(width=width, height=height, n_in_row=n)
    board.init_board()

    board.states = states
    board.availables = sensible_moves
    board.current_player = currentPlayer
    board.last_move = lastMove
    #策略价值网络
    best_policy = PolicyValueNetNumpy(width, height, policy_param)
    #纯蒙特卡洛搜索
    mcts_player = MCTSPlayer(best_policy.policy_value_fn,
                             c_puct=5,
                             n_playout=400)

    #从蒙特卡洛搜索中返回下一步要走的地方
    nextmove = mcts_player.get_action(board)

    return nextmove