Ejemplo n.º 1
0
def mcts(board, player, root, network):
    """
    board: np.array((3, 19, 19))
    take board, player turn (0, 1), root node
    return next move, updated board, policy vector, next root and boolean for game status
    """

    # parameters: number of search
    trials = 6

    # build tree
    for _ in range(trials):
        search(root, board, player, network)

    # reshape policy to (361)
    p = np.ones(361) - (board[:, :, 0] + board[:, :, 1]).flatten()
    p[p == 1] = root.get_mcts() * (1 - 2 * player)

    # get coordinates of chosen move, and update board
    n = root.get_best_move(player)

    # update board
    x, y = get_pos_on_board(board, n)
    put_on_board(board, (x, y), player, 1)

    # if unexplored child
    child = root.get_child(n)
    if child.leaf():
        expand(child, board, player ^ 1, network)

    # get status game
    _, e = evaluate(board, player, (x, y))

    return ((x, y), board, p, child, e)
Ejemplo n.º 2
0
def select(node, board, player):
    """
    return chosen node, updated board, new coordinates
    """
    # choose next node, best neg for black
    n = np.argmax(node.get_policy() * (1 - 2 * player))
    child = node.get_child(n)
    child.add_frequency()
    # get coordinates of next move, and update board
    x, y = get_pos_on_board(board, n)
    put_on_board(board, (x, y), player, 1)
    return child, board, (x, y), player ^ 1
Ejemplo n.º 3
0
def search(node, board, player, network):
    """
    node: object Node
    board: np.array(3,19,19)
    player: 0 for white, 1 for black
    do actions on a level of deepness
    """
    child, board, pos, next_player = select(node, board, player)
    # evaluate or keep searching
    if child.leaf():
        value = evaluate(board, player, pos)
        # if not a winning move
        if not value:
            value = expand(child, board, next_player, network)
    else:
        value = search(child, board, next_player, network)
    # clean board and back propagate
    put_on_board(board, pos, player, 0)
    child.score(value)
    return value
Ejemplo n.º 4
0
def human_turn(board, node, player, net):
    print("Your turn, Human")

    e = 0
    while (not e):
        try:
            x = int(input("x: "))
            y = int(input("y: "))
            e = 1
        except:
            print("wrong format, only integers required")
            q = input("quit? (y/n): ")
            if q == "y":
                exit(0)

    pos = (x, y)

    put_on_board(board, pos, player, 1)

    node = update_turn(board, player ^ 1, node, net, pos)
    _, r = evaluate(board, player, pos)

    return node, r
Ejemplo n.º 5
0
def mcts(board, player, root, network):
    """
    board: np.array((3, 19, 19))
    take board, player turn (0, 1), root node
    return next move, updated board, policy vector, next root and boolean for game status
    """
    # parameters: number of search
    trials = 6

    # build tree
    for _ in range(trials):
        search(root, board, player, network)

    # reshape policy to (361)
    p = np.ones(361) - (board[:, :, 0] + board[:, :, 1]).flatten()
    p[p == 1] = root.get_policy() * (1 - 2 * player)

    # get coordinates of chosen move, and update board
    n = root.get_max_frequency_move()
    x, y = get_pos_on_board(board, n)
    put_on_board(board, (x, y), player, 1)

    return ((x, y), board, p, root.get_child(n),
            evaluate(board, player, (x, y)))