コード例 #1
0
def play_against_network(evaluator, opponent_evaluator, color, conf):
    # evaluators[0] for black player, evaluators[1] for white player
    evaluators = [evaluator, opponent_evaluator]
    if color == WHITE:
        evaluators[0], evaluators[1] = evaluators[1], evaluators[0]

    # create search trees for both players
    roots = [None, None]
    for i in range(2):
        roots[i] = TreeNode(None, None, evaluators[i], conf)

    # black player goes first (0 for black, 1 for white)
    player = 0

    previous_action = None
    t = 0
    while t < conf.MAX_GAME_LENGTH:
        # perform MCTS
        for _ in range(conf.NUM_SIMULATIONS):
            tree_search(roots[player], evaluators[player], conf)

        # calculate the distribution of action selection
        # temperature tau -> 0
        m = max(roots[player].n)
        p = [0 if x < m else 1 for x in roots[player].n]
        s = sum(p)
        pi = np.array([x / s for x in p], dtype=np.float32)

        # choose an action
        action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        for i in range(2):
            if roots[i].children[action] is None:
                roots[i].children[action] = \
                    TreeNode(roots[i], action, evaluators[i], conf)
            roots[i] = roots[i].children[action]

            # release memory
            roots[i].parent.children = None

        t += 1

        # switch to the other player
        player = 1 - player

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            break
        previous_action = action

    score_black, score_white = roots[0].go.score()

    return (score_black > score_white) == (color == BLACK)
コード例 #2
0
def self_play(evaluator, resign_threshold, conf):
    examples = []

    allow_resign = resign_threshold > -1.0 \
        and np.random.rand() >= conf.RESIGN_SAMPLE_RATE
    resign_value_history = None if allow_resign else []

    # result undecided
    result = 0.0

    # create a search tree
    root = TreeNode(None, None, evaluator, conf)

    previous_action = None
    t = 0
    while t < conf.MAX_GAME_LENGTH:
        # perform MCTS
        for _ in range(conf.NUM_SIMULATIONS):
            tree_search(root, evaluator, conf)

        # we follow AlphaGo's method to calculate the resignation value
        # notice that children with n = 0 are skipped by setting their
        # value to be -1.0 (w / n > -1.0 for children with n > 0)
        resign_value = max(
            map(lambda w, n: -1.0 if n == 0 else w / n, root.w, root.n))
        if not allow_resign:
            resign_value_history.append([resign_value, root.go.turn])
        elif -1.0 < resign_value <= resign_threshold:
            result = 1.0 if root.go.turn == WHITE else -1.0
            break

        # calculate the distribution of action selection
        # notice that illegal actions always have zero probability as
        # long as NUM_SIMULATION > 0
        if t < conf.EXPLORATION_TIME:
            # temperature tau = 1
            s = sum(root.n)
            pi = [x / s for x in root.n]
        else:
            # temperature tau -> 0
            m = max(root.n)
            p = [0 if x < m else 1 for x in root.n]
            s = sum(p)
            pi = [x / s for x in p]

        # save position, distribution of action selection and turn
        examples.append([
            extract_feature(root, conf),
            np.array(pi, dtype=np.float32),
            np.array([root.go.turn], dtype=np.float32),
        ])

        # choose an action
        action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        root = root.children[action]

        # release memory
        root.parent.children = None

        t += 1

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            break
        previous_action = action

    # calculate the scores if the result is undecided
    if result == 0.0:
        score_black, score_white = root.go.score()
        result = 1.0 if score_black > score_white else -1.0

    # update the the game winner from the perspective of each player
    for i in range(len(examples)):
        examples[i][2] *= result

    return examples, resign_value_history, result
コード例 #3
0
def play_against_human(model_file, human_plays_black):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = torch.load(model_file)
    conf = model['conf']

    # load the network
    network = ZetaGoNetwork(conf)
    network.load_state_dict(model['best_network'])
    network.to(device)

    # create a evaluator
    evaluator = DefaultEvaluator(network, device)

    # create a search tree
    root = TreeNode(None, None, evaluator, conf)

    gui = GUI(conf)

    human_turn = human_plays_black
    previous_action = None
    while True:
        if human_turn:
            # wait for human player's action
            action = gui.wait_for_action(root.go)
        else:
            # calculate computer's action
            gui.update_text('Computer is thinking...')

            # perform MCTS
            for _ in range(conf.NUM_SIMULATIONS):
                tree_search(root, evaluator, conf)

            # calculate the distribution of action selection
            # temperature tau -> 0
            m = max(root.n)
            p = [0 if x < m else 1 for x in root.n]
            s = sum(p)
            pi = np.array([x / s for x in p], dtype=np.float32)

            # choose an action
            action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        if root.children[action] is None:
            root.children[action] = \
                TreeNode(root, action, evaluator, conf)
        root = root.children[action]

        # release memory
        root.parent.children = None

        # update GUI
        gui.update_go(root.go)
        gui.update_text('Computer passes' if action == conf.PASS else '')

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            black_score, white_score = root.go.score()
            winner = 'BLACK' if black_score > white_score else 'WHITE'
            gui.update_text(f'{winner} wins, {black_score} : {white_score}')
            gui.freeze()

        previous_action = action
        human_turn = not human_turn
コード例 #4
0
ファイル: play.py プロジェクト: xiao-liu/zeta-go-legacy
def mutual_play(network_black, network_white, device, conf):
    # create search trees for both players
    root_black = TreeNode(None, None, network_black, device, conf)
    root_white = TreeNode(None, None, network_white, device, conf)

    # create evaluators for both players
    evaluator_black = DefaultEvaluator(network_black, device)
    evaluator_white = DefaultEvaluator(network_white, device)

    # black player goes first
    root = root_black
    evaluator = evaluator_black

    previous_action = None
    t = 0
    while t < conf.MAX_GAME_LENGTH:
        # both players perform MCTS, each one uses its own network
        for i in range(conf.NUM_SIMULATIONS):
            tree_search(root, evaluator, conf)

        # calculate the distribution of action selection
        # temperature tau -> 0
        m = max(root.n)
        p = [0 if x < m else 1 for x in root.n]
        s = sum(p)
        pi = np.array([x / s for x in p], dtype=np.float32)

        # choose an action
        action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        if root_black.children[action] is None:
            root_black.children[action] = \
                TreeNode(root_black, action, evaluator_black, conf)
        root_black = root_black.children[action]
        if root_white.children[action] is None:
            root_white.children[action] = \
                TreeNode(root_white, action, evaluator_white, conf)
        root_white = root_white.children[action]

        # release memory
        root_black.parent.children = None
        root_white.parent.children = None

        # switch to the other search tree
        if root.go.turn == BLACK:
            root = root_white
            evaluator = evaluator_white
        else:
            root = root_black
            evaluator = evaluator_black

        t += 1

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            break
        previous_action = action

    score_black, score_white = root.go.score()

    return score_black > score_white