Example #1
0
    def play(self):
        """Function to play a game vs the AI."""
        print("Start Human vs AI\n")

        mcts = MonteCarloTreeSearch(self.net)
        game = self.game.clone()  # Create a fresh clone for each game.
        game_over = False
        value = 0
        node = TreeNode()

        print("Enter your move in the form: row, column. Eg: 1,1")
        go_first = input("Do you want to go first: y/n?")

        if go_first.lower().strip() == 'y':
            print("You play as X")
            human_value = 1

            game.print_board()
        else:
            print("You play as O")
            human_value = -1

        # Keep playing until the game is in a terminal state.
        while not game_over:
            # MCTS simulations to get the best child node.
            # If player_to_eval is 1 play as the Human.
            # Else play as the AI.
            if game.current_player == human_value:
                action = input("Enter your move: ")
                if isinstance(action, str):
                    action = [int(n, 10) for n in action.split(",")]
                    action = (1, action[0], action[1])

                best_child = TreeNode()
                best_child.action = action
            else:
                best_child = mcts.search(game, node,
                                         CFG.temp_final)

            action = best_child.action
            game.play_action(action)  # Play the child node's action.

            game.print_board()

            game_over, value = game.check_game_over(game.current_player)

            best_child.parent = None
            node = best_child  # Make the child node the root node.

        if value == human_value * game.current_player:
            print("You won!")
        elif value == -human_value * game.current_player:
            print("You lost.")
        else:
            print("Draw Match")
        print("\n")
Example #2
0
def play_against_network(evaluator, opponent_evaluator, color, conf):
    # evaluators[0] for black player, evaluators[1] for white player
    evaluators = [evaluator, opponent_evaluator]
    if color == WHITE:
        evaluators[0], evaluators[1] = evaluators[1], evaluators[0]

    # create search trees for both players
    roots = [None, None]
    for i in range(2):
        roots[i] = TreeNode(None, None, evaluators[i], conf)

    # black player goes first (0 for black, 1 for white)
    player = 0

    previous_action = None
    t = 0
    while t < conf.MAX_GAME_LENGTH:
        # perform MCTS
        for _ in range(conf.NUM_SIMULATIONS):
            tree_search(roots[player], evaluators[player], conf)

        # calculate the distribution of action selection
        # temperature tau -> 0
        m = max(roots[player].n)
        p = [0 if x < m else 1 for x in roots[player].n]
        s = sum(p)
        pi = np.array([x / s for x in p], dtype=np.float32)

        # choose an action
        action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        for i in range(2):
            if roots[i].children[action] is None:
                roots[i].children[action] = \
                    TreeNode(roots[i], action, evaluators[i], conf)
            roots[i] = roots[i].children[action]

            # release memory
            roots[i].parent.children = None

        t += 1

        # switch to the other player
        player = 1 - player

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            break
        previous_action = action

    score_black, score_white = roots[0].go.score()

    return (score_black > score_white) == (color == BLACK)
Example #3
0
    def play(self):
        datas, node = [], TreeNode()
        mc = MonteCarloTreeSearch(self.net)
        move_count = 0

        while True:
            if move_count < TEMPTRIG:
                pi, next_node = mc.search(self.board, node, temperature=1)
            else:
                pi, next_node = mc.search(self.board, node)

            datas.append([self.board.gen_state(), pi, self.board.c_player])

            self.board.move(next_node.action)
            next_node.parent = None
            node = next_node

            if self.board.is_draw():
                reward = 0.
                break

            if self.board.is_game_over():
                reward = 1.
                break

            self.board.trigger()
            move_count += 1

        datas = np.asarray(datas)
        datas[:, 2][datas[:, 2] == self.board.c_player] = reward
        datas[:, 2][datas[:, 2] != self.board.c_player] = -reward

        return datas
Example #4
0
    def play_game(self, game, training_data):
        """Loop for each self-play game.

        Runs MCTS for each game state and plays a move based on the MCTS output.
        Stops when the game is over and prints out a winner.

        Args:
            game: An object containing the game state.
            training_data: A list to store self play states, pis and vs.
        """
        mcts = MonteCarloTreeSearch(self.net)

        game_over = False
        value = 0
        self_play_data = []
        count = 0

        node = TreeNode()

        # Keep playing until the game is in a terminal state.
        while not game_over:
            # MCTS simulations to get the best child node.
            if count < CFG.temp_thresh:
                best_child, prob_vector = mcts.search(game, node,
                                                      CFG.temp_init)
            else:
                best_child, prob_vector = mcts.search(game, node,
                                                      CFG.temp_final)

            # Store state, prob and v for training.
            if best_child != None:
                self_play_data.append(
                    [deepcopy(game.state),
                     deepcopy(prob_vector), 0])

                action = best_child.action
                game.play_action(action)  # Play the child node's action.
                count += 1
                # print('Next player is', game.current_player)

                game_over, value = game.check_game_over(game.current_player)

                best_child.parent = None
                node = best_child  # Make the child node the root node.
            else:
                self_play_data.append(
                    [deepcopy(game.state),
                     deepcopy(prob_vector), 0])
                game.current_player *= -1
                # print('NO ACTION TAKEN, Next player is', game.current_player)

        # Update v as the value of the game result.
        print('FINAL SCORES ARE ', game.score)
        for game_state in self_play_data:
            value = -value
            game_state[2] = value
            self.augment_data(game_state, training_data, game.row, game.column)
Example #5
0
    def evaluate(self):
        """Play self-play games between the two networks and record game stats.

        Returns:
            Wins and losses count from the perspective of the current network.
        """
        wins = 0
        losses = 0

        # Self-play loop
        for i in range(self.num_eval_games):
            print("Start Evaluation Self-Play Game:", i, "\n")

            game = self.game.clone()  # Create a fresh clone for each game.
            game_over = False
            value = 0
            node = TreeNode()

#             player = game.current_player

            # Keep playing until the game is in a terminal state.
            while not game_over:
                # MCTS simulations to get the best child node.
                # If player_to_eval is 1 play using the current network
                # Else play using the evaluation network.
#                 if game.current_player == 1:
                best_child = self.current_mcts.search(game, node,
                                                          self.temp_final)
#                 else:
#                     best_child = self.eval_mcts.search(game, node,
#                                                        self.temp_final)

                action = best_child.action
                
                game.play_action(action)  # Play the child node's action.

                game_over, value = game.check_game_over()

                best_child.parent = None
                node = best_child  # Make the child node the root node.
            
            game.print_board()
            final_score = game.evaluate()
            print('Score : ', final_score, ' (% of best score possible : ', np.round(final_score*100/game.maxScore, 2), '%)')

            if value == 1:
                print("win")
                wins += 1
            elif value == -1:
                print("loss")
                losses += 1
            else:
                print("draw")
            print("\n")

        return wins, losses
Example #6
0
    def go(self):
        print("One rule:\r\n Move piece form 'x,y' \r\n eg 1,3\r\n")
        print("-" * 60)
        print("Ready Go")

        mc = MonteCarloTreeSearch(self.net, 1000)
        node = TreeNode()
        board = Board()

        while True:
            if board.c_player == BLACK:
                action = input(f"Your piece is 'O' and move: ")
                action = [int(n, 10) for n in action.split(",")]
                action = action[0] * board.size + action[1]
                next_node = TreeNode(action=action)
            else:
                _, next_node = mc.search(board, node)

            board.move(next_node.action)
            board.show()

            next_node.parent = None
            node = next_node

            if board.is_draw():
                print("-" * 28 + "Draw" + "-" * 28)
                return

            if board.is_game_over():
                if board.c_player == BLACK:
                    print("-" * 28 + "Win" + "-" * 28)
                else:
                    print("-" * 28 + "Loss" + "-" * 28)
                return

            board.trigger()
Example #7
0
    def play_game(self, game, training_data):
        """Loop for each self-play game.

        Runs MCTS for each game state and plays a move based on the MCTS output.
        Stops when the game is over and prints out a winner.

        Args:
            game: An object containing the game state.
            training_data: A list to store self play states, pis and vs.
        """
        mcts = MonteCarloTreeSearch(self.net)

        game_over = False
        value = 0
        self_play_data = []
        count = 0

        node = TreeNode()

        # Keep playing until the game is in a terminal state.
        while not game_over:
            # MCTS simulations to get the best child node.
            if count < self.temp_thresh:
                best_child = mcts.search(game, node, self.temp_init)
            else:
                best_child = mcts.search(game, node, self.temp_final)

            # Store state, prob and v for training.
            self_play_data.append([
                deepcopy(game.state['state']),
                deepcopy(best_child.parent.child_psas), 0
            ])

            action = best_child.action
            game.play_action(action)  # Play the child node's action.
            count += 1
            ''' TO BE COMPLETED !! '''
            game_over, value = game.check_game_over()

            best_child.parent = None
            node = best_child  # Make the child node the root node.

        # Update v as the value of the game result.
        for game_state in self_play_data:
            value = -value
            game_state[2] = value
            self.augment_data(game_state, training_data, game.row, game.column)
Example #8
0
    def evaluate(self, result):
        self.net.eval()
        self.evl_net.eval()

        if random.randint(0, 1) == 1:
            players = {
                BLACK: (MonteCarloTreeSearch(self.net), "net"),
                WHITE: (MonteCarloTreeSearch(self.evl_net), "eval"),
            }
        else:
            players = {
                WHITE: (MonteCarloTreeSearch(self.net), "net"),
                BLACK: (MonteCarloTreeSearch(self.evl_net), "eval"),
            }
        node = TreeNode()

        while True:
            _, next_node = players[self.board.c_player][0].search(
                self.board, node)

            self.board.move(next_node.action)

            if self.board.is_draw():
                result[0] += 1
                return

            if self.board.is_game_over():
                if players[self.board.c_player][1] == "net":
                    result[1] += 1
                else:
                    result[2] += 1
                return

            self.board.trigger()

            next_node.parent = None
            node = next_node
Example #9
0
def play_against_human(model_file, human_plays_black):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = torch.load(model_file)
    conf = model['conf']

    # load the network
    network = ZetaGoNetwork(conf)
    network.load_state_dict(model['best_network'])
    network.to(device)

    # create a evaluator
    evaluator = DefaultEvaluator(network, device)

    # create a search tree
    root = TreeNode(None, None, evaluator, conf)

    gui = GUI(conf)

    human_turn = human_plays_black
    previous_action = None
    while True:
        if human_turn:
            # wait for human player's action
            action = gui.wait_for_action(root.go)
        else:
            # calculate computer's action
            gui.update_text('Computer is thinking...')

            # perform MCTS
            for _ in range(conf.NUM_SIMULATIONS):
                tree_search(root, evaluator, conf)

            # calculate the distribution of action selection
            # temperature tau -> 0
            m = max(root.n)
            p = [0 if x < m else 1 for x in root.n]
            s = sum(p)
            pi = np.array([x / s for x in p], dtype=np.float32)

            # choose an action
            action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        if root.children[action] is None:
            root.children[action] = \
                TreeNode(root, action, evaluator, conf)
        root = root.children[action]

        # release memory
        root.parent.children = None

        # update GUI
        gui.update_go(root.go)
        gui.update_text('Computer passes' if action == conf.PASS else '')

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            black_score, white_score = root.go.score()
            winner = 'BLACK' if black_score > white_score else 'WHITE'
            gui.update_text(f'{winner} wins, {black_score} : {white_score}')
            gui.freeze()

        previous_action = action
        human_turn = not human_turn
Example #10
0
def self_play(evaluator, resign_threshold, conf):
    examples = []

    allow_resign = resign_threshold > -1.0 \
        and np.random.rand() >= conf.RESIGN_SAMPLE_RATE
    resign_value_history = None if allow_resign else []

    # result undecided
    result = 0.0

    # create a search tree
    root = TreeNode(None, None, evaluator, conf)

    previous_action = None
    t = 0
    while t < conf.MAX_GAME_LENGTH:
        # perform MCTS
        for _ in range(conf.NUM_SIMULATIONS):
            tree_search(root, evaluator, conf)

        # we follow AlphaGo's method to calculate the resignation value
        # notice that children with n = 0 are skipped by setting their
        # value to be -1.0 (w / n > -1.0 for children with n > 0)
        resign_value = max(
            map(lambda w, n: -1.0 if n == 0 else w / n, root.w, root.n))
        if not allow_resign:
            resign_value_history.append([resign_value, root.go.turn])
        elif -1.0 < resign_value <= resign_threshold:
            result = 1.0 if root.go.turn == WHITE else -1.0
            break

        # calculate the distribution of action selection
        # notice that illegal actions always have zero probability as
        # long as NUM_SIMULATION > 0
        if t < conf.EXPLORATION_TIME:
            # temperature tau = 1
            s = sum(root.n)
            pi = [x / s for x in root.n]
        else:
            # temperature tau -> 0
            m = max(root.n)
            p = [0 if x < m else 1 for x in root.n]
            s = sum(p)
            pi = [x / s for x in p]

        # save position, distribution of action selection and turn
        examples.append([
            extract_feature(root, conf),
            np.array(pi, dtype=np.float32),
            np.array([root.go.turn], dtype=np.float32),
        ])

        # choose an action
        action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        root = root.children[action]

        # release memory
        root.parent.children = None

        t += 1

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            break
        previous_action = action

    # calculate the scores if the result is undecided
    if result == 0.0:
        score_black, score_white = root.go.score()
        result = 1.0 if score_black > score_white else -1.0

    # update the the game winner from the perspective of each player
    for i in range(len(examples)):
        examples[i][2] *= result

    return examples, resign_value_history, result
Example #11
0
    def play(self):

        mcts = MonteCarloTreeSearch(self.net)
        game = deepcopy(self.game)
        game_over = False
        value = 0
        node = TreeNode()
        valid = 0
        # self.game.colorBoard()
        game.print_board()

        while not game_over:

            if game.current_player == self.human_player:
                valid = False
                while valid == False:
                    piece, refpt, rot, flip = self.get_input(game)
                    piece.create(0, (refpt[0], refpt[1]))

                    f = 'None'
                    if flip == 0:
                        f == 'None'
                    else:
                        f = 'h'

                    piece.flip(f)
                    piece.rotate(90 * rot)

                    valid = game.valid_move(piece.points, self.human_player)

                    if valid == False:
                        print('You selected an illegal move, please reselect')
                        # print('attempting', piece.points)
                        # print('corners are ', game.corners[self.human_player])

                    if piece.ID not in ['I5', 'I4', 'I3', 'I2']:
                        encoding = (refpt[0] * 14 +
                                    refpt[1]) * 91 + piece.shift + (
                                        rot // 90) * 2 + flip
                    else:
                        encoding = (refpt[0] * 14 +
                                    refpt[1]) * 91 + piece.shift + (
                                        rot // 90) * 1 + flip

                best_child = TreeNode()
                best_child.action = encoding
                print('CHOICE WAS MADE BY A HUMAN TO PLAY', piece.ID, '@',
                      refpt)

            else:
                best_child = mcts.search(game, node, CFG.temp_final)

            action = best_child.action
            game.play_action(action)

            game.print_board()
            # game.colorBoard()

            game_over, value = game.check_game_over(game.current_player)

            best_child.parent = None
            node = best_child

        if value == self.human_player * game.current_player:
            print("You won!")
        elif value == -self.human_player * game.current_player:
            print("You lost.")
        else:
            print("Draw Match")
Example #12
0
"""

from mcts import encode_position, TreeNode
from board import Position, make_board, empty_board
from net import GobangModel

p0 = Position(make_board(empty_board), 'a', 0, -1)
p0.show()
x = encode_position(p0)
print x[10:15, 0, 0]
print x[10:15, 0, 1]
print x[10:15, 0, 2]

p1 = p0.move(18)
y = encode_position(p1)
print y[10:15, 0, 0]
print y[10:15, 0, 1]
print y[10:15, 0, 2]

p2 = p1.move(37)
z = encode_position(p2)
print z[10:15, 0, 0]
print z[10:15, 0, 1]
print z[10:15, 0, 2]

net = GobangModel
t0 = TreeNode(net, p0)
print list(p0.moves())

p3 = p0.move(190)
print p3
Example #13
0
def mutual_play(network_black, network_white, device, conf):
    # create search trees for both players
    root_black = TreeNode(None, None, network_black, device, conf)
    root_white = TreeNode(None, None, network_white, device, conf)

    # create evaluators for both players
    evaluator_black = DefaultEvaluator(network_black, device)
    evaluator_white = DefaultEvaluator(network_white, device)

    # black player goes first
    root = root_black
    evaluator = evaluator_black

    previous_action = None
    t = 0
    while t < conf.MAX_GAME_LENGTH:
        # both players perform MCTS, each one uses its own network
        for i in range(conf.NUM_SIMULATIONS):
            tree_search(root, evaluator, conf)

        # calculate the distribution of action selection
        # temperature tau -> 0
        m = max(root.n)
        p = [0 if x < m else 1 for x in root.n]
        s = sum(p)
        pi = np.array([x / s for x in p], dtype=np.float32)

        # choose an action
        action = np.random.choice(conf.NUM_ACTIONS, p=pi)

        # take the action
        if root_black.children[action] is None:
            root_black.children[action] = \
                TreeNode(root_black, action, evaluator_black, conf)
        root_black = root_black.children[action]
        if root_white.children[action] is None:
            root_white.children[action] = \
                TreeNode(root_white, action, evaluator_white, conf)
        root_white = root_white.children[action]

        # release memory
        root_black.parent.children = None
        root_white.parent.children = None

        # switch to the other search tree
        if root.go.turn == BLACK:
            root = root_white
            evaluator = evaluator_white
        else:
            root = root_black
            evaluator = evaluator_black

        t += 1

        # game terminates when both players pass
        if previous_action is not None \
                and previous_action == conf.PASS \
                and action == conf.PASS:
            break
        previous_action = action

    score_black, score_white = root.go.score()

    return score_black > score_white