Beispiel #1
0
 def test_make_move_invalid_not_in_board(self):
     move = (-1, 1)
     board = GoBoard(board_dimension=9,
                     player=1,
                     board_grid=None,
                     game_history=None)
     self.assertEqual(self.utils.make_move(board, move), (False, board))
Beispiel #2
0
    def test_make_move_valid_move_pass(self):
        move = (-1, -1)
        board_grid = [[0, 0, 0, 0], [-1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0]]
        game_history = [(1, 2, 0), (-1, 1, 0), (1, 3, 1)]
        board = GoBoard(board_dimension=4,
                        player=-1,
                        board_grid=board_grid,
                        game_history=game_history)

        new_board_grid = board_grid
        new_game_history = [(1, 2, 0), (-1, 1, 0), (1, 3, 1), (-1, -1, -1)]
        new_board = GoBoard(board_dimension=4,
                            player=1,
                            board_grid=new_board_grid,
                            game_history=new_game_history)
        self.assertEqual(self.utils.make_move(board, move), (True, new_board))
Beispiel #3
0
    def generate_fake_data(self, training_data_num):
        """Generate fake boards and counts the number of black and white stones as labels.
        Args:
            training_data_num: the number of fake training data we want to generate
        Returns:
            Xs: a list of training boards
            Ys: a list of training labels, each label is: 
            [a size 26 one hot arrayindicating the count the total number stones, layer indicating current player(1) or opponent(-1) has more stones,
                return 1 if they have the equal number of stones]
        """
        board_dimension = self.board_dimension
        Xs = []
        total_stone_count_vectors = []
        player_with_more_stones_all = [
        ]  #1 if current player has more stones, -1 otherwise

        options = [-1, 0, 1]  #white empty black
        for i in range(training_data_num):
            black_stone_count = 0
            white_stone_count = 0

            player = random.choice([-1, 1])
            board_grid = [[
                random.choice(options) for c in range(board_dimension)
            ] for r in range(board_dimension)]
            for r in range(board_dimension):
                for c in range(board_dimension):
                    if board_grid[r][c] == -1:
                        white_stone_count += 1
                    elif board_grid[r][c] == 1:
                        black_stone_count += 1
            board = GoBoard(board_dimension, player, board_grid)
            Xs.append(self.convert_to_resnet_input(board))

            total_stone_count = black_stone_count + white_stone_count
            total_stone_count_vector = [0] * (
                board_dimension * board_dimension + 1)
            total_stone_count_vector[total_stone_count] = 1

            if player == 1:
                if black_stone_count > white_stone_count:
                    player_with_more_stones = float(1)
                elif black_stone_count < white_stone_count:
                    player_with_more_stones = float(-1)
                else:
                    player_with_more_stones = float(0)
            elif player == -1:
                if black_stone_count < white_stone_count:
                    player_with_more_stones = float(1)
                elif black_stone_count > white_stone_count:
                    player_with_more_stones = float(-1)
                else:
                    player_with_more_stones = float(0)

            total_stone_count_vectors.append(total_stone_count_vector)
            player_with_more_stones_all.append(
                [float(player_with_more_stones)])

        return np.array(Xs), np.array(total_stone_count_vectors), np.array(
            player_with_more_stones_all)
Beispiel #4
0
 def test_make_move_invalid_move_into_an_eye(self):
     move = (3, 0)
     board_grid = [[0, 0, 0, 0], [-1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0]]
     game_history = [(1, 2, 0), (-1, 1, 0), (1, 3, 1)]
     board = GoBoard(board_dimension=4,
                     player=-1,
                     board_grid=board_grid,
                     game_history=game_history)
     self.assertEqual(self.utils.make_move(board, move), (False, board))
Beispiel #5
0
def ai_vs_mcts(nn_batch, ai_simulation_num, mcts_simulation_num, game_num):
    """ Play ai against mcts (with uniform heuristic) only and calculate the ai's winning rate
    Args:
        nn_batch: the batch number for the version of ResNet used, save in the models folder
        ai_simulation_num: simulation number used in AlphaGo
        mcts_simulation_num: simluation number used in MCTS
        game_num: number of games played
    Returns:
        percentage of games when AI beats MCTS
    """
    uniform_net = UniformPredictionNet(path_to_model='/',
                                       board_dimension=BOARD_DIM)
    utils = GoUtils()
    count_nn_winning = 0
    count_mcts_winning = 0
    alphago0 = AlphaGoZero(model_path="../models/batch_" + str(nn_batch),
                           restored=True)

    for i in range(game_num):
        print()
        print("game number ", i)
        game_over = False
        board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK)
        while not game_over:
            #AlphaGo with MCTS plays black
            if board.player == PLAYER_BLACK:
                print("AlphaGo Zero plays")
                move = alphago0.play_with_mcts(
                    board, simulation_number=mcts_simulation_num)
            else:
                print("MCTS plays")
                mcts_play_instance = MCTS(
                    board,
                    uniform_net,
                    utils,
                    simluation_number=mcts_simulation_num)
                move = mcts_play_instance.run_simulations_without_noise()

            print("\t move is", move)

            _, board = utils.make_move(board=board, move=move)

            if utils.is_game_finished(board) or len(
                    board.game_history) > BOARD_DIM**2 * 2:
                game_over = True
                winner, winning_by_points = utils.evaluate_winner(
                    board.board_grid)
                if winning_by_points > 0:
                    if winner == 1:
                        count_nn_winning += 1
                    elif winner == -1:
                        count_mcts_winning += 1
                print("winner is ", winner)
                print("winning by points", winning_by_points)
                print(board)

    return count_nn_winning, count_mcts_winning
Beispiel #6
0
 def test_make_move_invalid_on_another_stone_no_capture(self):
     move = (0, 1)
     board_grid = [[0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
     game_history = [(1, 0, 1)]
     board = GoBoard(board_dimension=4,
                     player=-1,
                     board_grid=board_grid,
                     game_history=game_history)
     self.assertEqual(self.utils.make_move(board, move), (False, board))
Beispiel #7
0
 def test_is_invalid_move_because_of_ko4_not_ko_corner(self):
     #Current move is not surrounded by opponents' stones
     move = (0, 1)
     board_grid = [[-1, 0, 0, 0], [1, -1, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0]]
     game_history = [(1, 3, 0), (-1, 0, 0), (1, 1, 0), (-1, 1, 1)]
     board = GoBoard(board_dimension=4,
                     player=1,
                     board_grid=board_grid,
                     game_history=game_history)
     self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
Beispiel #8
0
    def test_make_move_valid_move_capture_stone_4(self):
        move = (3, 2)
        board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 1, -1],
                      [0, 0, 0, 0]]
        game_history = [(1, 2, 0), (-1, 2, 1), (1, 1, 1), (-1, 1, 2),
                        (1, 2, 2), (-1, 2, 3), (1, -1, -1)]
        board = GoBoard(board_dimension=4,
                        player=-1,
                        board_grid=board_grid,
                        game_history=game_history)

        new_board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 0, -1],
                          [0, 0, -1, 0]]
        new_game_history = game_history + [(-1, 3, 2)]
        new_board = GoBoard(board_dimension=4,
                            player=1,
                            board_grid=new_board_grid,
                            game_history=new_game_history)
        self.assertEqual(self.utils.make_move(board, move), (True, new_board))
Beispiel #9
0
 def test_is_invalid_move_because_of_ko3_ko_center(self):
     move = (2, 2)
     board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 0, -1],
                   [0, 1, -1, 0]]
     game_history = [(1, 2, 0), (-1, 2, 3), (1, 1, 1), (-1, 1, 2),
                     (1, 2, 2), (-1, 3, 2), (1, 3, 1), (-1, 2, 1)]
     board = GoBoard(board_dimension=4,
                     player=1,
                     board_grid=board_grid,
                     game_history=game_history)
     self.assertTrue(GoUtils._is_invalid_move_because_of_ko(board, move))
Beispiel #10
0
def ai_vs_random(nn_batch, ai_simulation_num, game_num):
    """ Play ai against random play
    Args:
        nn_batch: the batch number for the version of ResNet used, save in the models folder
        ai_simulation_num: simulation number used in AlphaGo
        game_num: number of games played
    Returns:
        percentage of games when AI beats MCTS
    """
    uniform_net = UniformPredictionNet(path_to_model='/',
                                       board_dimension=BOARD_DIM)
    utils = GoUtils()
    count_nn_winning = 0
    count_random_winning = 0
    alphago0 = AlphaGoZero(model_path="../models/batch_" + str(nn_batch),
                           restored=True)

    for i in range(game_num):
        print()
        print("game number ", i)
        game_over = False
        board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK)
        while not game_over:
            #AlphaGo with MCTS plays black
            if board.player == PLAYER_BLACK:
                print("AlphaGo Zero plays")
                move = alphago0.play_with_mcts(
                    board, simulation_number=mcts_simulation_num)
            else:
                print("Random plays")
                p, _ = uniform_net.predict(board)
                move = random.choice(
                    [move for move in p.keys() if p[move] > 0])

            print("\t move is", move)

            _, board = utils.make_move(board=board, move=move)

            if utils.is_game_finished(board) or len(
                    board.game_history) > BOARD_DIM**2 * 2:
                game_over = True
                winner, winning_by_points = utils.evaluate_winner(
                    board.board_grid)
                if winning_by_points > 0:
                    if winner == 1:
                        count_nn_winning += 1
                    elif winner == -1:
                        count_random_winning += 1
                print("winner is ", winner)
                print("winning by points", winning_by_points)
                print(board)

    return count_nn_winning, count_random_winning
Beispiel #11
0
 def test_is_invalid_move_because_of_ko5_not_ko_center(self):
     #Current move captures two adjacent groups
     move = (1, 2)
     board_grid = [[0, 1, -1, 1], [1, -1, 0, -1], [0, 1, -1, 0],
                   [0, 0, 0, 0]]
     game_history = [(1, 1, 0), (-1, 0, 2), (1, 0, 1), (-1, 2, 2),
                     (1, 2, 1), (-1, 1, 3), (1, 0, 3), (-1, 1, 1)]
     board = GoBoard(board_dimension=4,
                     player=1,
                     board_grid=board_grid,
                     game_history=game_history)
     self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
Beispiel #12
0
 def test_is_invalid_move_because_of_ko7_not_ko_center(self):
     #stone with no liberty from 2's position was not played in the last move
     move = (2, 2)
     board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 0, -1],
                   [0, 1, -1, 0]]
     game_history = [(1, 1, 1), (-1, 1, 2), (1, 2, 2), (-1, 2, 3),
                     (1, 3, 1), (-1, 3, 2), (1, 2, 0), (-1, 2, 1),
                     (1, -1, -1), (-1, -1, -1)]
     board = GoBoard(board_dimension=4,
                     player=1,
                     board_grid=board_grid,
                     game_history=game_history)
     self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
Beispiel #13
0
 def test_is_invalid_move_because_of_ko6_not_ko_center(self):
     #Capture Two stones that are connected from the move
     move = (2, 1)
     board_grid = [[0, 0, 0, 0, 0], [0, 1, -1, -1, 0], [1, 0, 1, 1, -1],
                   [0, 1, -1, -1, 0], [0, 0, 0, 0, 0]]
     game_history = [(1, 1, 1), (-1, 1, 2), (1, 2, 2), (-1, 1, 3),
                     (1, 2, 3), (-1, 2, 4), (1, -1, -1), (-1, 3, 3),
                     (1, 3, 1), (-1, 3, 2), (1, 2, 0)]
     board = GoBoard(board_dimension=5,
                     player=-1,
                     board_grid=board_grid,
                     game_history=game_history)
     self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
Beispiel #14
0
    def __init__(self):
        self.go_board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK)
        pygame.init()
        pygame.font.init()
        self._display_surf = pygame.display.set_mode((GAME_WIDTH,GAME_HIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF)

        pygame.display.set_caption('Go')

        self.utils = GoUtils()
        self._running = True
        self._playing = False
        self._win = False
        self.lastPosition = [-1,-1]
        self.pass_button_clicked = False
        self.passed_once = False
        self.game_over = False
Beispiel #15
0
class GoBoardTest(unittest.TestCase):
    board_grid = [[0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
    game_history = [(1, 0, 1)]
    board1 = GoBoard(board_dimension=4,
                     player=-1,
                     board_grid=np.array(board_grid),
                     game_history=game_history)
    # for augmented_board in board.generate_augmented_boards():
    #     print(augmented_board)
    #     print()

    history_boards = [board1, board1]
    print(
        np.array([
            augment_board for history_board in history_boards
            for augment_board in history_board.generate_augmented_boards()
        ]))
Beispiel #16
0
    def train_nn(self, training_game_number, simulation_number):
        """Training the resnet by self play using MCTS
        With experience replay
        Args:
            training_game_number: number of self play games
            simulation_number: number of simulations used in MCTS
        Returns:
            Nothing, but model_path/game_1 has the model trained
        Notes:
            Training 2000 games, total distinct board number seen = 2000 * 50 = 100,000
            After each game, 2000 boards are sampled. Each board is used 2/25*25000/50 = 40 times.
            Fake dataset also had 100,000 data seen (achieved 96% test accuracy on 50 test boards for counting)
        """

        #Batch and bucket size used for testing
        # BATCH_SIZE = 60
        # BUCKET_SIZE = 100

        BATCH_SIZE = 2000
        BUCKET_SIZE = 25000  # bucket size used in experience replay
        BLACK = 1  # black goes first
        batch_num = 0

        # batch_training_sample_size = 0
        bucket_training_boards = np.empty(0)
        bucket_training_labels_p = np.empty(0)
        bucket_training_labels_v = np.empty(0)

        batch_training_boards = np.empty(0)
        batch_training_labels_p = np.empty(0)
        batch_training_labels_v = np.empty(0)

        with self.sess.as_default():
            for game_num in prog_bar(range(training_game_number)):
                print("training game:", game_num + 1)
                board = GoBoard(self.nn.board_dimension,
                                BLACK,
                                board_grid=[],
                                game_history=None)

                play = SelfPlay(board,
                                self.nn,
                                self.utils,
                                simluation_number=simulation_number)
                training_boards, training_labels_p, training_labels_v = play.play_till_finish(
                )

                # Fill the bucket with current game's boards, around 20
                if len(bucket_training_boards) == 0:
                    bucket_training_boards = training_boards
                if len(bucket_training_labels_p) == 0:
                    bucket_training_labels_p = training_labels_p
                if len(bucket_training_labels_v) == 0:
                    bucket_training_labels_v = training_labels_v
                bucket_training_boards = np.append(bucket_training_boards,
                                                   training_boards,
                                                   axis=0)
                bucket_training_labels_p = np.append(bucket_training_labels_p,
                                                     training_labels_p,
                                                     axis=0)
                #print("bucket_training_labels_p:", bucket_training_labels_p.shape)
                bucket_training_labels_v = np.append(bucket_training_labels_v,
                                                     training_labels_v,
                                                     axis=0)

                # Remove from the front if bucket size exceeds the specified bucket size
                if len(bucket_training_labels_v) > BUCKET_SIZE:
                    deleted_indices = [
                        i for i in range(
                            len(bucket_training_labels_v) - BUCKET_SIZE)
                    ]
                    bucket_training_boards = np.delete(bucket_training_boards,
                                                       deleted_indices,
                                                       axis=0)
                    bucket_training_labels_p = np.delete(
                        bucket_training_labels_p, deleted_indices, axis=0)
                    bucket_training_labels_v = np.delete(
                        bucket_training_labels_v, deleted_indices, axis=0)
                    #print("bucket_training_labels_p:", bucket_training_labels_p.shape)
                    # Take BATCH_SIZE number of random elements from the bucket and train
                    BUCKET_INDICES = [i for i in range(BUCKET_SIZE)]
                    batch_indices = np.random.choice(BUCKET_INDICES,
                                                     BATCH_SIZE,
                                                     replace=False)
                    batch_training_boards = np.take(bucket_training_boards,
                                                    batch_indices,
                                                    axis=0)
                    batch_training_labels_p = np.take(bucket_training_labels_p,
                                                      batch_indices,
                                                      axis=0)
                    #print("batch_training_labels_p:", batch_training_labels_p.shape)
                    batch_training_labels_v = np.take(bucket_training_labels_v,
                                                      batch_indices,
                                                      axis=0)
                    batch_num += 1
                    if batch_num % 10 == 0:  #Save every 10 batches
                        model_path = self.model_path + '/batch_' + str(
                            batch_num)
                        self.nn.train(batch_training_boards,
                                      batch_training_labels_p,
                                      batch_training_labels_v, model_path)
                    else:
                        print("batch number", batch_num)
                        self.nn.train(batch_training_boards,
                                      batch_training_labels_p,
                                      batch_training_labels_v)
Beispiel #17
0
 def start(self):
     self._playing = True
     self.lastPosition = [-1,-1]
     self.go_board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK)
     self._win = False