def __init__(self): self.go_board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK) pygame.init() pygame.font.init() self._display_surf = pygame.display.set_mode((GAME_WIDTH,GAME_HIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) pygame.display.set_caption('Go') self.utils = GoUtils() self._running = True self._playing = False self._win = False self.lastPosition = [-1,-1] self.pass_button_clicked = False self.passed_once = False self.game_over = False
def test_make_move_valid_move_pass(self): move = (-1, -1) board_grid = [[0, 0, 0, 0], [-1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0]] game_history = [(1, 2, 0), (-1, 1, 0), (1, 3, 1)] board = GoBoard(board_dimension=4, player=-1, board_grid=board_grid, game_history=game_history) new_board_grid = board_grid new_game_history = [(1, 2, 0), (-1, 1, 0), (1, 3, 1), (-1, -1, -1)] new_board = GoBoard(board_dimension=4, player=1, board_grid=new_board_grid, game_history=new_game_history) self.assertEqual(self.utils.make_move(board, move), (True, new_board))
def test_make_move_invalid_not_in_board(self): move = (-1, 1) board = GoBoard(board_dimension=9, player=1, board_grid=None, game_history=None) self.assertEqual(self.utils.make_move(board, move), (False, board))
def generate_fake_data(self, training_data_num): """Generate fake boards and counts the number of black and white stones as labels. Args: training_data_num: the number of fake training data we want to generate Returns: Xs: a list of training boards Ys: a list of training labels, each label is: [a size 26 one hot arrayindicating the count the total number stones, layer indicating current player(1) or opponent(-1) has more stones, return 1 if they have the equal number of stones] """ board_dimension = self.board_dimension Xs = [] total_stone_count_vectors = [] player_with_more_stones_all = [ ] #1 if current player has more stones, -1 otherwise options = [-1, 0, 1] #white empty black for i in range(training_data_num): black_stone_count = 0 white_stone_count = 0 player = random.choice([-1, 1]) board_grid = [[ random.choice(options) for c in range(board_dimension) ] for r in range(board_dimension)] for r in range(board_dimension): for c in range(board_dimension): if board_grid[r][c] == -1: white_stone_count += 1 elif board_grid[r][c] == 1: black_stone_count += 1 board = GoBoard(board_dimension, player, board_grid) Xs.append(self.convert_to_resnet_input(board)) total_stone_count = black_stone_count + white_stone_count total_stone_count_vector = [0] * ( board_dimension * board_dimension + 1) total_stone_count_vector[total_stone_count] = 1 if player == 1: if black_stone_count > white_stone_count: player_with_more_stones = float(1) elif black_stone_count < white_stone_count: player_with_more_stones = float(-1) else: player_with_more_stones = float(0) elif player == -1: if black_stone_count < white_stone_count: player_with_more_stones = float(1) elif black_stone_count > white_stone_count: player_with_more_stones = float(-1) else: player_with_more_stones = float(0) total_stone_count_vectors.append(total_stone_count_vector) player_with_more_stones_all.append( [float(player_with_more_stones)]) return np.array(Xs), np.array(total_stone_count_vectors), np.array( player_with_more_stones_all)
def test_make_move_invalid_on_another_stone_no_capture(self): move = (0, 1) board_grid = [[0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] game_history = [(1, 0, 1)] board = GoBoard(board_dimension=4, player=-1, board_grid=board_grid, game_history=game_history) self.assertEqual(self.utils.make_move(board, move), (False, board))
def test_make_move_invalid_move_into_an_eye(self): move = (3, 0) board_grid = [[0, 0, 0, 0], [-1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0]] game_history = [(1, 2, 0), (-1, 1, 0), (1, 3, 1)] board = GoBoard(board_dimension=4, player=-1, board_grid=board_grid, game_history=game_history) self.assertEqual(self.utils.make_move(board, move), (False, board))
def ai_vs_mcts(nn_batch, ai_simulation_num, mcts_simulation_num, game_num): """ Play ai against mcts (with uniform heuristic) only and calculate the ai's winning rate Args: nn_batch: the batch number for the version of ResNet used, save in the models folder ai_simulation_num: simulation number used in AlphaGo mcts_simulation_num: simluation number used in MCTS game_num: number of games played Returns: percentage of games when AI beats MCTS """ uniform_net = UniformPredictionNet(path_to_model='/', board_dimension=BOARD_DIM) utils = GoUtils() count_nn_winning = 0 count_mcts_winning = 0 alphago0 = AlphaGoZero(model_path="../models/batch_" + str(nn_batch), restored=True) for i in range(game_num): print() print("game number ", i) game_over = False board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK) while not game_over: #AlphaGo with MCTS plays black if board.player == PLAYER_BLACK: print("AlphaGo Zero plays") move = alphago0.play_with_mcts( board, simulation_number=mcts_simulation_num) else: print("MCTS plays") mcts_play_instance = MCTS( board, uniform_net, utils, simluation_number=mcts_simulation_num) move = mcts_play_instance.run_simulations_without_noise() print("\t move is", move) _, board = utils.make_move(board=board, move=move) if utils.is_game_finished(board) or len( board.game_history) > BOARD_DIM**2 * 2: game_over = True winner, winning_by_points = utils.evaluate_winner( board.board_grid) if winning_by_points > 0: if winner == 1: count_nn_winning += 1 elif winner == -1: count_mcts_winning += 1 print("winner is ", winner) print("winning by points", winning_by_points) print(board) return count_nn_winning, count_mcts_winning
def test_make_move_valid_move_capture_stone_4(self): move = (3, 2) board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 1, -1], [0, 0, 0, 0]] game_history = [(1, 2, 0), (-1, 2, 1), (1, 1, 1), (-1, 1, 2), (1, 2, 2), (-1, 2, 3), (1, -1, -1)] board = GoBoard(board_dimension=4, player=-1, board_grid=board_grid, game_history=game_history) new_board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 0, -1], [0, 0, -1, 0]] new_game_history = game_history + [(-1, 3, 2)] new_board = GoBoard(board_dimension=4, player=1, board_grid=new_board_grid, game_history=new_game_history) self.assertEqual(self.utils.make_move(board, move), (True, new_board))
def test_is_invalid_move_because_of_ko4_not_ko_corner(self): #Current move is not surrounded by opponents' stones move = (0, 1) board_grid = [[-1, 0, 0, 0], [1, -1, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0]] game_history = [(1, 3, 0), (-1, 0, 0), (1, 1, 0), (-1, 1, 1)] board = GoBoard(board_dimension=4, player=1, board_grid=board_grid, game_history=game_history) self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
def test_is_invalid_move_because_of_ko3_ko_center(self): move = (2, 2) board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 0, -1], [0, 1, -1, 0]] game_history = [(1, 2, 0), (-1, 2, 3), (1, 1, 1), (-1, 1, 2), (1, 2, 2), (-1, 3, 2), (1, 3, 1), (-1, 2, 1)] board = GoBoard(board_dimension=4, player=1, board_grid=board_grid, game_history=game_history) self.assertTrue(GoUtils._is_invalid_move_because_of_ko(board, move))
def ai_vs_random(nn_batch, ai_simulation_num, game_num): """ Play ai against random play Args: nn_batch: the batch number for the version of ResNet used, save in the models folder ai_simulation_num: simulation number used in AlphaGo game_num: number of games played Returns: percentage of games when AI beats MCTS """ uniform_net = UniformPredictionNet(path_to_model='/', board_dimension=BOARD_DIM) utils = GoUtils() count_nn_winning = 0 count_random_winning = 0 alphago0 = AlphaGoZero(model_path="../models/batch_" + str(nn_batch), restored=True) for i in range(game_num): print() print("game number ", i) game_over = False board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK) while not game_over: #AlphaGo with MCTS plays black if board.player == PLAYER_BLACK: print("AlphaGo Zero plays") move = alphago0.play_with_mcts( board, simulation_number=mcts_simulation_num) else: print("Random plays") p, _ = uniform_net.predict(board) move = random.choice( [move for move in p.keys() if p[move] > 0]) print("\t move is", move) _, board = utils.make_move(board=board, move=move) if utils.is_game_finished(board) or len( board.game_history) > BOARD_DIM**2 * 2: game_over = True winner, winning_by_points = utils.evaluate_winner( board.board_grid) if winning_by_points > 0: if winner == 1: count_nn_winning += 1 elif winner == -1: count_random_winning += 1 print("winner is ", winner) print("winning by points", winning_by_points) print(board) return count_nn_winning, count_random_winning
def test_is_invalid_move_because_of_ko5_not_ko_center(self): #Current move captures two adjacent groups move = (1, 2) board_grid = [[0, 1, -1, 1], [1, -1, 0, -1], [0, 1, -1, 0], [0, 0, 0, 0]] game_history = [(1, 1, 0), (-1, 0, 2), (1, 0, 1), (-1, 2, 2), (1, 2, 1), (-1, 1, 3), (1, 0, 3), (-1, 1, 1)] board = GoBoard(board_dimension=4, player=1, board_grid=board_grid, game_history=game_history) self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
def test_is_invalid_move_because_of_ko7_not_ko_center(self): #stone with no liberty from 2's position was not played in the last move move = (2, 2) board_grid = [[0, 0, 0, 0], [0, 1, -1, 0], [1, -1, 0, -1], [0, 1, -1, 0]] game_history = [(1, 1, 1), (-1, 1, 2), (1, 2, 2), (-1, 2, 3), (1, 3, 1), (-1, 3, 2), (1, 2, 0), (-1, 2, 1), (1, -1, -1), (-1, -1, -1)] board = GoBoard(board_dimension=4, player=1, board_grid=board_grid, game_history=game_history) self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
def test_is_invalid_move_because_of_ko6_not_ko_center(self): #Capture Two stones that are connected from the move move = (2, 1) board_grid = [[0, 0, 0, 0, 0], [0, 1, -1, -1, 0], [1, 0, 1, 1, -1], [0, 1, -1, -1, 0], [0, 0, 0, 0, 0]] game_history = [(1, 1, 1), (-1, 1, 2), (1, 2, 2), (-1, 1, 3), (1, 2, 3), (-1, 2, 4), (1, -1, -1), (-1, 3, 3), (1, 3, 1), (-1, 3, 2), (1, 2, 0)] board = GoBoard(board_dimension=5, player=-1, board_grid=board_grid, game_history=game_history) self.assertFalse(GoUtils._is_invalid_move_because_of_ko(board, move))
class GoBoardTest(unittest.TestCase): board_grid = [[0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] game_history = [(1, 0, 1)] board1 = GoBoard(board_dimension=4, player=-1, board_grid=np.array(board_grid), game_history=game_history) # for augmented_board in board.generate_augmented_boards(): # print(augmented_board) # print() history_boards = [board1, board1] print( np.array([ augment_board for history_board in history_boards for augment_board in history_board.generate_augmented_boards() ]))
def train_nn(self, training_game_number, simulation_number): """Training the resnet by self play using MCTS With experience replay Args: training_game_number: number of self play games simulation_number: number of simulations used in MCTS Returns: Nothing, but model_path/game_1 has the model trained Notes: Training 2000 games, total distinct board number seen = 2000 * 50 = 100,000 After each game, 2000 boards are sampled. Each board is used 2/25*25000/50 = 40 times. Fake dataset also had 100,000 data seen (achieved 96% test accuracy on 50 test boards for counting) """ #Batch and bucket size used for testing # BATCH_SIZE = 60 # BUCKET_SIZE = 100 BATCH_SIZE = 2000 BUCKET_SIZE = 25000 # bucket size used in experience replay BLACK = 1 # black goes first batch_num = 0 # batch_training_sample_size = 0 bucket_training_boards = np.empty(0) bucket_training_labels_p = np.empty(0) bucket_training_labels_v = np.empty(0) batch_training_boards = np.empty(0) batch_training_labels_p = np.empty(0) batch_training_labels_v = np.empty(0) with self.sess.as_default(): for game_num in prog_bar(range(training_game_number)): print("training game:", game_num + 1) board = GoBoard(self.nn.board_dimension, BLACK, board_grid=[], game_history=None) play = SelfPlay(board, self.nn, self.utils, simluation_number=simulation_number) training_boards, training_labels_p, training_labels_v = play.play_till_finish( ) # Fill the bucket with current game's boards, around 20 if len(bucket_training_boards) == 0: bucket_training_boards = training_boards if len(bucket_training_labels_p) == 0: bucket_training_labels_p = training_labels_p if len(bucket_training_labels_v) == 0: bucket_training_labels_v = training_labels_v bucket_training_boards = np.append(bucket_training_boards, training_boards, axis=0) bucket_training_labels_p = np.append(bucket_training_labels_p, training_labels_p, axis=0) #print("bucket_training_labels_p:", bucket_training_labels_p.shape) bucket_training_labels_v = np.append(bucket_training_labels_v, training_labels_v, axis=0) # Remove from the front if bucket size exceeds the specified bucket size if len(bucket_training_labels_v) > BUCKET_SIZE: deleted_indices = [ i for i in range( len(bucket_training_labels_v) - BUCKET_SIZE) ] bucket_training_boards = np.delete(bucket_training_boards, deleted_indices, axis=0) bucket_training_labels_p = np.delete( bucket_training_labels_p, deleted_indices, axis=0) bucket_training_labels_v = np.delete( bucket_training_labels_v, deleted_indices, axis=0) #print("bucket_training_labels_p:", bucket_training_labels_p.shape) # Take BATCH_SIZE number of random elements from the bucket and train BUCKET_INDICES = [i for i in range(BUCKET_SIZE)] batch_indices = np.random.choice(BUCKET_INDICES, BATCH_SIZE, replace=False) batch_training_boards = np.take(bucket_training_boards, batch_indices, axis=0) batch_training_labels_p = np.take(bucket_training_labels_p, batch_indices, axis=0) #print("batch_training_labels_p:", batch_training_labels_p.shape) batch_training_labels_v = np.take(bucket_training_labels_v, batch_indices, axis=0) batch_num += 1 if batch_num % 10 == 0: #Save every 10 batches model_path = self.model_path + '/batch_' + str( batch_num) self.nn.train(batch_training_boards, batch_training_labels_p, batch_training_labels_v, model_path) else: print("batch number", batch_num) self.nn.train(batch_training_boards, batch_training_labels_p, batch_training_labels_v)
class Go: def __init__(self): self.go_board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK) pygame.init() pygame.font.init() self._display_surf = pygame.display.set_mode((GAME_WIDTH,GAME_HIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) pygame.display.set_caption('Go') self.utils = GoUtils() self._running = True self._playing = False self._win = False self.lastPosition = [-1,-1] self.pass_button_clicked = False self.passed_once = False self.game_over = False def on_event(self, event): if event.type == pygame.QUIT: self._running = False pos = pygame.mouse.get_pos() if self._playing and event.type == pygame.MOUSEBUTTONDOWN and self.mouse_in_pass_button(pos): self.pass_button_clicked = True elif event.type == pygame.MOUSEBUTTONUP: if self.mouse_in_botton(pos): if not self._playing: self.start() else: self.surrender() self.go_board.flip_player() elif self._playing and self.mouse_in_pass_button(pos): self.pass_button_clicked = False _, self.go_board = self.utils.make_move(board=self.go_board, move=PASS) if not self.passed_once: self.passed_once = True else: # Double Pass Game Over print("Game Over!") self.game_over = True self.print_winner() elif self._playing: c = (pos[0] - PADDING + WIDTH // 2) // (WIDTH + MARGIN) r = (pos[1] - PADDING + WIDTH // 2) // (WIDTH + MARGIN) if 0 <= r < BOARD_DIM and 0 <= c < BOARD_DIM: _, self.go_board = self.utils.make_move(board=self.go_board, move=(r, c)) self.passed_once = False self.print_winner() self.lastPosition = self.go_board.get_last_position() # print(self.go_board) # print() def on_render(self): self.render_go_piece() self.render_last_position() self.render_game_info() self.render_button() self.render_pass_button() pygame.display.update() def on_cleanup(self): pygame.quit() def on_execute(self): while( self._running ): self.go_board_init() for event in pygame.event.get(): self.on_event(event) self.on_render() self.on_cleanup() def start(self): self._playing = True self.lastPosition = [-1,-1] self.go_board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK) self._win = False def surrender(self): self._playing = False self._win = True def go_board_init(self): self._display_surf.fill(YELLOW) # Draw black background rect for game area pygame.draw.rect(self._display_surf, BLACK, [PADDING, PADDING, BOARD, BOARD]) # Draw the grid for row in range(BOARD_DIM - 1): for column in range(BOARD_DIM - 1): pygame.draw.rect(self._display_surf, YELLOW, [(MARGIN + WIDTH) * column + MARGIN + PADDING, (MARGIN + WIDTH) * row + MARGIN + PADDING, WIDTH, WIDTH]) # dots # points = [(3,3),(11,3),(3,11),(11,11),(7,7)] # for point in points: # pygame.draw.rect(self._display_surf, BLACK, # (PADDING + point[0] * (MARGIN + WIDTH) - DOT // 2, # PADDING + point[1] * (MARGIN + WIDTH) - DOT // 2, # DOT, # DOT),0) def mouse_in_botton(self,pos): """ Check if mouse is in the button and return a boolean value """ if GAME_WIDTH // 4*3 - 50 <= pos[0] <= GAME_WIDTH // 4*3 + 50 and GAME_HIGHT - 50 <= pos[1] <= GAME_HIGHT - 20: return True return False def mouse_in_pass_button(self, pos): """ Check if mouse is in the pass button and return a boolean value """ if GAME_WIDTH // 4 - 50 <= pos[0] <= GAME_WIDTH // 4 + 50 and GAME_HIGHT - 50 <= pos[1] <= GAME_HIGHT - 20: return True return False def render_button(self): color = GREEN if not self._playing else RED info = "Start" if not self._playing else "Surrender" pygame.draw.rect(self._display_surf, color, (GAME_WIDTH // 4*3 - 50, GAME_HIGHT - 50, 100, 30)) info_font = pygame.font.SysFont('Helvetica', 16) text = info_font.render(info, True, WHITE) textRect = text.get_rect() textRect.centerx = GAME_WIDTH // 4*3 textRect.centery = GAME_HIGHT - 35 self._display_surf.blit(text, textRect) def render_pass_button(self): color = GREEN if not self.pass_button_clicked else YELLOW info = "Pass" pygame.draw.rect(self._display_surf, color, (GAME_WIDTH // 4 - 50, GAME_HIGHT - 50, 100, 30)) info_font = pygame.font.SysFont('Helvetica', 16) text = info_font.render(info, True, WHITE) textRect = text.get_rect() textRect.centerx = GAME_WIDTH // 4 textRect.centery = GAME_HIGHT - 35 self._display_surf.blit(text, textRect) def render_game_info(self): #current player color if not self.game_over: color = BLACK if self.go_board.player == PLAYER_BLACK else WHITE else: color, win_by_points = self.retrieve_winner() center = (GAME_WIDTH // 2 - 60, BOARD + 60) radius = 12 pygame.draw.circle(self._display_surf, color, center, radius, 0) if not self.game_over: info = "Wins!" if self._win else "Your Turn" else: info = "wins by " + str(win_by_points) + " points." info_font = pygame.font.SysFont('Helvetica', 16) text = info_font.render(info, True, BLACK) textRect = text.get_rect() textRect.centerx = self._display_surf.get_rect().centerx + 20 textRect.centery = center[1] self._display_surf.blit(text, textRect) def render_go_piece(self): """ Render the Go stones on the board according to self.go_board """ # print('rendering go pieces') # print(self.go_board) for r in range(BOARD_DIM): for c in range(BOARD_DIM): center = ((MARGIN + WIDTH) * c + MARGIN + PADDING, (MARGIN + WIDTH) * r + MARGIN + PADDING) if self.go_board.board_grid[r][c] != EMPTY: color = BLACK if self.go_board.board_grid[r][c] == PLAYER_BLACK else WHITE pygame.draw.circle(self._display_surf, color, center, WIDTH // 2 - MARGIN, 0) def render_last_position(self): """ Render a red rectangle around the last position """ if self.lastPosition[0] > 0 and self.lastPosition[1] > 0: pygame.draw.rect(self._display_surf,RED, ((MARGIN + WIDTH) * self.lastPosition[1] - (MARGIN + WIDTH) // 2 + PADDING, (MARGIN + WIDTH) * self.lastPosition[0] - (MARGIN + WIDTH) // 2 + PADDING, (MARGIN + WIDTH), (MARGIN + WIDTH)),1) def print_winner(self): winner, winning_by_points = self.utils.evaluate_winner(self.go_board.board_grid) if winner == PLAYER_BLACK: print ("Black wins by " + str(winning_by_points)) else: print ("White wins by " + str(winning_by_points)) def retrieve_winner(self): return self.utils.evaluate_winner(self.go_board.board_grid)
def start(self): self._playing = True self.lastPosition = [-1,-1] self.go_board = GoBoard(board_dimension=BOARD_DIM, player=PLAYER_BLACK) self._win = False