def test_model_evaluation(self): tree = self.tree board = self.board size = conf['SIZE'] test_board1, player = game_init() make_play(0, 0, test_board1) test_board2, player = game_init() make_play(1, 0, test_board2) class DummyModel(object): def predict_on_batch(_, X): size = conf['SIZE'] board1 = X[0].reshape(1, size, size, 17) board2 = X[1].reshape(1, size, size, 17) self.assertTrue(np.array_equal(board1, test_board1)) self.assertTrue(np.array_equal(board2, test_board2)) batch_size = X.shape[0] policy = np.zeros((batch_size, size * size + 1), dtype=np.float32) policy[:,0] = 1 value = np.zeros((batch_size, 1), dtype=np.float32) value[:] = 1 return policy, value model = DummyModel() simulate(tree, board, model, mcts_batch_size=2, original_player=1)
def test_get_liberties(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(8, 9, board) # black random make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white make_play(2, 0, board) # black # ○ ● . ● . . # . . ● . . . # . . . . . . tmp = get_liberties(2, 0, board, 1) self.assertEqual(len(tmp), 0) tmp = get_liberties(2, 0, board, -1) self.assertEqual(len(tmp), 4) board, player = game_init() make_play(2, 1, board) # white make_play(2, 0, board) # black make_play(3, 1, board) # white make_play(1, 1, board) # black make_play(4, 1, board, -1) # white make_play(2, 2, board, -1) # white # . . ○ . . . # . ○ ● ● ○ . # . . ○ . . . # . . . . . . tmp = get_liberties(2, 1, board, 1) self.assertEqual(len(tmp), 2) tmp = get_liberties(3, 1, board, 1) self.assertEqual(len(tmp), 2)
def test_model_evaluation(self): tree = self.tree board = self.board size = conf['SIZE'] test_board1, player = game_init() make_play(0, 0, test_board1) test_board2, player = game_init() make_play(1, 0, test_board2) model = MCTSDummyModel() async_simulate(tree, board, model, energy=2, original_player=1)
def test_model_engine_resign(self): board, player = game_init() model = DummyModel() # Always resign self.engine = ModelEngine(model, mcts_simulations=8, board=board, resign=1) x, y, policy, value, board, player, policy = self.engine.genmove('B') self.assertEqual( (x, y), (0, 10))
def test_play(self): model = DummyModel() mcts_simulations = 8 # mcts batch size is 8 and we need at least one batch game_data = play_game(model, model, mcts_simulations, conf['STOP_EXPLORATION'], self_play=True, num_moves=2) winner = game_data['winner'] test_board1, player = game_init() board = game_data['moves'][0]['board'] self.assertTrue(np.array_equal(board, test_board1)) # First board is empty self.assertEqual(winner, 0) # White should win with 5.5 komi after 2 moves for move, move_data in enumerate( game_data['moves'][::2]): # Black player lost value_target = 1 if winner == move_data['player'] else -1 self.assertEqual(move_data['player'], 1) self.assertEqual(value_target, -1) for move, move_data in enumerate( game_data['moves'][1::2]): # White player won value_target = 1 if winner == move_data['player'] else -1 self.assertEqual(move_data['player'], 0) self.assertEqual(value_target, 1)
def test_model_learning(self): model = self.model board, player = game_init() for i in range(SIZE): for j in range(SIZE): if (i + j) % 2 == 0: make_play(i, j, board) make_play(0, SIZE, board) # White does not play playing # Black board, black to play policies, values = model.predict_on_batch(board) self.assertGreater(values[0][0], 0.9) # White board, white to play board[:, :, :, -1] = 0 policies, values = model.predict_on_batch(board) self.assertGreater(values[0][0], 0.9) board[:, :, :, -1] = 1 make_play(0, SIZE, board) # black passes # Black board, white to play policies, values = model.predict_on_batch(board) self.assertLess(values[0][0], -0.9) board[:, :, :, -1] = 1 # White board, black to play policies, values = model.predict_on_batch(board) self.assertLess(values[0][0], -0.9)
def test_simulation_can_recover_from_sucide_move_white(self): model = self.model board, player = game_init() x = randrange(SIZE) y = randrange(SIZE) for i in range(SIZE): for j in range(SIZE): make_play(0, SIZE, board) # Black does not play playing if i == x and j == y: make_play(0, SIZE, board) # pass on one intersection else: make_play(i, j, board) make_play(0, SIZE, board) # Black does not play playing policies, values = model.predict_on_batch(board) policy = policies[0] policy[y * SIZE + x], policy[SIZE * SIZE] = policy[ SIZE * SIZE], policy[y * SIZE + x] # Make best move sucide mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) self.assertEqual(np.argmax(policy), y * SIZE + x) # Best option in policy is sucide tree = new_tree(policy, board) chosen_play = select_play(policy, board, mcts_simulations=128, mcts_tree=tree, temperature=0, model=model) # First simulation chooses pass, second simulation chooses sucide (p is still higher), # then going deeper it chooses pass again (value is higher) self.assertEqual(chosen_play, SIZE * SIZE) # Pass move is best option
def test_legal_moves_not_ko(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 0, board) # white make_play(2, 1, board) # black make_play(8, 8, board) # white random pos # ○ ● ● . . . # . ○ ○ . . . # . . . . . . make_play(3, 0, board) # black captures_first # ○ . . ○ . . # . ○ ○ . . . # . . . . . . mask = legal_moves(board) self.assertEqual(board[0][0][1][0], 0) # white stone 1 self.assertEqual(board[0][0][1][1], 0) # was taken self.assertEqual(board[0][0][2][0], 0) # white stone 2 self.assertEqual(board[0][0][2][1], 0) # was taken self.assertEqual(board[0][0][1][2], 1) # white stone 1 was here self.assertEqual(board[0][0][1][3], 0) # black stone was not here self.assertEqual(board[0][0][2][2], 1) # white stone 2 was here self.assertEqual(board[0][0][2][3], 0) # black stone was not here self.assertEqual(mask[1], False) self.assertEqual(mask[2], False)
def test_model_saving_after_training(self): init_directories() model_name = "model_1" model = build_model(model_name) self.assertEqual(model.name, 'model_1') board, player = game_init() policies, values = model.predict(board) try: os.remove('test.h5') except: pass model.save('test.h5') self_play(model, n_games=2, mcts_simulations=32) train(model, game_model_name=model.name, epochs=2) self.assertEqual(model.name, 'model_2') policies2, values2 = model.predict(board) self.assertFalse(np.array_equal(values, values2)) self.assertFalse(np.array_equal(policies, policies2)) model3 = load_model('test.h5', custom_objects={'loss': loss}) policies3, values3 = model3.predict(board) self.assertTrue(np.array_equal(values, values3)) self.assertTrue(np.array_equal(policies, policies3)) os.remove('test.h5')
def __init__(self): self._komi = 0 self.board, self.player = game_init() model = load_best_model() self.sejong_engine = SejongGoEngine(model, conf['MCTS_SIMULATIONS'], self.board) print("GTP engine ready")
def test_full_board_capture(self): size = conf['SIZE'] board, player = game_init() for i in range(size * size - 2): x, y = index2coord(i) make_play(x, y, board) # black make_play(0, size, board) # white pass # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ . . make_play(0, size, board) # black pass make_play(size - 1, size - 1, board) # white corner # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ . ● for i in range(size * size - 2): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 1) # black stone i self.assertEqual(board[0][y][x][1], 0) # black stone i self.assertEqual(board[0][size - 1][size - 1][0], 0) # white stone self.assertEqual(board[0][size - 1][size - 1][1], 1) # white stone self.assertEqual(board[0][size - 1][size - 2][0], 0) # empty self.assertEqual(board[0][size - 1][size - 2][1], 0) # empty make_play(size - 2, size - 1, board) # black # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ . for i in range(size * size - 1): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 0) # black stone i self.assertEqual(board[0][y][x][1], 1) # black stone i (it's white's turn) self.assertEqual(board[0][size - 1][size - 1][0], 0) # empty self.assertEqual(board[0][size - 1][size - 1][1], 0) # empty make_play(size - 1, size - 1, board) # white # . . . . . . # . . . . . . # . . . . . . # . . . . . ● for i in range(size * size - 1): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 0) # empty self.assertEqual(board[0][y][x][1], 0) # empty self.assertEqual(board[0][size - 1][size - 1][0], 0) # white self.assertEqual(board[0][size - 1][size - 1][1], 1) # white
def play_game_kgs(self, game_file): with open(game_file, "rb") as f: game = sgf.Sgf_game.from_bytes(f.read()) winner = game.get_winner() board_size = game.get_size() root_node = game.get_root() b_player = root_node.get("PB") w_player = root_node.get("PW") SIZE = conf['SIZE'] if board_size != SIZE: print("GAME SIZE IS NOT EXPECTED ", board_size) return board, _ = game_init() moves = [] try: handicap = root_node.get("AB") # Get handicap if len(handicap) > 0: board = self.setupHandicap(board, handicap) except: pass for move_n, node in enumerate(game.get_main_sequence()): player, move = node.get_move() if player is None: continue if move is None: index = -1 move = (board_size, board_size) #pass move else: index = coord2index(move[0], move[1]) policy_target = np.zeros(board_size * board_size + 1, dtype=float) policy_target[index] = 1.0 value = -1.0 if winner == player: value = 1.0 move_data = { 'board': np.copy(board), 'policy': policy_target, 'value': value, 'move': (move[0], move[1]), 'move_n': move_n, 'player': player } moves.append(move_data) board, _ = make_play(move[0], move[1], board, color=(1 if player == "b" else -1)) return moves
def test_legal_moves_suicide2(self): board, player = game_init() make_play(3, 0, board) # black = 1, col, row make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(3, 1, board, -1) # white make_play(4, 0, board, -1) # white # . ● . ○ ● . # . ○ ● ● . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], True) # suicide move should be illegal
def test_legal_moves_suicide(self): board, player = game_init() make_play(0, 1, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # . ● . ● . . # ○ ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], True) # suicide move should be illegal
def __init__(self, size=9, komi=7.5): self.board, player = game_init() self.current_player = player self.size = size self.ko = None self.komi = komi self.handicaps = [] self.history = [] self.num_black_prisoners = 0 self.num_white_prisoners = 0 self.is_end_of_game = False # Each pass move by a player subtracts a point self.passes_white = 0 self.passes_black = 0
def test_legal_moves_not_suicide(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], False) # not a suicide when capture other stones
def setUp(self): size = conf['SIZE'] board, player = game_init() policy = np.zeros((1, size * size + 1), dtype=np.float32) self.board = board self.size = size self.policy = policy board = self.board for x, y in [(1, 1), (1, 2), (1, 3), (2, 3)]: make_play(x, y, board) # black make_play(0, size, board) # white pass policy[0, x + y * size] = 1 policy[0, size * size] = -1 # Pass move
def main(): global start_time, time_limit start_time = datetime.now() time_limit = 11.6 raw = Utils.json_input() request = raw['requests'] model = load_best_model() board, player = game_init() if not (request['x'] == -2 and request['y'] == -2): board, player = make_play(request['x'] - 1, request['y'] - 1, board) if player == 1: color = 'B' else: color = 'W' engine = ModelEngine(model, conf['MCTS_SIMULATIONS'], board) x, y, _, _, _, _, _ = engine.genmove(color) response = {} response['x'], response['y'] = x + 1, y + 1 if y == conf['SIZE']: response['x'], response['y'] = -1, -1 Utils.json_output({'response': response}) print(">>>BOTZONE_REQUEST_KEEP_RUNNING<<<") time_limit = 3.6 while True: try: start_time = datetime.now() raw = Utils.json_input() request = raw['requests'] if not (request['x'] == -1 and request['y'] == -1): engine.play('B', request['x'] - 1, request['y'] - 1, update_tree=True) x, y, _, _, _, _, _ = engine.genmove(color) response = {} response['x'], response['y'] = x + 1, y + 1 if y == conf['SIZE']: response['x'], response['y'] = -1, -1 Utils.json_output({'response': response}) print(">>>BOTZONE_REQUEST_KEEP_RUNNING<<<") except json.JSONDecodeError: break
def test_bug(self): # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ○ ○ ● ● # ● ● ● ● ● ● . ● ● # ● ● ● ● ● ● ○ ○ ○ size = conf['SIZE'] board, player = game_init() for i in range(size*size): x, y = index2coord(i) if (x, y) in [(5, 6), (6, 6), (6, 8), (7, 8), (8, 8)]: make_play(x, y, board) # black make_play(0, size, board) # white pass elif (x, y) in [(6, 7)]: make_play(0, size, board) # black pass make_play(0, size, board) # white pass else: make_play(0, size, board) # black pass make_play(x, y, board) # white make_play(0, size, board) # black pass make_play(6, 7, board) # white # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● . . ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● . . . for i in range(size*size - 1): x, y = index2coord(i) if (x, y) in [(5, 6), (6, 6), (6, 8), (7, 8), (8, 8)]: self.assertEqual(board[0][y][x][0], 0) # empty self.assertEqual(board[0][y][x][1], 0) # emtpy else: self.assertEqual(board[0][y][x][0], 0) # white self.assertEqual(board[0][y][x][1], 1) # white
def test_model_saving(self): init_directories() model_name = "model_1" model = build_model(model_name) board, player = game_init() policies, values = model.predict(board) try: os.remove('test.h5') except: pass model.save('test.h5') model2 = load_model('test.h5', custom_objects={'loss': loss}) policies2, values2 = model2.predict(board) self.assertTrue(np.array_equal(values, values2)) self.assertTrue(np.array_equal(policies, policies2)) os.remove('test.h5')
def test_learned_to_pass_black(self): model = self.model board, player = game_init() x = randrange(SIZE) y = randrange(SIZE) for i in range(SIZE): for j in range(SIZE): if i == x and j == y: make_play(0, SIZE, board) # pass on one intersection else: make_play(i, j, board) make_play(0, SIZE, board) # White does not play playing policies, values = model.predict_on_batch(board) self.assertEqual(np.argmax(policies[0]), SIZE * SIZE) # Pass move is best option
def test_self_sucide(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(8, 9, board) # black random make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . . ● . . . # . . . . . . make_play(2, 0, board) # black sucides self.assertEqual(board[0][0][1][0], 1) # white stone self.assertEqual(board[0][0][1][1], 0) # was not taken self.assertEqual(board[0][0][2][0], 0) # black stone self.assertEqual(board[0][0][2][1], 0) # was taken
def setUp(self): # Remove the symmetries for reproductibility import symmetry symmetry.SYMMETRIES = symmetry.SYMMETRIES[0:1] tree = { 'count': 0, 'mean_value': 0, 'value': 0, 'parent': None, 'move': 1, 'subtree': { 0:{ 'count': 0, 'p': 1, 'value': 0, 'mean_value': 0, 'move': 2, 'subtree': {} }, 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'move': 2, 'subtree': {} } } } tree['subtree'][0]['parent'] = tree tree['subtree'][1]['parent'] = tree board, player = game_init() model = DummyModel() self.model = model self.board = board self.tree = tree
def test_legal_moves_ko(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . ○ ● . . . # . . . . . . make_play(2, 0, board) # black captures_first # ○ . ○ ● . . # . ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(board[0][0][1][0], 0) # white stone self.assertEqual(board[0][0][1][1], 0) # was taken self.assertEqual(board[0][0][1][2], 1) # white stone was here self.assertEqual(board[0][0][1][3], 0) # black stone was not here self.assertEqual(mask[1], True)
def setUp(self): # Remove the symmetries for reproductibility import symmetry symmetry.SYMMETRIES = symmetry.SYMMETRIES[0:1] size = conf['SIZE'] tree = { 'count': 0, 'mean_value': 0, 'value': 0, 'parent': None, 'virtual_loss': 0, 'subtree': { 0: { 'count': 0, 'p': 1, 'value': 0, 'mean_value': 0, 'virtual_loss': 0, 'subtree': {} }, 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'virtual_loss': 0, 'subtree': {} } } } tree['subtree'][0]['parent'] = tree tree['subtree'][1]['parent'] = tree board, player = game_init() model = DummyModel() self.model = model self.board = board self.tree = tree init_simulation_workers()
def test_simulation_can_recover_from_sucide_move_white(self): model = self.model board, player = game_init() give_two_eyes(board, 'W') policies, values = model.predict_on_batch(board) policy = policies[0] if np.argmax(policy) == PASS: policy[0], policy[PASS] = policy[PASS], policy[0] # Make best move sucide mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) self.assertEqual(np.argmax(policy), 0) # Best option in policy is sucide else: print("Warning, policy is not great") tree = Tree() tree.new_tree(policy, board, move=2) chosen_play = select_play(policy, board, mcts_simulations=128, mcts_tree=tree.tree, temperature=0, model=model) # First simulation chooses pass, second simulation chooses sucide (p is still higher), # then going deeper it chooses pass again (value is higher) self.assertEqual(chosen_play, PASS) # Pass move is best option
def clear_board(self): self.board, self.player = game_init() return ""
def __init__(self, model, logfile): self.board, self.player = game_init() self.start_engine(model) self.logfile = logfile
def play_game(model1, model2, mcts_simulations, stop_exploration, self_play=False, num_moves=None, resign_model1=None, resign_model2=None): board, player = game_init() moves = [] current_model, other_model = choose_first_player(model1, model2) mcts_tree, other_mcts = None, None last_value = None value = None model1_isblack = current_model == model1 start = datetime.datetime.now() skipped_last = False temperature = 1 start = datetime.datetime.now() end_reason = "PLAYED ALL MOVES" if num_moves is None: num_moves = SIZE * SIZE * 2 for move_n in range(num_moves): last_value = value if move_n == stop_exploration: temperature = 0 policies, values = current_model.predict_on_batch(board) policy = policies[0] value = values[0] resign = resign_model1 if current_model == model1 else resign_model2 if resign and value <= resign: end_reason = "resign" break # Start of the game mcts_tree is None, but it can be {} if we selected a play that mcts never checked if not mcts_tree or not mcts_tree['subtree']: mcts_tree = new_tree(policy, board, add_noise=self_play) if self_play: other_mcts = mcts_tree index = select_play(policy, board, mcts_simulations, mcts_tree, temperature, current_model) x, y = index2coord(index) policy_target = np.zeros(SIZE * SIZE + 1) for _index, d in mcts_tree['subtree'].items(): policy_target[_index] = d['p'] move_data = { 'board': np.copy(board), 'policy': policy_target, 'value': value, 'move': (x, y), 'move_n': move_n, 'player': player, } moves.append(move_data) if skipped_last and y == SIZE: end_reason = "BOTH_PASSED" break skipped_last = y == SIZE # Update trees if not self_play: # Update other only if we are not in self_play if other_mcts and index in other_mcts['subtree']: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree else: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree mcts_tree = mcts_tree['subtree'][index] mcts_tree['parent'] = None # Cut the tree # Swap players board, player = make_play(x, y, board) current_model, other_model = other_model, current_model mcts_tree, other_mcts = other_mcts, mcts_tree if conf['SHOW_EACH_MOVE']: # Inverted here because we already swapped players color = "W" if player == 1 else "B" print("%s(%s,%s)" % (color, x, y)) print("") show_board(board) print("") winner, black_points, white_points = get_winner(board) player_string = {1: "B", 0: "D", -1: "W"} if end_reason == "resign": winner_string = "%s+R" % (player_string[player]) else: winner_string = "%s+%s" % (player_string[winner], abs(black_points - white_points)) winner_result = {1: 1, -1: 0, 0: None} if winner == 0: winner_model = None else: winner_model = model1 if (winner == 1) == model1_isblack else model2 if model1_isblack: modelB, modelW = model1, model2 else: modelW, modelB = model1, model2 if player == 0: # black played last bvalue, wvalue = value, last_value else: bvalue, wvalue = last_value, value if conf['SHOW_END_GAME']: print("") print("B:%s, W:%s" % (modelB.name, modelW.name)) print("Bvalue:%s, Wvalue:%s" % (bvalue, wvalue)) show_board(board) print("Game played (%s: %s) : %s" % (winner_string, end_reason, datetime.datetime.now() - start)) game_data = { 'moves': moves, 'modelB_name': modelB.name, 'modelW_name': modelW.name, 'winner': winner_result[winner], 'winner_model': winner_model.name, 'result': winner_string, 'resign_model1': resign_model1, 'resign_model2': resign_model2, } return game_data
def __init__(self, model): self.board, self.player = game_init() self.start_engine(model)