def test_model_evaluation(self): tree = self.tree board = self.board size = conf['SIZE'] test_board1, player = game_init() make_play(0, 0, test_board1) test_board2, player = game_init() make_play(1, 0, test_board2) class DummyModel(object): def predict_on_batch(_, X): size = conf['SIZE'] board1 = X[0].reshape(1, size, size, 17) board2 = X[1].reshape(1, size, size, 17) self.assertTrue(np.array_equal(board1, test_board1)) self.assertTrue(np.array_equal(board2, test_board2)) batch_size = X.shape[0] policy = np.zeros((batch_size, size * size + 1), dtype=np.float32) policy[:,0] = 1 value = np.zeros((batch_size, 1), dtype=np.float32) value[:] = 1 return policy, value model = DummyModel() simulate(tree, board, model, mcts_batch_size=2, original_player=1)
def do_move(self, action, color): if color is None: color = self.current_player if action != RESIGN and action != "pass": x, y = index2coord( action ) # also included pass action. If skip action => y == SIZE make_play(x, y, self.board, color)
def test_model_evaluation(self): tree = self.tree board = self.board size = conf['SIZE'] test_board1, player = game_init() make_play(0, 0, test_board1) test_board2, player = game_init() make_play(1, 0, test_board2) model = MCTSDummyModel() async_simulate(tree, board, model, energy=2, original_player=1)
def test_simulation_can_recover_from_sucide_move_white(self): model = self.model board, player = game_init() x = randrange(SIZE) y = randrange(SIZE) for i in range(SIZE): for j in range(SIZE): make_play(0, SIZE, board) # Black does not play playing if i == x and j == y: make_play(0, SIZE, board) # pass on one intersection else: make_play(i, j, board) make_play(0, SIZE, board) # Black does not play playing policies, values = model.predict_on_batch(board) policy = policies[0] policy[y * SIZE + x], policy[SIZE * SIZE] = policy[ SIZE * SIZE], policy[y * SIZE + x] # Make best move sucide mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) self.assertEqual(np.argmax(policy), y * SIZE + x) # Best option in policy is sucide tree = new_tree(policy, board) chosen_play = select_play(policy, board, mcts_simulations=128, mcts_tree=tree, temperature=0, model=model) # First simulation chooses pass, second simulation chooses sucide (p is still higher), # then going deeper it chooses pass again (value is higher) self.assertEqual(chosen_play, SIZE * SIZE) # Pass move is best option
def setUp(self): size = conf['SIZE'] board, player = game_init() policy = np.zeros((1, size * size + 1), dtype=np.float32) self.board = board self.size = size self.policy = policy board = self.board for x, y in [(1, 1), (1, 2), (1, 3), (2, 3)]: make_play(x, y, board) # black make_play(0, size, board) # white pass policy[0, x + y * size] = 1 policy[0, size * size] = -1 # Pass move
def play(self, color, x, y, update_tree=True): index = coord2index(x, y) if update_tree: self.tree.play(index) self.board, self.player = make_play(x, y, self.board) self.move += 1 return self.board, self.player
def give_two_eyes(board, color): if not color in 'BW': raise Exception("Invalid color") eyes = [[0, 0], [2, 0]] # The 2 eyes for i in range(SIZE): for j in range(SIZE): if color == 'W': make_play(0, SIZE, board) # Black pass if [i, j] in eyes: make_play(0, SIZE, board) # pass on two intersection else: make_play(i, j, board) if color == 'B': make_play(0, SIZE, board) # White pass if color == 'W': make_play(0, SIZE, board) # Black last pass
def play(self, color, x, y, update_tree=True): index = coord2index(x, y) if update_tree: if self.mcts_tree and index in self.mcts_tree['subtree']: self.mcts_tree = self.mcts_tree['subtree'][index] self.mcts_tree['parent'] = None # Cut the tree else: self.mcts_tree = None self.board, self.player = make_play(x, y, self.board, color) self.move += 1 return self.board, self.player
def play_game_kgs(self, game_file): with open(game_file, "rb") as f: game = sgf.Sgf_game.from_bytes(f.read()) winner = game.get_winner() board_size = game.get_size() root_node = game.get_root() b_player = root_node.get("PB") w_player = root_node.get("PW") SIZE = conf['SIZE'] if board_size != SIZE: print("GAME SIZE IS NOT EXPECTED ", board_size) return board, _ = game_init() moves = [] try: handicap = root_node.get("AB") # Get handicap if len(handicap) > 0: board = self.setupHandicap(board, handicap) except: pass for move_n, node in enumerate(game.get_main_sequence()): player, move = node.get_move() if player is None: continue if move is None: index = -1 move = (board_size, board_size) #pass move else: index = coord2index(move[0], move[1]) policy_target = np.zeros(board_size * board_size + 1, dtype=float) policy_target[index] = 1.0 value = -1.0 if winner == player: value = 1.0 move_data = { 'board': np.copy(board), 'policy': policy_target, 'value': value, 'move': (move[0], move[1]), 'move_n': move_n, 'player': player } moves.append(move_data) board, _ = make_play(move[0], move[1], board, color=(1 if player == "b" else -1)) return moves
def test_model_learning(self): model = self.model board, player = game_init() for i in range(SIZE): for j in range(SIZE): if (i + j) % 2 == 0: make_play(i, j, board) make_play(0, SIZE, board) # White does not play playing # Black board, black to play policies, values = model.predict_on_batch(board) self.assertGreater(values[0][0], 0.9) # White board, white to play board[:, :, :, -1] = 0 policies, values = model.predict_on_batch(board) self.assertGreater(values[0][0], 0.9) board[:, :, :, -1] = 1 make_play(0, SIZE, board) # black passes # Black board, white to play policies, values = model.predict_on_batch(board) self.assertLess(values[0][0], -0.9) board[:, :, :, -1] = 1 # White board, black to play policies, values = model.predict_on_batch(board) self.assertLess(values[0][0], -0.9)
def basic_tasks2(node, board, moves, model_indicator, original_player, process_id): for m in moves: x, y = index2coord(m) board, _ = make_play(x, y, board) # predicting policy, value = put_predict_request(model_indicator, board) # subtree making node['subtree'] = new_subtree(policy, board, node) v = value if board[0, 0, 0, -1] == original_player else -value node['count'] += 1 node['value'] += v node['mean_value'] = node['value'] / float(node['count']) simulation_result_queue[process_id].put((node, moves))
def board_worker(input_tuple): try: dic, board = input_tuple action = dic['action'] if dic['node']['subtree'] != {}: tmp_node = dic['node'] tmp_action = action x, y = index2coord(tmp_action) board, _ = make_play(x, y, board) while tmp_node['subtree'] != {}: tmp_d = top_one_action(tmp_node['subtree']) tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, board) return board[0] else: x, y = index2coord(action) make_play(x, y, board) return board[0] except Exception: print("EXCEPTION IN BOARD WORKER!!!!!!")
def main(): global start_time, time_limit start_time = datetime.now() time_limit = 11.6 raw = Utils.json_input() request = raw['requests'] model = load_best_model() board, player = game_init() if not (request['x'] == -2 and request['y'] == -2): board, player = make_play(request['x'] - 1, request['y'] - 1, board) if player == 1: color = 'B' else: color = 'W' engine = ModelEngine(model, conf['MCTS_SIMULATIONS'], board) x, y, _, _, _, _, _ = engine.genmove(color) response = {} response['x'], response['y'] = x + 1, y + 1 if y == conf['SIZE']: response['x'], response['y'] = -1, -1 Utils.json_output({'response': response}) print(">>>BOTZONE_REQUEST_KEEP_RUNNING<<<") time_limit = 3.6 while True: try: start_time = datetime.now() raw = Utils.json_input() request = raw['requests'] if not (request['x'] == -1 and request['y'] == -1): engine.play('B', request['x'] - 1, request['y'] - 1, update_tree=True) x, y, _, _, _, _, _ = engine.genmove(color) response = {} response['x'], response['y'] = x + 1, y + 1 if y == conf['SIZE']: response['x'], response['y'] = -1, -1 Utils.json_output({'response': response}) print(">>>BOTZONE_REQUEST_KEEP_RUNNING<<<") except json.JSONDecodeError: break
def test_learned_to_pass_black(self): model = self.model board, player = game_init() x = randrange(SIZE) y = randrange(SIZE) for i in range(SIZE): for j in range(SIZE): if i == x and j == y: make_play(0, SIZE, board) # pass on one intersection else: make_play(i, j, board) make_play(0, SIZE, board) # White does not play playing policies, values = model.predict_on_batch(board) self.assertEqual(np.argmax(policies[0]), SIZE * SIZE) # Pass move is best option
def basic_tasks(node, board, move, model_indicator, original_player): moves = [move] while node['subtree'] != {}: action = top_one_action(node['subtree']) node = action['node'] moves.append(action['action']) # making board for m in moves: x,y = index2coord(m) board, _ = make_play(x,y, board) policy, value = put_predict_request(model_indicator, board) node['subtree'] = new_subtree(policy, board, node) # backpropagation v = value if board[0, 0, 0, -1] == original_player else -value while True: node['count'] += 1 node['value'] += v node['mean_value'] = node['value'] / float(node['count']) if node['parent']: node = node['parent'] else: break return node
def test_model_evaluation_other_nested(self): tree = { 'count': 0, 'mean_value': 0, 'value': 0, 'parent': None, 'move': 1, 'subtree':{ 0:{ 'count': 0, 'p': 1, 'value': 0, 'mean_value': 0, 'move': 2, 'subtree': {}, }, 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'move': 2, 'subtree': { 0: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'move': 3, 'subtree': {}, }, 2: { 'count': 0, 'p': 1, 'mean_value': 0, 'value': 0, 'move': 3, 'subtree': {}, } } } } } tree['subtree'][0]['parent'] = tree tree['subtree'][1]['parent'] = tree tree['subtree'][1]['subtree'][0]['parent'] = tree['subtree'][1] tree['subtree'][1]['subtree'][2]['parent'] = tree['subtree'][1] board = self.board test_board1, player = game_init() make_play(0, 0, test_board1) test_board2, player = game_init() make_play(1, 0, test_board2) make_play(2, 0, test_board2) class DummyModel(object): def predict_on_batch(_, X): size = conf['SIZE'] board1 = X[0].reshape(1, size, size, 17) board2 = X[1].reshape(1, size, size, 17) self.assertTrue(np.array_equal(board1, test_board1)) self.assertTrue(np.array_equal(board2, test_board2)) batch_size = X.shape[0] policy = np.zeros((batch_size, size * size + 1), dtype=np.float32) policy[:,0] = 1 value = np.zeros((batch_size, 1), dtype=np.float32) value[:] = 1 return policy, value model = DummyModel() simulate(tree, board, model, mcts_batch_size=2, original_player=1)
def test_bug(self): # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ○ ○ ● ● # ● ● ● ● ● ● . ● ● # ● ● ● ● ● ● ○ ○ ○ size = conf['SIZE'] board, player = game_init() for i in range(size*size): x, y = index2coord(i) if (x, y) in [(5, 6), (6, 6), (6, 8), (7, 8), (8, 8)]: make_play(x, y, board) # black make_play(0, size, board) # white pass elif (x, y) in [(6, 7)]: make_play(0, size, board) # black pass make_play(0, size, board) # white pass else: make_play(0, size, board) # black pass make_play(x, y, board) # white make_play(0, size, board) # black pass make_play(6, 7, board) # white # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● . . ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● . . . for i in range(size*size - 1): x, y = index2coord(i) if (x, y) in [(5, 6), (6, 6), (6, 8), (7, 8), (8, 8)]: self.assertEqual(board[0][y][x][0], 0) # empty self.assertEqual(board[0][y][x][1], 0) # emtpy else: self.assertEqual(board[0][y][x][0], 0) # white self.assertEqual(board[0][y][x][1], 1) # white
def test_legal_moves_not_suicide(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], False) # not a suicide when capture other stones
def play_game(model1, model2, mcts_simulations, stop_exploration, self_play=False, num_moves=None, resign_model1=None, resign_model2=None): board, player = game_init() moves = [] current_model, other_model = choose_first_player(model1, model2) mcts_tree, other_mcts = None, None last_value = None value = None model1_isblack = current_model == model1 start = datetime.datetime.now() skipped_last = False temperature = 1 start = datetime.datetime.now() end_reason = "PLAYED ALL MOVES" if num_moves is None: num_moves = SIZE * SIZE * 2 for move_n in range(num_moves): last_value = value if move_n == stop_exploration: temperature = 0 policies, values = current_model.predict_on_batch(board) policy = policies[0] value = values[0] resign = resign_model1 if current_model == model1 else resign_model2 if resign and value <= resign: end_reason = "resign" break # Start of the game mcts_tree is None, but it can be {} if we selected a play that mcts never checked if not mcts_tree or not mcts_tree['subtree']: mcts_tree = new_tree(policy, board, add_noise=self_play) if self_play: other_mcts = mcts_tree index = select_play(policy, board, mcts_simulations, mcts_tree, temperature, current_model) x, y = index2coord(index) policy_target = np.zeros(SIZE * SIZE + 1) for _index, d in mcts_tree['subtree'].items(): policy_target[_index] = d['p'] move_data = { 'board': np.copy(board), 'policy': policy_target, 'value': value, 'move': (x, y), 'move_n': move_n, 'player': player, } moves.append(move_data) if skipped_last and y == SIZE: end_reason = "BOTH_PASSED" break skipped_last = y == SIZE # Update trees if not self_play: # Update other only if we are not in self_play if other_mcts and index in other_mcts['subtree']: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree else: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree mcts_tree = mcts_tree['subtree'][index] mcts_tree['parent'] = None # Cut the tree # Swap players board, player = make_play(x, y, board) current_model, other_model = other_model, current_model mcts_tree, other_mcts = other_mcts, mcts_tree if conf['SHOW_EACH_MOVE']: # Inverted here because we already swapped players color = "W" if player == 1 else "B" print("%s(%s,%s)" % (color, x, y)) print("") show_board(board) print("") winner, black_points, white_points = get_winner(board) player_string = {1: "B", 0: "D", -1: "W"} if end_reason == "resign": winner_string = "%s+R" % (player_string[player]) else: winner_string = "%s+%s" % (player_string[winner], abs(black_points - white_points)) winner_result = {1: 1, -1: 0, 0: None} if winner == 0: winner_model = None else: winner_model = model1 if (winner == 1) == model1_isblack else model2 if model1_isblack: modelB, modelW = model1, model2 else: modelW, modelB = model1, model2 if player == 0: # black played last bvalue, wvalue = value, last_value else: bvalue, wvalue = last_value, value if conf['SHOW_END_GAME']: print("") print("B:%s, W:%s" % (modelB.name, modelW.name)) print("Bvalue:%s, Wvalue:%s" % (bvalue, wvalue)) show_board(board) print("Game played (%s: %s) : %s" % (winner_string, end_reason, datetime.datetime.now() - start)) game_data = { 'moves': moves, 'modelB_name': modelB.name, 'modelW_name': modelW.name, 'winner': winner_result[winner], 'winner_model': winner_model.name, 'result': winner_string, 'resign_model1': resign_model1, 'resign_model2': resign_model2, } return game_data
def test_self_sucide(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(8, 9, board) # black random make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . . ● . . . # . . . . . . make_play(2, 0, board) # black sucides self.assertEqual(board[0][0][1][0], 1) # white stone self.assertEqual(board[0][0][1][1], 0) # was not taken self.assertEqual(board[0][0][2][0], 0) # black stone self.assertEqual(board[0][0][2][1], 0) # was taken
def test_model_evaluation_nested(self): tree = { 'count': 0, 'mean_value': 0, 'value': 0, 'parent': None, 'virtual_loss': 0, 'subtree':{ 0:{ 'count': 0, 'p': 1, 'value': 0, 'mean_value': 0, 'virtual_loss': 0, 'subtree': { 1: { # <----- This will be checked first 'count': 0, 'p': 1, 'mean_value': 0, 'value': 0, 'virtual_loss': 0, 'subtree': {}, }, 2: { # <----- This will be checked second 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'virtual_loss': 0, 'subtree': {}, } } }, 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'virtual_loss': 0, 'subtree': {}, } } } tree['subtree'][0]['parent'] = tree tree['subtree'][0]['subtree'][1]['parent'] = tree['subtree'][0] tree['subtree'][0]['subtree'][2]['parent'] = tree['subtree'][0] tree['subtree'][1]['parent'] = tree board = self.board size = conf['SIZE'] test_board1, player = game_init() make_play(0, 0, test_board1) make_play(1, 0, test_board1) test_board2, player = game_init() make_play(0, 0, test_board2) make_play(2, 0, test_board2) model = MCTSDummyModel() # Remove the symmetries for reproductibility async_simulate(tree, board, model, energy=4, original_player=1)
def test_model_evaluation_other_nested(self): tree = { 'count': 0, 'mean_value': 0, 'value': 0, 'virtual_loss': 0, 'parent': None, 'subtree': { 0: { 'count': 0, 'p': 1, 'value': 0, 'virtual_loss': 0, 'mean_value': 0, 'subtree': {}, }, 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'virtual_loss': 0, 'value': 0, 'subtree': { 0: { 'count': 0, 'p': 0, 'mean_value': 0, 'virtual_loss': 0, 'value': 0, 'subtree': {}, }, 2: { 'count': 0, 'p': 1, 'mean_value': 0, 'virtual_loss': 0, 'value': 0, 'subtree': {}, } } } } } tree['subtree'][0]['parent'] = tree tree['subtree'][1]['parent'] = tree tree['subtree'][1]['subtree'][0]['parent'] = tree['subtree'][1] tree['subtree'][1]['subtree'][2]['parent'] = tree['subtree'][1] board = self.board size = conf['SIZE'] test_board1, player = game_init() make_play(0, 0, test_board1) test_board2, player = game_init() make_play(1, 0, test_board2) make_play(2, 0, test_board2) model = MCTSDummyModel() async_simulate(tree, board, model, energy=2, original_player=1)
def test_legal_moves_suicide3(self): board, player = game_init() make_play(1, 2, board) # black make_play(2, 0, board) # white make_play(3, 1, board) # black make_play(3, 0, board) # white make_play(1, 1, board, -1) # white make_play(4, 1, board, -1) # white make_play(2, 2, board, -1) # white make_play(3, 2, board, -1) # white # . . ● ● . . # . ● . ○ ● . # . ○ ● ● . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[10], True) # suicide move should be illegal
def test_legal_moves_suicide(self): board, player = game_init() make_play(0, 1, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # . ● . ● . . # ○ ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], True) # suicide move should be illegal
def test_legal_moves_ko(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . ○ ● . . . # . . . . . . make_play(2, 0, board) # black captures_first # ○ . ○ ● . . # . ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(board[0][0][1][0], 0) # white stone self.assertEqual(board[0][0][1][1], 0) # was taken self.assertEqual(board[0][0][1][2], 1) # white stone was here self.assertEqual(board[0][0][1][3], 0) # black stone was not here self.assertEqual(mask[1], True)
def test_legal_moves_not_ko(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 0, board) # white make_play(2, 1, board) # black make_play(8, 8, board) # white random pos # ○ ● ● . . . # . ○ ○ . . . # . . . . . . make_play(3, 0, board) # black captures_first # ○ . . ○ . . # . ○ ○ . . . # . . . . . . mask = legal_moves(board) self.assertEqual(board[0][0][1][0], 0) # white stone 1 self.assertEqual(board[0][0][1][1], 0) # was taken self.assertEqual(board[0][0][2][0], 0) # white stone 2 self.assertEqual(board[0][0][2][1], 0) # was taken self.assertEqual(board[0][0][1][2], 1) # white stone 1 was here self.assertEqual(board[0][0][1][3], 0) # black stone was not here self.assertEqual(board[0][0][2][2], 1) # white stone 2 was here self.assertEqual(board[0][0][2][3], 0) # black stone was not here self.assertEqual(mask[1], False) self.assertEqual(mask[2], False)
def simulate(node, board, model, mcts_batch_size, original_player): node_subtree = node['subtree'] max_actions = top_n_actions(node_subtree, mcts_batch_size) max_a = max_actions[0]['action'] selected_action = max_a selected_node = node_subtree[selected_action] if selected_node['subtree'] == {}: # This is a leaf boards = np.zeros((mcts_batch_size, SIZE, SIZE, 17), dtype=np.float32) for i, dic in enumerate(max_actions): action = dic['action'] if dic['node']['subtree'] != {}: # already expanded tmp_node = dic['node'] tmp_action = action tmp_board = np.copy(board) x, y = index2coord(tmp_action) tmp_board, _ = make_play(x, y, tmp_board) while tmp_node['subtree'] != {}: tmp_max_actions = top_n_actions(tmp_node['subtree'], mcts_batch_size) tmp_d = tmp_max_actions[0] tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] # The node for this action is the leaf, this is where the # update will start, working up the tree dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, tmp_board) boards[i] = tmp_board else: tmp_board = np.copy(board) x, y = index2coord(action) make_play(x, y, tmp_board) boards[i] = tmp_board # The random symmetry will changes boards, so copy them before hand presymmetry_boards = np.copy(boards) policies, values = random_symmetry_predict(model, boards) for policy, v, board, action in zip(policies, values, presymmetry_boards, max_actions): shape = board.shape board = board.reshape([1] + list(shape)) player = board[0, 0, 0, -1] # Inverse value if we're looking from other player perspective value = v[0] if player == original_player else -v[0] subtree = new_subtree(policy, board, node) leaf_node = action['node'] leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: x, y = index2coord(selected_action) make_play(x, y, board) simulate(selected_node, board, model, mcts_batch_size, original_player)
def test_full_board_capture(self): size = conf['SIZE'] board, player = game_init() for i in range(size*size - 2): x, y = index2coord(i) make_play(x, y, board) # black make_play(0, size, board) # white pass # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ . . make_play(0, size, board) # black pass make_play(size -1, size - 1, board) # white corner # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ . ● for i in range(size*size - 2): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 1) # black stone i self.assertEqual(board[0][y][x][1], 0) # black stone i self.assertEqual(board[0][size - 1][size - 1][0], 0) # white stone self.assertEqual(board[0][size - 1][size - 1][1], 1) # white stone self.assertEqual(board[0][size - 1][size - 2][0], 0) # empty self.assertEqual(board[0][size - 1][size - 2][1], 0) # empty make_play(size - 2, size - 1, board) # black # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ . for i in range(size*size - 1): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 0) # black stone i self.assertEqual(board[0][y][x][1], 1) # black stone i (it's white's turn) self.assertEqual(board[0][size - 1][size - 1][0], 0) # empty self.assertEqual(board[0][size - 1][size - 1][1], 0) # empty make_play(size - 1, size - 1, board) # white # . . . . . . # . . . . . . # . . . . . . # . . . . . ● for i in range(size*size - 1): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 0) # empty self.assertEqual(board[0][y][x][1], 0) # empty self.assertEqual(board[0][size - 1][size - 1][0], 0) # white self.assertEqual(board[0][size - 1][size - 1][1], 1) # white
def simulate(node, board, model, mcts_batch_size, original_player): node_subtree = node['subtree'] max_actions = top_n_actions(node_subtree, mcts_batch_size) selected_action = max_actions[0]['action'] selected_node = node_subtree[selected_action] if selected_node['subtree'] == {}: if False: #conf['THREAD_SIMULATION']: from simulation_workers import process_pool, board_worker ret = process_pool.map(board_worker, [(dic, board) for i, dic in enumerate(max_actions)]) boards = np.array(ret) else: boards = np.zeros((len(max_actions), SIZE, SIZE, 17), dtype=np.float32) for i, dic in enumerate(max_actions): action = dic['action'] tmp_board = np.copy(board) if dic['node']['subtree'] != {}: # already expanded tmp_node = dic['node'] tmp_action = action x, y = index2coord(tmp_action) tmp_board, _ = make_play(x, y, tmp_board) while tmp_node['subtree'] != {}: # tmp_max_actions = top_n_actions(tmp_node['subtree'], 1) # tmp_d = tmp_max_actions[0] tmp_d = top_one_action(tmp_node['subtree']) tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] # The node for this action is the leaf, this is where the # update will start, working up the tree dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, tmp_board) boards[i] = tmp_board else: x, y = index2coord(action) make_play(x, y, tmp_board) boards[i] = tmp_board # The random symmetry will changes boards, so copy them before hand presymmetry_boards = np.copy(boards) policies, values = random_symmetry_predict(model, boards) if conf['THREAD_SIMULATION']: from simulation_workers import subtree_worker, process_pool subtree_array = process_pool.map( subtree_worker, [(policy, board) for policy, board in zip(policies, presymmetry_boards)]) for subtree, board, v, action in zip(subtree_array, presymmetry_boards, values, max_actions): player = board[0, 0, -1] value = v[0] if player == original_player else -v[0] leaf_node = action['node'] for _, node in subtree.items(): node['parent'] = leaf_node leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: for policy, v, board, action in zip(policies, values, presymmetry_boards, max_actions): # reshape from [n, n, 17] to [1, n, n, 17] shape = board.shape board = board.reshape([1] + list(shape)) player = board[0, 0, 0, -1] # Inverse value if we're looking from other player perspective value = v[0] if player == original_player else -v[0] leaf_node = action['node'] subtree = new_subtree(policy, board, leaf_node) leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: x, y = index2coord(selected_action) make_play(x, y, board) simulate(selected_node, board, model, mcts_batch_size, original_player)
def test_get_liberties(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(8, 9, board) # black random make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white make_play(2, 0, board) # black # ○ ● . ● . . # . . ● . . . # . . . . . . tmp = get_liberties(2, 0, board, 1) self.assertEqual(len(tmp), 0) tmp = get_liberties(2, 0, board, -1) self.assertEqual(len(tmp), 4) board, player = game_init() make_play(2, 1, board) # white make_play(2, 0, board) # black make_play(3, 1, board) # white make_play(1, 1, board) # black make_play(4, 1, board, -1) # white make_play(2, 2, board, -1) # white # . . ○ . . . # . ○ ● ● ○ . # . . ○ . . . # . . . . . . tmp = get_liberties(2, 1, board, 1) self.assertEqual(len(tmp), 2) tmp = get_liberties(3, 1, board, 1) self.assertEqual(len(tmp), 2)