def simulate(node, board, model, mcts_batch_size, original_player): node_subtree = node['subtree'] max_actions = top_n_actions(node_subtree, mcts_batch_size) max_a = max_actions[0]['action'] selected_action = max_a selected_node = node_subtree[selected_action] if selected_node['subtree'] == {}: # This is a leaf boards = np.zeros((mcts_batch_size, SIZE, SIZE, 17), dtype=np.float32) for i, dic in enumerate(max_actions): action = dic['action'] if dic['node']['subtree'] != {}: # already expanded tmp_node = dic['node'] tmp_action = action tmp_board = np.copy(board) x, y = index2coord(tmp_action) tmp_board, _ = make_play(x, y, tmp_board) while tmp_node['subtree'] != {}: tmp_max_actions = top_n_actions(tmp_node['subtree'], mcts_batch_size) tmp_d = tmp_max_actions[0] tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] # The node for this action is the leaf, this is where the # update will start, working up the tree dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, tmp_board) boards[i] = tmp_board else: tmp_board = np.copy(board) x, y = index2coord(action) make_play(x, y, tmp_board) boards[i] = tmp_board # The random symmetry will changes boards, so copy them before hand presymmetry_boards = np.copy(boards) policies, values = random_symmetry_predict(model, boards) for policy, v, board, action in zip(policies, values, presymmetry_boards, max_actions): shape = board.shape board = board.reshape([1] + list(shape)) player = board[0, 0, 0, -1] # Inverse value if we're looking from other player perspective value = v[0] if player == original_player else -v[0] subtree = new_subtree(policy, board, node) leaf_node = action['node'] leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: x, y = index2coord(selected_action) make_play(x, y, board) simulate(selected_node, board, model, mcts_batch_size, original_player)
def predict_with_latest_model(self, board, is_symmetry=False): if is_symmetry: return random_symmetry_predict(self.latest_model, board) else: return self.latest_model.predict_on_batch(board)
def simulate(node, board, model, mcts_batch_size, original_player): node_subtree = node['subtree'] max_actions = top_n_actions(node_subtree, mcts_batch_size) selected_action = max_actions[0]['action'] selected_node = node_subtree[selected_action] if selected_node['subtree'] == {}: if False: #conf['THREAD_SIMULATION']: from simulation_workers import process_pool, board_worker ret = process_pool.map(board_worker, [(dic, board) for i, dic in enumerate(max_actions)]) boards = np.array(ret) else: boards = np.zeros((len(max_actions), SIZE, SIZE, 17), dtype=np.float32) for i, dic in enumerate(max_actions): action = dic['action'] tmp_board = np.copy(board) if dic['node']['subtree'] != {}: # already expanded tmp_node = dic['node'] tmp_action = action x, y = index2coord(tmp_action) tmp_board, _ = make_play(x, y, tmp_board) while tmp_node['subtree'] != {}: # tmp_max_actions = top_n_actions(tmp_node['subtree'], 1) # tmp_d = tmp_max_actions[0] tmp_d = top_one_action(tmp_node['subtree']) tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] # The node for this action is the leaf, this is where the # update will start, working up the tree dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, tmp_board) boards[i] = tmp_board else: x, y = index2coord(action) make_play(x, y, tmp_board) boards[i] = tmp_board # The random symmetry will changes boards, so copy them before hand presymmetry_boards = np.copy(boards) policies, values = random_symmetry_predict(model, boards) if conf['THREAD_SIMULATION']: from simulation_workers import subtree_worker, process_pool subtree_array = process_pool.map( subtree_worker, [(policy, board) for policy, board in zip(policies, presymmetry_boards)]) for subtree, board, v, action in zip(subtree_array, presymmetry_boards, values, max_actions): player = board[0, 0, -1] value = v[0] if player == original_player else -v[0] leaf_node = action['node'] for _, node in subtree.items(): node['parent'] = leaf_node leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: for policy, v, board, action in zip(policies, values, presymmetry_boards, max_actions): # reshape from [n, n, 17] to [1, n, n, 17] shape = board.shape board = board.reshape([1] + list(shape)) player = board[0, 0, 0, -1] # Inverse value if we're looking from other player perspective value = v[0] if player == original_player else -v[0] leaf_node = action['node'] subtree = new_subtree(policy, board, leaf_node) leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: x, y = index2coord(selected_action) make_play(x, y, board) simulate(selected_node, board, model, mcts_batch_size, original_player)
def run(self): try: self.load_model() while True: if board_queue.empty(): time.sleep(0.1) root = {"BEST_SYM":{'board':[], 'a':[]}, "LATEST_SYM":{'board':[], 'a':[]}, "BEST":{'board':[], 'a':[]}, "LATEST":{'board':[], 'a':[]}, "BEST_NAME" : {'a':[]}, "LATEST_NAME": {'a': []} } n = conf['PREDICTING_BATCH_SIZE'] while n > 0 and not board_queue.empty(): try: board, indicator, a, response_now = board_queue.get_nowait() if a is None and indicator is None and board is None: print("SHUTING DONW PREDICTING WORKER!!!") # K.clear_session() return root[indicator]['a'].append(a) current_boards = root[indicator].get('board') if current_boards is None: break if current_boards == []: root[indicator]['board'] = board else: root[indicator]['board'] = np.vstack((current_boards, board)) if response_now: break n = n - 1 except Exception: pass if root["BEST"]['board'] != []: p, v = self.best_model.predict_on_batch(root["BEST"]['board']) for index, a in enumerate(root["BEST"]['a']): a.send((p[index], v[index][0])) if root["LATEST"]['board'] != []: p, v = self.latest_model.predict_on_batch(root["LATEST"]['board']) for index, a in enumerate(root["LATEST"]['a']): a.send((p[index], v[index][0])) if root["BEST_SYM"]['board'] != []: # tt = len(root["BEST_SYM"]['board']) # print(tt) # total += tt # print("%s..%s" % (tt, total)) p, v = random_symmetry_predict(self.best_model, root["BEST_SYM"]['board']) for index, a in enumerate(root["BEST_SYM"]['a']): a.send((p[index], v[index][0])) if root["LATEST_SYM"]['board'] != []: p, v = random_symmetry_predict(self.best_model, root["LATEST_SYM"]['board']) for index, a in enumerate(root["LATEST_SYM"]['a']): a.send((p[index], v[index][0])) if root["LATEST_NAME"]['a'] != []: name = self.latest_model.name for index, a in enumerate(root["LATEST_NAME"]['a']): a.send(name) if root["BEST_NAME"]['a'] != []: name = self.best_model.name for index, a in enumerate(root["BEST_NAME"]['a']): a.send(name) except Exception as e: print("PREDICTING QUEUE WORKER EXCEPTION !!!") print(e)