예제 #1
0
def simulate(node, board, model, mcts_batch_size, original_player):
    node_subtree = node['subtree']
    max_actions = top_n_actions(node_subtree, mcts_batch_size)
    max_a = max_actions[0]['action']

    selected_action = max_a
    selected_node = node_subtree[selected_action]
    if selected_node['subtree'] == {}:
        # This is a leaf
        boards = np.zeros((mcts_batch_size, SIZE, SIZE, 17), dtype=np.float32)
        for i, dic in enumerate(max_actions):
            action = dic['action']
            if dic['node']['subtree'] != {}:
                # already expanded
                tmp_node = dic['node']
                tmp_action = action
                tmp_board = np.copy(board)
                x, y = index2coord(tmp_action)
                tmp_board, _ = make_play(x, y, tmp_board)
                while tmp_node['subtree'] != {}:
                    tmp_max_actions = top_n_actions(tmp_node['subtree'],
                                                    mcts_batch_size)
                    tmp_d = tmp_max_actions[0]
                    tmp_node = tmp_d['node']
                    tmp_action = tmp_d['action']
                    # The node for this action is the leaf, this is where the
                    # update will start, working up the tree
                    dic['node'] = tmp_node
                    x, y = index2coord(tmp_action)
                    make_play(x, y, tmp_board)

                boards[i] = tmp_board
            else:
                tmp_board = np.copy(board)
                x, y = index2coord(action)
                make_play(x, y, tmp_board)
                boards[i] = tmp_board

        # The random symmetry will changes boards, so copy them before hand
        presymmetry_boards = np.copy(boards)

        policies, values = random_symmetry_predict(model, boards)

        for policy, v, board, action in zip(policies, values,
                                            presymmetry_boards, max_actions):
            shape = board.shape
            board = board.reshape([1] + list(shape))
            player = board[0, 0, 0, -1]
            # Inverse value if we're looking from other player perspective
            value = v[0] if player == original_player else -v[0]

            subtree = new_subtree(policy, board, node)
            leaf_node = action['node']
            leaf_node['subtree'] = subtree

            current_node = leaf_node
            while True:
                current_node['count'] += 1
                current_node['value'] += value
                current_node['mean_value'] = current_node['value'] / float(
                    current_node['count'])
                if current_node['parent']:
                    current_node = current_node['parent']
                else:
                    break
    else:
        x, y = index2coord(selected_action)
        make_play(x, y, board)
        simulate(selected_node, board, model, mcts_batch_size, original_player)
예제 #2
0
 def predict_with_latest_model(self, board, is_symmetry=False):
     if is_symmetry:
         return random_symmetry_predict(self.latest_model, board)
     else:
         return self.latest_model.predict_on_batch(board)
예제 #3
0
def simulate(node, board, model, mcts_batch_size, original_player):
    node_subtree = node['subtree']
    max_actions = top_n_actions(node_subtree, mcts_batch_size)
    selected_action = max_actions[0]['action']
    selected_node = node_subtree[selected_action]
    if selected_node['subtree'] == {}:

        if False:  #conf['THREAD_SIMULATION']:
            from simulation_workers import process_pool, board_worker
            ret = process_pool.map(board_worker,
                                   [(dic, board)
                                    for i, dic in enumerate(max_actions)])
            boards = np.array(ret)
        else:
            boards = np.zeros((len(max_actions), SIZE, SIZE, 17),
                              dtype=np.float32)
            for i, dic in enumerate(max_actions):
                action = dic['action']
                tmp_board = np.copy(board)

                if dic['node']['subtree'] != {}:
                    # already expanded
                    tmp_node = dic['node']
                    tmp_action = action
                    x, y = index2coord(tmp_action)
                    tmp_board, _ = make_play(x, y, tmp_board)
                    while tmp_node['subtree'] != {}:
                        # tmp_max_actions = top_n_actions(tmp_node['subtree'], 1)
                        # tmp_d = tmp_max_actions[0]
                        tmp_d = top_one_action(tmp_node['subtree'])
                        tmp_node = tmp_d['node']
                        tmp_action = tmp_d['action']
                        # The node for this action is the leaf, this is where the
                        # update will start, working up the tree
                        dic['node'] = tmp_node
                        x, y = index2coord(tmp_action)
                        make_play(x, y, tmp_board)
                    boards[i] = tmp_board
                else:
                    x, y = index2coord(action)
                    make_play(x, y, tmp_board)
                    boards[i] = tmp_board

        # The random symmetry will changes boards, so copy them before hand
        presymmetry_boards = np.copy(boards)
        policies, values = random_symmetry_predict(model, boards)

        if conf['THREAD_SIMULATION']:
            from simulation_workers import subtree_worker, process_pool
            subtree_array = process_pool.map(
                subtree_worker,
                [(policy, board)
                 for policy, board in zip(policies, presymmetry_boards)])

            for subtree, board, v, action in zip(subtree_array,
                                                 presymmetry_boards, values,
                                                 max_actions):
                player = board[0, 0, -1]
                value = v[0] if player == original_player else -v[0]
                leaf_node = action['node']
                for _, node in subtree.items():
                    node['parent'] = leaf_node
                leaf_node['subtree'] = subtree

                current_node = leaf_node
                while True:
                    current_node['count'] += 1
                    current_node['value'] += value
                    current_node['mean_value'] = current_node['value'] / float(
                        current_node['count'])
                    if current_node['parent']:
                        current_node = current_node['parent']
                    else:
                        break

        else:
            for policy, v, board, action in zip(policies, values,
                                                presymmetry_boards,
                                                max_actions):
                # reshape from [n, n, 17] to [1, n, n, 17]
                shape = board.shape
                board = board.reshape([1] + list(shape))

                player = board[0, 0, 0, -1]
                # Inverse value if we're looking from other player perspective
                value = v[0] if player == original_player else -v[0]

                leaf_node = action['node']
                subtree = new_subtree(policy, board, leaf_node)
                leaf_node['subtree'] = subtree

                current_node = leaf_node
                while True:
                    current_node['count'] += 1
                    current_node['value'] += value
                    current_node['mean_value'] = current_node['value'] / float(
                        current_node['count'])
                    if current_node['parent']:
                        current_node = current_node['parent']
                    else:
                        break
    else:
        x, y = index2coord(selected_action)
        make_play(x, y, board)
        simulate(selected_node, board, model, mcts_batch_size, original_player)
예제 #4
0
    def run(self):
        try:
            self.load_model()
            while True:
                if board_queue.empty():
                    time.sleep(0.1)
                root = {"BEST_SYM":{'board':[], 'a':[]},
                        "LATEST_SYM":{'board':[], 'a':[]},
                        "BEST":{'board':[], 'a':[]},
                        "LATEST":{'board':[], 'a':[]},
                        "BEST_NAME" : {'a':[]},
                        "LATEST_NAME": {'a': []}
                        }
                n = conf['PREDICTING_BATCH_SIZE']
                while n > 0 and not board_queue.empty():
                    try:
                        board, indicator, a, response_now = board_queue.get_nowait()
                        if a is None and indicator is None and board is None:
                            print("SHUTING DONW PREDICTING WORKER!!!")
                            # K.clear_session()
                            return
                        root[indicator]['a'].append(a)
                        current_boards = root[indicator].get('board')
                        if current_boards is None:
                            break
                        if current_boards == []:
                            root[indicator]['board'] = board
                        else:
                            root[indicator]['board'] = np.vstack((current_boards, board))
                        if response_now:
                            break
                        n = n - 1
                    except Exception:
                        pass

                if root["BEST"]['board'] != []:
                    p, v = self.best_model.predict_on_batch(root["BEST"]['board'])
                    for index, a in enumerate(root["BEST"]['a']):
                        a.send((p[index], v[index][0]))
                if root["LATEST"]['board'] != []:
                    p, v = self.latest_model.predict_on_batch(root["LATEST"]['board'])
                    for index, a in enumerate(root["LATEST"]['a']):
                        a.send((p[index], v[index][0]))
                if root["BEST_SYM"]['board'] != []:
                    # tt = len(root["BEST_SYM"]['board'])
                    # print(tt)
                    # total += tt
                    # print("%s..%s" % (tt, total))
                    p, v = random_symmetry_predict(self.best_model, root["BEST_SYM"]['board'])
                    for index, a in enumerate(root["BEST_SYM"]['a']):
                        a.send((p[index], v[index][0]))
                if root["LATEST_SYM"]['board'] != []:
                    p, v = random_symmetry_predict(self.best_model, root["LATEST_SYM"]['board'])
                    for index, a in enumerate(root["LATEST_SYM"]['a']):
                        a.send((p[index], v[index][0]))
                if root["LATEST_NAME"]['a'] != []:
                    name = self.latest_model.name
                    for index, a in enumerate(root["LATEST_NAME"]['a']):
                        a.send(name)
                if root["BEST_NAME"]['a'] != []:
                    name = self.best_model.name
                    for index, a in enumerate(root["BEST_NAME"]['a']):
                        a.send(name)

        except Exception as e:
            print("PREDICTING QUEUE WORKER EXCEPTION !!!")
            print(e)