Esempio n. 1
0
    def expand_action_on_node(self, node: MCTSTreeNode, action, env: Env):
        # 进行step,获得进行这次action的legal_actions
        next_state, next_player_id = env.step(action, False)
        new_node = MCTSTreeNode(key=action,
                                action=action,
                                legal_actions=next_state['legal_actions'],
                                game_over=env.is_over(),
                                parent=node)
        node.children[action] = new_node

        return new_node
Esempio n. 2
0
    def default_policy(self, node: MCTSTreeNode, env: Env):
        action = random.sample(node.legal_actions, 1)[0]

        while not env.is_over():
            # step forward
            next_state, next_player_id = env.step(action, False)
            if not env.is_over():
                action = random.sample(next_state['legal_actions'], 1)[0]
        # game over
        reward = env.get_payoffs()[0]
        return reward
Esempio n. 3
0
    def tree_policy(self, root_node: MPMCTSTreeNode, env: Env):
        player_id = env.get_player_id()
        untried_action = self.get_untried_action(root_node, player_id)
        if untried_action is not None:
            # 在env上执行action
            node = self.expand_action_on_node(root_node, untried_action, env)
        else:
            # select max-UCB value node
            node = self.get_max_UCB_child_node(root_node, CP_VAL, player_id)
            # step
            next_state, next_player_id = env.step(node.action, False)
            node.legal_actions[next_player_id] = next_state['legal_actions']

        return node
Esempio n. 4
0
    def expand_action_on_node(self, node: MPMCTSTreeNode, action, env: Env):
        # 向前移动
        next_state, next_player_id = env.step(action, False)
        if node.children.__contains__(action):  # action存在
            new_node = node.children[action]
            new_node.legal_actions[next_player_id] = next_state[
                'legal_actions']
        else:
            actions = [[] for i in range(env.player_num)]
            actions[next_player_id] = next_state['legal_actions']
            new_node = MPMCTSTreeNode(key=action,
                                      action=action,
                                      legal_actions=actions,
                                      parent=node,
                                      player_num=env.player_num)
            node.children[action] = new_node

        return new_node
Esempio n. 5
0
    def default_policy(self, node: MPMCTSTreeNode, env: Env):
        if env.is_over():
            return env.get_payoffs()
        player_id = env.get_player_id()
        #print(player_id)
        state = env.get_state(player_id)

        action, _ = self.drqn_agents[player_id].eval_step(state)
        while not env.is_over():
            #print(action)
            # step forward
            next_state, next_player_id = env.step(action, False)

            if not env.is_over():
                #action = random.sample(next_state['legal_actions'],1)[0]
                action, _ = self.drqn_agents[next_player_id].eval_step(
                    next_state)
        # game over
        return env.get_payoffs()