Example #1
0
    def default_policy(self, node: MCTSTreeNode, env: Env):
        action = random.sample(node.legal_actions, 1)[0]

        while not env.is_over():
            # step forward
            next_state, next_player_id = env.step(action, False)
            if not env.is_over():
                action = random.sample(next_state['legal_actions'], 1)[0]
        # game over
        reward = env.get_payoffs()[0]
        return reward
Example #2
0
    def default_policy(self, node: MPMCTSTreeNode, env: Env):
        if env.is_over():
            return env.get_payoffs()
        player_id = env.get_player_id()
        #print(player_id)
        state = env.get_state(player_id)

        action, _ = self.drqn_agents[player_id].eval_step(state)
        while not env.is_over():
            #print(action)
            # step forward
            next_state, next_player_id = env.step(action, False)

            if not env.is_over():
                #action = random.sample(next_state['legal_actions'],1)[0]
                action, _ = self.drqn_agents[next_player_id].eval_step(
                    next_state)
        # game over
        return env.get_payoffs()
Example #3
0
    def expand_action_on_node(self, node: MCTSTreeNode, action, env: Env):
        # 进行step,获得进行这次action的legal_actions
        next_state, next_player_id = env.step(action, False)
        new_node = MCTSTreeNode(key=action,
                                action=action,
                                legal_actions=next_state['legal_actions'],
                                game_over=env.is_over(),
                                parent=node)
        node.children[action] = new_node

        return new_node
Example #4
0
    def tree_policy(self, root_node: MCTSTreeNode, env: Env):
        untried_action = self.get_untried_action(root_node)
        if untried_action is not None:
            # 在env上执行action
            node = self.expand_action_on_node(root_node, untried_action, env)
        else:
            # select max-UCB value node
            node = self.get_max_UCB_child_node(root_node, CP_VAL)
            # step
            next_state, next_player_id = env.step(node.action, False)
            node.legal_actions = next_state['legal_actions']
            node.game_over = env.is_over()

        return node