def expand_action_on_node(self, node: MCTSTreeNode, action, env: Env): # 进行step,获得进行这次action的legal_actions next_state, next_player_id = env.step(action, False) new_node = MCTSTreeNode(key=action, action=action, legal_actions=next_state['legal_actions'], game_over=env.is_over(), parent=node) node.children[action] = new_node return new_node
def default_policy(self, node: MCTSTreeNode, env: Env): action = random.sample(node.legal_actions, 1)[0] while not env.is_over(): # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): action = random.sample(next_state['legal_actions'], 1)[0] # game over reward = env.get_payoffs()[0] return reward
def tree_policy(self, root_node: MPMCTSTreeNode, env: Env): player_id = env.get_player_id() untried_action = self.get_untried_action(root_node, player_id) if untried_action is not None: # 在env上执行action node = self.expand_action_on_node(root_node, untried_action, env) else: # select max-UCB value node node = self.get_max_UCB_child_node(root_node, CP_VAL, player_id) # step next_state, next_player_id = env.step(node.action, False) node.legal_actions[next_player_id] = next_state['legal_actions'] return node
def expand_action_on_node(self, node: MPMCTSTreeNode, action, env: Env): # 向前移动 next_state, next_player_id = env.step(action, False) if node.children.__contains__(action): # action存在 new_node = node.children[action] new_node.legal_actions[next_player_id] = next_state[ 'legal_actions'] else: actions = [[] for i in range(env.player_num)] actions[next_player_id] = next_state['legal_actions'] new_node = MPMCTSTreeNode(key=action, action=action, legal_actions=actions, parent=node, player_num=env.player_num) node.children[action] = new_node return new_node
def default_policy(self, node: MPMCTSTreeNode, env: Env): if env.is_over(): return env.get_payoffs() player_id = env.get_player_id() #print(player_id) state = env.get_state(player_id) action, _ = self.drqn_agents[player_id].eval_step(state) while not env.is_over(): #print(action) # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): #action = random.sample(next_state['legal_actions'],1)[0] action, _ = self.drqn_agents[next_player_id].eval_step( next_state) # game over return env.get_payoffs()