def default_policy(self, node: MCTSTreeNode, env: Env): action = random.sample(node.legal_actions, 1)[0] while not env.is_over(): # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): action = random.sample(next_state['legal_actions'], 1)[0] # game over reward = env.get_payoffs()[0] return reward
def default_policy(self, node: MPMCTSTreeNode, env: Env): if env.is_over(): return env.get_payoffs() player_id = env.get_player_id() #print(player_id) state = env.get_state(player_id) action, _ = self.drqn_agents[player_id].eval_step(state) while not env.is_over(): #print(action) # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): #action = random.sample(next_state['legal_actions'],1)[0] action, _ = self.drqn_agents[next_player_id].eval_step( next_state) # game over return env.get_payoffs()
def expand_action_on_node(self, node: MCTSTreeNode, action, env: Env): # 进行step,获得进行这次action的legal_actions next_state, next_player_id = env.step(action, False) new_node = MCTSTreeNode(key=action, action=action, legal_actions=next_state['legal_actions'], game_over=env.is_over(), parent=node) node.children[action] = new_node return new_node
def tree_policy(self, root_node: MCTSTreeNode, env: Env): untried_action = self.get_untried_action(root_node) if untried_action is not None: # 在env上执行action node = self.expand_action_on_node(root_node, untried_action, env) else: # select max-UCB value node node = self.get_max_UCB_child_node(root_node, CP_VAL) # step next_state, next_player_id = env.step(node.action, False) node.legal_actions = next_state['legal_actions'] node.game_over = env.is_over() return node