def default_policy(self, node: MPMCTSTreeNode, env: Env): if env.is_over(): return env.get_payoffs() player_id = env.get_player_id() #print(player_id) action = random.sample(node.legal_actions[player_id], 1)[0] while not env.is_over(): # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): action = random.sample(next_state['legal_actions'], 1)[0] # game over return env.get_payoffs()
def default_policy(self, node: MPMCTSTreeNode, env: Env): if env.is_over(): return env.get_payoffs() player_id = env.get_player_id() #print(player_id) state = env.get_state(player_id) action, _ = self.drqn_agents[player_id].eval_step(state) while not env.is_over(): #print(action) # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): #action = random.sample(next_state['legal_actions'],1)[0] action, _ = self.drqn_agents[next_player_id].eval_step( next_state) # game over return env.get_payoffs()