def default_policy(self, node: MCTSTreeNode, env: Env): action = random.sample(node.legal_actions, 1)[0] while not env.is_over(): # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): action = random.sample(next_state['legal_actions'], 1)[0] # game over reward = env.get_payoffs()[0] return reward
def expand_action_on_node(self, node: MCTSTreeNode, action, env: Env): # 进行step,获得进行这次action的legal_actions next_state, next_player_id = env.step(action, False) new_node = MCTSTreeNode(key=action, action=action, legal_actions=next_state['legal_actions'], game_over=env.is_over(), parent=node) node.children[action] = new_node return new_node
def tree_policy(self, root_node: MPMCTSTreeNode, env: Env): player_id = env.get_player_id() untried_action = self.get_untried_action(root_node, player_id) if untried_action is not None: # 在env上执行action node = self.expand_action_on_node(root_node, untried_action, env) else: # select max-UCB value node node = self.get_max_UCB_child_node(root_node, CP_VAL, player_id) # step next_state, next_player_id = env.step(node.action, False) node.legal_actions[next_player_id] = next_state['legal_actions'] return node
def restore_other_player_cards(self, env: Env): cur_player_id = env.get_player_id() other_player_id = [] for i in range(3): if i != cur_player_id: other_player_id.append(i) for id in other_player_id: env.game.players[id]._current_hand = self.other_hands[id] env.game.players[id].singles = self.other_singles[id] cardstr = cards2str(env.game.players[id]._current_hand) env.game.judger.playable_cards[ id] = env.game.judger.playable_cards_from_hand(cardstr)
def save_other_player_cards(self, env: Env): cur_player_id = env.get_player_id() other_player_id = [] for i in range(3): if i != cur_player_id: other_player_id.append(i) self.other_hands = [[] for i in range(3)] # 手牌 self.other_singles = [[] for i in range(3)] # 去重的牌 for id in other_player_id: self.other_hands[id] = copy.deepcopy( env.game.players[id]._current_hand) self.other_singles[id] = env.game.players[id].singles
def shuffle_other_player_cards(self, env: Env): cur_player_id = env.get_player_id() other_player_id = [] for i in range(3): if i != cur_player_id: other_player_id.append(i) other_hands = [[] for i in range(3)] # 合并二者 merged_hands = [] for id in other_player_id: other_hands[id] = copy.deepcopy(env.game.players[id]._current_hand) merged_hands.extend(other_hands[id]) merged_hands = np.asarray(merged_hands) other_shuffuled_hands = [[] for i in range(3)] other_shuffuled_idx = random.sample(range(len(merged_hands)), len(merged_hands)) idx_start = 0 other_shuffled_singels = [[] for i in range(3)] for id in other_player_id: # 选取 hands = list( merged_hands[other_shuffuled_idx[idx_start:idx_start + len(other_hands[id])]]) # 排序 hands.sort(key=functools.cmp_to_key(doudizhu_sort_card)) other_shuffuled_hands[id] = hands idx_start += len(other_hands[id]) st = cards2str(other_shuffuled_hands[id]) cardstr = "".join( OrderedDict.fromkeys(cards2str(other_shuffuled_hands[id]))) other_shuffled_singels[id] = cardstr # 设置数据到env for id in other_player_id: # 手牌更新 env.game.players[id]._current_hand = list( other_shuffuled_hands[id]) # 去重更新 env.game.players[id].singles = other_shuffled_singels[id] # 重新计算可出的牌 cardstr = cards2str(env.game.players[id]._current_hand) env.game.judger.playable_cards[ id] = env.game.judger.playable_cards_from_hand(cardstr)
def default_policy(self, node: MPMCTSTreeNode, env: Env): if env.is_over(): return env.get_payoffs() player_id = env.get_player_id() #print(player_id) state = env.get_state(player_id) action, _ = self.drqn_agents[player_id].eval_step(state) while not env.is_over(): #print(action) # step forward next_state, next_player_id = env.step(action, False) if not env.is_over(): #action = random.sample(next_state['legal_actions'],1)[0] action, _ = self.drqn_agents[next_player_id].eval_step( next_state) # game over return env.get_payoffs()
def expand_action_on_node(self, node: MPMCTSTreeNode, action, env: Env): # 向前移动 next_state, next_player_id = env.step(action, False) if node.children.__contains__(action): # action存在 new_node = node.children[action] new_node.legal_actions[next_player_id] = next_state[ 'legal_actions'] else: actions = [[] for i in range(env.player_num)] actions[next_player_id] = next_state['legal_actions'] new_node = MPMCTSTreeNode(key=action, action=action, legal_actions=actions, parent=node, player_num=env.player_num) node.children[action] = new_node return new_node