def mcts(payload): root = Node(None, None) my_id = (payload['role_id'] + 2) % 3 next_id = (payload['role_id'] + 1) % 3 next_next_id = (payload['role_id'] + 2) % 3 my_card_ = payload['hand_card'][payload['role_id']] my_card_.sort() my_card = card_list_to_dict(card_to_list(change_card_form_reversal(my_card_))) next_card_ = payload['hand_card'][next_id] next_card_.sort() next_card = card_list_to_dict(card_to_list(change_card_form_reversal(next_card_))) next_next_card_ = payload['hand_card'][next_next_id] next_next_card_.sort() next_next_card = card_list_to_dict(card_to_list(change_card_form_reversal(next_next_card_))) last_move_, last_p_ = get_last_move(payload['role_id'], next_id, next_next_id, payload['last_taken']) last_move = change_card_form_reversal(last_move_) last_p = (last_p_ + 2) % 3 moves_num = len(get_moves(my_card, last_move)) state = State(my_id, my_card, next_card, next_next_card, last_move, -1, moves_num, None, last_p) root.set_state(state) computation_budget = 1000 for i in range(computation_budget): expand_node = tree_policy(root, my_id) reward = default_policy(expand_node, my_id) backup(expand_node, reward) best_next_node = get_bestchild_(root) move = best_next_node.get_state().action return move
def get_next_state_with_random_choice(self, untried_move): # 下家变自家,下下家变下家,自家变下下家 valid_moves = get_moves(self.my_card, self.last_move) moves_num = len(valid_moves) i = np.random.choice(moves_num) tmp = valid_moves[i].copy() if untried_move is not None: tmp = untried_move while self.is_buchu(tmp) and self.last_pid == self.my_id: i = np.random.choice(moves_num) tmp = valid_moves[i].copy() move = [] next_next_card = self.my_card.copy() for k in [str(i) for i in range(3, 14)] + ['1', '2', '14', '15']: move.extend([int(k)] * tmp.get(k, 0)) next_next_card[k] -= tmp.get(k, 0) my_id = (self.my_id + 1) % 3 my_card = self.next_card.copy() next_card = self.next_next_card.copy() # 判断出完牌游戏是否结束 winner = self.my_id for lis in next_next_card.values(): if lis != 0: winner = -1 break last_move = move.copy() last_p = self.my_id # 如果选择不出, 下家的last_move等于自家的last_move if len(move) == 0: last_p = self.last_pid last_move = self.last_move.copy() if len(move) == 0 and self.last_pid == my_id: last_move = [] valid_moves_ = get_moves(my_card, last_move) moves_num_ = len(valid_moves_) next_state = State(my_id, my_card, next_card, next_next_card, last_move, winner, moves_num_, move, last_p) return next_state
def expand(self): if self.state.try_flag == 0: valid_moves = get_moves(self.state.my_card, self.state.last_move) for move in valid_moves: self.state.init_untried_actions(move) self.state.try_flag = 1 moves_num = len(self.state.untried_actions) i = np.random.choice(moves_num) untried_move = self.state.untried_actions[i].copy() while self.state.is_buchu(untried_move) and self.state.last_pid == self.state.my_id: i = np.random.choice(moves_num) untried_move = self.state.untried_actions[i].copy() new_state = self.get_state().get_next_state_with_random_choice(untried_move) del self.state.untried_actions[i] sub_node = Node(self, new_state) self.add_child(sub_node) return sub_node
def choose(self, state): min_crads = min([sum(p.get_hand_card()) for p in self.game.players]) if min_crads > 7: # 获得手牌 hand_card = self.get_hand_card() # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $ trans_hand_card = [ card_list[i] for i in range(15) for _ in range(hand_card[i]) ] # 获得上家出牌 last_move = [ card_list[i] for i in range(15) for _ in range(state.last_move[i]) ] # 拆牌 D = Decomposer() combs, fine_mask = D.get_combinations(trans_hand_card, last_move) # 根据对手剩余最少牌数决定每多一手牌的惩罚 left_crads = [sum(p.get_hand_card()) for p in self.game.players] min_oppo_crads = min( left_crads[1], left_crads[2]) if self.player_id == 0 else left_crads[0] round_penalty = 15 - 12 * min_oppo_crads / 20 # 寻找最优出牌 best_move = None best_comb = None max_value = -np.inf for i in range(len(combs)): # 手牌总分 total_value = sum([cards_value[x] for x in combs[i]]) small_num = 0 for j in range(0, len(combs[i])): if j > 0 and action_space[j][0] not in ["2", "R", "B"]: small_num += 1 total_value -= small_num * round_penalty for j in range(0, len(combs[i])): # Pass 得分 if combs[i][j] == 0 and min_oppo_crads > 4: if total_value > max_value: max_value = total_value best_comb = combs[i] best_move = 0 # 出牌得分 elif combs[i][j] > 0 and (fine_mask is None or fine_mask[i, j] == True): # 特判只有一手 if len(combs[i]) == 1 or len( combs[i]) == 2 and combs[i][0] == 0: max_value = np.inf best_comb = combs[i] best_move = combs[i][-1] move_value = total_value - cards_value[ combs[i][j]] + round_penalty if move_value > max_value: max_value = move_value best_comb = combs[i] best_move = combs[i][j] if best_move is None: best_comb = [0] best_move = 0 # 最优出牌 best_cards = action_space[best_move] move = [best_cards.count(x) for x in card_list] # 输出选择的牌组 # print("\nbest comb: ") # for m in best_comb: # print(action_space[m], cards_value[m]) # 输出 player i [手牌] // [出牌] print("Player {}".format(self.player_id), ' ', Card.visual_card(hand_card), end=' // ') print(Card.visual_card(move), "From RuleBasedModel") return move, None # start = time.time() # 定位current_node cards_out = self.game.cards_out length = len(cards_out) # 判断是否定位到current_node的flag flag = 0 if self.new_game is False: # 前两步对手选择的move out1 = self.list_to_card(cards_out[length - 2][1]) out2 = self.list_to_card(cards_out[length - 1][1]) for child in self.current_node.get_children(): if self.compare(child.state.action, out1): self.current_node = child flag = 1 break if flag == 1: for child in self.current_node.get_children(): if self.compare(child.state.action, out2): self.current_node = child flag = 2 break my_id = self.player_id if flag != 2: self.new_game = False root = Node(None, None) self.current_node = root # 下家id next_id = (my_id + 1) % 3 # 下下家id next_next_id = (my_id + 2) % 3 my_card = self.card_list_to_dict(self.get_hand_card()) # 下家牌 next_card = self.card_list_to_dict( self.game.players[next_id].get_hand_card()) # 下下家牌 next_next_card = self.card_list_to_dict( self.game.players[next_next_id].get_hand_card()) last_move = self.trans_card(Card.visual_card(self.game.last_move)) last_p = self.game.last_pid moves_num = len(get_moves(my_card, last_move)) state = State(my_id, my_card, next_card, next_next_card, last_move, -1, moves_num, None, last_p) self.current_node.set_state(state) # 搜索 computation_budget = 2000 for i in range(computation_budget): expand_node = tree_policy(self.current_node, my_id) reward = default_policy(expand_node, my_id) backup(expand_node, reward) best_next_node = get_bestchild(self.current_node, my_id) move = best_next_node.get_state().action self.current_node = best_next_node new_move = self.card_to_list(move) hand_card = [] for i, n in enumerate(Card.all_card_name): hand_card.extend([n] * self.get_hand_card()[i]) print("Player {}".format(self.player_id), ' ', hand_card, end=' // ') print(Card.visual_card(new_move), "From MctsModel") # end = time.time() # dur = end - start # print('cost: {}'.format(dur)) return new_move, None
def choose(self, state): start = time.time() # 定位current_node cards_out = self.game.cards_out length = len(cards_out) # 判断是否定位到current_node的flag flag = 0 if length > 2: # 前两步对手选择的move out1 = self.list_to_card(cards_out[length - 2][1]) out2 = self.list_to_card(cards_out[length - 1][1]) for child in self.current_node.get_children(): if self.compare(child.state.action, out1): self.current_node = child flag = 1 break if flag == 1: for child in self.current_node.get_children(): if self.compare(child.state.action, out2): self.current_node = child flag = 2 break my_id = self.player_id if flag != 2: root = Node(None, None) self.current_node = root # 下家id next_id = (my_id + 1) % 3 # 下下家id next_next_id = (my_id + 2) % 3 my_card = self.card_list_to_dict(self.get_hand_card()) # 下家牌 next_card = self.card_list_to_dict( self.game.players[next_id].get_hand_card()) # 下下家牌 next_next_card = self.card_list_to_dict( self.game.players[next_next_id].get_hand_card()) last_move = self.trans_card(Card.visual_card(self.game.last_move)) last_p = self.game.last_pid moves_num = len(get_moves(my_card, last_move)) state_ = State(my_id, my_card, next_card, next_next_card, last_move, -1, moves_num, None, last_p) self.current_node.set_state(state_) # 搜索 computation_budget = 2000 for i in range(computation_budget): expand_node = tree_policy(self.current_node, my_id) reward = default_policy(expand_node, my_id) backup(expand_node, reward) best_next_node = get_bestchild_(self.current_node) move = best_next_node.get_state().action self.current_node = best_next_node new_move = self.card_to_list(move) hand_card = [] for i, n in enumerate(Card.all_card_name): hand_card.extend([n] * self.get_hand_card()[i]) print("Player {}".format(self.player_id), ' ', hand_card, end=' // ') print(Card.visual_card(new_move)) end = time.time() dur = end - start # print('cost: {}'.format(dur)) return new_move, None