def choose(self, state): # 获得手牌 hand_card = self.get_hand_card() # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $ trans_hand_card = [card_list[i] for i in range(15) for _ in range(hand_card[i])] # 获得上家出牌 last_move = [card_list[i] for i in range(15) for _ in range(state.last_move[i])] # 拆牌 D = Decomposer() combs, fine_mask = D.get_combinations(trans_hand_card, last_move) # 根据对手剩余最少牌数决定每多一手牌的惩罚 left_crads = [sum(p.get_hand_card()) for p in self.game.players] min_oppo_crads = min(left_crads[1], left_crads[2]) if self.player_id == 0 else left_crads[0] round_penalty = 15 - 12 * min_oppo_crads / 20 # 寻找最优出牌 best_move = None best_comb = None max_value = -np.inf for i in range(len(combs)): # 手牌总分 total_value = sum([cards_value[x] for x in combs[i]]) small_num = 0 for j in range(0, len(combs[i])): if j > 0 and action_space[j][0] not in ["2", "R", "B"]: small_num += 1 total_value -= small_num * round_penalty for j in range(0, len(combs[i])): # Pass 得分 if combs[i][j] == 0 and min_oppo_crads > 4: if total_value > max_value: max_value = total_value best_comb = combs[i] best_move = 0 # 出牌得分 elif combs[i][j] > 0 and (fine_mask is None or fine_mask[i, j] == True): # 特判只有一手 if len(combs[i]) == 1 or len(combs[i]) == 2 and combs[i][0] == 0: max_value = np.inf best_comb = combs[i] best_move = combs[i][-1] move_value = total_value - cards_value[combs[i][j]] + round_penalty if move_value > max_value: max_value = move_value best_comb = combs[i] best_move = combs[i][j] if best_move is None: best_comb = [0] best_move = 0 # 最优出牌 best_cards = action_space[best_move] move = [best_cards.count(x) for x in card_list] # 输出选择的牌组 # print("\nbest comb: ") # for m in best_comb: # print(action_space[m], cards_value[m]) # 输出 player i [手牌] // [出牌] print("Player {}".format(self.player_id), ' ', Card.visual_card(hand_card), end=' // ') print(Card.visual_card(move)) return move, None
def choose(self, state): # state -(CNNModel)-> card_combs current_handcards = self.get_hand_card() series_cards_out = self.game.cards_out player_id = self.player_id # Process Public public_np = self.get_public_card() # calc binary np array state (15*4 bool) state = generate_game_state(current_handcards, public_np, series_cards_out, player_id) if self.player_id == 0: # Using the model trained on Landlord valid_moves = self.get_moves() # Load Trained Net net = resnet.resnetpokernet(num_classes=13707).to(device) net.load_state_dict(torch.load(args.model_path)["state_dict"]) batch_state = np.expand_dims(state, axis=0) # batch_state_ = batch_state.to(device) batch_state_ = torch.from_numpy(batch_state).float().to(device) outputs = net(batch_state_) _, pred = torch.topk(outputs, 10) if isinstance(valid_moves, np.ndarray): pass else: valid_moves = np.array(valid_moves) for i in range(10): _, list_idx = label_int2str(pred[0][i].item()) find_res = findByRow(valid_moves, np.array(list_idx)) if len(find_res) == 1: move = valid_moves[find_res] hand_card = [] for j, n in enumerate(Card.all_card_name): hand_card.extend([n] * self.get_hand_card()[j]) # player i [手牌] // [出牌] print("Player {}".format(self.player_id), ' ', hand_card, ' // ', Card.visual_card(move)) return move, None # Don't exist in top-10, return PASS # rand_i = np.random.choice(len(valid_moves)) return [], None elif self.player_id == 1: # Using the model trained on Landlord_down # Would be loaded later pass elif self.player_id == 2: # Using the model trained on Landlord_up # Would be loaded later pass else: raise ValueError( 'player_id should be among 0, 1, 2. Got: {}'.format()) return []
def choose(self, state): valid_moves = self.get_moves() hand_card = [] for i, n in enumerate(Card.all_card_name): hand_card.extend([n] * self.get_hand_card()[i]) i = np.random.choice(len(valid_moves)) move = valid_moves[i] # player i [手牌] // [出牌] print("Player {}".format(self.player_id), ' ', hand_card, end=' // ') print(Card.visual_card(move)) return move, None
def choose(self, state): move_list = self.move_list res = self.model.choose_action(state_to_tensor(state), move_list) if os.path.exists( os.path.join(os.path.dirname(os.path.dirname(__file__)), 'test')): # player i [手牌] // [出牌] hand_card = [] for i, n in enumerate(Card.all_card_name): hand_card.extend([n] * self.get_hand_card()[i]) print("DQN Player {}".format(self.player_id), ' ', hand_card, ' // ', Card.visual_card(res)) return res, None
def test_is_quads(): situations = [ ( [ Card(14, "s"), Card(14, "h"), Card(14, "d"), Card(14, "c"), Card(10, "h"), Card(4, "h"), Card(10, "h"), ], (14, 10), ), ( [ Card(14, "s"), Card(13, "h"), Card(14, "d"), Card(14, "c"), Card(10, "h"), Card(4, "h"), Card(2, "h"), ], False, ), ] test_outcomes = [is_quads(hand) == outcome for hand, outcome in situations] assert all(test_outcomes)
def test_is_straight_flush(): situations = [ ( [ Card(14, "s"), Card(13, "h"), Card(12, "h"), Card(11, "h"), Card(10, "h"), Card(4, "h"), Card(2, "h"), ], False, ), ( [ Card(14, "h"), Card(13, "h"), Card(12, "h"), Card(11, "h"), Card(10, "h"), Card(4, "s"), Card(2, "s"), ], (1, 14), ), ] test_outcomes = [ is_straight_flush(hand) == outcome for hand, outcome in situations ] assert all(test_outcomes)
def test_is_flush_various(): situations = [ ( [ Card(10, "h"), Card(10, "h"), Card(9, "h"), Card(5, "h"), Card(9, "h"), Card(4, "s"), Card(2, "s"), ], 10, ), ( [ Card(10, "h"), Card(10, "h"), Card(9, "h"), Card(5, "h"), Card(9, "s"), Card(4, "s"), Card(2, "s"), ], False, ), ] test_outcomes = [is_flush(hand) == outcome for hand, outcome in situations] assert all(test_outcomes)
def ranks_to_hand(ranks): return [Card(rank, "h") for rank in ranks]
def test_is_straight_not(): hand = [14, 12, 10, 9, 8, 6, 2] hand = [Card(rank, "h") for rank in hand] assert not is_straight(hand)
def test_is_straight(): hand = [14, 13, 12, 11, 10, 9, 8] hand = [Card(rank, "h") for rank in hand] assert is_straight(hand)
def choose(self, state): start = time.time() # 定位current_node cards_out = self.game.cards_out length = len(cards_out) # 判断是否定位到current_node的flag flag = 0 if length > 2: # 前两步对手选择的move out1 = self.list_to_card(cards_out[length - 2][1]) out2 = self.list_to_card(cards_out[length - 1][1]) for child in self.current_node.get_children(): if self.compare(child.state.action, out1): self.current_node = child flag = 1 break if flag == 1: for child in self.current_node.get_children(): if self.compare(child.state.action, out2): self.current_node = child flag = 2 break my_id = self.player_id if flag != 2: root = Node(None, None) self.current_node = root # 下家id next_id = (my_id + 1) % 3 # 下下家id next_next_id = (my_id + 2) % 3 my_card = self.card_list_to_dict(self.get_hand_card()) # 下家牌 next_card = self.card_list_to_dict( self.game.players[next_id].get_hand_card()) # 下下家牌 next_next_card = self.card_list_to_dict( self.game.players[next_next_id].get_hand_card()) last_move = self.trans_card(Card.visual_card(self.game.last_move)) last_p = self.game.last_pid moves_num = len(get_moves(my_card, last_move)) state_ = State(my_id, my_card, next_card, next_next_card, last_move, -1, moves_num, None, last_p) self.current_node.set_state(state_) # 搜索 computation_budget = 2000 for i in range(computation_budget): expand_node = tree_policy(self.current_node, my_id) reward = default_policy(expand_node, my_id) backup(expand_node, reward) best_next_node = get_bestchild_(self.current_node) move = best_next_node.get_state().action self.current_node = best_next_node new_move = self.card_to_list(move) hand_card = [] for i, n in enumerate(Card.all_card_name): hand_card.extend([n] * self.get_hand_card()[i]) print("Player {}".format(self.player_id), ' ', hand_card, end=' // ') print(Card.visual_card(new_move)) end = time.time() dur = end - start # print('cost: {}'.format(dur)) return new_move, None
def choose(self, state): min_crads = min([sum(p.get_hand_card()) for p in self.game.players]) if min_crads > 7: # 获得手牌 hand_card = self.get_hand_card() # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $ trans_hand_card = [ card_list[i] for i in range(15) for _ in range(hand_card[i]) ] # 获得上家出牌 last_move = [ card_list[i] for i in range(15) for _ in range(state.last_move[i]) ] # 拆牌 D = Decomposer() combs, fine_mask = D.get_combinations(trans_hand_card, last_move) # 根据对手剩余最少牌数决定每多一手牌的惩罚 left_crads = [sum(p.get_hand_card()) for p in self.game.players] min_oppo_crads = min( left_crads[1], left_crads[2]) if self.player_id == 0 else left_crads[0] round_penalty = 15 - 12 * min_oppo_crads / 20 # 寻找最优出牌 best_move = None best_comb = None max_value = -np.inf for i in range(len(combs)): # 手牌总分 total_value = sum([cards_value[x] for x in combs[i]]) small_num = 0 for j in range(0, len(combs[i])): if j > 0 and action_space[j][0] not in ["2", "R", "B"]: small_num += 1 total_value -= small_num * round_penalty for j in range(0, len(combs[i])): # Pass 得分 if combs[i][j] == 0 and min_oppo_crads > 4: if total_value > max_value: max_value = total_value best_comb = combs[i] best_move = 0 # 出牌得分 elif combs[i][j] > 0 and (fine_mask is None or fine_mask[i, j] == True): # 特判只有一手 if len(combs[i]) == 1 or len( combs[i]) == 2 and combs[i][0] == 0: max_value = np.inf best_comb = combs[i] best_move = combs[i][-1] move_value = total_value - cards_value[ combs[i][j]] + round_penalty if move_value > max_value: max_value = move_value best_comb = combs[i] best_move = combs[i][j] if best_move is None: best_comb = [0] best_move = 0 # 最优出牌 best_cards = action_space[best_move] move = [best_cards.count(x) for x in card_list] # 输出选择的牌组 # print("\nbest comb: ") # for m in best_comb: # print(action_space[m], cards_value[m]) # 输出 player i [手牌] // [出牌] print("Player {}".format(self.player_id), ' ', Card.visual_card(hand_card), end=' // ') print(Card.visual_card(move), "From RuleBasedModel") return move, None # start = time.time() # 定位current_node cards_out = self.game.cards_out length = len(cards_out) # 判断是否定位到current_node的flag flag = 0 if self.new_game is False: # 前两步对手选择的move out1 = self.list_to_card(cards_out[length - 2][1]) out2 = self.list_to_card(cards_out[length - 1][1]) for child in self.current_node.get_children(): if self.compare(child.state.action, out1): self.current_node = child flag = 1 break if flag == 1: for child in self.current_node.get_children(): if self.compare(child.state.action, out2): self.current_node = child flag = 2 break my_id = self.player_id if flag != 2: self.new_game = False root = Node(None, None) self.current_node = root # 下家id next_id = (my_id + 1) % 3 # 下下家id next_next_id = (my_id + 2) % 3 my_card = self.card_list_to_dict(self.get_hand_card()) # 下家牌 next_card = self.card_list_to_dict( self.game.players[next_id].get_hand_card()) # 下下家牌 next_next_card = self.card_list_to_dict( self.game.players[next_next_id].get_hand_card()) last_move = self.trans_card(Card.visual_card(self.game.last_move)) last_p = self.game.last_pid moves_num = len(get_moves(my_card, last_move)) state = State(my_id, my_card, next_card, next_next_card, last_move, -1, moves_num, None, last_p) self.current_node.set_state(state) # 搜索 computation_budget = 2000 for i in range(computation_budget): expand_node = tree_policy(self.current_node, my_id) reward = default_policy(expand_node, my_id) backup(expand_node, reward) best_next_node = get_bestchild(self.current_node, my_id) move = best_next_node.get_state().action self.current_node = best_next_node new_move = self.card_to_list(move) hand_card = [] for i, n in enumerate(Card.all_card_name): hand_card.extend([n] * self.get_hand_card()[i]) print("Player {}".format(self.player_id), ' ', hand_card, end=' // ') print(Card.visual_card(new_move), "From MctsModel") # end = time.time() # dur = end - start # print('cost: {}'.format(dur)) return new_move, None
def choose(self, state): # 获得手牌 hand_card = self.get_hand_card() # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $ trans_hand_card = [ card_list[i] for i in range(15) for _ in range(hand_card[i]) ] # 获得上家出牌 last_move = [ card_list[i] for i in range(15) for _ in range(state.last_move[i]) ] # 拆牌 D = Decomposer() combs, fine_mask = D.get_combinations(trans_hand_card, last_move) # 根据对手剩余最少牌数决定每多一手牌的惩罚 left_crads = [sum(p.get_hand_card()) for p in self.game.players] min_oppo_crads = min( left_crads[1], left_crads[2]) if self.player_id == 0 else left_crads[0] round_penalty = 17 - 12 * min_oppo_crads / 20 # 惩罚值调整为与敌人最少手牌数负线性相关 if not last_move: if self.player_id == 0: #地主 round_penalty += 7 elif self.player_id == 1: #地主下家 round_penalty += 5 else: #地主上家 round_penalty += 3 if self.player_id == 2 and not last_move: #队友没要地主牌 round_penalty += 5 if self.player_id == 1 and not last_move: #地主没要队友牌 round_penalty -= 8 # 寻找最优出牌 best_move = None max_value = -np.inf for i in range(len(combs)): # 手牌总分 total_value = sum([cards_value[x] for x in combs[i]]) # small_num = 0 # for j in range(0, len(combs[i])): # if j > 0 and action_space[j][0] not in ["2", "R", "B"]: # small_num += 1 # total_value -= small_num * round_penalty small_num = hand_card[-1] + hand_card[-2] + hand_card[-3] small_num = (len(combs[i]) - small_num ) # 如果一手牌为小牌, 需要加上惩罚值, 所以要统计小牌数量 total_value -= small_num * round_penalty # 手里有火箭和另一手牌 if len(combs[i]) == 3 and combs[i][0] == 0 or len(combs[i]) == 2: if cards_value[combs[i][-1]] == 12 or cards_value[combs[i] [-2]] == 12: print('*****rule 火箭直接走') return [0] * 13 + [1, 1], None # 下家农民手里只有一张牌,送队友走 if self.player_id == 1 and sum(self.game.players[2].get_hand_card( )) == 1 and not last_move: for i, j in enumerate(hand_card): if j != 0: tem = [0] * 15 tem[i] = 1 print('******rule 下家农民手里只有一张牌,送队友走') return tem, None #队友出大牌能走就压 if self.player_id == 2 and len(combs[i]) == 3 and combs[i][0] == 0: if action_space[combs[i][1]] in dapai and ( fine_mask is None or fine_mask[i, 1] == True): print('******rule 队友出大牌能走就压') best_move = combs[i][1] break elif action_space[combs[i][2]] in dapai and ( fine_mask is None or fine_mask[i, 2] == True): print('******rule 队友出大牌能走就压') best_move = combs[i][2] break # 队友出大牌走不了就不压 if self.player_id == 2 and state.last_pid == 1 and sorted( last_move) in dapai: print('******rule 队友出大牌走不了就不压') best_move = 0 break for j in range(0, len(combs[i])): # Pass 得分 if combs[i][j] == 0 and min_oppo_crads > 8: if total_value > max_value: max_value = total_value best_move = 0 # print('pass得分',max_value,end=' // ') # 出牌得分 elif combs[i][j] > 0 and (fine_mask is None or fine_mask[i, j] == True ): # 枚举非pass且fine_mask为True的出牌 # 特判只有一手 if len(combs[i]) == 1 or len( combs[i]) == 2 and combs[i][0] == 0: max_value = np.inf best_move = combs[i][-1] break move_value = total_value - cards_value[combs[i] [j]] + round_penalty #手里有当前最大牌和另一手牌 if len(combs[i]) == 3 and combs[i][0] == 0 or len( combs[i]) == 2: if combs[i][j] > maxcard( state.other_hand) and combs[i][j] <= 15: move_value += 100 #地主只剩一张牌时别出单牌 if self.player_id != 0 and sum( self.game.players[0].get_hand_card()) == 1: if combs[i][j] <= maxcard(state.other_hand): move_value -= 100 # 农民只剩一张牌时别出单牌 if self.player_id == 0 and ( sum(self.game.players[1].get_hand_card()) == 1 or sum(self.game.players[2].get_hand_card())) == 1: if combs[i][j] <= maxcard(state.other_hand): move_value -= 100 if move_value > max_value: max_value = move_value best_move = combs[i][j] if best_move is None: best_move = 0 # 最优出牌 best_cards = action_space[best_move] move = [best_cards.count(x) for x in card_list] # print('出牌得分', max_value) # 输出选择的牌组 # print("\nbest comb: ") # for m in best_comb: # print(action_space[m], cards_value[m]) # 输出 player i [手牌] // [出牌] print("Player {}".format(self.player_id), ' ', Card.visual_card(hand_card), end=' // ') print(Card.visual_card(move)) return move, None