def update_model(self, experiences): logger = roomai.get_logger() reward_plus_gamma_q = [] info_feats = [] action_feats = [] logger = roomai.get_logger() for experience in experiences: next_action_feats = [ action_feat for action_feat in experience.next_available_action_feats ] next_info_feats = [ experience.next_info_feat for i in range(len(experience.next_available_action_feats)) ] q = self.sess.run(self.q, feed_dict={ self.info_feats: next_info_feats, self.action_feats: next_action_feats }) reward_plus_gamma_q.append(experience.reward + self.gamma * np.max(q)) info_feats.append(experience.info_feat) action_feats.append(experience.action_feat) _, loss, q = self.sess.run( (self.train_op, self.loss, self.q), feed_dict={ self.info_feats: info_feats, self.action_feats: action_feats, self.reward_plus_gamma_q: reward_plus_gamma_q })
def compete_silent(cls, env, players): ''' Use the game environment to hold a compete_silent for the players :param env: The game environment :param players: The normal players (without the chance player) :return: scores for the players ''' total_scores = [0 for i in range(len(players))] total_count = 10 num_normal_players = len(players) players = players + [roomai.games.common.RandomPlayerChance()] for count in range(total_count): chips = [(1000 + int(random.random() * 200)) for i in range(len(players))] dealer_id = int(random.random() * len(players)) big_blind_bet = 50 infos, public, persons, private, action_history = env.init({ "chips": chips, "param_num_normal_players": num_normal_players, "dealer_id": dealer_id, "big_blind_bet": big_blind_bet }) for i in range(len(players)): players[i].receive_info(infos[i]) while public[-1].is_terminal == False: turn = public[-1].turn action = players[turn].take_action() # print len(infos[turn].person_state.available_actions),action.key(),turn infos, public, persons, private, _ = env.forward(action) for i in range(len(players)): players[i].receive_info(infos[i]) for i in range(len(players)): players[i].receive_info(infos[i]) for i in range(num_normal_players): total_scores[i] += public[-1].scores[i] if (count + 1) % 500 == 0: tmp_scores = [0 for i in range(len(total_scores))] for i in range(len(total_scores)): tmp_scores[i] = total_scores[i] / (count + 1) roomai.get_logger().info( "TexasHoldem completes %d competitions, scores=%s" % (count + 1, ",".join([str(i) for i in tmp_scores]))) for i in range(len(total_scores)): total_scores[i] /= 1.0 * total_count return total_scores
def compete(cls, env, players): """ Args: env: players: Returns: """ total_scores = [0 for i in xrange(len(players))] total_count = 1000 for count in range(total_count): chips = [(1000 + int(random.random() * 200)) for i in range(len(players))] num_players = len(players) dealer_id = int(random.random() * len(players)) big_blind_bet = 50 infos, public, persons, private = env.init({ "chips": chips, "num_players": num_players, "dealer_id": dealer_id, "big_blind_bet": big_blind_bet }) for i in xrange(len(players)): players[i].receive_info(infos[i]) while public.is_terminal == False: turn = public.turn action = players[turn].take_action() #print len(infos[turn].person_state.available_actions),action.key(),turn infos, public, persons, private = env.forward(action) for i in xrange(len(players)): players[i].receive_info(infos[i]) for i in xrange(len(players)): players[i].receive_info(infos[i]) total_scores[i] += public.scores[i] if (count + 1) % 500 == 0: tmp_scores = [0 for i in xrange(len(total_scores))] for i in xrange(len(total_scores)): tmp_scores[i] = total_scores[i] / (count + 1) roomai.get_logger().info( "TexasHoldem completes %d competitions, scores=%s" % (count + 1, ",".join([str(i) for i in tmp_scores]))) for i in xrange(len(total_scores)): total_scores[i] /= 1.0 * total_count return total_scores
def init(self, params=dict()): ''' Initialize the TexasHoldem game environment with the initialization params.\n The initialization is a dict with only an option: \n param_num_normal_players: how many players are in the game, the option must be in {2, 4, 5}, default 5. An example of the initialization param is {"param_num_normal_players":2} \n :param params: the initialization params :return: infos, public_state_history, person_states_history, private_state_history, playerid_action_history ''' logger = roomai.get_logger() ############ public state and private state ########## public_state = BangStatePublic() private_state = BangStatePrivate() self.__public_state_history__.append(public_state) self.__private_state_history__.append(private_state) if "param_num_normal_players" in params: public_state.__param_num_normal_players__ = params[ "param_num_normal_players"] else: public_state.__param_num_normal_players__ = 5 if public_state.param_num_normal_players not in [2, 4, 5]: logger.fatal("The number of normal players must be in [2,4,5]") raise ValueError("The number of normal players must be in [2,4,5]") public_state.__public_player_infos__ = [ PublicPlayerInfo() for i in range(public_state.__param_num_normal_players__) ] for i in range(public_state.__param_num_normal_players__): public_state.__public_player_infos__[i].__num_hand_cards__ = 0 public_state.__public_player_infos__[i].__character_card__ = None public_state.__public_player_infos__[i].__equipment_cards__ = [] public_state.__phase_info__ = PhaseInfo() public_state.__phase_info__.__playid__ = public_state.__param_num_normal_players__ public_state.__phase_info__.__phase__ = PhaseInfo.ChancePlay public_state.__turn__ = public_state.__param_num_normal_players__ ########### person states ######### person_states = [ BangStatePerson() for i in range(public_state.param_num_normal_players + 1) ] for i in range(public_state.param_num_normal_players): self.__person_states_history__[i].append(person_states[i]) person_states[i][0].__id__ = i person_states[i][0].__hand_cards__ = [] person_states[i][0].__role__ = None person_states[i][0].__available_actions__ = dict() person_states[public_state.__param_num_normal_players__][ 0].__available_actions__ = self.available_actions() return self.__gen_infos__( ), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__
def sampling(probs): logger = roomai.get_logger() r = random.random() sum1 = 0 for i in range(len(probs)): sum1 += probs[i] if sum1 > r: return i logger.warn("Sampling probs(%s) with r = %f occurs sum(probs) >= r", ",".join([str(i) for i in probs]), r) return len(probs)-1
def lookup(cls, key): ''' lookup a PokerCard with the specified key :param key: The specified key :return: The PokerCard with the specified key ''' logger = roomai.get_logger() if key not in AllPokerCards: logger.fatal("key (%s) is not invalid poker card key"%(key)) raise ValueError("key (%s) is not invalid poker card key"%(key)) return AllPokerCards[key]
def gen_info_feat(self, info): logger = roomai.get_logger() hand_cards = info.person_state.hand_cards info_feat = np.zeros((self.num_point, self.num_suit, self.info_dim)) current_id = info.person_state.id previous_id = (current_id + 3 - 1) % 3 next_id = (current_id + 1) % 3 if info.public_state.stage == 0: for card in hand_cards: info_feat[card.point_rank, card.suit_rank, 0] += 1 for person_action in info.public_state.action_history: person_id = person_action[0] action = person_action[1] for card in action.cards: if person_id == current_id: info_feat[card.point_rank, card.suit_rank, 1] += 1 elif person_id == previous_id: info_feat[card.point_rank, card.suit_rank, 2] += 1 elif person_id == next_id: info_feat[card.point_rank, card.suit_rank, 3] += 1 elif person_id == 3: logger.debug( "SevenKingModel finds the chance player-action pair in public_state.action_history" ) else: for card in hand_cards: info_feat[card.point_rank, card.suit_rank, 4] += 1 for person_action in info.public_state.action_history: person_id = person_action[0] action = person_action[1] for card in action.cards: if person_id == current_id: info_feat[card.point_rank, card.suit_rank, 5] += 1 elif person_id == previous_id: info_feat[card.point_rank, card.suit_rank, 6] += 1 elif person_id == next_id: info_feat[card.point_rank, card.suit_rank, 7] += 1 elif person_id == 3: logger.debug( "SevenKingModel finds the chance player-action pair in public_state.action_history" ) return info_feat
def init(self, params = dict()): ''' Initialize the TexasHoldem game environment with the initialization params.\n The initialization is a dict with some options\n 1. param_num_normal_players: how many players are in the game, the option must be in {2, 4, 5}, default 5\n 2. param_start_turn: The param_start_turn is the id of a normal player, who is the first to take an action \n An example of the initialization param is {"param_num_normal_players":2} :param params: the initialization params :return: infos, public_state, person_states, private_state ''' logger = roomai.get_logger() public_state = BangStatePublic() private_state = BangStatePrivate() if "param_num_normal_players" in params: public_state.__param_num_normal_players__ = params["param_num_normal_players"] else: public_state.__param_num_normal_players__ = 5 if public_state.param_num_normal_players not in [2,4,5]: logger.fatal("The number of normal players must be in [2,4,5]") raise ValueError("The number of normal players must be in [2,4,5]") public_state.__public_person_info__ = [PublicPersonInfo() for i in range(public_state.__param_num_normal_players__)] for i in range(public_state.__param_num_normal_players__): public_state.__public_person_info__[i].__num_hand_cards__ = 0 public_state.__public_person_info__[i].__charactor_card__ = None public_state.__public_person_info__[i].__equipment_cards__ = [] person_states = [BangStatePerson() for i in range(public_state.param_num_normal_players+1)] self.__public_state_history__.append(public_state) self.__private_state_history__.append(private_state) for i in range(public_state.param_num_normal_players): self.__person_states_history__[i].append(person_states[i]) self.__person_states_history__[i][0].__id__ = i self.__person_states_history__[i][0].__hand_cards__ = [] self.__person_states_history__[i][0].__role__ = "" self.__person_states_history__[public_state.__param_num_normal_players__][0].__available_actions__ = self.available_actions() self.__gen_infos__()
def lookup(cls, key): ''' lookup a Card with the specified key :param key: The specified key :return: The Card with the specified key ''' logger = roomai.get_logger() if key not in AllPlayingCardsDict: logger.fatal("key (%s) is not invalid poker normalcard key" % (key)) raise ValueError("key (%s) is not invalid poker normalcard key" % (key)) return AllPlayingCardsDict[key]
def lookup(self, key): logger = roomai.get_logger() if key is None or not isinstance(key, str): logger.fatal( "In the constructor BangActionChance.lookup(key), the key must be a str" ) raise TypeError( "In the constructor BangActionChance.lookup(key), the key must be a str" ) if key not in AllBangActionChancesDict: logger.fatal( "In the constructor BangActionChance.lookup(key), the key must be the key of CharacterCard, RoleCard or PlayingCard" ) raise ValueError( "In the constructor BangActionChance.lookup(key), the key must be the key of CharacterCard, RoleCard or PlayingCard" ) return AllBangActionChancesDict[key]
def __init__(self, key): logger = roomai.get_logger() self.__type__ = BangActionType.card self.__key__ = None self.__skill__ = None self.__seen_cards__ = set() self.__choosen_cards__ = set() self.__card__ = None self.__card_targets__ = [] self.__other__ = None self.__other_targets__ = [] keys = key.split("_") if keys[0] in AllPlayingCardsDict: self.__card__ = AllPlayingCardsDict[keys[0]] else: logger.info("%s is invalid action key, since the cardkey %s is invalid"%(key, keys[0]))
def __action_chance__(self, action): pu = self.__public_state_history__[-1] pes = [self.__person_states_history__[i][-1] for i in range(len(self.__person_states_history__))] pr = self.__private_state_history__[-1] card = action.card num = len(pr.all_used_cards) if num < (len(pes)-1) * 2: idx = int(num / 2) pes[idx].__hand_cards__.append(card) elif num < (len(pes)-1) * 2 + 5: pr.__keep_cards__.append(card) else: logger = roomai.get_logger() logger.fatal("the chance action in the invalid condition") pr.__all_used_cards__.append(card)
def __init__(self, key): logger = roomai.get_logger() self.__is_public__ = False if key in AllPlayingCardsDict: self.__type__ = BangActionChanceType.playingcard self.__card__ = PlayingCard.lookup(key) self.__key__ = key elif key in AllCharacterCardsDict: self.__type__ = BangActionChanceType.charactercard self.__card__ = CharacterCard.lookup(key) self.__key__ = key elif key in AllRoleCardsDict: self.__type__ = BangActionChanceType.rolecard self.__card__ = RoleCard.lookup(key) self.__key__ = key else: logger.fatal( "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard" ) raise TypeError( "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard" )
def __init__(self, card): logger = roomai.get_logger() self.__is_public__ = False if isinstance(card, PlayingCard): self.__type__ = BangActionChanceType.playingcard self.__card__ = card self.__key__ = card.key elif isinstance(card, CharacterCard): self.__type__ = BangActionChanceType.charactercard self.__card__ = card self.__key__ = card.key elif isinstance(card, RoleCard): self.__type__ = BangActionChanceType.rolecard self.__card__ = card self.__key__ = card.key else: logger.fatal( "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard" ) raise TypeError( "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard" )
def __init__(self, role): logger = roomai.get_logger() if isinstance(role, str): logger.fatal( "In the constructor RoleCard(rolecard), the rolecard must be a str." ) raise TypeError( "In the constructor RoleCard(rolecard), the rolecard must be a str." ) if role not in [ RoleCardNames.sheriff, RoleCardNames.deputy_sheriff, RoleCardNames.outlaw, RoleCardNames.renegade ]: logger.fatal( "In the constructor RoleCard(rolecard), the rolecard must be one of [%s,%s,%s,%s]" % (RoleCardNames.sheriff, RoleCardNames.deputy_sheriff, RoleCardNames.outlaw, RoleCardNames.renegade)) raise TypeError( "In the constructor RoleCard(rolecard), the rolecard must be one of [%s,%s,%s,%s]" % (RoleCardNames.sheriff, RoleCardNames.deputy_sheriff, RoleCardNames.outlaw, RoleCardNames.renegade)) self.__name1__ = role
#!/bin/python #coding=utf8 import roomai import roomai.common logger = roomai.get_logger() ######################################################################### Basic Concepts ##################################################### class AbstractPublicState(object): ''' The abstract class of the public state. The information in the public state is public to every player ''' def __init__(self): self.__turn__ = None self.__action_history__ = [] self.__is_terminal__ = False self.__scores__ = None def __get_turn__(self): return self.__turn__ turn = property(__get_turn__, doc = "The players[turn] is expected to take an action.") def __get_action_history__(self): return tuple(self.__action_history__) action_history = property(__get_action_history__, doc = "The action_history so far. For example, action_history = [(0, roomai.kuhn.KuhnAction.lookup(\"check\"),(1,roomai.kuhn.KuhnAction.lookup(\"bet\")]") ''' def __get_previous_id__(self): return self.__previous_id__ previous_id = property(__get_previous_id__,doc = "The players[previous_id] took an action in the previous epoch. In the first epoch, previous_id is None")
def lookup(cls, key): logger = roomai.get_logger() if key not in AllRoleCardsDict: logger.fatal("%s is not valid rolecard key" % (key)) raise TypeError("%s is not valid rolecard key" % (key)) return AllRoleCardsDict[key]
def init(self, params=dict()): ''' Initialize the TexasHoldem game environment with the initialization params.\n The initialization is a dict with some options\n 1) allcards: the order of all poker cards appearing\n 2) record_history: whether to record all history states. if you need call the backward function, please set it to True. default False\n 3) num_players: how many players are in the game, default 3\n 4) dealer_id: the player id of the dealer, default random\n 5) chips: the initialization chips, default [1000,1000,...]\n 6) big_blind_bet: the number of chips for the big blind bet, default 10\n An example of the initialization param is {"num_players":2,"record_history":True} :param params: the initialization params :return: infos, public_state, person_states, private_state ''' self.logger = roomai.get_logger() if "num_players" in params: self.__params__["num_players"] = params["num_players"] else: self.__params__["num_players"] = 3 if "dealer_id" in params: self.__params__["dealer_id"] = params["dealer_id"] else: self.__params__["dealer_id"] = int(random.random() * self.__params__["num_players"]) if "chips" in params: self.__params__["chips"] = params["chips"] else: self.__params__["chips"] = [ 1000 for i in range(self.__params__["num_players"]) ] if "big_blind_bet" in params: self.__params__["big_blind_bet"] = params["big_blind_bet"] else: self.__params__["big_blind_bet"] = 10 if "allcards" in params: self.__params__["allcards"] = [ c.__deepcopy__() for c in params["allcards"] ] else: self.__params__["allcards"] = list( roomai.common.AllPokerCards_Without_King.values()) random.shuffle(self.__params__["allcards"]) if "record_history" in params: self.__params__["record_history"] = params["record_history"] else: self.__params__["record_history"] = False self.__check_initialization_configuration__(self) ## public info small = (self.__params__["dealer_id"] + 1) % self.__params__["num_players"] big = (self.__params__["dealer_id"] + 2) % self.__params__["num_players"] self.public_state = TexasHoldemPublicState() pu = self.public_state pu.__num_players__ = self.__params__["num_players"] pu.__dealer_id__ = self.__params__["dealer_id"] pu.__big_blind_bet__ = self.__params__["big_blind_bet"] pu.__raise_account__ = self.__params__["big_blind_bet"] pu.__is_fold__ = [False for i in range(self.__params__["num_players"])] pu.__num_fold__ = 0 pu.__is_allin__ = [ False for i in range(self.__params__["num_players"]) ] pu.__num_allin__ = 0 pu.__is_needed_to_action__ = [ True for i in range(self.__params__["num_players"]) ] pu.__num_needed_to_action__ = pu.num_players pu.__bets__ = [0 for i in range(self.__params__["num_players"])] pu.__chips__ = self.__params__["chips"] pu.__stage__ = StageSpace.firstStage pu.__turn__ = (big + 1) % pu.num_players pu.__public_cards__ = [] pu.__previous_id__ = None pu.__previous_action__ = None if pu.chips[big] > self.__params__["big_blind_bet"]: pu.__chips__[big] -= self.__params__["big_blind_bet"] pu.__bets__[big] += self.__params__["big_blind_bet"] else: pu.__bets__[big] = pu.chips[big] pu.__chips__[big] = 0 pu.__is_allin__[big] = True pu.__num_allin__ += 1 pu.__max_bet_sofar__ = pu.bets[big] pu.__raise_account__ = self.__params__["big_blind_bet"] if pu.chips[small] > self.__params__["big_blind_bet"] / 2: pu.__chips__[small] -= self.__params__["big_blind_bet"] / 2 pu.__bets__[small] += self.__params__["big_blind_bet"] / 2 else: pu.__bets__[small] = pu.chips[small] pu.__chips__[small] = 0 pu.__is_allin__[small] = True pu.__num_allin__ += 1 pu.__is_terminal__ = False pu.__scores__ = [0 for i in range(self.__params__["num_players"])] # private info self.private_state = TexasHoldemPrivateState() pr = self.private_state pr.__keep_cards__ = self.__params__[ "allcards"][self.__params__["num_players"] * 2:self.__params__["num_players"] * 2 + 5] ## person info self.person_states = [ TexasHoldemPersonState() for i in range(self.__params__["num_players"]) ] pes = self.person_states for i in range(self.__params__["num_players"]): pes[i].__id__ = i pes[i].__hand_cards__ = self.__params__["allcards"][i * 2:(i + 1) * 2] pes[pu.turn].__available_actions__ = self.available_actions( pu, pes[pu.turn]) self.__gen_history__() infos = self.__gen_infos__() if self.logger.level <= logging.DEBUG: self.logger.debug("TexasHoldemEnv.init: num_players = %d, dealer_id = %d, chip = %d, big_blind_bet = %d"%(\ pu.num_players,\ pu.dealer_id,\ pu.chips[0],\ pu.big_blind_bet )) return infos, pu, pes, pr
def available_actions(self): ''' Generate all valid actions given the public state and the person state :return: all valid actions ''' logger = roomai.get_logger() ###################################### chance action ################################# ## charactercard if self.__public_state_history__[-1].__public_person_infos__[-1].__character_card__ is None: available_actions = dict() tmp_set = set() for i in range(len(self.__public_state_history__[-1].__public_person_infos__)): if self.__public_state_history__[-1].__public_person_infos__[i].__character_card__ is not None: tmp_set.add(self.__public_state_history__[-1].__public_person_infos__[i].__character_card__.key) for key in AllCharacterCardsDict: if key not in tmp_set: available_actions[key] = BangActionChance.lookup(key) return available_actions ## rolecard for i in range(self.__public_state_history__[-1].param_num_normal_players): if self.__person_states_history__[i].person_states[-1].__role_card__ is None: available_actions = dict() tmp_set = set() num_sheriff = 0 num_deputy_sheriff = 0 num_renegade = 0 num_outlaw = 0 for j in range(len(self.__public_state_history__[-1].param_num_normal_players)): if self.__person_states_history__[j].person_states[-1].__role_card__ is not None: tmp_set.add(self.__person_states_history__[j].person_states[-1].__role_card__.key) if self.__public_state_history__[-1].__param_num_normal_players__ == 2: return available_actions elif self.__public_state_history__[-1].__param_num_normal_players__ == 4: num_sheriff = 1 num_renegade = 1 num_outlaw = 2 elif self.__public_state_history__[-1].__param_num_normal_players__ == 5: num_sheriff = 1 num_deputy_sheriff = 1 num_renegade = 1 num_outlaw = 2 else: logger.fatal("param_num_normal_players not in [2,4,5]") raise ValueError("param_num_normal_players not in [2,4,5]") for key in tmp_set: if key == CardRole.RoleCardNames.sheriff: num_sheriff = num_sheriff - 1 if key == CardRole.RoleCardNames.deputy_sheriff: num_deputy_sheriff = num_deputy_sheriff - 1 if key == CardRole.RoleCardNames.renegade: num_renegade = num_renegade - 1 if key == CardRole.RoleCardNames.outlaw: num_outlaw = num_outlaw - 1 if num_sheriff > 0: available_actions[CardRole.RoleCardNames.sheriff] = BangActionChance.lookup(CardRole.RoleCardNames.sheriff) if num_deputy_sheriff > 0: available_actions[CardRole.RoleCardNames.deputy_sheriff] = BangActionChance.lookup(CardRole.RoleCardNames.deputy_sheriff) if num_renegade > 0: available_actions[CardRole.RoleCardNames.renegade] = BangActionChance.lookup(CardRole.RoleCardNames.renegade) if num_outlaw > 0: available_actions[CardRole.RoleCardNames.outlaw] = BangActionChance.lookup(CardRole.RoleCardNames.outlaw) return available_actions ## deal cards available_actions = dict() for card in self.__private_state_history__.deck: available_actions[card.key] = card ####################################### action #################################### turn = self.__public_state_history__[-1].turn tmp_set = dict() if len(self.__public_state_history__[-1].response_infos_stack) > 0: person_states = self.__person_states_history__[-1] private_state = self.__private_state_history__[-1] subject = self.__public_state_history__[-1].response_infos_stack[-1].subject object = self.__public_state_history__[-1].response_infos_stack[-1].object reason = self.__public_state_history__[-1].response_infos_stack[-1].reason if reason == ResponseInfo.UseIndian: for card in person_states[subject].hand_cards: if card.name == PlayingCardNames.Bang: tmp_set[card.name] = BangAction.lookup(card.name) tmp_set[OtherActionNames.giveup] = BangAction.lookup(OtherActionNames.giveup) return tmp_set elif reason == ResponseInfo.UseCatling: for card in person_states[subject].hand_cards: if card.name == PlayingCardNames.Miss: tmp_set[card.name] = BangAction.lookup(card.name) tmp_set[OtherActionNames.giveup] = BangAction.lookup(OtherActionNames.giveup) return tmp_set elif reason == ResponseInfo.ToDead: for card in person_states[subject].hand_cards: if card.name == PlayingCardNames.Beer: tmp_set[card.name+"_%d"%(object)] = BangAction.lookup(card.name+"_%d"%(object)) tmp_set[OtherActionNames.giveup] = BangAction.lookup(OtherActionNames.giveup) return tmp_set elif reason == ResponseInfo.Shuffle: for card in private_state.shuffle_deck: tmp_set[card.key] = BangActionChance.lookup(card.key) return tmp_set
def init(self, params=dict()): ''' Initialize the TexasHoldem game environment with the initialization params.\n The initialization is a dict with some options\n 1. param_num_normal_players: how many players are in the game, default 3\n 2. param_dealer_id: the player id of the dealer, default random\n 3. param_init_chips: the initialization chips, default [1000,1000,...]\n 4. param_big_blind_bet: the number of chips for the big blind bet, default 10\n An example of the initialization param is {"param_num_normal_players":2,"backward_enable":True} :param params: the initialization params :return: infos, public_state, person_states, private_state ''' logger = roomai.get_logger() public_state = TexasHoldemStatePublic() self.__public_state_history__.append(public_state) if "param_num_normal_players" in params: public_state.__param_num_normal_players__ = params[ "param_num_normal_players"] else: public_state.__param_num_normal_players__ = 3 if "param_start_turn" in params: public_state.__param_start_turn__ = params["param_start_turn"] else: public_state.__param_start_turn__ = int( random.random() * public_state.param_num_normal_players) if "param_dealer_id" in params: public_state.__param_dealer_id__ = params["param_dealer_id"] else: public_state.__param_dealer_id__ = int( random.random() * public_state.param_num_normal_players) if "param_init_chips" in params: public_state.__param_init_chips__ = params["param_init_chips"] else: public_state.__param_init_chips__ = [ 1000 for i in range(public_state.param_num_normal_players) ] if "param_big_blind_bet" in params: public_state.__param_big_blind_bet__ = params[ "param_big_blind_bet"] else: public_state.__param_big_blind_bet__ = 10 ## check initialization config if len(public_state.param_init_chips ) != public_state.param_num_normal_players: raise ValueError( "len(env.param_initialization_chips) %d != param_num_normal_players %d" % (len(public_state.param_init_chips), public_state.num_normal_players)) if public_state.param_num_normal_players > 6: raise ValueError( "The maximum of the number of players is 6. Now, the number of players = %d" % (public_state.param_num_normal_players)) ## public info small = (public_state.param_dealer_id + 1) % public_state.param_num_normal_players big = (public_state.param_dealer_id + 2) % public_state.param_num_normal_players pu = public_state pu.__is_fold__ = [ False for i in range(public_state.param_num_normal_players) ] pu.__num_fold__ = 0 pu.__is_allin__ = [ False for i in range(public_state.param_num_normal_players) ] pu.__num_allin__ = 0 pu.__is_needed_to_action__ = [ True for i in range(public_state.param_num_normal_players) ] pu.__num_needed_to_action__ = pu.param_num_normal_players pu.__bets__ = [0 for i in range(public_state.param_num_normal_players)] pu.__chips__ = list(public_state.param_init_chips) pu.__stage__ = Stage.firstStage pu.__turn__ = pu.param_num_normal_players pu.__public_cards__ = [] pu.__previous_id__ = None pu.__previous_action__ = None if pu.chips[big] > public_state.param_big_blind_bet: pu.__chips__[big] -= public_state.param_big_blind_bet pu.__bets__[big] += public_state.param_big_blind_bet else: pu.__bets__[big] = pu.chips[big] pu.__chips__[big] = 0 pu.__is_allin__[big] = True pu.__num_allin__ += 1 pu.__max_bet_sofar__ = pu.bets[big] pu.__raise_account__ = public_state.param_big_blind_bet if pu.chips[small] > public_state.param_big_blind_bet / 2: pu.__chips__[small] -= public_state.param_big_blind_bet / 2 pu.__bets__[small] += public_state.param_big_blind_bet / 2 else: pu.__bets__[small] = pu.chips[small] pu.__chips__[small] = 0 pu.__is_allin__[small] = True pu.__num_allin__ += 1 pu.__is_terminal__ = False pu.__scores__ = [ 0 for i in range(public_state.param_num_normal_players) ] # private info pr = TexasHoldemStatePrivate() self.__private_state_history__.append(pr) pr.__keep_cards__ = [] ##pr.__keep_cards__ =allcards[public_state.param_num_normal_players*2:public_state.param_num_normal_players*2+5] ## person info self.__person_states_history__ = [ [] for i in range(pu.param_num_normal_players + 1) ] for i in range(pu.param_num_normal_players + 1): self.__person_states_history__[i].append(TexasHoldemStatePerson()) self.__person_states_history__[i][0].__id__ = i self.__person_states_history__[i][0].__hand_cards__ = [] self.__person_states_history__[ pu.turn][0].__available_actions__ = self.available_actions() infos = self.__gen_infos__() if logger.level <= logging.DEBUG: logger.debug("TexasHoldemEnv.init: param_num_normal_players = %d, param_dealer_id = %d, param_initialization_chip = %d, param_big_blind_bet = %d"%(\ pu.param_num_normal_players,\ pu.param_dealer_id,\ pu.param_init_chips[0],\ pu.param_big_blind_bet )) return infos, self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__
def forward(self, action): ''' The TexasHoldem game environments steps with the action taken by the current player :param action: The action taken by the current player :return: infos, public_state, person_states, private_state ''' logger = roomai.get_logger() pu = self.__public_state_history__[-1].__deepcopy__() pes = [ self.__person_states_history__[i][-1].__deepcopy__() for i in range(len(self.__person_states_history__)) ] pr = self.__private_state_history__[-1].__deepcopy__() self.__public_state_history__.append(pu) for i in range(len(pes)): self.__person_states_history__[i].append(pes[i]) self.__private_state_history__.append(pr) if action.key not in pes[pu.turn].available_actions: logger.critical("action=%s is invalid" % (action.key)) raise ValueError("action=%s is invalid" % (action.key)) pes[pu.turn].__available_actions__ = dict() self.__playerid_action_history__.append( roomai.games.common.ActionRecord(pu.turn, action)) if isinstance(action, TexasHoldemActionChance) == True: self.__action_chance__(action) if len(pr.all_used_cards) == (len(pes) - 1) * 2 + 5: pu.__turn__ = (pu.param_dealer_id + 2 + 1) % pu.param_num_normal_players pes[pu.turn].__available_actions__ = self.available_actions() infos = self.__gen_infos__() return infos, self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__ if action.option == TexasHoldemAction.Fold: self.__action_fold__(action) elif action.option == TexasHoldemAction.Check: self.__action_check__(action) elif action.option == TexasHoldemAction.Call: self.__action_call__(action) elif action.option == TexasHoldemAction.Raise: self.__action_raise__(action) elif action.option == TexasHoldemAction.AllIn: self.__action_allin__(action) else: raise Exception( "action.option(%s) not in [Fold, Check, Call, Raise, AllIn]" % (action.option)) pu.__previous_id__ = pu.turn pu.__previous_action__ = action pu.__is_terminal__ = False pu.__scores__ = [ 0 for i in range( self.__public_state_history__[-1].param_num_normal_players) ] # computing_score if TexasHoldemEnv.__is_compute_scores__( self.__public_state_history__[-1]): ## need showdown pu.__public_cards__ = pr.keep_cards[0:5] pu.__is_terminal__ = True pu.__scores__ = self.__compute_scores__() # enter into the next stage elif TexasHoldemEnv.__is_nextround__( self.__public_state_history__[-1]): add_cards = [] if pu.stage == Stage.firstStage: add_cards = pr.keep_cards[0:3] if pu.stage == Stage.secondStage: add_cards = [pr.keep_cards[3]] if pu.stage == Stage.thirdStage: add_cards = [pr.keep_cards[4]] pu.__public_cards__.extend(add_cards) pu.__stage__ = pu.stage + 1 pu.__num_needed_to_action__ = 0 pu.__is_needed_to_action__ = [ False for i in range(pu.param_num_normal_players) ] for i in range(pu.param_num_normal_players): if pu.__is_fold__[i] != True and pu.__is_allin__[i] != True: pu.__is_needed_to_action__[i] = True pu.__num_needed_to_action__ += 1 pu.__turn__ = pu.param_dealer_id pu.__turn__ = self.__next_player__(pu) pes[self.__public_state_history__[-1]. turn].__available_actions__ = self.available_actions() ##normal else: pu.__turn__ = self.__next_player__(pu) self.__person_states_history__[self.__public_state_history__[ -1].turn][-1].__available_actions__ = self.available_actions() logger = roomai.get_logger() if logger.level <= logging.DEBUG: logger.debug("TexasHoldemEnv.forward: num_fold+num_allin = %d+%d = %d, action = %s, stage = %d"%(\ self.__public_state_history__[-1].num_fold,\ self.__public_state_history__[-1].num_allin,\ self.__public_state_history__[-1].num_fold + self.__public_state_history__[-1].num_allin,\ action.key,\ self.__public_state_history__[-1].stage\ )) infos = self.__gen_infos__() return infos, self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__
def forward(self, action): """ 接受一个动作,先检查是否是ActionChance,处理,如果是正常玩家的,pass 如果是ActionChance,进行相应的动作处理 The Bang game environment steps with the action taken by the current player :param action :returns:infos, public_state_history, person_states_history, private_state_history, playerid_action_history """ logger = roomai.get_logger() private_state = copy.deepcopy(self.__private_state_history__[-1]) public_state = copy.deepcopy(self.__public_state_history__[-1]) person_states = [copy.deepcopy(self.__person_states_history__[i][-1]) for i in range(public_state.param_num_normal_players)] person_states[public_state.turn].__available_actions__ = dict() self.__public_state_history__.append(public_state) self.__private_state_history__.append(private_state) for i in range(len(person_states)): self.__person_states_history__[i].append(person_states[i]) if isinstance(action, BangActionChance) == True: if action.type == BangActionChance.BangActionChanceType.charactercard: # chance player deals character cards person_states[public_state.turn].__available_actions__ = self.available_actions() for i in range(len(public_state.__public_person_infos__)): if public_state.__public_person_infos__[i].__character_card is None: # sample a character card to that player public_state.__public_person_infos__[i].__character_card = \ person_states[public_state.turn].__available_actions__[choice(person_states[public_state.turn].__available_actions__.keys)] public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__ # if all players have been assigned a character, return public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__ if action.type == BangActionChance.BangActionChanceType.rolecard: # chance player deals role cards person_states[public_state.turn].__available_actions__ = self.available_actions() for i in range(public_state.param_num_normal_players): if person_states[i].__role__ is None: # sample a role card to that player person_states[i].__role__ = person_states[public_state.turn].__available_actions__[choice(person_states[public_state.turn].__available_actions__.keys)] public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players if person_states[i].__role__ == CardRole.RoleCard(CardRole.RoleCardNames.sheriff): public_state.__sheriff_id__ = i return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__ public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__ if action.type == BangActionChance.BangActionChanceType.playingcard: # chance player deals/shuffles cards person_states[public_state.turn].__available_actions__ = self.available_actions() private_state.__deal_cards__.append(action) private_state.__deck__.pop(action.key) if len(private_state.__deck__) == 0: # there is no card, then the chance player needs to shuffle discard cards private_state.__deck__ = public_state.__discard_pile__[:] public_state.__discard_pile__ = [] public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__ else: if len(self.__public_state_history__[-1].response_infos_stack) > 0: response_action = self.__public_state_history__[-1].response_infos_stack[-1].action if isinstance(response_action,BangAction) == True \ and response_action.type == BangActionType.card \ and response_action.card.name == PlayingCardNames.Indian: if action.type == BangActionType.other and action.other == OtherActionNames.giveup: person_states[public_state.__turn__].__hp__ -= 1 elif action.type == BangActionType.card and action.card.name == PlayingCardNames.Bang: person_states.__hand_cards__.remove(action.card) new_turn = (public_state.turn + 1) % (public_state.param_num_normal_players) public_state.__turn__ = (public_state.turn + 1) % public_state.param_num_normal_players else: logger.fatal("BangEnv generates %s action for responding Indian"%(action.key)) raise Exception("BangEnv generates %s action for responding Indian"%(action.key)) elif isinstance(response_action,BangAction) == True \ and response_action.type == BangActionType.card \ and response_action.card.name == PlayingCardNames.Catling: if action.type == BangActionType.other and action.other == OtherActionNames.giveup: person_states[public_state.__turn__].__hp__ -= 1 elif action.type == BangActionType.card and action.card.name == PlayingCardNames.Miss: person_states.__hand_cards__.remove(action.card) new_turn = (public_state.turn + 1) % (public_state.param_num_normal_players) public_state.__turn__ = (public_state.turn + 1) % public_state.param_num_normal_players else: logger.fatal("BangEnv generates %s action for responding Indian" % (action.key)) raise Exception("BangEnv generates %s action for responding Indian" % (action.key))
def lookup(cls, key): logger = roomai.get_logger() if key not in AllCharacterCardsDict: logger.fatal("key (%s) is not invalid charactor key" % (key)) raise ValueError("key (%s) is not invalid charactor key" % (key)) return AllCharacterCardsDict[key]
def init(self, params=dict()): """ Args: params: Returns: """ self.logger = roomai.get_logger() if "num_players" in params: self.num_players = params["num_players"] else: self.num_players = 3 if "dealer_id" in params: self.dealer_id = params["dealer_id"] else: self.dealer_id = int(random.random() * self.num_players) if "chips" in params: self.chips = params["chips"] else: self.chips = [1000 for i in range(self.num_players)] if "big_blind_bet" in params: self.big_blind_bet = params["big_blind_bet"] else: self.big_blind_bet = 10 if "allcards" in params: self.allcards = [c.__deepcopy__() for c in params["allcards"]] else: self.allcards = [] for i in xrange(13): for j in xrange(4): self.allcards.append(roomai.common.PokerCard(i, j)) random.shuffle(self.allcards) if "record_history" in params: self.record_history = params["record_history"] else: self.record_history = False self.check_initialization_configuration(self) hand_cards = [] for i in xrange(self.num_players): hand_cards.append(self.allcards[i * 2:(i + 1) * 2]) keep_cards = self.allcards[self.num_players * 2:self.num_players * 2 + 5] ## public info small = (self.dealer_id + 1) % self.num_players big = (self.dealer_id + 2) % self.num_players self.public_state = TexasHoldemPublicState() self.public_state.num_players = self.num_players self.public_state.dealer_id = self.dealer_id self.public_state.big_blind_bet = self.big_blind_bet self.public_state.raise_account = self.big_blind_bet self.public_state.is_fold = [False for i in xrange(self.num_players)] self.public_state.num_quit = 0 self.public_state.is_allin = [False for i in xrange(self.num_players)] self.public_state.num_allin = 0 self.public_state.is_needed_to_action = [ True for i in xrange(self.num_players) ] self.public_state.num_needed_to_action = self.public_state.num_players self.public_state.bets = [0 for i in xrange(self.num_players)] self.public_state.chips = self.chips self.public_state.stage = StageSpace.firstStage self.public_state.turn = (big + 1) % self.public_state.num_players self.public_state.public_cards = [] self.public_state.previous_id = None self.public_state.previous_action = None if self.public_state.chips[big] > self.big_blind_bet: self.public_state.chips[big] -= self.big_blind_bet self.public_state.bets[big] += self.big_blind_bet else: self.public_state.bets[big] = self.public_state.chips[big] self.public_state.chips[big] = 0 self.public_state.is_allin[big] = True self.public_state.num_allin += 1 self.public_state.max_bet_sofar = self.public_state.bets[big] self.public_state.raise_account = self.big_blind_bet if self.public_state.chips[small] > self.big_blind_bet / 2: self.public_state.chips[small] -= self.big_blind_bet / 2 self.public_state.bets[small] += self.big_blind_bet / 2 else: self.public_state.bets[small] = self.public_state.chips[small] self.public_state.chips[small] = 0 self.public_state.is_allin[small] = True self.public_state.num_allin += 1 self.public_state.is_terminal = False self.public_state.scores = None # private info self.private_state = TexasHoldemPrivateState() self.private_state.hand_cards = [[] for i in xrange(self.num_players)] for i in xrange(self.num_players): self.private_state.hand_cards[i] = [ hand_cards[i][j].__deepcopy__() for j in xrange(len(hand_cards[i])) ] self.private_state.keep_cards = [ keep_cards[i].__deepcopy__() for i in xrange(len(keep_cards)) ] ## person info self.person_states = [ TexasHoldemPersonState() for i in xrange(self.num_players) ] for i in xrange(self.num_players): self.person_states[i].id = i self.person_states[i].hand_cards = [ hand_cards[i][j].__deepcopy__() for j in xrange(len(hand_cards[i])) ] self.person_states[ self.public_state.turn].available_actions = self.available_actions( self.public_state, self.person_states[self.public_state.turn]) self.__gen_history__() infos = self.__gen_infos__() if self.logger.level <= logging.DEBUG: self.logger.debug("TexasHoldemEnv.init: num_players = %d, dealer_id = %d, chip = %d, big_blind_bet = %d"%(\ self.public_state.num_players,\ self.public_state.dealer_id,\ self.public_state.chips[0],\ self.public_state.big_blind_bet )) return infos, self.public_state, self.person_states, self.private_state