예제 #1
0
    def take_action(self):
        cur_strategies = self.get_strategies(self.states,
                                             self.available_actions)

        # print self.states
        # print cur_strategies

        new_state = ''

        val = random.random()
        total = 0
        for key in cur_strategies:
            total += cur_strategies[key]
            if total > 0 and val < total:
                new_state = key

        if new_state != '' and new_state[-1] != '_':
            action1 = new_state.split("_")[-2]
            action2 = new_state.split("_")[-1]
            if len(action1) > 1:
                action1 = action1[-1]
            action = action1 + '_' + action2
            return SevenKingAction.lookup(action)
        else:
            idx = int(random.random() * len(self.available_actions))
            return list(self.available_actions.values())[idx]
예제 #2
0
 def take_action(self):
     if "" not in self.available_actions:
         min_card = None
         for a in self.available_actions.values():
             if a.pattern[0] == "p_0":
                 if min_card is None: min_card = a.hand_card[0]
                 else:
                     card = a.hand_card[0]
                     if SevenKingPokerCard.compare(card, min_card) < 0:
                         min_card = card
         if min_card is None:
             return list(self.available_actions.values())[0]
         else:
             return SevenKingAction.lookup(min_card.key)
     else:
         return SevenKingAction("")
예제 #3
0
    def testScores1(self):
        """

        """
        env = SevenKingEnv()
        infos, public_state, person_states, private_state = env.init()

        self.assertTrue("" not in infos[public_state.turn].person_state.available_actions)
        self.assertFalse(env.is_action_valid(SevenKingAction.lookup(""),public_state, person_states[public_state.turn]))
예제 #4
0
    def take_action(self):
        """

        Returns:

        """
        for a in self.available_actions.values():
            if a.key != "":
                return a
        return SevenKingAction.lookup("")
예제 #5
0
    def take_action(self):
        action_list = []
        regret_list = dict()

        for action in self.action_history:
            action_list.append(input_trans(action))

        for action in self.available_actions:
            this_action = action_list
            this_action.append(input_trans(action))
            action_t = []
            if len(this_action) < self.rnn_model.TIME_STEPS:
                action_t1 = [0] * self.rnn_model.INPUT_SIZE
                for j in range(self.rnn_model.TIME_STEPS - len(this_action)):
                    action_t.append(action_t1)
                action_t.extend(this_action)
            else:
                action_t = this_action[(len(this_action) -
                                        self.rnn_model.TIME_STEPS):-1]
            regret_list[action] = self.rnn_model.sess.run(
                self.rnn_model.output,
                feed_dict={
                    self.rnn_model.xs:
                    np.array(action_t).reshape(-1, self.rnn_model.TIME_STEPS,
                                               self.rnn_model.INPUT_SIZE)
                })

        # pdb.set_trace()
        cur_strategies = dict()
        normalizing_sum = 0
        for key in regret_list:
            normalizing_sum += max(regret_list[key][-1][0][0], 0)
        for key in regret_list:
            if normalizing_sum > 0:
                cur_strategies[key] = max(regret_list[key][-1][0][0],
                                          0) / normalizing_sum
            else:
                cur_strategies[key] = 1.0 / len(self.available_actions)

        val = random.random()
        total = 0

        new_state = ''

        for key in cur_strategies:
            total += cur_strategies[key]
            if total > 0 and val < total:
                new_state = key

        if new_state != '':
            return SevenKingAction.lookup(new_state)
        else:
            idx = int(random.random() * len(self.available_actions))
            return list(self.available_actions.values())[idx]
예제 #6
0
    def take_action(self):
        max_action = SevenKingAction.lookup("")
        max_pattern = 0
        for a in self.available_actions.values():
            if (a.pattern[1] > max_pattern):
                max_pattern = a.pattern[1]
                max_action = a
            elif (a.pattern[1] == max_pattern):
                if (a.pattern[0] != 'p_0' and (SevenKingPokerCard.compare(
                        a.cards[-1], max_action.cards[-1]) > 0)):
                    max_action = a

        return max_action
예제 #7
0
    def testEnv(self):
        """

        """
        env = SevenKingEnv()

        infos, public_state, person_states, private_state = env.init({"num_players":2})
        assert(len(infos) == 2)
        turn = public_state.turn
        self.show_hand_card(person_states[turn].hand_cards)
        print (turn)
        print ("available_actions=",person_states[turn].available_actions.keys())
        print ("available_actions_v=",person_states[turn].available_actions.values())


        action = SevenKingAction("%s" % (person_states[turn].hand_cards[0].key))
        infos, public_state, person_states, private_state = env.forward(action)
예제 #8
0
    def forward(self, action):
        '''
        The SevenKing game environment steps with the action taken by the current player
        
        :param action: 
        :return: 
        '''
        pu   = self.public_state
        pr   = self.private_state
        pes  = self.person_states
        turn = pu.turn

        if self.is_action_valid(action,pu, pes[turn]) == False:
            raise  ValueError("The (%s) is an invalid action " % (action.key))

        pes[pu.turn].__available_actions__ = dict()
        pu.__action_history__.append((pu.turn,action))

        ## the action plays its role
        if action.pattern[0] == "p_0":
            pu.__is_fold__[turn]           = True
            pu.__num_fold__               += 1
            pes[turn].__available_actions__ = dict()
        else:
            pes[turn].__del_cards__(action.cards)
            if pu.stage == 0:
                tmp = []
                for i in range(5 - len(pes[turn].hand_cards)):
                    c = pr.__keep_cards__.pop()
                    tmp.append(c)
                pes[turn].__add_cards__(tmp)
            elif pu.stage == 1:
                pu.__num_hand_cards__[turn] = len(pes[turn].hand_cards)

        if action.pattern[0] != "p_0":
            pu.__license_action__ = action



        #print (turn, "len_of_hand_card=",len(self.private_state.hand_cards[turn]), " len_of_keep_card=", len(self.private_state.keep_cards), " action = (%s)" %action.key,\
       #        " handcard1=%s"%(",".join([a.key for a in self.private_state.hand_cards[0]]))," handcard2=%s"%(",".join([a.key for a in self.private_state.hand_cards[1]])),\
         #      " num_fold =%d"%(self.public_state.num_fold),"fold=%s"%(",".join([str(s) for s in pu.is_fold])))
        ## termminal
        if self.public_state.stage == 1 and len(self.person_states[turn].hand_cards) == 0:
            pu.__is_terminal__    = True
            pu.__scores__         = self.__compute_scores__()
            new_turn              = None
            pu.__turn__           = new_turn
            pu.__license_action__ = SevenKingAction.lookup("")

        ## stage 0 to 1
        elif len(self.private_state.keep_cards) < 5 and pu.stage == 0:
            new_turn, min_card               = self.__choose_player_with_lowest_card__()
            pu.__turn__                         = new_turn
            pu.__num_fold__                     = 0
            pu.__is_fold__                      = [False for i in range(pu.num_normal_players)]
            pu.__license_action__               = SevenKingAction.lookup("")
            pes[new_turn].__available_actions__                    = SevenKingEnv.available_actions(pu, pes[new_turn])
            keys = list(pes[new_turn].available_actions.keys())
            for key in keys:
                if min_card.key not in key:
                    del pes[new_turn].__available_actions__[key]
            pu.__stage__                        = 1


        ## round next
        elif self.public_state.num_fold + 1 == pu.num_normal_players:
            new_turn                            = self.__choose_player_with_nofold__()
            pu.__turn__                         = new_turn
            pu.__num_fold__                     = 0
            pu.__is_fold__                      = [False for i in range(pu.num_normal_players)]
            pu.__license_action__               = SevenKingAction.lookup("")
            pes[new_turn].__available_actions__ = SevenKingEnv.available_actions(pu, pes[new_turn])


        else:
            new_turn                            = (turn + 1) % pu.num_normal_players
            pu.__turn__                         = new_turn
            pes[new_turn].__available_actions__ = SevenKingEnv.available_actions(pu, pes[new_turn])



        self.__gen_history__()
        infos = self.__gen_infos__()
        return infos, self.public_state, self.person_states, self.private_state
예제 #9
0
    def available_actions(cls, public_state, person_state):
        available_actions = dict()

        license_action = public_state.license_action
        if license_action is None:
            license_action = SevenKingAction("")
        hand_cards = person_state.hand_cards


        patterns = set()
        if license_action.pattern[0] == "p_0":
            for p in AllSevenKingPatterns.values():
                if p[0] != "p_0":
                    patterns.add(p)
        else:
            patterns.add(license_action.pattern)
            patterns.add(AllSevenKingPatterns["p_0"])

        for pattern in patterns:

                if pattern[1] >= 2:
                    point2cards = person_state.__gen_pointrank2cards__()

                if len(person_state.hand_cards) < pattern[1]:
                    continue

                elif pattern[0] == "p_0":
                    available_actions[""] = SevenKingAction.lookup("")


                elif pattern[0] == "p_1":
                    license_pattern = license_action.pattern
                    license_card = None
                    if license_pattern[0] != "p_0":
                        license_card = license_action.cards[-1]

                    for c in person_state.hand_cards:
                        if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(c,license_card) >0:
                            available_actions[c.key] = SevenKingAction.lookup(c.key)

                elif pattern[0] == "p_2":
                    for p in point2cards:

                        license_pattern = license_action.pattern
                        license_card    = None
                        if license_pattern[0] != "p_0":
                            #print license_action.key, license_action.pattern, license_pattern[0] != "p_0"
                            license_card    = license_action.cards[-1]
                        len1 = len(point2cards[p])

                        if len1 == 2:
                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][1],
                                                                                      license_card) > 0:
                                str = "%s,%s" % (point2cards[p][0].key, point2cards[p][1].key)
                                available_actions[str] = SevenKingAction.lookup(str)

                        if len1 == 3:
                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][1],
                                                                                      license_card) > 0:
                                str = "%s,%s" % (point2cards[p][0].key, point2cards[p][1].key)
                                available_actions[str] = (SevenKingAction.lookup(str))

                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2],
                                                                                      license_card) > 0:
                                str = "%s,%s" % (point2cards[p][0].key, point2cards[p][2].key)
                                available_actions[str] = (SevenKingAction.lookup(str))
                                str = "%s,%s" % (point2cards[p][1].key, point2cards[p][2].key)
                                available_actions[str] = (SevenKingAction.lookup(str))

                        if len1 == 4:
                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][1],
                                                                                      license_card) > 0:
                                str = "%s,%s" % (point2cards[p][0].key, point2cards[p][1].key)
                                available_actions[str] = (SevenKingAction.lookup(str))

                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2],
                                                                                      license_card) > 0:
                                str = "%s,%s" % (point2cards[p][0].key, point2cards[p][2].key)
                                available_actions[str] = (SevenKingAction.lookup(str))
                                str = "%s,%s" % (point2cards[p][1].key, point2cards[p][2].key)
                                available_actions[str] = (SevenKingAction.lookup(str))
                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][3],
                                                                                      license_card) > 0:
                                str = "%s,%s" % (point2cards[p][0].key, point2cards[p][3].key)
                                available_actions[str] = (SevenKingAction.lookup(str))
                                str = "%s,%s" % (point2cards[p][1].key, point2cards[p][3].key)
                                available_actions[str] = (SevenKingAction.lookup(str))
                                str = "%s,%s" % (point2cards[p][2].key, point2cards[p][3].key)
                                available_actions[str] = (SevenKingAction.lookup(str))


                elif pattern[0] == "p_3":
                    for p in point2cards:

                        license_pattern = license_action.pattern
                        license_card    = None
                        if license_pattern[0] != "p_0" :
                            license_card    = license_action.cards[-1]
                        len1 = len(point2cards[p])

                        if len1 == 3:
                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2],
                                                                                      license_card) > 0:
                                str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][1].key, point2cards[p][2].key)
                                available_actions[str] = (SevenKingAction.lookup(str))
                        if len1 == 4:

                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2],
                                                                                      license_card) > 0:
                                str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][1].key, point2cards[p][2].key)
                                available_actions[str] = (SevenKingAction.lookup(str))
                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][3],
                                                                                      license_card) > 0:
                                str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][1].key, point2cards[p][3].key)
                                available_actions[str]=(SevenKingAction.lookup(str))
                                str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][2].key, point2cards[p][3].key)
                                available_actions[str]=(SevenKingAction.lookup(str))
                                str = "%s,%s,%s" % (point2cards[p][1].key, point2cards[p][2].key, point2cards[p][3].key)
                                available_actions[str]=(SevenKingAction.lookup(str))

                elif pattern[0] == "p_4":
                    for p in point2cards:
                        license_pattern = license_action.pattern
                        license_card    = None
                        if license_pattern[0] != "p_0" :
                            license_card    = license_action.cards[-1]
                        len1 = len(point2cards[p])

                        if len1 >= 4:
                            if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][3],
                                                                                  license_card) > 0:
                                str = "%s,%s,%s,%s" % (
                                    point2cards[p][0].key,
                                    point2cards[p][1].key,
                                    point2cards[p][2].key,
                                    point2cards[p][3].key
                                )
                                available_actions[str]=(SevenKingAction.lookup(str))

                if pattern[0] != "p_0" and pattern[0] != "p_1" and\
                   pattern[0] != "p_2" and pattern[0] != "p_3" and pattern[0] != "p_4":
                    raise ValueError("The %s pattern is invalid" % (pattern[0]))


        #for a in available_actions.values():
        #    if SevenKingEnv.__is_action_valid__(a,public_state,person_state) == False:
        #        del available_actions[a.key]

        return available_actions
예제 #10
0
    def init(self, params = dict()):
        '''
        Initialize the SevenKing game environment with the initialization params.\n
        The initialization is a dict with some options\n
        1) backward_enable: whether to record all history states. if you need call the backward function, please set it to True. default False\n
        2) num_normal_players: how many players are in the game  \n
        An example of the initialization param is {"num_normal_players":2,"backward_enable":True}\n

        :param params: the initialization params
        :return: infos, public_state, person_states, private_state
        '''

        if "num_normal_players" in params:
            self.__params__["num_normal_players"] = params["num_normal_players"]
        else:
            self.__params__["num_normal_players"] = 3

        if "backward_enable" in params:
            self.__params__["backward_enable"] = params["backward_enable"]
        else:
            self.__params__["backward_enable"] = False


        self.public_state  = SevenKingPublicState()
        self.private_state = SevenKingPrivateState()
        self.person_states = [SevenKingPersonState() for i in range(self.__params__["num_normal_players"] + 1)]

        self.public_state_history  = []
        self.private_state_history = []
        self.person_states_history = []

        ## private_state
        allcards =  [c.__deepcopy__() for c in AllSevenKingPokerCards.values()]
        random.shuffle(allcards)
        self.private_state.__keep_cards__ = allcards

        for i in range(self.__params__["num_normal_players"]):
            tmp = []
            for j in range(5):
                c = self.private_state.__keep_cards__.pop()
                tmp.append(c)
            self.person_states[i].__add_cards__(tmp)

        ## public_state
        self.public_state.__turn__,_          = self.__choose_player_with_lowest_card__()
        self.public_state.__is_terminal__     = False
        self.public_state.__scores__          = []
        self.public_state.__license_action__  = SevenKingAction.lookup("")
        self.public_state.__stage__           = 0

        self.public_state.__num_normal_players__     = self.__params__["num_normal_players"]
        self.public_state.__num_keep_cards__  = len(self.private_state.keep_cards)
        self.public_state.__num_hand_cards__  = [len(person_state.hand_cards) for person_state in self.person_states]
        self.public_state.__is_fold__         = [False for i in range(self.public_state.num_normal_players)]
        self.public_state.__num_fold__        = 0

        ## person_state
        for i in range(self.__params__["num_normal_players"]+1):
            self.person_states[i].__id__   = i
            if i == self.public_state.turn:
                self.person_states[i].__available_actions__ = SevenKingEnv.available_actions(self.public_state, self.person_states[i])

        self.__gen_history__()
        infos = self.__gen_infos__()
        return infos, self.public_state, self.person_states, self.private_state
예제 #11
0
    def init(self, params=dict()):
        """

        Args:
            params:

        Returns:

        """

        if "num_players" in params:
            self.num_players = params["num_players"]
        else:
            self.num_players = 3

        if "allcards" in params:
            allcards = [c.__deepcopy__() for c in params["allcards"]]
        else:
            allcards = [
                c.__deepcopy__() for c in AllSevenKingPokerCards.values()
            ]
            random.shuffle(allcards)

        if "record_history" in params:
            self.record_history = params["record_history"]
        else:
            self.record_history = False

        self.public_state = SevenKingPublicState()
        self.private_state = SevenKingPrivateState()
        self.person_states = [
            SevenKingPersonState() for i in range(self.num_players)
        ]

        self.public_state_history = []
        self.private_state_history = []
        self.person_states_history = []

        ## private_state
        self.private_state._SevenKingPrivateState__keep_cards = allcards

        for i in range(self.num_players):
            tmp = []
            for j in range(5):
                c = self.private_state._SevenKingPrivateState__keep_cards.pop()
                tmp.append(c)
            self.person_states[i]._SevenKingPersonState__add_cards(tmp)

        ## public_state
        self.public_state.turn, _ = self.choose_player_with_lowest_card()
        self.public_state.is_terminal = False
        self.public_state.scores = []
        self.public_state.previous_id = None
        self.public_state.previous_action = None
        self.public_state._SevenKingPublicState__license_action = SevenKingAction.lookup(
            "")
        self.public_state._SevenKingPublicState__stage = 0

        self.public_state._SevenKingPublicState__num_players = self.num_players
        self.public_state._SevenKingPublicState__num_keep_cards = len(
            self.private_state.keep_cards)
        self.public_state._SevenKingPublicState__num_hand_cards = [
            len(person_state.hand_cards) for person_state in self.person_states
        ]
        self.public_state._SevenKingPublicState__is_fold = [
            False for i in range(self.public_state.num_players)
        ]
        self.public_state._SevenKingPublicState__num_fold = 0

        ## person_state
        for i in range(self.num_players):
            self.person_states[i].id = i
            if i == self.public_state.turn:
                self.person_states[
                    i].available_actions = SevenKingEnv.available_actions(
                        self.public_state, self.person_states[i])

        self.__gen_history__()
        infos = self.__gen_infos__()
        return infos, self.public_state, self.person_states, self.private_state
예제 #12
0
    def forward(self, action):
        """

        Args:
            action:

        Returns:

        """
        pu = self.public_state
        pr = self.private_state
        pes = self.person_states
        turn = pu.turn

        if self.is_action_valid(action, pu, pes[turn]) == False:
            raise ValueError("The (%s) is an invalid action " % (action.key))

        ## the action plays its role
        if action.pattern[0] == "p_0":
            pu._SevenKingPublicState__is_fold[turn] = True
            pu._SevenKingPublicState__num_fold += 1
            pes[turn].available_actions = dict()
        else:
            pes[turn]._SevenKingPersonState__del_cards(action.cards)

            if pu.stage == 0:
                tmp = []
                for i in range(5 - len(pes[turn].hand_cards)):
                    c = pr._SevenKingPrivateState__keep_cards.pop()
                    tmp.append(c)
                pes[turn]._SevenKingPersonState__add_cards(tmp)
            elif pu.stage == 1:
                pu._SevenKingPublicState__num_hand_cards[turn] = len(
                    pes[turn]._SevenKingPersonState__hand_cards)

            pes[turn].available_actions = dict()

        pu.previous_id = turn
        pu.previous_action = action
        if action.pattern[0] != "p_0":
            pu._SevenKingPublicState__license_action = action

        #print (turn, "len_of_hand_card=",len(self.private_state.hand_cards[turn]), " len_of_keep_card=", len(self.private_state.keep_cards), " action = (%s)" %action.key,\

    #        " handcard1=%s"%(",".join([a.key for a in self.private_state.hand_cards[0]]))," handcard2=%s"%(",".join([a.key for a in self.private_state.hand_cards[1]])),\
    #      " num_fold =%d"%(self.public_state.num_fold),"fold=%s"%(",".join([str(s) for s in pu.is_fold])))
    ## termminal
        if self.public_state.stage == 1 and len(
                self.person_states[turn].hand_cards) == 0:
            pu.is_terminal = True
            pu.scores = self.compute_scores()
            new_turn = None
            pu.turn = new_turn
            pu._SevenKingPublicState__license_action = SevenKingAction.lookup(
                "")

        ## stage 0 to 1
        elif len(self.private_state.keep_cards) < 5 and pu.stage == 0:
            new_turn, min_card = self.choose_player_with_lowest_card()
            pu.turn = new_turn
            pu._SevenKingPublicState__num_fold = 0
            pu._SevenKingPublicState__is_fold = [
                False for i in range(pu.num_players)
            ]
            pu._SevenKingPublicState__license_action = SevenKingAction.lookup(
                "")
            pes[new_turn].available_actions = SevenKingEnv.available_actions(
                pu, pes[new_turn])
            keys = pes[new_turn].available_actions.keys()
            for key in keys:
                if min_card.key not in key:
                    del pes[new_turn].available_actions[key]
            pu._SevenKingPublicState__stage = 1

        ## round next
        elif self.public_state.num_fold + 1 == pu.num_players:
            new_turn = self.choose_player_with_nofold()
            pu.turn = new_turn
            pu._SevenKingPublicState__num_fold = 0
            pu._SevenKingPublicState__is_fold = [
                False for i in range(pu.num_players)
            ]
            pu._SevenKingPublicState__license_action = SevenKingAction.lookup(
                "")
            pes[new_turn].available_actions = SevenKingEnv.available_actions(
                pu, pes[new_turn])

        else:
            new_turn = (turn + 1) % pu.num_players
            pu.turn = new_turn
            pes[new_turn].available_actions = SevenKingEnv.available_actions(
                pu, pes[new_turn])

        self.__gen_history__()
        infos = self.__gen_infos__()
        return infos, self.public_state, self.person_states, self.private_state