Ejemplo n.º 1
0
    def step(self, action):
        """
        :param action: bid action
        :param tries: MC tries.
        :return: state, reward, done
        """
        if self.done:
            raise Exception("No more actions can be taken")

        if action < 0 or action > 35:
            raise Exception("illegal action")

        if action == 35:  # PASS
            self.bidding_history[action] = 1  # PASS
            self.n_pass += 1
        else:
            if action <= self.max_bid:
                raise Exception("illegal bidding.")
            self.bidding_history[action] = 1
            self.bidding_history[-1] = 0  # reset PASS
            self.n_pass = 0
            self.max_bid = action

            strain = convert_action2strain(action)
            group = Seat2Group[self.turn]
            if self.strain_declarer[group].get(strain, '') == '':
                self.strain_declarer[group][strain] = self.turn  # which one
            self.group_declarer = group  # which group

        self.turn = (self.turn + 1) % len(Seat)  # loop
        while True:  # move to the participant
            if self.turn not in self.bidding_seats:
                self.turn = (self.turn + 1) % len(Seat)
                self.n_pass += 1
            else:
                break

        hand = self.one_hot_deal[self.turn]
        reward = 0
        # state is the next bidding player's state
        if self.n_pass >= 3 or self.max_bid == 34:
            if self.max_bid < 0:
                raise Exception("illegal bidding")
            # extract the declarer, strain , level
            strain = convert_action2strain(self.max_bid)
            level = convert_action2level(self.max_bid)
            # single thread
            # reward = np.mean(Deal.score_st(dealer=self.deal, level=level, strain=strain, declarer=declarer, tries=self.nmc, mode=self.score_mode))
            # parallel threads

            # np.mean is moved to score
            declarer = self.strain_declarer[self.group_declarer][
                strain]  # thise group's first declarer
            reward = Deal.score(dealer=self.deal,
                                level=level,
                                strain=strain,
                                declarer=declarer,
                                tries=self.nmc,
                                mode=self.score_mode)
            self.done = True

        state = (hand, self.bidding_history)
        info = {"turn": Seat[self.turn], "max_bid": self.max_bid}
        if self.debug:
            log_state(state, reward, self.done, info)
        return state, reward, self.done, info
Ejemplo n.º 2
0
from deal import Deal
import time
start = time.time()

# Strain 0: "C", 1: "D", 2: "H", 3: "S", 4: "N"
# Suit 0: "S", 1: "H", 2: "D", 3: "C"
predeal = {0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}
deal1 = Deal.prepare(predeal)
Deal.score(dealer=deal1, level=5, strain=0, declarer=3, tries=100)

print("%.2f seconds elapsed" % (time.time()-start))