def step(self, action): """ :param action: bid action :param tries: MC tries. :return: state, reward, done """ if self.done: raise Exception("No more actions can be taken") if action < 0 or action > 35: raise Exception("illegal action") if action == 35: # PASS self.bidding_history[action] = 1 # PASS self.n_pass += 1 else: if action <= self.max_bid: raise Exception("illegal bidding.") self.bidding_history[action] = 1 self.bidding_history[-1] = 0 # reset PASS self.n_pass = 0 self.max_bid = action strain = convert_action2strain(action) group = Seat2Group[self.turn] if self.strain_declarer[group].get(strain, '') == '': self.strain_declarer[group][strain] = self.turn # which one self.group_declarer = group # which group self.turn = (self.turn + 1) % len(Seat) # loop while True: # move to the participant if self.turn not in self.bidding_seats: self.turn = (self.turn + 1) % len(Seat) self.n_pass += 1 else: break hand = self.one_hot_deal[self.turn] reward = 0 # state is the next bidding player's state if self.n_pass >= 3 or self.max_bid == 34: if self.max_bid < 0: raise Exception("illegal bidding") # extract the declarer, strain , level strain = convert_action2strain(self.max_bid) level = convert_action2level(self.max_bid) # single thread # reward = np.mean(Deal.score_st(dealer=self.deal, level=level, strain=strain, declarer=declarer, tries=self.nmc, mode=self.score_mode)) # parallel threads # np.mean is moved to score declarer = self.strain_declarer[self.group_declarer][ strain] # thise group's first declarer reward = Deal.score(dealer=self.deal, level=level, strain=strain, declarer=declarer, tries=self.nmc, mode=self.score_mode) self.done = True state = (hand, self.bidding_history) info = {"turn": Seat[self.turn], "max_bid": self.max_bid} if self.debug: log_state(state, reward, self.done, info) return state, reward, self.done, info
from deal import Deal import time start = time.time() # Strain 0: "C", 1: "D", 2: "H", 3: "S", 4: "N" # Suit 0: "S", 1: "H", 2: "D", 3: "C" predeal = {0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]} deal1 = Deal.prepare(predeal) Deal.score(dealer=deal1, level=5, strain=0, declarer=3, tries=100) print("%.2f seconds elapsed" % (time.time()-start))