def time_pypokerengine(hole, community, num_sim): hole = [Card.from_id(c) for c in hole] community = [Card.from_id(c) for c in community] start = time.clock() estimate_hole_card_win_rate(num_sim, 2, hole, community) return time.clock() - start
def sim_nature(nodes): root = nodes[0][0] deck = gen_deck() deck.shuffle() #deal hole = deck.draw_cards(2) wr = estimate_hole_card_win_rate(nb_simulation=N_TESTS, nb_player=2, hole_card=hole) deal_node = get_node(wr, nodes[1]) root.adjust_for(deal_node) comm = [] curr = deal_node i = 2 for comm_deal in [3, 1, 1]: comm = comm + deck.draw_cards(comm_deal) wr = estimate_hole_card_win_rate(nb_simulation=N_TESTS, nb_player=2, hole_card=hole, community_card=comm) _next = get_node(wr, nodes[i]) curr.adjust_for(_next) curr = _next i += 1
def receive_round_result_message(self, winners, hand_info, round_state): #get opp card from here if round_state['street'] == "showdown" and len( round_state['action_histories']) >= 4: for i in hand_info: if i['hand']['card'] != self.hole_card: opp_card = i['hand']['card'] community_card = copy.deepcopy( round_state['community_card']) try: #river update_node_action_sequence = self.leaf_node_sequence_at_street( round_state, 3) pro_win_opp = estimate_hole_card_win_rate( nb_simulation=100, nb_player=2, hole_card=gen_cards(opp_card), community_card=gen_cards(community_card)) node = self.action_tree.search_node_by_name( update_node_action_sequence) node.history_cell.update_action_frequency_cell( pro_win_opp) self.update_call_raise_EHS(round_state, 3, pro_win_opp) #turn community_card.pop() update_node_action_sequence = self.leaf_node_sequence_at_street( round_state, 2) node = self.action_tree.search_node_by_name( update_node_action_sequence) pro_win_opp = estimate_hole_card_win_rate( nb_simulation=100, nb_player=2, hole_card=gen_cards(opp_card), community_card=gen_cards(community_card)) node.history_cell.update_action_frequency_cell( pro_win_opp) self.update_call_raise_EHS(round_state, 2, pro_win_opp) #flop community_card.pop() update_node_action_sequence = self.leaf_node_sequence_at_street( round_state, 1) node = self.action_tree.search_node_by_name( update_node_action_sequence) pro_win_opp = self.lookupProb(opp_card, community_card) node.history_cell.update_action_frequency_cell( pro_win_opp) self.update_call_raise_EHS(round_state, 1, pro_win_opp) self.update_opp_model() except AttributeError as e: print(update_node_action_sequence) #reset variable here self.action_sequence = ""
def get_win_rate(self): win_rate = [] win_rate.append( estimate_hole_card_win_rate(1000, self.n, gen_cards(self.hand), [])) win_rate.append( estimate_hole_card_win_rate(1000, self.n, gen_cards(self.hand), gen_cards([self.commit[:3]]))) win_rate.append( estimate_hole_card_win_rate(1000, self.n, gen_cards(self.hand), gen_cards([self.commit[:4]]))) win_rate.append( estimate_hole_card_win_rate(1000, self.n, gen_cards(self.hand), gen_cards([self.commit[:]]))) self.win_rate = win_rate
def receive_street_start_message(self, street, round_state): # street: current moment of the game. # preflop : Before opening card # flop : After opening first 3 card # turn : After opening 4th card # river : After opening 5th card # round_state: The current state of this round. Contains the following, # dealer_btn: What is this? # big_blind_pos: Literal position of big blind # round_count: The current round count. # small_blind_pos: Literal position of small blind # next_player: Should be next player to play # small_blind_amount: small blind amount/cost # action_histories: Histories of action. Contains a dict with turn, preflop, flop, with a list of actions. # street: current moment of game I presume. (seems to be same as the other method param) # seats: Who is in which seat, laso contains the $$ they possess # community card: The card in river. # pot: money in pot # pypokerutils.estimate_hand_strength(100, 2, round_state) holeCard = gen_cards(self.hole_card) communityCard = gen_cards(round_state["community_card"]) self.estimatedStrength = estimate_hole_card_win_rate( nb_simulation=50, nb_player=2, hole_card=holeCard, community_card=communityCard) pass
def calculate_broad_win_rate(hole_card, community_card, rpt_times): win_rate = estimate_hole_card_win_rate( nb_simulation=rpt_times, nb_player=2, #####only consider 2 players situation hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) return win_rate
def gather_informations(self, hole_card, round_state, valid_actions=None): hand_strength = estimate_hole_card_win_rate( nb_simulation=1000, nb_player=self.nb_players, hole_card=gen_cards(hole_card), community_card=gen_cards( round_state['community_card'])) / self.nb_players street = { 'preflop': 0, 'flop': 0, 'turn': 0, 'river': 0, round_state['street']: 1 } pots = sum([round_state['pot']['main']['amount']] + [pot['amount'] for pot in round_state['pot']['side']]) player_stack = [ player['stack'] for player in round_state['seats'] if player['uuid'] == self.uuid ][0] / self.start_stack other_stacks = [ player['stack'] / self.start_stack for player in round_state['seats'] if player['uuid'] != self.uuid ] call_amount_in_live = valid_actions[1]['amount'] if valid_actions[1][ 'amount'] > 0 else valid_actions[2]['amount']['min'] self.pot_odds = pots / call_amount_in_live self.latest_ehs = hand_strength return [ hand_strength, pots, *list(street.values()), player_stack, *other_stacks ]
def estimate_winrate(self, card_representations, community_card_representations): return estimate_hole_card_win_rate( nb_simulation=1000, nb_player=5, hole_card=card_representations, community_card=community_card_representations )
def estimate_and_round_wr(): #gets wr, rounds to nearest clump hole_s = [str(c)[::-1] for c in p._hole] board_s = [str(c)[::-1] for c in gamestate.game._board] hole_s.sort() board_s.sort() str_r = f"{hole_s}|{board_s}" if str_r in self.cache: wr = self.cache[str_r] else: hole = gen_cards(hole_s) board = gen_cards(board_s) wr = estimate_hole_card_win_rate(1000, 2, hole, board) self.cache[str_r] = wr if self.num_new == 0: with open(wr_cache, 'wb') as f: dump(self.cache, f) self.num_new = (self.num_new + 1) % 1000 closest = self.config['wrs'][0] for w in self.config['wrs']: if abs(w - wr) < abs(closest - wr): closest = w return closest
def declare_action(self, valid_actions, hole_card, round_state): # valid_actions format => [fold_action_info, call_action_info, raise_action_info] # print(hole_card) if round_state['street'] == 'preflop': return super().declare_action(valid_actions, hole_card, round_state) # print(round_state) p_win = estimate_hole_card_win_rate( nb_simulation=10, nb_player=len(round_state['seats']), hole_card=gen_cards(hole_card), community_card=gen_cards(round_state['community_card'])) # print('prob win', int(p_win*100), '%') if p_win > self.raise_pwin and random.random() * 2 < ( len(round_state['seats']) * p_win): raise_action_info = valid_actions[2] action, amount = raise_action_info[ "action"], raise_action_info["amount"]['min'] * 2 return action, amount elif p_win > self.check_pwin or valid_actions[1]['amount'] == 0: call_action_info = valid_actions[1] action, amount = call_action_info["action"], call_action_info[ "amount"] return action, amount else: return "fold", 0
def declare_action(self, valid_actions, hole_card, round_state): community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=2, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card) ) can_raise = len([item for item in valid_actions if item['action'] == 'raise']) > 0 if win_rate >= 1.0 / 2: if win_rate > 0.6: if can_raise: action = valid_actions[2] amount = action['amount']['min'] else: action = valid_actions[1] amount = action['amount'] else: action = valid_actions[1] amount = action['amount'] else: action = valid_actions[0] # fetch FOLD action info amount = 0 return action['action'], amount # action returned here is sent to the poker engine
def declare_action(self, valid_actions, hole_card, round_state): output = self.model_dict(round_state) prob_list = self.output['probability_list'] stack = self.output['stack'] #how to access this? max_of_list = max(prob_list) #print('Our Bot Estimated max_prob', max_of_list) community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=1000, nb_player=self.nb_player, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) # Make sure bot is able to raise by valid amount, then select within range if (win_rate >= max_of_list * self.raise_prob) and ( valid_actions[2]['amount']['max'] != -1): action = valid_actions[2] # fetch raise action info bet = int( random.uniform( action['amount']['min'], action['amount']['min'] * (action['amount']['min'] * self.raise_percent))) if bet > action['amount']['max']: bet = int(action['amount']['max']) elif (win_rate >= (max_of_list * self.call_prob)) and (stack >= valid_actions[1]['amount']): action = valid_actions[1] # fetch call action info bet = int(action['amount']) else: action = valid_actions[0] # fetch FOLD action info bet = int(action['amount']) #print("Our Bot: ", hole_card, "; Prob: ", win_rate) return action['action'], bet
def declare_action(self, valid_actions, hole_card, round_state): fold_action, raise_action, call_action = list( map(lambda valid_action: valid_action['action'], valid_actions)) + [None] * (3 - len(valid_actions)) current_round = round_state['street'] # mathematically increasing function (approaches 0 as x gets more and more negative and 1 as x gets more # and more positive) sigmoid_function = lambda x: exp(x) / (exp(x) + 1) # weights for deciding whether to fold based on sigmoid function (inverse function of the game round) weights = { member[0]: sigmoid_function(member[1] - 2) for member in getmembers(PokerConstants.Street())[0:6] } community_card = round_state['community_card'] # calculated win rate generated by PyPoker based on hole cards and community cards (if the round is # not a preflop) win_rate = estimate_hole_card_win_rate( RaisedPlayer.INFORMATION['number_of_simulations'], RaisedPlayer.INFORMATION['number_of_players'], gen_cards(hole_card + community_card)) # if the probability of winning is less than a certain cutoff based on weights calculated by the sigmoid # function then the agent choose to fold, otherwise it explores the game tree to choose whether to call or # raise if win_rate < weights[current_round.upper()] and fold_action != None: return fold_action else: remaining_actions = [raise_action, call_action] return self.explore_game_tree(hole_card, round_state, remaining_actions)
def declare_action( self, valid_actions: List[Dict[str, Union[int, str]]], hole_card: List[str], round_state: Dict[str, Union[int, str, List, Dict]] ) -> Tuple[Union[int, str], Union[int, str]]: """ Define what action the player should execute. :param valid_actions: List of dictionary containing valid actions the player can execute. :param hole_card: Cards in possession of the player encoded as a list of strings. :param round_state: Dictionary containing relevant information and history of the game. :return: action: str specifying action type. amount: int action argument. """ nb_player = len(round_state['seats']) community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=nb_player, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) if win_rate >= 1.0 / nb_player: action = valid_actions[2] # fetch RAISE action info amount = action["amount"] action['amount'] = random.randint( amount["min"], max(amount["min"], amount["max"])) elif win_rate >= .1: action = valid_actions[1] # fetch CALL action info else: action = valid_actions[0] # fetch FOLD action info return action['action'], action['amount']
def re_calculate_probability(self): # if in PREFLOP, we check against expected value and reverse the equation if self.street == self.STREET_ZERO_CARD: first_card = self.hole_card[0] second_card = self.hole_card[1] if self.CARD_NUM_DICT[first_card[1]] > self.CARD_NUM_DICT[ second_card[1]]: #check number lower_card_number = second_card[1] higher_card_number = first_card[1] else: lower_card_number = first_card[1] higher_card_number = second_card[1] if first_card[0] == second_card[0]: #check same shape is_same_shape = True else: is_same_shape = False #reverse engineer equation, 2*Pr(win) = (Expected Value Per Bet) + 1 self.preflop_expected_value = ( self.PREFLOP_EXPECTED_VALUE[is_same_shape][lower_card_number] [higher_card_number]) self.winning_probability = (self.preflop_expected_value + 1) / 2 #when not in PREFLOP else: self.winning_probability = estimate_hole_card_win_rate( nb_simulation=self.NB_SIMULATION, nb_player=2, hole_card=gen_cards(list(self.hole_card)), community_card=gen_cards(list(self.community_card)))
def declare_action(self, valid_actions, hole_card, round_state): self.rng = random.randint(0, 10) community_card = round_state['community_card'] self.win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=2, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) act = 0 act += self.coeff[0] * self.normalize(self.curr_round, 0, 1000) act += self.coeff[1] * self.normalize(self.curr_money_diff, -10000, 10000) act += self.coeff[2] * self.normalize(self.curr_street, 1, 4) act += self.coeff[3] * self.normalize(self.rng, 0, 10) act += self.coeff[4] * self.normalize(self.win_rate, 0, 1) if act > 0.33 and len(valid_actions) == 3: action = valid_actions[2] elif act > -0.33: action = valid_actions[1] else: action = valid_actions[0] #print(action['action']) return action['action'] '''
def declare_action(self, valid_actions, hole_card, round_state): self.rng = random.randint(0, 10) community_card = round_state['community_card'] self.win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=self.nb_player, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) act = 0 act += self.coeff[0] * self.curr_round act += self.coeff[1] * self.curr_money_diff act += self.coeff[2] * self.curr_street act += self.coeff[3] * self.rng act += self.coeff[4] * self.win_rate # print(act) if act > 2 and len(valid_actions) == 3: action = valid_actions[2] amount = valid_actions[2]["amount"]["min"] elif act > 1: action = valid_actions[1] amount = valid_actions[1]["amount"] else: action = valid_actions[0] amount = 0 # print(action['action']) return action['action'], amount '''
def declare_action(self, valid_actions, hole_card, round_state): fold_action_info = valid_actions[0] call_action_info = valid_actions[1] raise_action_info = valid_actions[2] call_amount = call_action_info["amount"] raise_amount_max = raise_action_info["amount"]["max"] raise_amount_min = raise_action_info["amount"]["min"] pot_size = round_state["pot"]["main"]["amount"] community_card = round_state['community_card'] street_nums = { "preflop": 0, "flop": 1, "turn": 2, "river": 3, } street_num = street_nums[round_state["street"]] nb_player = self.nb_player for seat in round_state["seats"]: if seat["state"] == "folded": nb_player -= 1 win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=nb_player, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card) ) params = [ norm(win_rate), norm(max(call_amount, 30) / abs(raise_amount_max)), norm(pot_size / 13500), street_num, norm(abs(raise_amount_max) / 13500) ] decision_function = 100 * params[0] + -86.2 * params[1] + 104.8 * params[2] + 27.5 * params[3] + 54.9 * params[ 4] + sps.norm.rvs(size=1, scale=10 ** .5) if decision_function > 100: action = raise_action_info["action"] amount = min(pot_size, 4 * raise_amount_min) elif 100 > decision_function > 80: action = raise_action_info["action"] amount = 2 * raise_amount_min elif 80 > decision_function > 40: action = call_action_info["action"] amount = call_action_info["amount"] elif decision_function < 40: action = fold_action_info["action"] amount = fold_action_info["amount"] else: action = fold_action_info["action"] amount = fold_action_info["amount"] return action, amount
def declare_action(self, valid_actions, hole_card, round_state): community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=self.num_players, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) can_call = len( [item for item in valid_actions if item['action'] == 'call']) > 0 can_raise = len( [item for item in valid_actions if item['action'] == 'raise']) > 0 # print("HonestP hole card: "+ str(hole_card)) # print("Winrate: "+ str(win_rate)) if win_rate >= 0.35: if win_rate > 0.7: action = valid_actions[2][ 'action'] if can_raise else valid_actions[1]['action'] else: action = valid_actions[1]['action'] else: action = "fold" return action # action returned here is sent to the poker engine
def declare_action(self, valid_actions, hole_card, round_state): self.rng = random.randint(0, 10) community_card = round_state['community_card'] # if (len(community_card) == 0): # hole_card = sorted(hole_card, key = lambda x: Card.from_str(x).to_id()) # #print(Card.from_str(hole_card[0]).to_id(), Card.from_str(hole_card[1]).to_id()) # self.win_rate = self.rate[hole_card[0] + hole_card[1]] # else: self.win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=2, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) act = [0, 0, 0] for i in range(3): act[i] += self.coeff[i * 5 + 0] * self.normalize( self.curr_round, 0, 1000) act[i] += self.coeff[i * 5 + 1] * self.normalize( self.curr_money_diff, -10000, 10000) act[i] += self.coeff[i * 5 + 2] * self.normalize( self.curr_street, 1, 4) act[i] += self.coeff[i * 5 + 3] * self.normalize(self.rng, 0, 10) act[i] += self.coeff[i * 5 + 4] * self.normalize( self.win_rate, 0, 1) if len(valid_actions) == 3: action = valid_actions[act.index(max(act))] else: #len = 2 action = valid_actions[act.index(max(act[:2]))] print(action['action']) return action['action'] '''
def declare_action(self, valid_actions, hole_card, round_state): if round_state["street"] == "preflop": return valid_actions[1]['action'] curStreet = round_state["action_histories"][round_state["street"]] if not curStreet: previousAction = None elif curStreet[-1]["action"] == 'CALL': previousAction = 'CALL' else: previousAction = 'RAISE' win_rate = estimate_hole_card_win_rate( nb_simulation=200, nb_player=2, hole_card=gen_cards(hole_card), community_card=gen_cards(round_state['community_card'])) if (win_rate >= 0.8 or (win_rate >= 0.7 and previousAction == 'CALL')) and len(valid_actions) == 3: action = valid_actions[2] elif previousAction != 'RAISE' or win_rate >= 0.3: action = valid_actions[1] else: action = valid_actions[0] return action['action']
def declare_action(self, valid_actions, hole_card, round_state): # valid_actions format => [raise_action_pp = pprint.PrettyPrinter(indent=2) #pp = pprint.PrettyPrinter(indent=2) #print("------------ROUND_STATE(RANDOM)--------") #pp.pprint(round_state) #print("------------HOLE_CARD----------") #pp.pprint(hole_card) #print("------------VALID_ACTIONS----------") #pp.pprint(valid_actions) #print("-------------------------------") win_rate = estimate_hole_card_win_rate( 1000, 2, gen_cards(hole_card), gen_cards(round_state['community_card'])) # if win_rate <= 0.5: # call_action_info = valid_actions[1] # elif r<= 0.9 and len(valid_actions ) == 3: # call_action_info = valid_actions[2] # else: # call_action_info = valid_actions[0] # action = call_action_info["action"] # return action # action returned here is sent to the poker engine if win_rate > 0.75 and len(valid_actions) == 3: action = call_action_info = valid_actions[2]["action"] elif win_rate > 0.25: action = call_action_info = valid_actions[1]["action"] else: action = call_action_info = valid_actions[0]["action"] return action
def declare_action(self, valid_actions, hole_card, round_state): community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=self.sims, nb_player=self.nb_player, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) can_raise = len( [item for item in valid_actions if item['action'] == 'raise']) > 0 amount = 0 if win_rate >= 0.8: if can_raise: action = valid_actions[2] amount = (action['amount']['min'] + action['amount']['max']) / 2.0 else: action = valid_actions[1] amount = action['amount'] elif win_rate >= self.risk / self.nb_player and len( round_state['community_card']) != 5: action = valid_actions[1] # fetch CALL action info amount = action['amount'] else: action = valid_actions[0] # fetch FOLD action info amount = action['amount'] return action['action'], amount
def declare_action(self, valid_actions, hole_card, round_state): self.pot_size = round_state['pot']['main']['amount'] community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=self.nb_player, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) # CHANGE!!! ex_win_rate = 1 / self.nb_player min_bet = valid_actions[2]['amount']['min'] max_bet = valid_actions[2]['amount']['max'] if win_rate / ex_win_rate >= 1.3: action = valid_actions[2] # fetch raise action info try: bet = random.randrange(min_bet, max_bet) except: bet = -1 elif win_rate / ex_win_rate > 1.18: action = valid_actions[1] # fetch CALL action info bet = action['amount'] else: action = valid_actions[0] # fetch FOLD action info bet = action['amount'] print("Tight Aggressive: ", hole_card, "; Prob: ", win_rate) return action['action'], bet
def declare_action(self, valid_actions, hole_card, round_state): start = time.time() # valid_actions format => [raise_action_pp = pprint.PrettyPrinter(indent=2) # pp = pprint.PrettyPrinter(indent=2) # print("------------ROUND_STATE(RANDOM)--------") # pp.pprint(round_state) # print("------------HOLE_CARD----------") # pp.pprint(hole_card) # print("------------VALID_ACTIONS----------") # pp.pprint(valid_actions) community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=2, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card)) if win_rate >= 0.66 and len(valid_actions) == 3: action = valid_actions[2] elif win_rate >= 0.33: action = valid_actions[1] # fetch CALL action info else: action = valid_actions[0] # fetch FOLD action info end = time.time() print("\n Time taken to try 100 simulations: %.4f seconds" % (end - start)) return action['action']
def declare_action(self, valid_actions, hole_card, round_state): moneyOnTable = round_state['pot']['main']['amount'] moneyToCall = 20 moneyToRaise = 20 wining_probability = estimate_hole_card_win_rate( 300, 2, gen_cards(hole_card), gen_cards(round_state['community_card'])) # the mathmatical expectation of Fold is 0 ex_call = wining_probability * (moneyOnTable + moneyToCall) - moneyToCall # math expectation for calling ex_raise = wining_probability * ( moneyOnTable + 2 * moneyToRaise + moneyToCall) - moneyToRaise - moneyToCall # math expectation for raising # if max(0, ex_call, ex_raise) == 0 has_action_raise = False has_action_flod = False has_action_call = False for i in valid_actions: if i["action"] == "raise": # action_raise = i["action"] has_action_raise = True # return action # action returned here is sent to the poker engine if i["action"] == "fold": # action_fold = i["action"] has_action_fold = True if i["action"] == "call": # action_call = i["action"] has_action_call = True if wining_probability > 0.23 and has_action_raise == True: action = 'raise' elif has_action_call == True: action = 'call' else: action = 'fold' # if max(ex_call, ex_raise, 0) == ex_raise and has_action_raise == True: # action = 'raise' # elif max(ex_raise, ex_call, 0) == ex_call and has_action_call == True: # action = 'call' # else: action = 'fold' ''' print(wining_probability) print("\n") print(round_state) print("\n") print(valid_actions) print("\n") print(moneyOnTable) print("\n") print(ex_call) print("\n") print(ex_raise) ''' return action # action returned here is sent to the poker engine
def win_rate_calc(df, NB_SIMULATION): df['win_rate'] = df.apply(lambda row: estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=row['nb_player'], hole_card=gen_cards(row['hole_card']), community_card=gen_cards(row['community_card'])), axis=1) return df
def declare_action(self, valid_actions, hole_card, round_state): community_card = round_state['community_card'] current_players = 0 stack = 0 for i in round_state['seats']: if self.uuid == i['uuid']: stack = i['stack'] if i['state'] == 'participating': current_players += 1 if current_players < 2: current_players = 2 win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=current_players, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card) ) bank = round_state['pot']['main']['amount'] self.big_blind_amount = 2*round_state['small_blind_amount'] if valid_actions[2]['amount']['min'] > 0: raise_amount = valid_actions[2]['amount']['min'] /self.big_blind_amount else : raise_amount = valid_actions[1]['amount']/self.big_blind_amount action, log_prob, value, entropy = self.select_action(torch.from_numpy(np.array([stack/(10*self.big_blind_amount), bank/self.big_blind_amount, valid_actions[1]['amount']/self.big_blind_amount, raise_amount, win_rate / current_players])).float()) if np.random.random() < self.epsilon: action = np.random.randint(4) self.random_action += 1 else : action = action.cpu().numpy() self.real_action += 1 self.entropies.append(entropy) self.values.append(value) self.log_probs.append(log_prob) self.rewards.append(0) if self.FOLD == action: if PRINT: print("{} fold".format(round_state['street'])) return valid_actions[0]['action'], valid_actions[0]['amount'] elif self.CALL == action: if PRINT: print("{} call {}".format(round_state['street'], valid_actions[1]['amount'])) return valid_actions[1]['action'], valid_actions[1]['amount'] elif self.MIN_RAISE == action: if PRINT: print("{} raise {}".format(round_state['street'], valid_actions[2]['amount']['min'])) return valid_actions[2]['action'], valid_actions[2]['amount']['min'] elif self.MAX_RAISE == action: if PRINT: print("{} allin {}".format(round_state['street'], valid_actions[2]['amount']['max'])) return valid_actions[2]['action'], valid_actions[2]['amount']['max'] else: raise Exception("Invalid action [ %s ] is set" % action)
def declare_action(self, valid_actions, hole_card, round_state): win_rate = estimate_hole_card_win_rate( 500, 2, gen_cards(hole_card), gen_cards(round_state['community_card'])) if len(valid_actions) == 3: action = call_action_info = valid_actions[2]["action"] else: action = call_action_info = valid_actions[1]["action"] return action
def cards_to_scaled_scalar_by_simulation(round_state, hole_card, simulation_num): player_num = len(round_state["seats"]) hole = gen_cards(hole_card) community = gen_cards(round_state["community_card"]) return [ estimate_hole_card_win_rate(simulation_num, player_num, hole, community) ]
def declare_action(self, valid_actions, hole_card, round_state): community_card = round_state['community_card'] win_rate = estimate_hole_card_win_rate( nb_simulation=NB_SIMULATION, nb_player=self.nb_player, hole_card=gen_cards(hole_card), community_card=gen_cards(community_card) ) if win_rate >= 1.0 / self.nb_player: action = valid_actions[1] # fetch CALL action info else: action = valid_actions[0] # fetch FOLD action info return action['action'], action['amount']