def get_action_policy(player_infos, community_infos, community_cards, env, _round, n_seats, state, policy, villain): player_actions = None current_player = community_infos[-3] player_object = env._player_dict[current_player] to_call = community_infos[-1] stack, hand_rank, played_this_round, betting, lastsidepot = player_infos[current_player-1] if current_player is 2 else player_infos[current_player] player_object.he.set_community_cards(community_cards, _round) if _round is not "Preflop": # preflop already evaluated player_object.he.evaluate(_round) range_structure = utilities.fill_range_structure(_round, player_object) utilities.assign_evals_player(player_object, _round, env) if(current_player == 0): # learner move probs = policy(state) choice = np.random.choice(np.arange(len(probs)), p=probs) best_nonlearning_action = player_object.choose_action(_round, range_structure, env) # Doesn't use player_actions = holdem.safe_actions(to_call, community_infos, villain_choice=None, n_seats=n_seats, choice=choice, player_o = player_object, best_nonlearning_action=best_nonlearning_action) else: # bot move if villain == "CallChump": player_actions = utilities.safe_actions_call_bot(community_infos, villain_choice=None, n_seats=n_seats) else: villain_choice = player_object.choose_action(_round, range_structure, env) player_actions = holdem.safe_actions(to_call, community_infos, villain_choice, n_seats=n_seats, choice=None, player_o = player_object) return player_actions
def get_action_policy(player_infos, community_infos, community_cards, env, _round, n_seats, state, policy): player_actions = None current_player = community_infos[-3] player_object = env._player_dict[current_player] to_call = community_infos[-1] stack, hand_rank, played_this_round, betting, lastsidepot = player_infos[ current_player - 1] if current_player is 2 else player_infos[current_player] player_object.he.set_community_cards(community_cards, _round) if _round is not "Preflop": # preflop already evaluated player_object.he.evaluate(_round) range_structure = utilities.fill_range_structure(_round, player_object) utilities.assign_evals_player(player_object, _round, env) if (current_player == 0): # learner move probs = policy(state) choice = np.random.choice(np.arange(len(probs)), p=probs) best_nonlearning_action = player_object.choose_action( _round, range_structure, env) # Doesn't use player_actions = holdem.safe_actions( to_call, community_infos, villain_choice=None, n_seats=n_seats, choice=choice, player_o=player_object, best_nonlearning_action=best_nonlearning_action) else: # bot move if villain == "CallChump": player_actions = utilities.safe_actions_call_bot( community_infos, villain_choice=None, n_seats=n_seats) else: villain_choice = player_object.choose_action( _round, range_structure, env) player_actions = holdem.safe_actions(to_call, community_infos, villain_choice, n_seats=n_seats, choice=None, player_o=player_object) this_lr = (sum(p == player_object.get_seat() for p, v in env.level_raises.items())) if env.highest_in_LR()[1] is not player_object.get_seat( ) and env.highest_in_LR()[0] > this_lr: prohibit_action(player_actions, current_player, ban=[0, 0]) # a,b = env.highest_in_LR() # print(player_actions) # which_action = player_object.choose_action(_round, range_structure, env) # player_actions = holdem.safe_actions(community_infos, which_action, n_seats=n_seats, choice=None) return player_actions
def act(self, state, player_infos, community_infos, community_cards, env, _round, n_seats, state_set, policy): if np.random.rand() <= self.epsilon: action = get_action_policy(player_infos, community_infos, community_cards, env, _round, n_seats, state_set, policy, villain) return action act_values = self.model.predict( state ) # if not acting according to safe_strategy, predict reward value based on current state predicted_action = np.argmax(act_values[0]) env.learner_bot.he.set_community_cards(community_cards, _round) range_structure = utilities.fill_range_structure( _round, env.learner_bot) utilities.assign_evals_player(env.learner_bot, _round, env) choice = None if predicted_action == 0: choice = 1 elif predicted_action == 1: total_bet = env._tocall + env._bigblind - env.villain.currentbet choice = (2, total_bet) elif predicted_action == 2: choice = 3 predicted_action = holdem.safe_actions(community_infos[-1], community_infos, villain_choice=None, n_seats=n_seats, choice=choice, player_o=env.learner_bot) return predicted_action # pick the action that will give the highest reward (i.e., go left or right?)
def get_guest_action(self): action = self.guest_action action = self.parse_action(action) player_actions = holdem.safe_actions(self.community_infos[-1], self.community_infos, action, n_seats=env.n_seats, choice=None, player_o=self.p2) return player_actions
def play_out_hand(env, n_seats): # reset environment, gather relevant observations (player_states, (community_infos, community_cards)) = env.reset() (player_infos, player_hands) = zip(*player_states) # display the table, cards and all env.render(mode='human') terminal = False while not terminal: # play safe actions, check when noone else has raised, call when raised. actions = holdem.safe_actions(community_infos, n_seats=n_seats) (player_states, (community_infos, community_cards)), rews, terminal, info = env.step(actions) env.render(mode='human')