Ejemplo n.º 1
0
def get_action_policy(player_infos, community_infos, community_cards, env, _round, n_seats, state, policy, villain):
	
	player_actions = None
	current_player = community_infos[-3]
	
	player_object = env._player_dict[current_player]
	to_call = community_infos[-1]
	stack, hand_rank, played_this_round, betting, lastsidepot = player_infos[current_player-1] if current_player is 2 else player_infos[current_player]
	player_object.he.set_community_cards(community_cards, _round)
	
	if _round is not "Preflop": # preflop already evaluated
		player_object.he.evaluate(_round)
	range_structure = utilities.fill_range_structure(_round, player_object)
	utilities.assign_evals_player(player_object, _round, env)

	if(current_player == 0): # learner move 
		probs = policy(state)
		choice = np.random.choice(np.arange(len(probs)), p=probs)
		best_nonlearning_action = player_object.choose_action(_round, range_structure, env) # Doesn't use
		player_actions = holdem.safe_actions(to_call, community_infos, villain_choice=None, n_seats=n_seats, choice=choice, player_o = player_object, best_nonlearning_action=best_nonlearning_action)
		
	else: # bot move 
		if villain == "CallChump":
			player_actions = utilities.safe_actions_call_bot(community_infos, villain_choice=None, n_seats=n_seats)
		else:
			villain_choice = player_object.choose_action(_round, range_structure, env) 
			player_actions = holdem.safe_actions(to_call, community_infos, villain_choice, n_seats=n_seats, choice=None, player_o = player_object)
	
	return player_actions
Ejemplo n.º 2
0
def get_action_policy(player_infos, community_infos, community_cards, env,
                      _round, n_seats, state, policy):
    player_actions = None
    current_player = community_infos[-3]
    player_object = env._player_dict[current_player]
    to_call = community_infos[-1]
    stack, hand_rank, played_this_round, betting, lastsidepot = player_infos[
        current_player -
        1] if current_player is 2 else player_infos[current_player]
    player_object.he.set_community_cards(community_cards, _round)

    if _round is not "Preflop":  # preflop already evaluated
        player_object.he.evaluate(_round)
    range_structure = utilities.fill_range_structure(_round, player_object)
    utilities.assign_evals_player(player_object, _round, env)

    if (current_player == 0):  # learner move
        probs = policy(state)
        choice = np.random.choice(np.arange(len(probs)), p=probs)
        best_nonlearning_action = player_object.choose_action(
            _round, range_structure, env)  # Doesn't use
        player_actions = holdem.safe_actions(
            to_call,
            community_infos,
            villain_choice=None,
            n_seats=n_seats,
            choice=choice,
            player_o=player_object,
            best_nonlearning_action=best_nonlearning_action)

    else:  # bot move
        if villain == "CallChump":
            player_actions = utilities.safe_actions_call_bot(
                community_infos, villain_choice=None, n_seats=n_seats)
        else:
            villain_choice = player_object.choose_action(
                _round, range_structure, env)
            player_actions = holdem.safe_actions(to_call,
                                                 community_infos,
                                                 villain_choice,
                                                 n_seats=n_seats,
                                                 choice=None,
                                                 player_o=player_object)

    this_lr = (sum(p == player_object.get_seat()
                   for p, v in env.level_raises.items()))
    if env.highest_in_LR()[1] is not player_object.get_seat(
    ) and env.highest_in_LR()[0] > this_lr:
        prohibit_action(player_actions, current_player, ban=[0, 0])
        # a,b = env.highest_in_LR()
        # print(player_actions)
        # which_action = player_object.choose_action(_round, range_structure, env)
        # player_actions = holdem.safe_actions(community_infos, which_action, n_seats=n_seats, choice=None)
    return player_actions
Ejemplo n.º 3
0
 def act(self, state, player_infos, community_infos, community_cards, env,
         _round, n_seats, state_set, policy):
     if np.random.rand() <= self.epsilon:
         action = get_action_policy(player_infos, community_infos,
                                    community_cards, env, _round, n_seats,
                                    state_set, policy, villain)
         return action
     act_values = self.model.predict(
         state
     )  # if not acting according to safe_strategy, predict reward value based on current state
     predicted_action = np.argmax(act_values[0])
     env.learner_bot.he.set_community_cards(community_cards, _round)
     range_structure = utilities.fill_range_structure(
         _round, env.learner_bot)
     utilities.assign_evals_player(env.learner_bot, _round, env)
     choice = None
     if predicted_action == 0:
         choice = 1
     elif predicted_action == 1:
         total_bet = env._tocall + env._bigblind - env.villain.currentbet
         choice = (2, total_bet)
     elif predicted_action == 2:
         choice = 3
     predicted_action = holdem.safe_actions(community_infos[-1],
                                            community_infos,
                                            villain_choice=None,
                                            n_seats=n_seats,
                                            choice=choice,
                                            player_o=env.learner_bot)
     return predicted_action  # pick the action that will give the highest reward (i.e., go left or right?)
Ejemplo n.º 4
0
 def get_guest_action(self):
     action = self.guest_action
     action = self.parse_action(action)
     player_actions = holdem.safe_actions(self.community_infos[-1],
                                          self.community_infos,
                                          action,
                                          n_seats=env.n_seats,
                                          choice=None,
                                          player_o=self.p2)
     return player_actions
Ejemplo n.º 5
0
def play_out_hand(env, n_seats):
  # reset environment, gather relevant observations
  (player_states, (community_infos, community_cards)) = env.reset()
  (player_infos, player_hands) = zip(*player_states)

  # display the table, cards and all
  env.render(mode='human')

  terminal = False
  while not terminal:
    # play safe actions, check when noone else has raised, call when raised.
    actions = holdem.safe_actions(community_infos, n_seats=n_seats)
    (player_states, (community_infos, community_cards)), rews, terminal, info = env.step(actions)
    env.render(mode='human')