def equity_self_improvement(self, improvement_rounds): """Create 6 players, 4 of them equity based, 2 of them random""" calling = [.1, .2, .3, .4, .5, .6] betting = [.2, .3, .4, .5, .6, .7] for improvement_round in range(improvement_rounds): self.env = HoldemTable(num_of_players=5, initial_stacks=100) for i in range(6): self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}', min_call_equity=calling[i], min_bet_equity=betting[i])) for _ in range(self.num_episodes): self.run_episode() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = int(league_table.index[0]) print(league_table) print(f"Best Player: {best_player}") # self improve: self.log.info(f"Self improvment round {improvement_round}") for i in range(6): calling[i] = np.mean([calling[i], calling[best_player]]) self.log.info(f"New calling for player {i} is {calling[i]}") betting[i] = np.mean([betting[i], betting[best_player]]) self.log.info(f"New betting for player {i} is {betting[i]}")
def random_agents(self): """Create an environment with 6 random players""" num_of_plrs = 6 self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500) for _ in range(num_of_plrs): player = RandomPlayer(500) self.env.add_player(player) self.run_episode()
def key_press_agents(self): """Create an environment with 6 key press agents""" num_of_plrs = 6 self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500) for _ in range(num_of_plrs): player = KeyPressAgent(500) self.env.add_player(player) self.run_episode()
def generate_data(n_samples=1000, max_runs=1000, write_data_dir=None, write_data_suffix=None): print("Generating data") cpp_calculator = cppimport.imp("tools.montecarlo_cpp.pymontecarlo") cpp_equity_func = cpp_calculator.montecarlo get_equity = cpp_equity_func table = HoldemTable() rank_enc, suit_enc = make_one_hot_encoders() n = 0 x_data = [] y_data = [] for i in range(n_samples): # Create deck table._create_card_deck() # Sample player cards p1_cards = sample_cards(table.deck, 2) p2_cards = sample_cards(table.deck, 2) # Sample table cards from either preflop, # flop, river or turn stage_card_nums = [0, 3, 4, 5] num_table_samples = np.random.choice(stage_card_nums) table_cards = sample_cards(table.deck, num_table_samples) equity = get_equity(set(p1_cards), set(table_cards), 2, max_runs) encoded_state = preprocess_data_state(p1_cards, table_cards, rank_enc, suit_enc) x_data.append(encoded_state) y_data.append(equity) x_data = np.array(x_data) y_data = np.array(y_data) if write_data_dir and write_data_suffix: X_data_path = write_data_dir + 'X_' + write_data_suffix Y_data_path = write_data_dir + 'Y_' + write_data_suffix with open(X_data_path, 'wb') as handle: pickle.dump(x_data, handle, protocol=pickle.HIGHEST_PROTOCOL) with open(Y_data_path, 'wb') as handle: pickle.dump(y_data, handle, protocol=pickle.HIGHEST_PROTOCOL) return x_data, y_data
def equity_vs_random(self): """Create 6 players, 4 of them equity based, 2 of them random""" self.env = HoldemTable(num_of_players=5, initial_stacks=500) self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5)) self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8)) self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7)) self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) self.env.add_player(RandomPlayer()) self.env.add_player(RandomPlayer()) for _ in range(self.num_episodes): self.run_episode() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = league_table.index[0] print(league_table) print(f"Best Player: {best_player}")
def sample_scenario(): table = HoldemTable() # Create deck table._create_card_deck() # Sample player cards p1_cards = sample_cards(table.deck, 2) p2_cards = sample_cards(table.deck, 2) # Sample table cards from either preflop, # flop, river or turn stage_card_nums = [0, 3, 4, 5] num_table_samples = np.random.choice(stage_card_nums) cards_on_table = sample_cards(table.deck, num_table_samples) my_cards = set(p1_cards) cards_on_table = set(cards_on_table) return my_cards, cards_on_table
def _create_env(n_players): """Create an environment""" env = HoldemTable() for _ in range(n_players): player = PlayerForTest() env.add_player(player) env.reset() return env
class Runner: """Orchestration""" def __init__(self, render, num_episodes): """Initialize""" self.winner_in_episodes = [] self.render = render self.env = None self.num_episodes = num_episodes self.log = logging.getLogger(__name__) def run_episode(self): """Run an episode""" self.env.reset() while True: if self.render: self.env.render() _, _, done, _ = self.env.step() if done: break def random_agents(self): """Create an environment with 6 random players""" num_of_plrs = 6 self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500) for _ in range(num_of_plrs): player = RandomPlayer(500) self.env.add_player(player) self.run_episode() def key_press_agents(self): """Create an environment with 6 key press agents""" num_of_plrs = 6 self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500) for _ in range(num_of_plrs): player = KeyPressAgent(500) self.env.add_player(player) self.run_episode() def equity_vs_random(self): """Create 6 players, 4 of them equity based, 2 of them random""" self.env = HoldemTable(num_of_players=5, initial_stacks=500) self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5)) self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8)) self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7)) self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3)) self.env.add_player(RandomPlayer()) self.env.add_player(RandomPlayer()) for _ in range(self.num_episodes): self.run_episode() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = league_table.index[0] print(league_table) print(f"Best Player: {best_player}") def equity_self_improvement(self, improvement_rounds): """Create 6 players, 4 of them equity based, 2 of them random""" calling = [.1, .2, .3, .4, .5, .6] betting = [.2, .3, .4, .5, .6, .7] for improvement_round in range(improvement_rounds): self.env = HoldemTable(num_of_players=5, initial_stacks=100) for i in range(6): self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}', min_call_equity=calling[i], min_bet_equity=betting[i])) for _ in range(self.num_episodes): self.run_episode() self.winner_in_episodes.append(self.env.winner_ix) league_table = pd.Series(self.winner_in_episodes).value_counts() best_player = int(league_table.index[0]) print(league_table) print(f"Best Player: {best_player}") # self improve: self.log.info(f"Self improvment round {improvement_round}") for i in range(6): calling[i] = np.mean([calling[i], calling[best_player]]) self.log.info(f"New calling for player {i} is {calling[i]}") betting[i] = np.mean([betting[i], betting[best_player]]) self.log.info(f"New betting for player {i} is {betting[i]}")