Ejemplo n.º 1
0
    def equity_self_improvement(self, improvement_rounds):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        calling = [.1, .2, .3, .4, .5, .6]
        betting = [.2, .3, .4, .5, .6, .7]

        for improvement_round in range(improvement_rounds):
            self.env = HoldemTable(num_of_players=5, initial_stacks=100)
            for i in range(6):
                self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}',
                                                 min_call_equity=calling[i],
                                                 min_bet_equity=betting[i]))

            for _ in range(self.num_episodes):
                self.run_episode()
                self.winner_in_episodes.append(self.env.winner_ix)

            league_table = pd.Series(self.winner_in_episodes).value_counts()
            best_player = int(league_table.index[0])
            print(league_table)
            print(f"Best Player: {best_player}")

            # self improve:
            self.log.info(f"Self improvment round {improvement_round}")
            for i in range(6):
                calling[i] = np.mean([calling[i], calling[best_player]])
                self.log.info(f"New calling for player {i} is {calling[i]}")
                betting[i] = np.mean([betting[i], betting[best_player]])
                self.log.info(f"New betting for player {i} is {betting[i]}")
Ejemplo n.º 2
0
    def random_agents(self):
        """Create an environment with 6 random players"""
        num_of_plrs = 6
        self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500)
        for _ in range(num_of_plrs):
            player = RandomPlayer(500)
            self.env.add_player(player)

        self.run_episode()
Ejemplo n.º 3
0
    def key_press_agents(self):
        """Create an environment with 6 key press agents"""
        num_of_plrs = 6
        self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500)
        for _ in range(num_of_plrs):
            player = KeyPressAgent(500)
            self.env.add_player(player)

        self.run_episode()
Ejemplo n.º 4
0
def generate_data(n_samples=1000,
                  max_runs=1000,
                  write_data_dir=None,
                  write_data_suffix=None):
    print("Generating data")
    cpp_calculator = cppimport.imp("tools.montecarlo_cpp.pymontecarlo")
    cpp_equity_func = cpp_calculator.montecarlo
    get_equity = cpp_equity_func

    table = HoldemTable()
    rank_enc, suit_enc = make_one_hot_encoders()
    n = 0
    x_data = []
    y_data = []
    for i in range(n_samples):
        # Create deck
        table._create_card_deck()

        # Sample player cards
        p1_cards = sample_cards(table.deck, 2)
        p2_cards = sample_cards(table.deck, 2)

        # Sample table cards from either preflop,
        # flop, river or turn
        stage_card_nums = [0, 3, 4, 5]
        num_table_samples = np.random.choice(stage_card_nums)
        table_cards = sample_cards(table.deck, num_table_samples)

        equity = get_equity(set(p1_cards), set(table_cards), 2, max_runs)

        encoded_state = preprocess_data_state(p1_cards, table_cards, rank_enc,
                                              suit_enc)

        x_data.append(encoded_state)
        y_data.append(equity)

    x_data = np.array(x_data)
    y_data = np.array(y_data)

    if write_data_dir and write_data_suffix:
        X_data_path = write_data_dir + 'X_' + write_data_suffix
        Y_data_path = write_data_dir + 'Y_' + write_data_suffix

        with open(X_data_path, 'wb') as handle:
            pickle.dump(x_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

        with open(Y_data_path, 'wb') as handle:
            pickle.dump(y_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return x_data, y_data
Ejemplo n.º 5
0
    def equity_vs_random(self):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        self.env = HoldemTable(num_of_players=5, initial_stacks=500)
        self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5))
        self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8))
        self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7))
        self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(RandomPlayer())

        for _ in range(self.num_episodes):
            self.run_episode()
            self.winner_in_episodes.append(self.env.winner_ix)

        league_table = pd.Series(self.winner_in_episodes).value_counts()
        best_player = league_table.index[0]

        print(league_table)
        print(f"Best Player: {best_player}")
Ejemplo n.º 6
0
def sample_scenario():
    table = HoldemTable()

    # Create deck
    table._create_card_deck()

    # Sample player cards
    p1_cards = sample_cards(table.deck, 2)
    p2_cards = sample_cards(table.deck, 2)

    # Sample table cards from either preflop,
    # flop, river or turn
    stage_card_nums = [0, 3, 4, 5]
    num_table_samples = np.random.choice(stage_card_nums)
    cards_on_table = sample_cards(table.deck, num_table_samples)

    my_cards = set(p1_cards)
    cards_on_table = set(cards_on_table)

    return my_cards, cards_on_table
Ejemplo n.º 7
0
def _create_env(n_players):
    """Create an environment"""
    env = HoldemTable()
    for _ in range(n_players):
        player = PlayerForTest()
        env.add_player(player)
    env.reset()
    return env
Ejemplo n.º 8
0
class Runner:
    """Orchestration"""

    def __init__(self, render, num_episodes):
        """Initialize"""
        self.winner_in_episodes = []
        self.render = render
        self.env = None
        self.num_episodes = num_episodes
        self.log = logging.getLogger(__name__)

    def run_episode(self):
        """Run an episode"""
        self.env.reset()
        while True:
            if self.render:
                self.env.render()
            _, _, done, _ = self.env.step()

            if done:
                break

    def random_agents(self):
        """Create an environment with 6 random players"""
        num_of_plrs = 6
        self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500)
        for _ in range(num_of_plrs):
            player = RandomPlayer(500)
            self.env.add_player(player)

        self.run_episode()

    def key_press_agents(self):
        """Create an environment with 6 key press agents"""
        num_of_plrs = 6
        self.env = HoldemTable(num_of_players=num_of_plrs, initial_stacks=500)
        for _ in range(num_of_plrs):
            player = KeyPressAgent(500)
            self.env.add_player(player)

        self.run_episode()

    def equity_vs_random(self):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        self.env = HoldemTable(num_of_players=5, initial_stacks=500)
        self.env.add_player(EquityPlayer(name='equity/50/50', min_call_equity=.5, min_bet_equity=-.5))
        self.env.add_player(EquityPlayer(name='equity/50/80', min_call_equity=.8, min_bet_equity=-.8))
        self.env.add_player(EquityPlayer(name='equity/70/70', min_call_equity=.7, min_bet_equity=-.7))
        self.env.add_player(EquityPlayer(name='equity/20/30', min_call_equity=.2, min_bet_equity=-.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(RandomPlayer())

        for _ in range(self.num_episodes):
            self.run_episode()
            self.winner_in_episodes.append(self.env.winner_ix)

        league_table = pd.Series(self.winner_in_episodes).value_counts()
        best_player = league_table.index[0]

        print(league_table)
        print(f"Best Player: {best_player}")

    def equity_self_improvement(self, improvement_rounds):
        """Create 6 players, 4 of them equity based, 2 of them random"""
        calling = [.1, .2, .3, .4, .5, .6]
        betting = [.2, .3, .4, .5, .6, .7]

        for improvement_round in range(improvement_rounds):
            self.env = HoldemTable(num_of_players=5, initial_stacks=100)
            for i in range(6):
                self.env.add_player(EquityPlayer(name=f'Equity/{calling[i]}/{betting[i]}',
                                                 min_call_equity=calling[i],
                                                 min_bet_equity=betting[i]))

            for _ in range(self.num_episodes):
                self.run_episode()
                self.winner_in_episodes.append(self.env.winner_ix)

            league_table = pd.Series(self.winner_in_episodes).value_counts()
            best_player = int(league_table.index[0])
            print(league_table)
            print(f"Best Player: {best_player}")

            # self improve:
            self.log.info(f"Self improvment round {improvement_round}")
            for i in range(6):
                calling[i] = np.mean([calling[i], calling[best_player]])
                self.log.info(f"New calling for player {i} is {calling[i]}")
                betting[i] = np.mean([betting[i], betting[best_player]])
                self.log.info(f"New betting for player {i} is {betting[i]}")