Ejemplo n.º 1
0
 def testStreetIncrement(self):
     params = copy.deepcopy(self.env_params)
     params['starting_street'] = pdt.Street.TURN
     params['pot'] = 1
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert env.board[-2] == 0
     assert env.board[-1] == 0
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert env.street == pdt.Street.RIVER
     assert env.board[-2] != 0
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert done == True
     del env
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.BB
     assert state[:,
                  -1][:,
                      env.state_mapping['last_position']] == pdt.Position.SB
     assert env.pot == 2
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['BB'].stack == 2
     assert env.players['SB'].stack == 4
     assert env.pot == 4
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.SB
     assert state[:,
                  -1][:,
                      env.state_mapping['last_position']] == pdt.Position.BB
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.BB
     assert state[:,
                  -1][:, env.
                      state_mapping['player2_position']] == pdt.Position.SB
     assert state[:, -1][:, env.
                         state_mapping['last_position']] == pdt.Position.BTN
     assert state[:, -1][:, env.state_mapping[
         'last_aggressive_position']] == pdt.Position.BB
     assert env.street == pdt.Street.FLOP
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert env.street == pdt.Street.TURN
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert env.street == pdt.Street.RIVER
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert done == True
Ejemplo n.º 2
0
 def testEnvCategoryMapping(self):
     params = copy.deepcopy(self.env_params)
     params['stacksize'] = 50
     params['n_players'] = 2
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 3)[0] == 4
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 2)[0] == 3
     assert env.convert_to_category(pdt.NetworkActions.CALL, 0.5)[0] == 2
     assert env.convert_to_category(pdt.NetworkActions.CHECK, 0)[0] == 0
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 9)[0] == 4
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 5)[0] == 3
     assert env.convert_to_category(pdt.NetworkActions.CALL, 2)[0] == 2
     assert env.convert_to_category(pdt.NetworkActions.CHECK, 0)[0] == 0
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert env.convert_to_category(pdt.NetworkActions.BET, 6)[0] == 4
     assert env.convert_to_category(pdt.NetworkActions.BET, 3)[0] == 3
     assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 24)[0] == 4
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 12)[0] == 3
     assert env.convert_to_category(pdt.NetworkActions.CALL, 6)[0] == 2
     assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 47)[0] == 4
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 42)[0] == 3
     assert env.convert_to_category(pdt.NetworkActions.CALL, 18)[0] == 2
     assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1
     del env
     params['stacksize'] = 3
     params['n_players'] = 2
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 3)[0] == 4
     print('check', env.convert_to_category(pdt.NetworkActions.RAISE, 2)[0])
     assert env.convert_to_category(pdt.NetworkActions.RAISE, 2)[0] == 3
     assert env.convert_to_category(pdt.NetworkActions.CALL, 0)[0] == 2
     assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1
Ejemplo n.º 3
0
 def preflopTests(self):
     """Facing sb call. Sb min raise."""
     params = copy.deepcopy(self.env_params)
     params['stacksize'] = 5
     params['n_players'] = 2
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert np.array_equal(mask, np.array([0, 1, 1, 0, 1]))
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 4
     assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4
     assert np.array_equal(mask, np.array([1, 0, 0, 0, 1]))
     del env
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     state, obs, done, mask, betsize_mask = env.step(ACTION_MIN_RAISE)
     assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 3
     assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4
Ejemplo n.º 4
0
 def testCritic(self):
     params = copy.deepcopy(self.env_params)
     env = Poker(params)
     nA = env.action_space
     nB = env.betsize_space
     nS = env.state_space
     seed = 152
     critic = OmahaObsQCritic(seed, nS, nA, nB, self.network_params)
     state, obs, done, mask, betsize_mask = env.reset()
     output = critic(obs)
     assert isinstance(output['value'], torch.Tensor)
Ejemplo n.º 5
0
 def testMasks(self):
     params = copy.deepcopy(self.env_params)
     params['stacksize'] = 5
     params['n_players'] = 2
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert state[:, -1][:, env.state_mapping['pot']] == 1.5
     assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4.5
     assert state[:,
                  -1][:, env.
                      state_mapping['player1_position']] == pdt.Position.SB
     assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 4
     assert state[:,
                  -1][:, env.
                      state_mapping['player2_position']] == pdt.Position.BB
     assert state[:, -1][:,
                         env.state_mapping['street']] == pdt.Street.PREFLOP
     assert env.current_player == 'SB'
     assert np.array_equal(betsize_mask, np.array([1, 1]))
     assert np.array_equal(mask, np.array([0, 1, 1, 0, 1]))
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.current_player == 'BB'
     assert state[:, -1][:, env.state_mapping['pot']] == 4
     assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4
     assert state[:,
                  -1][:, env.
                      state_mapping['player1_position']] == pdt.Position.BB
     assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 2
     assert state[:,
                  -1][:, env.
                      state_mapping['player2_position']] == pdt.Position.SB
     assert state[:, -1][:,
                         env.state_mapping['street']] == pdt.Street.PREFLOP
     assert np.array_equal(mask, np.array([0, 1, 1, 0, 1]))
     assert np.array_equal(betsize_mask, np.array([1, 0]))
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert state[:, -1][:, env.state_mapping['pot']] == 8
     assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 2
     assert state[:,
                  -1][:, env.
                      state_mapping['player1_position']] == pdt.Position.SB
     assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 0
     assert state[:,
                  -1][:, env.
                      state_mapping['player2_position']] == pdt.Position.BB
     assert state[:, -1][:,
                         env.state_mapping['street']] == pdt.Street.PREFLOP
     assert np.array_equal(mask, np.array([0, 1, 1, 0, 0]))
     assert np.array_equal(betsize_mask, np.array([0, 0]))
Ejemplo n.º 6
0
 def testStreetInitialization(self):
     params = copy.deepcopy(self.env_params)
     params['stacksize'] = 50
     params['n_players'] = 2
     params['starting_street'] = pdt.Street.RIVER
     params['pot'] = 1
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert state[:,
                  -1][:, env.
                      state_mapping['player1_position']] == pdt.Position.BB
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.BB
Ejemplo n.º 7
0
 def testActor(self):
     params = copy.deepcopy(self.env_params)
     env = Poker(params)
     nA = env.action_space
     nB = env.betsize_space
     nS = env.state_space
     seed = 152
     actor = OmahaActor(seed, nS, nA, nB, self.network_params)
     state, obs, done, mask, betsize_mask = env.reset()
     output = actor(state, mask, betsize_mask)
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     output = actor(state, mask, betsize_mask)
     assert isinstance(output['action_probs'], torch.Tensor)
     assert isinstance(output['action_prob'], torch.Tensor)
Ejemplo n.º 8
0
 def testReset(self):
     params = copy.deepcopy(self.env_params)
     params['starting_street'] = pdt.Street.RIVER
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert state.ndim == 3
     assert obs.ndim == 3
     assert state.shape == (1, 1, STATE_SHAPE)
     assert obs.shape == (1, 1, OBS_SHAPE)
     assert state[0, 0, env.state_mapping['street']] == pdt.Street.RIVER
     assert state[0, -1,
                  env.state_mapping['hero_position']] == pdt.Position.BB
     assert state[0, -1,
                  env.state_mapping['hero_stacksize']] == self.env_params[
                      'stacksize']
     assert state[0, -1,
                  env.state_mapping['player1_position']] == pdt.Position.BB
     assert state[
         0, -1, env.
         state_mapping['player1_stacksize']] == self.env_params['stacksize']
     assert state[0, -1, env.state_mapping['player1_street_total']] == 0
     assert state[0, -1,
                  env.state_mapping['player2_position']] == pdt.Position.SB
     assert state[
         0, -1, env.
         state_mapping['player2_stacksize']] == self.env_params['stacksize']
     assert state[0, -1, env.state_mapping['player2_street_total']] == 0
     assert state[0, -1,
                  env.state_mapping['last_action']] == pdt.Action.UNOPENED
     assert state[
         0, -1,
         env.state_mapping['last_aggressive_action']] == pdt.Action.UNOPENED
     assert state[0, -1, env.state_mapping['last_betsize']] == 0
     assert state[0, -1,
                  env.state_mapping['last_position']] == pdt.Position.BTN
     assert state[0, -1, env.state_mapping['amount_to_call']] == 0
     assert state[0, -1, env.state_mapping['pot_odds']] == 0
     assert env.players_remaining == 2
     assert done == False
     assert np.array_equal(mask, np.array([1., 0., 0., 1., 0.]))
     assert np.array_equal(betsize_mask, np.array([1., 1.]))
     assert len(env.players.players['SB'].hand
                ) == self.env_params['cards_per_player']
     assert len(env.players.players['BB'].hand
                ) == self.env_params['cards_per_player']
     assert len(env.deck) == 52 - (
         self.env_params['cards_per_player'] * self.env_params['n_players']
         + pdt.Globals.INITIALIZE_BOARD_CARDS[params['starting_street']])
Ejemplo n.º 9
0
 def testTies(self):
     params = copy.deepcopy(self.env_params)
     params['starting_street'] = pdt.Street.RIVER
     params['stacksize'] = 5
     params['pot'] = 1
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     # Modify board and hands
     env.board = [14, 0, 13, 1, 12, 2, 2, 2, 2, 3]
     env.players['SB'].hand = [[11, 3], [10, 3], [3, 2], [3, 3]]
     env.players['BB'].hand = [[11, 2], [10, 2], [4, 0], [4, 3]]
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert done == True
     assert env.players['SB'].stack == 5.5
     assert env.players['BB'].stack == 5.5
Ejemplo n.º 10
0
    def testCheckBetFold(self):
        params = copy.deepcopy(self.env_params)
        params['starting_street'] = pdt.Street.RIVER
        params['stacksize'] = 5
        params['pot'] = 1
        env = Poker(params)
        state, obs, done, mask, betsize_mask = env.reset()
        state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
        state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
        assert state.ndim == 3
        assert obs.ndim == 3
        assert state.shape == (1, 3, STATE_SHAPE)
        assert obs.shape == (1, 3, OBS_SHAPE)
        assert env.players['SB'].stack == 4
        assert env.players['BB'].stack == 5
        assert env.players['SB'].street_total == 1
        assert env.players['BB'].street_total == 0
        assert env.pot == 2
        assert state[:, -1][:, env.state_mapping['street']] == pdt.Street.RIVER
        assert state[:,
                     -1][:,
                         env.state_mapping['hero_position']] == pdt.Position.BB
        assert state[:,
                     -1][:,
                         env.state_mapping['last_position']] == pdt.Position.SB
        assert state[:, -1][:,
                            env.state_mapping['last_action']] == pdt.Action.BET
        assert state[:, -1][:, env.state_mapping['last_betsize']] == 1
        assert state[:, -1][:, env.state_mapping['hero_stacksize']] == params[
            'stacksize']
        assert state[:, -1][:,
                            env.state_mapping['player2_stacksize']] == params[
                                'stacksize'] - 1
        assert state[:, -1][:, env.state_mapping['amount_to_call']] == 1
        self.assertAlmostEqual(state[:, -1][:,
                                            env.state_mapping['pot_odds']][0],
                               0.333,
                               places=2)
        assert done == False
        assert np.array_equal(mask, np.array([0., 1., 1., 0., 1.]))
        assert np.array_equal(betsize_mask, np.array([1., 1.]))

        state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD)
        assert done == True
        assert env.players['SB'].stack == 6
        assert env.players['BB'].stack == 5
        assert env.players['BB'].status == Status.FOLDED
Ejemplo n.º 11
0
 def testBlindInitialization(self):
     params = copy.deepcopy(self.env_params)
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert env.players['SB'].stack == 4.5
     assert env.players['BB'].stack == 4.
     assert env.players['SB'].street_total == 0.5
     assert env.players['BB'].street_total == 1.
     assert state[0, -1, env.state_mapping['blind']] == pdt.Blind.POSTED
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.SB
     assert state[:,
                  -1][:,
                      env.state_mapping['last_position']] == pdt.Position.BB
     assert done == False
Ejemplo n.º 12
0
 def betsizingTests(self):
     params = copy.deepcopy(self.env_params)
     params['stacksize'] = 5
     params['n_players'] = 2
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     betsize = env.return_potlimit_betsize(action=4, betsize_category=0)
     assert betsize == 1.5
     betsize = env.return_potlimit_betsize(action=4, betsize_category=1)
     assert betsize == 2.5
     betsize = env.return_potlimit_betsize(action=2, betsize_category=0)
     assert betsize == 0.5
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     betsize = env.return_potlimit_betsize(action=4, betsize_category=0)
     assert betsize == 1
     betsize = env.return_potlimit_betsize(action=4, betsize_category=1)
     assert betsize == 2
Ejemplo n.º 13
0
 def additionalTests(self):
     params = copy.deepcopy(self.env_params)
     params['stacksize'] = 5
     params['n_players'] = 2
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 2
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 2
     assert state[:, -1][:, env.state_mapping['street']] == 1
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 0
     assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 0
     assert state[:, -1][:, env.state_mapping['street']] == 3
     assert done == True
Ejemplo n.º 14
0
    def testCheckCheck(self):
        params = copy.deepcopy(self.env_params)
        params['starting_street'] = pdt.Street.RIVER
        params['stacksize'] = 5
        params['pot'] = 1
        env = Poker(params)
        state, obs, done, mask, betsize_mask = env.reset()
        state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
        assert state.ndim == 3
        assert obs.ndim == 3
        assert state.shape == (1, 2, STATE_SHAPE)
        assert obs.shape == (1, 2, OBS_SHAPE)
        assert state[:,
                     1][:, env.
                        state_mapping['street']] == params['starting_street']
        assert state[:,
                     1][:,
                        env.state_mapping['hero_position']] == pdt.Position.SB
        assert state[:,
                     1][:, env.
                        state_mapping['player2_position']] == pdt.Position.BB
        assert state[:,
                     1][:,
                        env.state_mapping['last_position']] == pdt.Position.BB
        assert state[:,
                     1][:,
                        env.state_mapping['last_action']] == pdt.Action.CHECK
        assert state[:,
                     1][:, env.
                        state_mapping['hero_stacksize']] == params['stacksize']
        assert state[:, 1][:,
                           env.state_mapping['player2_stacksize']] == params[
                               'stacksize']
        assert state[:, 1][:, env.state_mapping['amount_to_call']] == 0
        assert state[:, 1][:, env.state_mapping['pot_odds']] == 0
        assert done == False
        assert np.array_equal(mask, np.array([1., 0., 0., 1., 0.]))
        assert np.array_equal(betsize_mask, np.array([1., 1.]))

        state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
        assert done == True
        assert env.players['SB'].stack == 6
        assert env.players['BB'].stack == 5
Ejemplo n.º 15
0
 def testBetRestrictions(self):
     params = copy.deepcopy(self.env_params)
     params['starting_street'] = pdt.Street.RIVER
     params['stacksize'] = 5
     params['pot'] = 1
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     state, obs, done, mask, betsize_mask = env.step(ACTION_MIN_RAISE)
     assert env.players['SB'].stack == 1
     assert env.players['SB'].status == Status.ACTIVE
     assert env.players['BB'].stack == 0
     assert env.players['BB'].status == Status.ALLIN
     assert state[0, -1, env.state_mapping['blind']] == pdt.Blind.NO_BLIND
     assert np.array_equal(mask, np.array([0., 1., 1., 0., 0.]))
     assert np.array_equal(betsize_mask, np.array([0., 0.]))
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert done == True
     assert env.players['SB'].stack == 11
     assert env.players['BB'].stack == 0
Ejemplo n.º 16
0
 def testAllin(self):
     params = copy.deepcopy(self.env_params)
     params['n_players'] = 3
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['BTN'].stack == 1.5
     assert env.players['BTN'].street_total == 3.5
     state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD)
     assert env.players['SB'].status == Status.FOLDED
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['BB'].stack == 0
     assert env.players['BB'].street_total == 5
     assert env.players['BB'].status == Status.ALLIN
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert env.players['BB'].stack == 10.5
     assert env.players['SB'].stack == 4.5
     assert env.players['BTN'].stack == 0
     assert env.players['BTN'].street_total == 0
     assert env.street == pdt.Street.RIVER
     assert done == True
Ejemplo n.º 17
0
class API(object):
    def __init__(self):
        self.increment_position = {'SB': 'BB', 'BB': 'SB'}
        self.seed = 1458
        self.connect()
        self.game_object = pdt.Globals.GameTypeDict[pdt.GameTypes.OMAHAHI]
        self.config = Config()
        self.env_params = {
            'game': pdt.GameTypes.OMAHAHI,
            'betsizes': self.game_object.rule_params['betsizes'],
            'bet_type': self.game_object.rule_params['bettype'],
            'n_players': 2,
            'pot': 1,
            'stacksize': 10,  #self.game_object.state_params['stacksize'],
            'cards_per_player':
            self.game_object.state_params['cards_per_player'],
            'starting_street':
            pdt.Street.FLOP,  #self.game_object.starting_street,
            'global_mapping': self.config.global_mapping,
            'state_mapping': self.config.state_mapping,
            'obs_mapping': self.config.obs_mapping,
            'shuffle': True
        }
        self.env = Poker(self.env_params)
        self.network_params = self.instantiate_network_params()
        self.actor = OmahaActor(self.seed, self.env.state_space,
                                self.env.action_space, self.env.betsize_space,
                                self.network_params)
        self.critic = OmahaObsQCritic(self.seed, self.env.state_space,
                                      self.env.action_space,
                                      self.env.betsize_space,
                                      self.network_params)
        self.load_model(self.actor, self.config.production_actor)
        self.load_model(self.critic, self.config.production_critic)
        self.player = {'name': None, 'position': 'BB'}
        self.reset_trajectories()

    def reset_trajectories(self):
        self.trajectories = defaultdict(lambda: [])
        self.trajectory = defaultdict(
            lambda: {
                'states': [],
                'obs': [],
                'betsize_masks': [],
                'action_masks': [],
                'actions': [],
                'action_category': [],
                'action_probs': [],
                'action_prob': [],
                'betsize': [],
                'rewards': [],
                'value': []
            })

    def instantiate_network_params(self):
        device = 'cpu'
        network_params = copy.deepcopy(self.config.network_params)
        network_params['maxlen'] = 10
        network_params['device'] = device
        return network_params

    def load_model(self, model, path):
        if os.path.isfile(path):
            model.load_state_dict(load(path, map_location=D('cpu')))
            set_grad_enabled(False)
        else:
            raise ValueError('File does not exist')

    def connect(self):
        client = MongoClient('localhost', 27017, maxPoolSize=10000)
        self.db = client.baseline

    def update_player_name(self, name: str):
        """updates player name"""
        self.player['name'] = name

    def update_player_position(self, position):
        self.player['position'] = position

    def insert_model_outputs(self, model_outputs, action_mask):
        outputs_json = {
            'action': model_outputs['action'],
            'action_category': model_outputs['action_category'],
            'betsize': model_outputs['betsize'],
            'action_prob':
            model_outputs['action_prob'].detach().numpy().tolist(),
            'action_probs':
            model_outputs['action_probs'].detach().numpy().tolist(),
            'value': model_outputs['value'].detach().numpy().tolist(),
            'action_mask': action_mask.tolist(),
            'player': self.player['name']
        }
        self.db['bot_data'].insert_one(outputs_json)

    def insert_into_db(self, training_data: dict):
        """
        stores player data in the player_stats collection.
        takes trajectories and inserts them into db for data analysis and learning.
        """
        stats_json = {
            'game': self.env.game,
            'player': self.player['name'],
            'reward': training_data[self.player['position']][0]['rewards'][0],
            'position': self.player['position'],
        }
        self.db['player_stats'].insert_one(stats_json)
        keys = training_data.keys()
        positions = [position for position in keys if position in ['SB', 'BB']]
        for position in positions:
            for i, poker_round in enumerate(training_data[position]):
                states = poker_round['states']
                observations = poker_round['obs']
                actions = poker_round['actions']
                action_prob = poker_round['action_prob']
                action_probs = poker_round['action_probs']
                action_categories = poker_round['action_category']
                betsize_masks = poker_round['betsize_masks']
                action_masks = poker_round['action_masks']
                rewards = poker_round['rewards']
                betsizes = poker_round['betsize']
                values = poker_round['value']
                assert (isinstance(rewards, list))
                assert (isinstance(actions, list))
                assert (isinstance(action_prob, list))
                assert (isinstance(action_probs, list))
                assert (isinstance(states, list))
                assert (isinstance(values, list))
                for step, state in enumerate(states):
                    state_json = {
                        'game': self.env.game,
                        'player': self.player['name'],
                        'poker_round': step,
                        'state': state.tolist(),
                        'action_probs': action_probs[step].tolist(),
                        'action_prob': action_prob[step].tolist(),
                        'action': actions[step],
                        'action_category': action_categories[step],
                        'betsize_mask': betsize_masks[step].tolist(),
                        'action_mask': action_masks[step].tolist(),
                        'betsize': betsizes[step],
                        'reward': rewards[step],
                        'value': values[step].tolist()
                    }
                    self.db['game_data'].insert_one(state_json)

    def return_model_outputs(self):
        query = {'player': self.player['name']}
        player_data = self.db['bot_data'].find(query).sort('_id', -1)
        action_probs = []
        values = []
        action_mask = []
        for result in player_data:
            action_probs.append(np.array(result['action_probs']))
            values.append(np.array(result['value']))
            action_mask.append(np.array(result['action_mask']))
            break
        if action_probs:
            action_probs = action_probs[0]
            values = values[0]
            action_mask = action_mask[0]
            if np.sum(action_probs) > 0:
                action_probs *= action_mask
                action_probs /= np.sum(action_probs)
            # scale values
            if np.max(np.abs(values)) > 0:
                values *= action_mask

                values /= self.env_params['stacksize'] + self.env_params['pot']
            model_outputs = {
                'action_probs': action_probs.tolist(),
                'q_values': [values.tolist()]
            }
        else:
            model_outputs = {
                'action_probs': [0] * self.env.action_space,
                'q_values': [0] * self.env.action_space
            }
        print(model_outputs)
        print(action_mask)
        return model_outputs

    def return_player_stats(self):
        """Returns dict of current player stats against the bot."""
        query = {'player': self.player['name']}
        # projection ={'reward':1,'hand_num':1,'_id':0}
        player_data = self.db['player_stats'].find(query)
        total_hands = self.db['player_stats'].count_documents(query)
        results = []
        position_results = {'SB': 0, 'BB': 0}
        # total_hands = 0
        for result in player_data:
            results.append(result['reward'])
            position_results[result['position']] += result['reward']
        bb_per_hand = sum(results) / total_hands if total_hands > 0 else 0
        sb_bb_per_hand = position_results[
            'SB'] / total_hands if total_hands > 0 else 0
        bb_bb_per_hand = position_results[
            'BB'] / total_hands if total_hands > 0 else 0
        player_stats = {
            'results': sum(results),
            'bb_per_hand': round(bb_per_hand, 2),
            'total_hands': total_hands,
            'SB': round(sb_bb_per_hand, 2),
            'BB': round(bb_bb_per_hand, 2),
        }
        return player_stats

    def parse_env_outputs(self, state, action_mask, betsize_mask, done):
        """Wraps state and passes to frontend. Can be the dummy last state. In which case hero mappings are reversed."""
        reward = state[:, -1][:, self.env.state_mapping[
            'hero_stacksize']] - self.env.starting_stack
        # cards go in a list
        hero = self.env.players[self.player['position']]
        villain = self.env.players[self.increment_position[
            self.player['position']]]
        state_object = {
            'history':
            state.tolist(),
            'betsizes':
            self.env.betsizes.tolist(),
            'mapping':
            self.env.state_mapping,
            'current_player':
            pdt.Globals.POSITION_MAPPING[self.env.current_player],
            'hero_stack':
            hero.stack,
            'hero_position':
            pdt.Globals.POSITION_MAPPING[hero.position],
            'hero_cards':
            flatten(hero.hand),
            'hero_street_total':
            hero.street_total,
            'pot':
            float(state[:, -1][:, self.env.state_mapping['pot']][0]),
            'board_cards':
            state[:, -1][:, self.env.state_mapping['board']][0].tolist(),
            'villain_stack':
            villain.stack,
            'villain_position':
            pdt.Globals.POSITION_MAPPING[villain.position],
            'villain_cards':
            flatten(villain.hand),
            'villain_street_total':
            villain.street_total,
            'last_action':
            int(state[:, -1][:, self.env.state_mapping['last_action']][0]),
            'last_betsize':
            float(state[:, -1][:, self.env.state_mapping['last_betsize']][0]),
            'last_position':
            int(state[:, -1][:, self.env.state_mapping['last_position']][0]),
            'last_aggressive_action':
            int(state[:, -1]
                [:, self.env.state_mapping['last_aggressive_action']][0]),
            'last_aggressive_betsize':
            float(state[:, -1]
                  [:, self.env.state_mapping['last_aggressive_betsize']][0]),
            'last_aggressive_position':
            int(state[:, -1]
                [:, self.env.state_mapping['last_aggressive_position']][0]),
            'done':
            done,
            'action_mask':
            action_mask.tolist(),
            'betsize_mask':
            betsize_mask.tolist(),
            'street':
            int(state[:, -1][:, self.env.state_mapping['street']][0]),
            'blind':
            bool(state[:, -1][:, self.env.state_mapping['blind']][0])
        }
        outcome_object = {
            'player1_reward': hero.stack - self.env.starting_stack,
            'player1_hand': flatten(hero.hand),
            'player2_reward': villain.stack - self.env.starting_stack,
            'player2_hand': flatten(villain.hand),
            'player1_handrank': hero.handrank,
            'player2_handrank': villain.handrank
        }
        json_obj = {'state': state_object, 'outcome': outcome_object}
        return json.dumps(json_obj)

    def store_state(self, state, obs, action_mask, betsize_mask):
        cur_player = self.env.current_player
        self.trajectory[cur_player]['states'].append(copy.copy(state))
        self.trajectory[cur_player]['action_masks'].append(
            copy.copy(action_mask))
        self.trajectory[cur_player]['betsize_masks'].append(
            copy.copy(betsize_mask))

    def store_actions(self, actor_outputs):
        cur_player = self.env.current_player
        self.trajectory[cur_player]['actions'].append(actor_outputs['action'])
        self.trajectory[cur_player]['action_category'].append(
            actor_outputs['action_category'])
        self.trajectory[cur_player]['action_prob'].append(
            actor_outputs['action_prob'])
        self.trajectory[cur_player]['action_probs'].append(
            actor_outputs['action_probs'])
        self.trajectory[cur_player]['betsize'].append(actor_outputs['betsize'])
        self.trajectory[cur_player]['value'].append(actor_outputs['value'])

    def query_bot(self, state, obs, action_mask, betsize_mask, done):
        while self.env.current_player != self.player['position'] and not done:
            actor_outputs = self.actor(state, action_mask, betsize_mask)
            critic_outputs = self.critic(obs)
            actor_outputs['value'] = critic_outputs['value']
            self.insert_model_outputs(actor_outputs, action_mask)
            self.store_actions(actor_outputs)
            state, obs, done, action_mask, betsize_mask = self.env.step(
                actor_outputs)
            if not done:
                self.store_state(state, obs, action_mask, betsize_mask)
        return state, obs, done, action_mask, betsize_mask

    def reset(self):
        assert self.player['name'] is not None
        assert isinstance(self.player['position'], str)
        self.reset_trajectories()
        self.update_player_position(
            self.increment_position[self.player['position']])
        state, obs, done, action_mask, betsize_mask = self.env.reset()
        self.store_state(state, obs, action_mask, betsize_mask)
        if self.env.current_player != self.player['position'] and not done:
            state, obs, done, action_mask, betsize_mask = self.query_bot(
                state, obs, action_mask, betsize_mask, done)
        assert self.env.current_player == self.player['position']
        return self.parse_env_outputs(state, action_mask, betsize_mask, done)

    def step(self, action: str, betsize: float):
        """Maps action + betsize -> to a flat action category"""
        assert self.player['name'] is not None
        assert isinstance(self.player['position'], str)
        if isinstance(betsize, str):
            betsize = float(betsize)
        action_type = pdt.Globals.SERVER_ACTION_DICT[action]
        flat_action_category, betsize_category = self.env.convert_to_category(
            action_type, betsize)
        assert isinstance(flat_action_category, int)
        player_outputs = {
            'action':
            flat_action_category,
            'action_category':
            action_type,
            'betsize':
            betsize_category,
            'action_prob':
            np.array([0]),
            'action_probs':
            np.zeros(self.env.action_space + self.env.betsize_space - 2),
            'value':
            np.zeros(self.env.action_space + self.env.betsize_space - 2)
        }
        self.store_actions(player_outputs)
        state, obs, done, action_mask, betsize_mask = self.env.step(
            player_outputs)
        if not done:
            self.store_state(state, obs, action_mask, betsize_mask)
            if self.env.current_player != self.player['position']:
                state, obs, done, action_mask, betsize_mask = self.query_bot(
                    state, obs, action_mask, betsize_mask, done)
        if done:
            rewards = self.env.player_rewards()
            for position in self.trajectory.keys():
                N = len(self.trajectory[position]['betsize_masks'])
                self.trajectory[position]['rewards'] = [rewards[position]] * N
                self.trajectories[position].append(self.trajectory[position])
            self.insert_into_db(self.trajectories)
        return self.parse_env_outputs(state, action_mask, betsize_mask, done)

    @property
    def current_player(self):
        return self.player
Ejemplo n.º 18
0
 # times = []
 # for i,val in enumerate([1,2,5,10,25,50]):
 #     print(f'Generating {val} samples')
 #     tic = time.time()
 #     training_params['generate_epochs'] = val
 #     train_dual(rank,env,actor,critic,target_actor,target_critic,training_params,learning_params,network_params,validation_params)
 #     toc = time.time()
 #     print(f'{val} samples took {toc-tic} seconds')
 #     times.append(toc-tic)
 #     mongo = MongoDB()
 #     mongo.clean_db()
 #     mongo.close()
 # plt.scatter([1,2,5,10,25,50],[0.26,0.73,1.75,3,7.9,14.5])
 # plt.savefig(f'generate_times.png',bbox_inches='tight')
 tic = time.time()
 with profiler.profile(record_shapes=True) as prof:
     if args.function == 'train':
         train_dual(rank,env,actor,critic,target_actor,target_critic,training_params,learning_params,network_params,validation_params)
     elif args.function == 'learn':
         dual_learning_update(actor,critic,target_actor,target_critic,learning_params)
     elif args.function == 'generate':
         generate_trajectories(env,actor,critic,training_params,rank=0)
     else:
         with torch.no_grad():
             for i in range(100):
                 state,obs,done,action_mask,betsize_mask = env.reset()
                 while not done:
                     actor_outputs = actor(state,action_mask,betsize_mask)
                     state,obs,done,action_mask,betsize_mask = env.step(actor_outputs)
 print(f'Computation took {time.time() - tic} seconds')
 print(prof)
Ejemplo n.º 19
0
 def testThreePlayers(self):
     params = copy.deepcopy(self.env_params)
     params['n_players'] = 3
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert state[:, -1][:, env.
                         state_mapping['hero_position']] == pdt.Position.BTN
     assert state[:,
                  -1][:, env.
                      state_mapping['player1_position']] == pdt.Position.BTN
     assert state[:,
                  -1][:, env.
                      state_mapping['player2_position']] == pdt.Position.SB
     assert state[:,
                  -1][:, env.
                      state_mapping['player3_position']] == pdt.Position.BB
     assert env.street == pdt.Street.PREFLOP
     assert env.players.num_active_players == 3
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['SB'].stack == 4.5
     assert env.players['BB'].stack == 4.
     assert env.players['BTN'].stack == 1.5
     assert env.players['SB'].street_total == 0.5
     assert env.players['BB'].street_total == 1.
     assert env.players['BTN'].street_total == 3.5
     state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD)
     assert env.players['SB'].status == Status.FOLDED
     assert env.players['BB'].status == Status.ACTIVE
     assert env.players['BTN'].status == Status.ACTIVE
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert env.players['SB'].stack == 4.5
     assert env.players['BB'].stack == 1.5
     assert env.players['BTN'].stack == 1.5
     assert env.players['SB'].street_total == 0.
     assert env.players['BB'].street_total == 0.
     assert env.players['BTN'].street_total == 0.
     assert state[:, -1][:, env.state_mapping['pot']] == 7.5
     assert env.pot == 7.5
     assert env.street == pdt.Street.FLOP
     assert env.players.num_active_players == 2
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.BB
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert env.street == pdt.Street.TURN
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert env.street == pdt.Street.RIVER
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert done == True
     assert env.players['SB'].stack == 4.5
     assert env.players['BB'].stack == 9
     assert env.players['BTN'].stack == 1.5
     del env
     params['n_players'] = 3
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert state[:, -1][:, env.
                         state_mapping['hero_position']] == pdt.Position.BTN
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.SB
     assert env.players['SB'].street_total == 0.5
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert env.players['SB'].street_total == 1
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.BB
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert env.street == pdt.Street.FLOP
     assert env.pot == 3
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert env.street == pdt.Street.TURN
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert env.street == pdt.Street.RIVER
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK)
     assert done == True
     assert env.players['SB'].stack == 7
     assert env.players['BB'].stack == 4
     assert env.players['BTN'].stack == 4.
     del env
     params['n_players'] = 3
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD)
     state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD)
     assert done == True
     assert env.players['SB'].stack == 4.5
     assert env.players['BB'].stack == 4
     assert env.players['BTN'].stack == 6.5
Ejemplo n.º 20
0
 def testBetLimits(self):
     params = copy.deepcopy(self.env_params)
     # Limit
     params['bet_type'] = pdt.LimitTypes.LIMIT
     params['n_players'] = 3
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert state[:, -1][:, env.state_mapping['pot']] == 1.5
     assert state[:, -1][:, env.
                         state_mapping['hero_position']] == pdt.Position.BTN
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert state[:, -1][:, env.state_mapping['pot']] == 3.5
     assert env.players['BTN'].stack == 3
     assert env.players['BB'].stack == 4
     assert env.players['SB'].stack == 4.5
     assert env.players['SB'].street_total == 0.5
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.SB
     assert state[:, -1][:,
                         env.state_mapping['last_aggressive_betsize']] == 2
     assert env.street == pdt.Street.PREFLOP
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['BTN'].stack == 3
     assert env.players['BB'].stack == 4
     assert env.players['SB'].stack == 2
     assert env.players['SB'].street_total == 3.
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.BB
     assert state[:,
                  -1][:,
                      env.state_mapping['last_aggressive_betsize']] == 2.5
     assert state[:, -1][:, env.state_mapping['pot']] == 6
     assert env.street == pdt.Street.PREFLOP
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:, -1][:, env.state_mapping['pot']] == 8
     assert state[:, -1][:, env.
                         state_mapping['hero_position']] == pdt.Position.BTN
     assert env.street == pdt.Street.PREFLOP
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert state[:, -1][:, env.state_mapping['pot']] == 9
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.SB
     assert env.street == pdt.Street.FLOP
     del env
     params['bet_type'] = pdt.LimitTypes.POT_LIMIT
     params['n_players'] = 3
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     params['stacksize'] = 100
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     assert state[:, -1][:, env.
                         state_mapping['hero_position']] == pdt.Position.BTN
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert state[:,
                  -1][:,
                      env.state_mapping['hero_position']] == pdt.Position.SB
     assert env.players['BTN'].stack == 96.5
     assert env.players['BTN'].street_total == 3.5
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['SB'].stack == 88.5
     assert env.players['SB'].street_total == 11.5
     assert state[:, -1][:,
                         env.state_mapping['last_aggressive_betsize']] == 11
     assert state[:, -1][:, env.state_mapping['pot']] == 16
     state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD)
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['BTN'].stack == 64.5
     assert env.players['BTN'].street_total == 35.5
     assert state[:, -1][:,
                         env.state_mapping['last_aggressive_betsize']] == 32
     assert state[:, -1][:, env.state_mapping['pot']] == 48
     del env
     params['bet_type'] = pdt.LimitTypes.POT_LIMIT
     params['n_players'] = 3
     params['starting_street'] = pdt.Street.PREFLOP
     params['pot'] = 0
     params['stacksize'] = 100
     env = Poker(params)
     state, obs, done, mask, betsize_mask = env.reset()
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['SB'].stack == 96
     assert env.players['SB'].street_total == 4
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     state, obs, done, mask, betsize_mask = env.step(ACTION_CALL)
     assert env.street == pdt.Street.FLOP
     assert state[:, -1][:, env.state_mapping['pot']] == 12
     state, obs, done, mask, betsize_mask = env.step(ACTION_BET)
     assert state[:, -1][:, env.state_mapping['pot']] == 24
     assert env.players['SB'].stack == 84
     assert env.players['SB'].street_total == 12
     state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE)
     assert env.players['BB'].stack == 48
     assert env.players['BB'].street_total == 48
     assert state[:, -1][:, env.state_mapping['pot']] == 72