Ejemplo n.º 1
0
 def test_pairs_with_dash(self):
     assert Range("22-33").combos == DEUCE_COMBOS + THREE_COMBOS
Ejemplo n.º 2
0
 def test_pairs_simple(self):
     """Test if pairs get all the combos."""
     assert Range("22").combos == DEUCE_COMBOS
Ejemplo n.º 3
0
 def test_pairs_multiple(self):
     assert Range("22 33").combos == DEUCE_COMBOS + THREE_COMBOS
Ejemplo n.º 4
0
    def test_empty_range(self):
        assert Range().hands == tuple()
        assert Range().combos == tuple()

        assert Range("").hands == tuple()
        assert Range("").combos == tuple()
Ejemplo n.º 5
0
 def test_offsuit_and_suited_with_dash_reversed_is_the_same(self):
     assert Range("J8-J4").hands == Range("J4-J8").hands
Ejemplo n.º 6
0
 def test_suited_plus(self):
     assert Range("KJs+").hands == (Hand("KJs"), Hand("KQs"))
Ejemplo n.º 7
0
 def test_offsuit_plus(self):
     assert Range("KJo+").hands == (Hand("KJo"), Hand("KQo"))
Ejemplo n.º 8
0
    def __init__(self,
                 sess,
                 scope,
                 action_num=4,
                 state_shape=None,
                 hidden_layers_sizes=None,
                 reservoir_buffer_capacity=int(1e6),
                 anticipatory_param=0.1,
                 batch_size=256,
                 train_every=1,
                 rl_learning_rate=0.1,
                 sl_learning_rate=0.005,
                 min_buffer_size_to_learn=1000,
                 q_replay_memory_size=30000,
                 q_replay_memory_init_size=1000,
                 q_update_target_estimator_every=1000,
                 q_discount_factor=0.99,
                 q_epsilon_start=0.06,
                 q_epsilon_end=0,
                 q_epsilon_decay_steps=int(1e6),
                 q_batch_size=256,
                 q_train_every=1,
                 q_mlp_layers=None,
                 evaluate_with='average_policy'):
        ''' Initialize the NFSP agent.

        Args:
            sess (tf.Session): Tensorflow session object.
            scope (string): The name scope of NFSPAgent.
            action_num (int): The number of actions.
            state_shape (list): The shape of the state space.
            hidden_layers_sizes (list): The hidden layers sizes for the layers of
              the average policy.
            reservoir_buffer_capacity (int): The size of the buffer for average policy.
            anticipatory_param (float): The hyper-parameter that balances rl/avarage policy.
            batch_size (int): The batch_size for training average policy.
            train_every (int): Train the SL policy every X steps.
            rl_learning_rate (float): The learning rate of the RL agent.
            sl_learning_rate (float): the learning rate of the average policy.
            min_buffer_size_to_learn (int): The minimum buffer size to learn for average policy.
            q_replay_memory_size (int): The memory size of inner DQN agent.
            q_replay_memory_init_size (int): The initial memory size of inner DQN agent.
            q_update_target_estimator_every (int): The frequency of updating target network for
              inner DQN agent.
            q_discount_factor (float): The discount factor of inner DQN agent.
            q_epsilon_start (float): The starting epsilon of inner DQN agent.
            q_epsilon_end (float): the end epsilon of inner DQN agent.
            q_epsilon_decay_steps (int): The decay steps of inner DQN agent.
            q_batch_size (int): The batch size of inner DQN agent.
            q_train_step (int): Train the model every X steps.
            q_mlp_layers (list): The layer sizes of inner DQN agent.
            evaluate_with (string): The value can be 'best_response' or 'average_policy'
        '''
        self.use_raw = False
        self._sess = sess
        self._scope = scope
        self._action_num = action_num
        self._state_shape = state_shape
        self._layer_sizes = hidden_layers_sizes
        self._batch_size = batch_size
        self._train_every = train_every
        self._sl_learning_rate = sl_learning_rate
        self._anticipatory_param = anticipatory_param
        self._min_buffer_size_to_learn = min_buffer_size_to_learn

        self._reservoir_buffer = ReservoirBuffer(reservoir_buffer_capacity)
        self._prev_timestep = None
        self._prev_action = None
        self.evaluate_with = evaluate_with

        self.d = {
            0: 'A',
            1: '2',
            2: '3',
            3: '4',
            4: '5',
            5: '6',
            6: '7',
            7: '8',
            8: '9',
            9: 'T',
            10: 'J',
            11: 'Q',
            12: 'K'
        }
        self.s = {0: 's', 1: 'h', 2: 'd', 3: 'c'}
        self.c2n = {
            '2': 2,
            '3': 3,
            '4': 4,
            '5': 5,
            '6': 6,
            '7': 7,
            '8': 8,
            '9': 9,
            'T': 10,
            'J': 11,
            'Q': 12,
            'K': 13,
            'A': 14
        }
        self.late_range = Range(
            '22+, A2s+, K2s+, Q2s+, J2s+, J8, T9, 98, 87, 76s, 65s, 54s, 98s+, K9+, Q8+, J7+, T6s+, A9+'
        )

        # Total timesteps
        self.total_t = 0

        # Step counter to keep track of learning.
        self._step_counter = 0

        with tf.variable_scope(scope):
            # Inner RL agent
            self._rl_agent = DQNAgent(
                sess, scope + '_dqn', q_replay_memory_size,
                q_replay_memory_init_size, q_update_target_estimator_every,
                q_discount_factor, q_epsilon_start, q_epsilon_end,
                q_epsilon_decay_steps, q_batch_size, action_num, state_shape,
                q_train_every, q_mlp_layers, rl_learning_rate)

            with tf.variable_scope('sl'):
                # Build supervised model
                self._build_model()

        self.sample_episode_policy()