def test_pairs_with_dash(self): assert Range("22-33").combos == DEUCE_COMBOS + THREE_COMBOS
def test_pairs_simple(self): """Test if pairs get all the combos.""" assert Range("22").combos == DEUCE_COMBOS
def test_pairs_multiple(self): assert Range("22 33").combos == DEUCE_COMBOS + THREE_COMBOS
def test_empty_range(self): assert Range().hands == tuple() assert Range().combos == tuple() assert Range("").hands == tuple() assert Range("").combos == tuple()
def test_offsuit_and_suited_with_dash_reversed_is_the_same(self): assert Range("J8-J4").hands == Range("J4-J8").hands
def test_suited_plus(self): assert Range("KJs+").hands == (Hand("KJs"), Hand("KQs"))
def test_offsuit_plus(self): assert Range("KJo+").hands == (Hand("KJo"), Hand("KQo"))
def __init__(self, sess, scope, action_num=4, state_shape=None, hidden_layers_sizes=None, reservoir_buffer_capacity=int(1e6), anticipatory_param=0.1, batch_size=256, train_every=1, rl_learning_rate=0.1, sl_learning_rate=0.005, min_buffer_size_to_learn=1000, q_replay_memory_size=30000, q_replay_memory_init_size=1000, q_update_target_estimator_every=1000, q_discount_factor=0.99, q_epsilon_start=0.06, q_epsilon_end=0, q_epsilon_decay_steps=int(1e6), q_batch_size=256, q_train_every=1, q_mlp_layers=None, evaluate_with='average_policy'): ''' Initialize the NFSP agent. Args: sess (tf.Session): Tensorflow session object. scope (string): The name scope of NFSPAgent. action_num (int): The number of actions. state_shape (list): The shape of the state space. hidden_layers_sizes (list): The hidden layers sizes for the layers of the average policy. reservoir_buffer_capacity (int): The size of the buffer for average policy. anticipatory_param (float): The hyper-parameter that balances rl/avarage policy. batch_size (int): The batch_size for training average policy. train_every (int): Train the SL policy every X steps. rl_learning_rate (float): The learning rate of the RL agent. sl_learning_rate (float): the learning rate of the average policy. min_buffer_size_to_learn (int): The minimum buffer size to learn for average policy. q_replay_memory_size (int): The memory size of inner DQN agent. q_replay_memory_init_size (int): The initial memory size of inner DQN agent. q_update_target_estimator_every (int): The frequency of updating target network for inner DQN agent. q_discount_factor (float): The discount factor of inner DQN agent. q_epsilon_start (float): The starting epsilon of inner DQN agent. q_epsilon_end (float): the end epsilon of inner DQN agent. q_epsilon_decay_steps (int): The decay steps of inner DQN agent. q_batch_size (int): The batch size of inner DQN agent. q_train_step (int): Train the model every X steps. q_mlp_layers (list): The layer sizes of inner DQN agent. evaluate_with (string): The value can be 'best_response' or 'average_policy' ''' self.use_raw = False self._sess = sess self._scope = scope self._action_num = action_num self._state_shape = state_shape self._layer_sizes = hidden_layers_sizes self._batch_size = batch_size self._train_every = train_every self._sl_learning_rate = sl_learning_rate self._anticipatory_param = anticipatory_param self._min_buffer_size_to_learn = min_buffer_size_to_learn self._reservoir_buffer = ReservoirBuffer(reservoir_buffer_capacity) self._prev_timestep = None self._prev_action = None self.evaluate_with = evaluate_with self.d = { 0: 'A', 1: '2', 2: '3', 3: '4', 4: '5', 5: '6', 6: '7', 7: '8', 8: '9', 9: 'T', 10: 'J', 11: 'Q', 12: 'K' } self.s = {0: 's', 1: 'h', 2: 'd', 3: 'c'} self.c2n = { '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'T': 10, 'J': 11, 'Q': 12, 'K': 13, 'A': 14 } self.late_range = Range( '22+, A2s+, K2s+, Q2s+, J2s+, J8, T9, 98, 87, 76s, 65s, 54s, 98s+, K9+, Q8+, J7+, T6s+, A9+' ) # Total timesteps self.total_t = 0 # Step counter to keep track of learning. self._step_counter = 0 with tf.variable_scope(scope): # Inner RL agent self._rl_agent = DQNAgent( sess, scope + '_dqn', q_replay_memory_size, q_replay_memory_init_size, q_update_target_estimator_every, q_discount_factor, q_epsilon_start, q_epsilon_end, q_epsilon_decay_steps, q_batch_size, action_num, state_shape, q_train_every, q_mlp_layers, rl_learning_rate) with tf.variable_scope('sl'): # Build supervised model self._build_model() self.sample_episode_policy()