def test_logistic_epsilon_abnormal_epsilon(): with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.3) with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=-0.3)
def test_logistic_epsilon_normal_epsilon(): policy1 = LogisticEpsilonGreedy(n_actions=2, dim=2) assert 0 <= policy1.epsilon <= 1 policy2 = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=0.5) assert policy2.epsilon == 0.5
def test_logistic_epsilon_select_action_exploration(): trial_num = 50 policy = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.0) context = np.array([1.0, 1.0]).reshape(1, -1) policy.update_params(action=0, reward=1.0, context=context) policy.update_params(action=0, reward=1.0, context=context) policy.update_params(action=1, reward=1.0, context=context) policy.update_params(action=1, reward=0.0, context=context) selected_action = [policy.select_action(context=context) for _ in range(trial_num)] assert 0 < sum(selected_action)[0] < trial_num
def test_logistic_base_exception(): # invalid dim with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=-3) with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=0) with pytest.raises(TypeError): LogisticEpsilonGreedy(n_actions=2, dim="3") # invalid n_actions with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=-3, dim=2) with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=1, dim=2) with pytest.raises(TypeError): LogisticEpsilonGreedy(n_actions="2", dim=2) # invalid len_list with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=-3) with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=0) with pytest.raises(TypeError): LogisticEpsilonGreedy(n_actions=2, dim=2, len_list="3") # invalid batch_size with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=-2) with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=0) with pytest.raises(TypeError): LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size="10") # invalid relationship between n_actions and len_list with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=5, len_list=10, dim=2) with pytest.raises(ValueError): LogisticEpsilonGreedy(n_actions=2, len_list=3, dim=2)
def test_logistic_epsilon_each_action_model(): n_actions = 3 policy = LogisticEpsilonGreedy(n_actions=n_actions, dim=2, epsilon=0.5) for i in range(n_actions): assert isinstance(policy.model_list[i], MiniBatchLogisticRegression)