Esempio n. 1
0
def test_logistic_epsilon_abnormal_epsilon():

    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.3)

    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=-0.3)
Esempio n. 2
0
def test_logistic_epsilon_normal_epsilon():

    policy1 = LogisticEpsilonGreedy(n_actions=2, dim=2)
    assert 0 <= policy1.epsilon <= 1

    policy2 = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=0.5)
    assert policy2.epsilon == 0.5
Esempio n. 3
0
def test_logistic_epsilon_select_action_exploration():
    trial_num = 50
    policy = LogisticEpsilonGreedy(n_actions=2, dim=2, epsilon=1.0)
    context = np.array([1.0, 1.0]).reshape(1, -1)
    policy.update_params(action=0, reward=1.0, context=context)
    policy.update_params(action=0, reward=1.0, context=context)
    policy.update_params(action=1, reward=1.0, context=context)
    policy.update_params(action=1, reward=0.0, context=context)
    selected_action = [policy.select_action(context=context) for _ in range(trial_num)]
    assert 0 < sum(selected_action)[0] < trial_num
Esempio n. 4
0
def test_logistic_base_exception():
    # invalid dim
    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=-3)

    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=0)

    with pytest.raises(TypeError):
        LogisticEpsilonGreedy(n_actions=2, dim="3")

    # invalid n_actions
    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=-3, dim=2)

    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=1, dim=2)

    with pytest.raises(TypeError):
        LogisticEpsilonGreedy(n_actions="2", dim=2)

    # invalid len_list
    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=-3)

    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, len_list=0)

    with pytest.raises(TypeError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, len_list="3")

    # invalid batch_size
    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=-2)

    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size=0)

    with pytest.raises(TypeError):
        LogisticEpsilonGreedy(n_actions=2, dim=2, batch_size="10")

    # invalid relationship between n_actions and len_list
    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=5, len_list=10, dim=2)

    with pytest.raises(ValueError):
        LogisticEpsilonGreedy(n_actions=2, len_list=3, dim=2)
Esempio n. 5
0
def test_logistic_epsilon_each_action_model():
    n_actions = 3
    policy = LogisticEpsilonGreedy(n_actions=n_actions, dim=2, epsilon=0.5)
    for i in range(n_actions):
        assert isinstance(policy.model_list[i], MiniBatchLogisticRegression)