Exemple #1
0
def test_timestep_contextual_bernoulli_bandit(fix_random):
    always_bandit = single_interaction_step.Bandit(conversion_rate=1)
    never_bandit = single_interaction_step.Bandit(conversion_rate=0)

    periodicity = single_interaction_step.Periodicity(
        single_interaction_step.weekly_periodicity([0, 0.1, 2, 0, 0.1, 2, 0]))
    env = single_interaction_step.TimestepContextualBernoulliBandits(
        bandits=[always_bandit, never_bandit], step_contexts=[periodicity])

    observation, reward, done, info = env.step(selected_bandit=0)
    assert not observation
    assert reward == 0.0

    observation, reward, done, info = env.step(selected_bandit=0)
    assert not observation
    assert reward == 0.0

    observation, reward, done, info = env.step(selected_bandit=0)
    assert observation
    assert reward == 1.0

    for _ in range(3):
        observation, reward, done, info = env.step(selected_bandit=1)
        assert not observation
        assert reward == 0.0
Exemple #2
0
def test_heirarchical_static_bernoulli_bandits(action, expected_observation,
                                               expected_reward):
    always_bandit = single_interaction_step.Bandit(conversion_rate=1)
    never_bandit = single_interaction_step.Bandit(conversion_rate=0)

    context = {"country": {"always": 1.0, "never": 0.0}}

    env = single_interaction_step.HeirarchicalStaticBernoulliBandits(
        bandits=[always_bandit, never_bandit], context=context)

    for _ in range(5):
        observation, reward, done, info = env.step(action=action)
        assert observation == expected_observation
        assert reward == expected_reward
Exemple #3
0
def test_basic_discrete_bernoulli_bandit():
    always_bandit = single_interaction_step.Bandit(conversion_rate=1)
    never_bandit = single_interaction_step.Bandit(conversion_rate=0)

    env = single_interaction_step.BasicDiscreteBernoulliBandits(
        bandits=[always_bandit, never_bandit])

    for _ in range(5):
        observation, reward, done, info = env.step(selected_bandit=0)
        assert observation
        assert reward == 1.0
    for _ in range(5):
        observation, reward, done, info = env.step(selected_bandit=1)
        assert not observation
        assert reward == 0.0
Exemple #4
0
def test_heirarchical_static_bernoulli_bandits_context_keys():
    bandit = single_interaction_step.Bandit(conversion_rate=0)
    env = single_interaction_step.HeirarchicalStaticBernoulliBandits(
        bandits=[bandit], context={"country": {
            "always": 1,
            "never": 0
        }})
    assert env.context_keys == {"country": ["always", "never"]}
Exemple #5
0
def test_heirarchical_static_bernoulli_bandits_raises_too_few_bandits(action):
    bandit = single_interaction_step.Bandit(conversion_rate=0)
    env = single_interaction_step.HeirarchicalStaticBernoulliBandits(
        bandits=[bandit], context={"country": {
            "always": 1,
            "never": 0
        }})

    with pytest.raises(AssertionError):
        env.step(action)
Exemple #6
0
def test_bandit_multiplier(fix_random):
    low_bandit = single_interaction_step.Bandit(conversion_rate=0.1)

    assert low_bandit.action(multiplier=6)
Exemple #7
0
def test_bandit(fix_random):
    low_bandit = single_interaction_step.Bandit(conversion_rate=0.1)
    high_bandit = single_interaction_step.Bandit(conversion_rate=0.9)

    assert not low_bandit.action()
    assert high_bandit.action()