def test_timestep_contextual_bernoulli_bandit(fix_random): always_bandit = single_interaction_step.Bandit(conversion_rate=1) never_bandit = single_interaction_step.Bandit(conversion_rate=0) periodicity = single_interaction_step.Periodicity( single_interaction_step.weekly_periodicity([0, 0.1, 2, 0, 0.1, 2, 0])) env = single_interaction_step.TimestepContextualBernoulliBandits( bandits=[always_bandit, never_bandit], step_contexts=[periodicity]) observation, reward, done, info = env.step(selected_bandit=0) assert not observation assert reward == 0.0 observation, reward, done, info = env.step(selected_bandit=0) assert not observation assert reward == 0.0 observation, reward, done, info = env.step(selected_bandit=0) assert observation assert reward == 1.0 for _ in range(3): observation, reward, done, info = env.step(selected_bandit=1) assert not observation assert reward == 0.0
def test_heirarchical_static_bernoulli_bandits(action, expected_observation, expected_reward): always_bandit = single_interaction_step.Bandit(conversion_rate=1) never_bandit = single_interaction_step.Bandit(conversion_rate=0) context = {"country": {"always": 1.0, "never": 0.0}} env = single_interaction_step.HeirarchicalStaticBernoulliBandits( bandits=[always_bandit, never_bandit], context=context) for _ in range(5): observation, reward, done, info = env.step(action=action) assert observation == expected_observation assert reward == expected_reward
def test_basic_discrete_bernoulli_bandit(): always_bandit = single_interaction_step.Bandit(conversion_rate=1) never_bandit = single_interaction_step.Bandit(conversion_rate=0) env = single_interaction_step.BasicDiscreteBernoulliBandits( bandits=[always_bandit, never_bandit]) for _ in range(5): observation, reward, done, info = env.step(selected_bandit=0) assert observation assert reward == 1.0 for _ in range(5): observation, reward, done, info = env.step(selected_bandit=1) assert not observation assert reward == 0.0
def test_heirarchical_static_bernoulli_bandits_context_keys(): bandit = single_interaction_step.Bandit(conversion_rate=0) env = single_interaction_step.HeirarchicalStaticBernoulliBandits( bandits=[bandit], context={"country": { "always": 1, "never": 0 }}) assert env.context_keys == {"country": ["always", "never"]}
def test_heirarchical_static_bernoulli_bandits_raises_too_few_bandits(action): bandit = single_interaction_step.Bandit(conversion_rate=0) env = single_interaction_step.HeirarchicalStaticBernoulliBandits( bandits=[bandit], context={"country": { "always": 1, "never": 0 }}) with pytest.raises(AssertionError): env.step(action)
def test_bandit_multiplier(fix_random): low_bandit = single_interaction_step.Bandit(conversion_rate=0.1) assert low_bandit.action(multiplier=6)
def test_bandit(fix_random): low_bandit = single_interaction_step.Bandit(conversion_rate=0.1) high_bandit = single_interaction_step.Bandit(conversion_rate=0.9) assert not low_bandit.action() assert high_bandit.action()