def given_two_treatments_test():
    treatments = ['A', 'B']
    bandit = simple_bandit.SimpleBandit(treatments)
    treatments_chosen = []
    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
    assert treatments_chosen.count(
        'A') == 5, 'Should explore treatment A for the first 5 tries'

    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        bandit.log_payout(chosen_treatment, 5.00)
    assert treatments_chosen.count(
        'B') == 5, 'Should explore treatment B for the next 5 tries'

    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
    assert treatments_chosen.count(
    ) == 10, 'Should explore treatment B for the next 5 tries after exploring'

    chosen_treatment = bandit.choose_treatment()
    assert chosen_treatment == 'A', 'Should return to exploring starting with A'
def given_two_treatments_test():
    treatments = ['A', 'B']
    bandit = simple_bandit.SimpleBandit(treatments)
    treatments_chosen = []

    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        bandit.log_payout(chosen_treatment, 0.00)
    assert_equal(treatments_chosen.count('A'), 5, 'Should explore treatment A '
                 'for the first 5 tries')

    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        bandit.log_payout(chosen_treatment, 5.00)
    assert_equal(treatments_chosen.count('B'), 5, 'Should explore treatment B '
                 'for the second 5 tries')
    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        treatments_chosen.count('B'), 10, 'Should explore treatment B '
        'for the next 5 tries after'
        ' exploring - "exploiting"')
    # make sure we return to exploring
    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        bandit.log_payout(chosen_treatment, 0.00)
    assert_equal(treatments_chosen.count('A'), 10,
                 'Should explore treatment A '
                 'for the next 5 tries')

    # make sure the pattern is being followed, i.e., A then B
    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        bandit.log_payout(chosen_treatment, 5.00)
    assert_equal(treatments_chosen.count('B'), 15,
                 'Should explore treatment B '
                 'for the next 5 tries')

    # ensure the last step in  the pattern is followed
    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        treatments_chosen.count('B'), 20, 'Should explore treatment B '
        'for the next 5 tries after'
        ' exploring - "exploiting"')

    # make sure we return to exploring
    for i in range(5):
        chosen_treatment = bandit.choose_treatment()
        bandit.log_payout(chosen_treatment, 0.00)
    assert_equal(treatments_chosen.count('A'), 15,
                 'Should explore treatment A '
                 'for the next 5 tries')
def run_comparison_test():
    simple_bandit_results = np.array([
        run_bandit_sim(simple_bandit.SimpleBandit(['A', 'B']))
        for i in range(300)
    rpm_bandit_results = np.array(
        [run_bandit_sim(rpm_bandit.RPMBandit(['A', 'B'])) for i in range(300)])
    rpm_better_count = sum(
        map(lambda x: x[0] > x[1],
            zip(rpm_bandit_results, simple_bandit_results)))
    assert rpm_better_count / 300. > .8, 'The RPM bandit should be better at least 80% of the time.'
def given_two_treatments_and_no_payoffs_test():
    treatments = ['A', 'B']
    bandit = simple_bandit.SimpleBandit(treatments)
    chosen_treatment = bandit.choose_treatment()
    assert chosen_treatment == treatments[
        0], 'Should choose the first treatment to start'
def given_a_single_treatment_test():
    bandit = simple_bandit.SimpleBandit(['A'])
    chosen_treatment = bandit.choose_treatment()
    assert chosen_treatment == 'A', 'Should choose the only available option.'