Exemple #1
0
            p[arm].append(p_min)
            p_int.append(p_min)

        # check if there is at-least one arm with p>0.05, then pull,
        # otherwise pull arm with highest ucb
        p_arm = [i for i in range(num_arms) if p_int[i] > 0.05]
        print(arm_sort_ucb)
        print(p_int)
        if p_arm:
            # we only pull the arm if we know that the difference is
            # not statistically significant
            for arm in arm_sort_ucb:
                if p_int[arm] > 0.05:
                    bandit.pull_arm(arm)
                    break
        else:
            print("pulled")
            bandit.pull_arm(arm_sort_ucb[0])

    return bandit


if __name__ == '__main__':
    # Define bandit
    num_arms = 4
    num_rounds = num_obs
    trt_dist_lis = trt_dist_list[:num_arms]
    ucb_bandit = Bandit(name='ucb_peek',
                        num_arms=num_arms,
                        trt_dist_list=trt_dist_lis)
    ucb_peek(bandit=ucb_bandit, num_rounds=num_rounds, num_arms=num_arms)
Exemple #2
0
 def __init__(self, args):
     Bandit.__init__(self, args)
     # 成功概率
     self.p = self.args[0]
 num_arms = 10
 arm_means = np.arange(0.0, 1.0, 0.1)
 arm_vars = [1 for i in range(len(arm_means))]
 dist_list = create_distributions_custom(arm_means, arm_vars)
 # Switches to turn on and off for various analysis
 run_simulations = True
 run_plots = True
 all_grps = True
 seperate = False
 agg_type = "avg"
 if run_simulations:
     # Now we call all the algorithms that can simulate various allocation
     # procedures
     # calling ucb naive bandit algorithm
     ucb_vanilla_bandit = Bandit(name='ucb_van',
                         num_arms=num_arms,
                         trt_dist_list=dist_list)
     ucb_peek_bandit = Bandit(name='ucb_peek',
                         num_arms=num_arms,
                         trt_dist_list=dist_list)
     ucb_vanilla = ucb_naive(bandit=ucb_vanilla_bandit, num_rounds=N,
                             num_arms=num_arms)
     ucb_p = ucb_peek(bandit=ucb_peek_bandit, num_rounds=N, num_arms=num_arms)
     ucb_v_group, ucb_v_outcome = ucb_vanilla.arm_tracker, \
                               ucb_vanilla.reward_tracker
     ucb_p_group, ucb_p_outcome = ucb_p.arm_tracker, \
                                  ucb_p.reward_tracker
     df = {"ucb_v_group":ucb_v_group, "ucb_v_outcome":ucb_v_outcome,
           "ucb_p_group":ucb_p_group, "ucb_p_outcome":ucb_p_outcome}
     df = pd.DataFrame.from_dict(df, orient='index')
     df = df.transpose()
                bandit.pull_arm(arm)
            else:
                pass
        # calculate ucb and lcb
        ucb = ucb_value_naive(num_arms, num_rounds, bandit.arm_pull_tracker,
                              bandit.avg_reward_tracker)
        lcb = lcb_value_naive(num_arms, num_rounds, bandit.arm_pull_tracker,
                              bandit.avg_reward_tracker)

        # Deactivate all arms that satisfy condition ucb(a) < lcb(a')
        for arm in range(num_arms):
            # find max_lcb of all other arms
            lcb_other = [lcb[i] for i in range(len(lcb)) if i != arm]
            # if this condition then deactivate arm
            if ucb[arm] < np.amax(lcb_other):
                active[arm] = 0

    return bandit


if __name__ == '__main__':
    # Define bandit
    num_arms_ep = 4
    num_rounds = num_obs
    trt_dist_lis_ep = trt_dist_list[:num_arms_ep]
    elim_bandit = Bandit(name='successive_elimination',
                         num_arms=num_arms_ep,
                         trt_dist_list=trt_dist_lis_ep)
    successive_elimination(bandit=elim_bandit,
                           num_rounds=num_rounds,
                           num_arms=num_arms_ep)
Exemple #5
0
def epsilon_greedy(epsilon, bandit, num_rounds, num_arms):
    """Function that reproduces the steps involved in epsilon greedy
    algorithm"""
    
    for round in range(num_rounds):
        flip = random.random()
        if flip < epsilon:
            # if random flip is less than threshold, we explore
            # choose an arm that does not have max_reward randomly
            arm_other = np.random.choice([i for i in range(num_arms) if i!=
                                          bandit.max_reward_arm])
            bandit.pull_arm(arm_other)
        else:
            # if random flip is greater than threshold, we exploit
            # pull the arm with max reward so far
            bandit.pull_arm(bandit.max_reward_arm)
    return bandit


if __name__ == '__main__':
    # Define bandit
    num_arms_ep = 4
    epsilon = 0.5
    num_rounds = num_obs
    trt_dist_lis_ep = trt_dist_list[:num_arms_ep]
    epsilon_bandit = Bandit(name='epsilon_greedy',
                            num_arms=num_arms_ep,
                            trt_dist_list=trt_dist_lis_ep)
    epsilon_greedy(epsilon=epsilon, bandit=epsilon_bandit,
                  num_rounds=num_rounds, num_arms=num_arms_ep)
from bandits.MAB_VAR.ab_testing import ab_testing
from bandits.MAB_VAR.peek_ab import peeking_ab_testing
from bandits.MAB_VAR.vanilla_mix import vanilla_mixed_UCB
from bandits.algorithms.ucb_naive import ucb_naive
from bandits.bandit import Bandit
from bandits.utils import create_distributions_vanilla
from bandits.MAB_VAR.stats import overall_stats


if __name__ == '__main__':
    
    num_arms = 10
    outcome_lis_of_lis = create_distributions_vanilla(num_arms)
    ab_group, ab_outcome = ab_testing(outcome_lis_of_lis, post_allocation=True)
    peek_group, peek_outcome = peeking_ab_testing(outcome_lis_of_lis,
                                                  post_allocation=True)
    mix_group, mix_outcome = vanilla_mixed_UCB(outcome_lis_of_lis)
    ucb_bandit = Bandit(name='ucb_naive', num_arms=num_arms,
                        trt_dist_list=outcome_lis_of_lis)
    ucb_bandit = ucb_naive(bandit=ucb_bandit,
                           num_rounds=len(outcome_lis_of_lis[0]),
                           num_arms=num_arms)
    ucb_group, ucb_outcome = ucb_bandit.arm_tracker, ucb_bandit.reward_tracker
    ab, peek, ucb, mix = map(overall_stats, [ab_outcome, peek_outcome,
                                             ucb_outcome, mix_outcome])
    print(ab)

Exemple #7
0
 def __init__(self, args):
     Bandit.__init__(self, args)
     self.mean = self.args[0]
     self.sigma = self.args[1]
Exemple #8
0
def ucb_naive(bandit, num_rounds, num_arms):
    """Function that reproduces the steps involved in ucb_naive
    algorithm"""

    # choose each action once:
    for arm in range(num_arms):
        bandit.pull_arm(arm)

    ucb_rounds = num_rounds - num_arms
    for round in range(ucb_rounds):
        # find UCB for all arms
        ucb_round = ucb_value_naive(num_arms, ucb_rounds,
                                    bandit.arm_pull_tracker,
                                    bandit.avg_reward_tracker)
        # find arm with max UCB
        arm_max_ucb = np.argmax(ucb_round)
        # Pull the arm with max ucb
        bandit.pull_arm(arm_max_ucb)
    return bandit


if __name__ == '__main__':
    # Define bandit
    num_arms = 4
    num_rounds = num_obs
    trt_dist_lis = trt_dist_list[:num_arms]
    ucb_bandit = Bandit(name='ucb_naive',
                        num_arms=num_arms,
                        trt_dist_list=trt_dist_lis)
    ucb_naive(bandit=ucb_bandit, num_rounds=num_rounds, num_arms=num_arms)
Exemple #9
0
def compare_bandits(bandit_list, num_rounds):
    df = pd.DataFrame([i for i in range(1, num_rounds + 1)], columns=['x'])
    for b in bandit_list:
        df[b.name + "_reward"] = b.reward_tracker
        df[b.name + "_arm"] = b.arm_tracker
    #p = (ggplot(df)+ geom_line(aes('x', )))
    #p.save('test.png')
    return df


if __name__ == '__main__':
    num_arms = 2
    num_rounds = 10000
    trt_dist_lis = trt_dist_list[:num_arms]
    always_explore_bandit = always_explore(
        Bandit("always_explore", num_arms, trt_dist_lis), num_rounds, num_arms)
    explore_percentage = 10
    explore_first_bandit = explore_first(
        Bandit("explore_first", num_arms, trt_dist_lis), num_rounds,
        explore_percentage, num_arms)
    epsilon = 0.3
    epsilon_greedy_bandit = epsilon_greedy(
        epsilon, Bandit("epsilon_greedy", num_arms, trt_dist_lis), num_rounds,
        num_arms)
    print(epsilon_greedy_bandit.name)
    successive_elimination_bandit = successive_elimination(
        Bandit("successive_elimination", num_arms, trt_dist_lis), num_rounds,
        num_arms)
    ucb_naive_bandit = ucb_naive(Bandit("ucb_naive", num_arms, trt_dist_lis),
                                 num_rounds, num_arms)
    bandit_list = [always_explore_bandit, ucb_naive_bandit]
Exemple #10
0
from bandits.bandit import Bandit
from bandits.distributions import trt_dist_list, num_obs
import numpy as np
import random


def always_explore(bandit, num_rounds, num_arms):
    """Function that reproduces the steps involved in explore_first
    algorithm"""
    num_explore_each_arm = int(num_rounds / num_arms)
    for round in range(num_explore_each_arm):
        for arm in range(num_arms):
            bandit.pull_arm(arm)
    return bandit


if __name__ == '__main__':
    # Define bandit
    num_arms_ex = 4
    num_rounds = num_obs
    trt_dist_lis_ex = trt_dist_list[:num_arms_ex]
    always_explore_bandit = Bandit(name='always_explore',
                                   num_arms=num_arms_ex,
                                   trt_dist_list=trt_dist_lis_ex)
    always_explore(bandit=always_explore_bandit,
                   num_rounds=num_rounds,
                   num_arms=num_arms_ex)
    """Function that reproduces the steps involved in explore_first
    algorithm"""
    num_explore = int((num_rounds * explore_percentage) / 100)
    num_explore_each_arm = int(num_explore / num_arms)
    num_exploit = num_rounds - num_explore

    # Explore all arms first:
    for round in range(num_explore_each_arm):
        for arm in range(num_arms):
            bandit.pull_arm(arm)

    # Exploit max reward arm
    max_arm = bandit.max_reward_arm
    for round in range(num_exploit):
        bandit.pull_arm(max_arm)
    return bandit


if __name__ == '__main__':
    # Define bandit
    num_arms_ex = 4
    explore_percentage = 10
    num_rounds = num_obs
    trt_dist_lis_ex = trt_dist_list[:num_arms_ex]
    explore_bandit = Bandit(name='explore_first',
                            num_arms=num_arms_ex,
                            trt_dist_list=trt_dist_lis_ex)
    explore_first(bandit=explore_bandit,
                  num_rounds=num_rounds,
                  explore_percentage=explore_percentage,
                  num_arms=num_arms_ex)