p[arm].append(p_min) p_int.append(p_min) # check if there is at-least one arm with p>0.05, then pull, # otherwise pull arm with highest ucb p_arm = [i for i in range(num_arms) if p_int[i] > 0.05] print(arm_sort_ucb) print(p_int) if p_arm: # we only pull the arm if we know that the difference is # not statistically significant for arm in arm_sort_ucb: if p_int[arm] > 0.05: bandit.pull_arm(arm) break else: print("pulled") bandit.pull_arm(arm_sort_ucb[0]) return bandit if __name__ == '__main__': # Define bandit num_arms = 4 num_rounds = num_obs trt_dist_lis = trt_dist_list[:num_arms] ucb_bandit = Bandit(name='ucb_peek', num_arms=num_arms, trt_dist_list=trt_dist_lis) ucb_peek(bandit=ucb_bandit, num_rounds=num_rounds, num_arms=num_arms)
def __init__(self, args): Bandit.__init__(self, args) # 成功概率 self.p = self.args[0]
num_arms = 10 arm_means = np.arange(0.0, 1.0, 0.1) arm_vars = [1 for i in range(len(arm_means))] dist_list = create_distributions_custom(arm_means, arm_vars) # Switches to turn on and off for various analysis run_simulations = True run_plots = True all_grps = True seperate = False agg_type = "avg" if run_simulations: # Now we call all the algorithms that can simulate various allocation # procedures # calling ucb naive bandit algorithm ucb_vanilla_bandit = Bandit(name='ucb_van', num_arms=num_arms, trt_dist_list=dist_list) ucb_peek_bandit = Bandit(name='ucb_peek', num_arms=num_arms, trt_dist_list=dist_list) ucb_vanilla = ucb_naive(bandit=ucb_vanilla_bandit, num_rounds=N, num_arms=num_arms) ucb_p = ucb_peek(bandit=ucb_peek_bandit, num_rounds=N, num_arms=num_arms) ucb_v_group, ucb_v_outcome = ucb_vanilla.arm_tracker, \ ucb_vanilla.reward_tracker ucb_p_group, ucb_p_outcome = ucb_p.arm_tracker, \ ucb_p.reward_tracker df = {"ucb_v_group":ucb_v_group, "ucb_v_outcome":ucb_v_outcome, "ucb_p_group":ucb_p_group, "ucb_p_outcome":ucb_p_outcome} df = pd.DataFrame.from_dict(df, orient='index') df = df.transpose()
bandit.pull_arm(arm) else: pass # calculate ucb and lcb ucb = ucb_value_naive(num_arms, num_rounds, bandit.arm_pull_tracker, bandit.avg_reward_tracker) lcb = lcb_value_naive(num_arms, num_rounds, bandit.arm_pull_tracker, bandit.avg_reward_tracker) # Deactivate all arms that satisfy condition ucb(a) < lcb(a') for arm in range(num_arms): # find max_lcb of all other arms lcb_other = [lcb[i] for i in range(len(lcb)) if i != arm] # if this condition then deactivate arm if ucb[arm] < np.amax(lcb_other): active[arm] = 0 return bandit if __name__ == '__main__': # Define bandit num_arms_ep = 4 num_rounds = num_obs trt_dist_lis_ep = trt_dist_list[:num_arms_ep] elim_bandit = Bandit(name='successive_elimination', num_arms=num_arms_ep, trt_dist_list=trt_dist_lis_ep) successive_elimination(bandit=elim_bandit, num_rounds=num_rounds, num_arms=num_arms_ep)
def epsilon_greedy(epsilon, bandit, num_rounds, num_arms): """Function that reproduces the steps involved in epsilon greedy algorithm""" for round in range(num_rounds): flip = random.random() if flip < epsilon: # if random flip is less than threshold, we explore # choose an arm that does not have max_reward randomly arm_other = np.random.choice([i for i in range(num_arms) if i!= bandit.max_reward_arm]) bandit.pull_arm(arm_other) else: # if random flip is greater than threshold, we exploit # pull the arm with max reward so far bandit.pull_arm(bandit.max_reward_arm) return bandit if __name__ == '__main__': # Define bandit num_arms_ep = 4 epsilon = 0.5 num_rounds = num_obs trt_dist_lis_ep = trt_dist_list[:num_arms_ep] epsilon_bandit = Bandit(name='epsilon_greedy', num_arms=num_arms_ep, trt_dist_list=trt_dist_lis_ep) epsilon_greedy(epsilon=epsilon, bandit=epsilon_bandit, num_rounds=num_rounds, num_arms=num_arms_ep)
from bandits.MAB_VAR.ab_testing import ab_testing from bandits.MAB_VAR.peek_ab import peeking_ab_testing from bandits.MAB_VAR.vanilla_mix import vanilla_mixed_UCB from bandits.algorithms.ucb_naive import ucb_naive from bandits.bandit import Bandit from bandits.utils import create_distributions_vanilla from bandits.MAB_VAR.stats import overall_stats if __name__ == '__main__': num_arms = 10 outcome_lis_of_lis = create_distributions_vanilla(num_arms) ab_group, ab_outcome = ab_testing(outcome_lis_of_lis, post_allocation=True) peek_group, peek_outcome = peeking_ab_testing(outcome_lis_of_lis, post_allocation=True) mix_group, mix_outcome = vanilla_mixed_UCB(outcome_lis_of_lis) ucb_bandit = Bandit(name='ucb_naive', num_arms=num_arms, trt_dist_list=outcome_lis_of_lis) ucb_bandit = ucb_naive(bandit=ucb_bandit, num_rounds=len(outcome_lis_of_lis[0]), num_arms=num_arms) ucb_group, ucb_outcome = ucb_bandit.arm_tracker, ucb_bandit.reward_tracker ab, peek, ucb, mix = map(overall_stats, [ab_outcome, peek_outcome, ucb_outcome, mix_outcome]) print(ab)
def __init__(self, args): Bandit.__init__(self, args) self.mean = self.args[0] self.sigma = self.args[1]
def ucb_naive(bandit, num_rounds, num_arms): """Function that reproduces the steps involved in ucb_naive algorithm""" # choose each action once: for arm in range(num_arms): bandit.pull_arm(arm) ucb_rounds = num_rounds - num_arms for round in range(ucb_rounds): # find UCB for all arms ucb_round = ucb_value_naive(num_arms, ucb_rounds, bandit.arm_pull_tracker, bandit.avg_reward_tracker) # find arm with max UCB arm_max_ucb = np.argmax(ucb_round) # Pull the arm with max ucb bandit.pull_arm(arm_max_ucb) return bandit if __name__ == '__main__': # Define bandit num_arms = 4 num_rounds = num_obs trt_dist_lis = trt_dist_list[:num_arms] ucb_bandit = Bandit(name='ucb_naive', num_arms=num_arms, trt_dist_list=trt_dist_lis) ucb_naive(bandit=ucb_bandit, num_rounds=num_rounds, num_arms=num_arms)
def compare_bandits(bandit_list, num_rounds): df = pd.DataFrame([i for i in range(1, num_rounds + 1)], columns=['x']) for b in bandit_list: df[b.name + "_reward"] = b.reward_tracker df[b.name + "_arm"] = b.arm_tracker #p = (ggplot(df)+ geom_line(aes('x', ))) #p.save('test.png') return df if __name__ == '__main__': num_arms = 2 num_rounds = 10000 trt_dist_lis = trt_dist_list[:num_arms] always_explore_bandit = always_explore( Bandit("always_explore", num_arms, trt_dist_lis), num_rounds, num_arms) explore_percentage = 10 explore_first_bandit = explore_first( Bandit("explore_first", num_arms, trt_dist_lis), num_rounds, explore_percentage, num_arms) epsilon = 0.3 epsilon_greedy_bandit = epsilon_greedy( epsilon, Bandit("epsilon_greedy", num_arms, trt_dist_lis), num_rounds, num_arms) print(epsilon_greedy_bandit.name) successive_elimination_bandit = successive_elimination( Bandit("successive_elimination", num_arms, trt_dist_lis), num_rounds, num_arms) ucb_naive_bandit = ucb_naive(Bandit("ucb_naive", num_arms, trt_dist_lis), num_rounds, num_arms) bandit_list = [always_explore_bandit, ucb_naive_bandit]
from bandits.bandit import Bandit from bandits.distributions import trt_dist_list, num_obs import numpy as np import random def always_explore(bandit, num_rounds, num_arms): """Function that reproduces the steps involved in explore_first algorithm""" num_explore_each_arm = int(num_rounds / num_arms) for round in range(num_explore_each_arm): for arm in range(num_arms): bandit.pull_arm(arm) return bandit if __name__ == '__main__': # Define bandit num_arms_ex = 4 num_rounds = num_obs trt_dist_lis_ex = trt_dist_list[:num_arms_ex] always_explore_bandit = Bandit(name='always_explore', num_arms=num_arms_ex, trt_dist_list=trt_dist_lis_ex) always_explore(bandit=always_explore_bandit, num_rounds=num_rounds, num_arms=num_arms_ex)
"""Function that reproduces the steps involved in explore_first algorithm""" num_explore = int((num_rounds * explore_percentage) / 100) num_explore_each_arm = int(num_explore / num_arms) num_exploit = num_rounds - num_explore # Explore all arms first: for round in range(num_explore_each_arm): for arm in range(num_arms): bandit.pull_arm(arm) # Exploit max reward arm max_arm = bandit.max_reward_arm for round in range(num_exploit): bandit.pull_arm(max_arm) return bandit if __name__ == '__main__': # Define bandit num_arms_ex = 4 explore_percentage = 10 num_rounds = num_obs trt_dist_lis_ex = trt_dist_list[:num_arms_ex] explore_bandit = Bandit(name='explore_first', num_arms=num_arms_ex, trt_dist_list=trt_dist_lis_ex) explore_first(bandit=explore_bandit, num_rounds=num_rounds, explore_percentage=explore_percentage, num_arms=num_arms_ex)