def run_simulations_uniform_random(num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions = 0): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' for i in range(num_sims): for num_steps in step_sizes: if forceActions != 0: print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) # cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm))] thompson_policy.calculate_thompson_single_bandit(cur_reward_file, num_actions=len(prob_per_arm), dest= cur_output_file, models=models, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, epsilon = 1.0, relearn=True, forced = forced)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \ reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' for i in range(num_sims): # num_steps_prev = 0 for num_steps in step_sizes: if forceActions != 0: # print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) if softmax_beta != None: # reorder rewards reordered_reward_file = get_reordered_rewards_filename(outfile_directory, num_steps, i) reorder_samples_in_rewards.reorder_rewards_by_quartile(cur_reward_file, reordered_reward_file, reordering_fn, softmax_beta) else: reordered_reward_file = cur_reward_file cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))] '''thompson_policy.calculate_thompson_single_bandit(reordered_reward_file, num_actions=len(prob_per_arm), dest= cur_output_file, models=models, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, relearn=True, forced = forced, batch_size = batch_size, burn_in_size = burn_in_size) ''' # num_steps_prev = num_steps thompson_policy.old_two_phase_random_thompson_policy(reordered_reward_file, num_actions=len(prob_per_arm), dest= cur_output_file, random_dur=0, models=models, random_start=0, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, relearn=True, forced = forced, batch_size = batch_size, burn_in_size = burn_in_size)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \ reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1, c = 0.1, resample = True): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' for i in range(num_sims): # num_steps_prev = 0 for num_steps in step_sizes: if forceActions != 0: # print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions( len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) if softmax_beta != None: # reorder rewards reordered_reward_file = get_reordered_rewards_filename( outfile_directory, num_steps, i) reorder_samples_in_rewards.reorder_rewards_by_quartile( cur_reward_file, reordered_reward_file, reordering_fn, softmax_beta) else: reordered_reward_file = cur_reward_file cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [ beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm)) ] #if don't pass model, then will be Greedy #thresh = 0.03 # thresh = 0.1 # for small effect, es = 0.1, 0.55 - 0.45 = 0.10 ppd.calculate_epsilon_single_bandit(reordered_reward_file, models=models, num_actions=len(prob_per_arm), dest=cur_output_file, forced=forced, c=c, resample=resample)
def run_simulations_uniform_random_binary( num_sims, prob_per_arm, steps_before_switch, steps_after_switch, outfile_directory, forceActions=0, switch_to_best_if_nonsignificant=True): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Samples uniformly at random. ''' num_steps = steps_before_switch + steps_after_switch for i in range(num_sims): if forceActions != 0: print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions( len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) # cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [ beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm)) ] thompson_policy.calculate_thompson_switch_to_fixed_policy( cur_reward_file, num_actions=len(prob_per_arm), dest=cur_output_file, num_actions_before_switch=steps_before_switch, models=models, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, epsilon=1.0, switch_to_best_if_nonsignificant=switch_to_best_if_nonsignificant, forced=forced)