def run_simulations(num_sims, mean_list, variance, step_sizes, outfile_directory, softmax_beta=None, reordering_fn=None, prior_mean=0, forceActions=0): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' for i in range(num_sims): for num_steps in step_sizes: if forceActions != 0: print("Forcing actions:", forceActions) forced = make_forced_actions(len(mean_list), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) # Check if they've passed in one variance for everything or multiple variances if not hasattr(variance, '__len__'): # only one variance - turn into a list variances = [variance] * len(mean_list) else: # multiple variances - pass straight through variances = variance generate_single_bandit.generate_normal_distribution_file( mean_list, variances, num_steps, cur_reward_file) if softmax_beta != None: # reorder rewards reordered_reward_file = get_reordered_rewards_filename( outfile_directory, num_steps, i) reorder_samples_in_rewards.reorder_rewards_by_quartile( cur_reward_file, reordered_reward_file, reordering_fn, softmax_beta) else: reordered_reward_file = cur_reward_file cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [ ng_normal.NGNormal(mu=prior_mean, k=1, alpha=1, beta=1) for _ in range(len(mean_list)) ] thompson_ng_policy.calculate_thompson_single_bandit( reordered_reward_file, num_actions=len(mean_list), dest=cur_output_file, models=models, action_mode=thompson_ng_policy.ActionSelectionMode. prob_is_best, relearn=True, forced=forced)
def run_simulations_uniform_random(num_sims, mean_list, variance, steps_before_switch, steps_after_switch, outfile_directory, forceActions=0, switch_to_best_if_nonsignificant=True): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Samples uniformly at random. ''' for i in range(num_sims): if forceActions != 0: print("Forcing actions:", forceActions) forced = make_forced_actions(len(mean_list), steps_before_switch, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename( outfile_directory, steps_before_switch + steps_after_switch, i) # Check if they've passed in one variance for everything or multiple variances if not hasattr(variance, '__len__'): # only one variance - turn into a list variances = [variance] * len(mean_list) else: # multiple variances - pass straight through variances = variance generate_single_bandit.generate_normal_distribution_file( mean_list, variances, steps_before_switch + steps_after_switch, cur_reward_file) # cur_output_file = get_output_filename( outfile_directory, steps_before_switch + steps_after_switch, i) models = [ ng_normal.NGNormal(mu=0, k=1, alpha=1, beta=1) for _ in range(len(mean_list)) ] thompson_ng_policy.calculate_thompson_switch_to_fixed_policy( cur_reward_file, num_actions=len(mean_list), dest=cur_output_file, num_actions_before_switch=steps_before_switch, models=models, switch_to_best_if_nonsignificant=switch_to_best_if_nonsignificant, epsilon=1.0, action_mode=thompson_ng_policy.ActionSelectionMode.prob_is_best, forced=forced)