def run_simulations_uniform_random(num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions = 0): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' for i in range(num_sims): for num_steps in step_sizes: if forceActions != 0: print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) # cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm))] thompson_policy.calculate_thompson_single_bandit(cur_reward_file, num_actions=len(prob_per_arm), dest= cur_output_file, models=models, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, epsilon = 1.0, relearn=True, forced = forced)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \ reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' for i in range(num_sims): # num_steps_prev = 0 for num_steps in step_sizes: if forceActions != 0: # print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) if softmax_beta != None: # reorder rewards reordered_reward_file = get_reordered_rewards_filename(outfile_directory, num_steps, i) reorder_samples_in_rewards.reorder_rewards_by_quartile(cur_reward_file, reordered_reward_file, reordering_fn, softmax_beta) else: reordered_reward_file = cur_reward_file cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))] '''thompson_policy.calculate_thompson_single_bandit(reordered_reward_file, num_actions=len(prob_per_arm), dest= cur_output_file, models=models, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, relearn=True, forced = forced, batch_size = batch_size, burn_in_size = burn_in_size) ''' # num_steps_prev = num_steps thompson_policy.old_two_phase_random_thompson_policy(reordered_reward_file, num_actions=len(prob_per_arm), dest= cur_output_file, random_dur=0, models=models, random_start=0, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, relearn=True, forced = forced, batch_size = batch_size, burn_in_size = burn_in_size)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1, random_dur=0, random_start=0, mode='', epsilon = 0.1, resample = True): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' csv_output_file_names = [] sim_results_dfs_list = [] for num_steps in step_sizes: sim_results = [] for i in range(num_sims): if forceActions != 0: forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() if softmax_beta != None: # reorder rewards raise ValueError("softmax_beta is not supported in fast mode.") if mode=='uniform': models = [beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm))] random_dur = num_steps else: models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))] sim_result, column_names,_ = \ thompson_policy.two_phase_random_thompson_policy( prob_per_arm=prob_per_arm, users_count=num_steps, random_dur=random_dur,#100, models=models, random_start=random_start, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, relearn=True, forced = forced, batch_size = batch_size, epsilon=epsilon, decreasing_epsilon=1) sim_results.extend(sim_result) sim_results_df = pd.DataFrame(sim_results, columns=column_names) sim_results_df.index = [idx for idx in range(num_steps)]*num_sims sim_results_dfs_list.append(sim_results_df) cur_output_file = get_output_filename(outfile_directory, num_steps, None, mode) csv_output_file_names.append(cur_output_file) return sim_results_dfs_list, csv_output_file_names
def run_simulations_empirical_rewards(num_sims, reward_file, experiment_id, reward_header, is_cost, outfile_directory, successPrior=1, failurePrior=1, forceActions=0, shuffle_data=False): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' num_actions = 2 max_steps = -1 means = [] variance = [] for i in range(num_sims): arm_1_rewards, arm_2_rewards = get_assistments_rewards.read_assistments_rewards( reward_file, reward_header, experiment_id, is_cost) if shuffle_data: random.shuffle(arm_1_rewards) random.shuffle(arm_2_rewards) max_steps = len(arm_1_rewards) + len(arm_2_rewards) means = [np.mean(arm_1_rewards), np.mean(arm_2_rewards)] variance = [np.var(arm_1_rewards), np.var(arm_2_rewards)] if forceActions != 0: print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions( num_actions, len(arm_1_rewards) + len(arm_2_rewards), forceActions) else: forced = forced_actions() cur_output_file = get_output_filename( outfile_directory, len(arm_1_rewards) + len(arm_2_rewards), i) models = [ beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(num_actions) ] thompson_policy.calculate_thompson_single_bandit_empirical_params( arm_1_rewards, arm_2_rewards, num_actions=num_actions, dest=cur_output_file, models=models, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, relearn=True, forced=forced) return max_steps, means, variance
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \ reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1, c = 0.1, resample = True): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' for i in range(num_sims): # num_steps_prev = 0 for num_steps in step_sizes: if forceActions != 0: # print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions( len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) if softmax_beta != None: # reorder rewards reordered_reward_file = get_reordered_rewards_filename( outfile_directory, num_steps, i) reorder_samples_in_rewards.reorder_rewards_by_quartile( cur_reward_file, reordered_reward_file, reordering_fn, softmax_beta) else: reordered_reward_file = cur_reward_file cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [ beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm)) ] #if don't pass model, then will be Greedy #thresh = 0.03 # thresh = 0.1 # for small effect, es = 0.1, 0.55 - 0.45 = 0.10 ppd.calculate_epsilon_single_bandit(reordered_reward_file, models=models, num_actions=len(prob_per_arm), dest=cur_output_file, forced=forced, c=c, resample=resample)
def run_simulations_uniform_random_binary( num_sims, prob_per_arm, steps_before_switch, steps_after_switch, outfile_directory, forceActions=0, switch_to_best_if_nonsignificant=True): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Samples uniformly at random. ''' num_steps = steps_before_switch + steps_after_switch for i in range(num_sims): if forceActions != 0: print("Forcing actions:", forceActions) forced = run_effect_size_simulations.make_forced_actions( len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i) generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps, cur_reward_file) # cur_output_file = get_output_filename(outfile_directory, num_steps, i) models = [ beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm)) ] thompson_policy.calculate_thompson_switch_to_fixed_policy( cur_reward_file, num_actions=len(prob_per_arm), dest=cur_output_file, num_actions_before_switch=steps_before_switch, models=models, action_mode=thompson_policy.ActionSelectionMode.prob_is_best, epsilon=1.0, switch_to_best_if_nonsignificant=switch_to_best_if_nonsignificant, forced=forced)
def test_all_in_directory(num_sims, step_sizes, outfile_directory, outfile_prefix, prior, num_actions=2, config={}): assert num_actions == 2 if config.get(read_config.NUM_STEP_SIZES_HEADER, len(step_sizes)) < len(step_sizes): step_sizes = step_sizes[:config.get(read_config.NUM_STEP_SIZES_HEADER)] if debug or True: print('Config:', config) rows = [] all_stats_file = config.get(read_config.PRINT_ALL_STATS_FILE_HEADER) none_context = contextmanager(lambda: iter([None]))() with (open(all_stats_file) if all_stats_file is not None else none_context) as stats_outfile: if stats_outfile is not None: csvwriter = csv.writer() else: csvwriter = None for i in range(num_sims): for num_steps in step_sizes: forced = run_effect_size_simulations.make_forced_actions( num_actions, num_steps, config[read_config.FORCE_ACTIONS_HEADER]) if config[read_config.BINARY_REWARDS_HEADER]: actions_infile = run_effect_size_simulations_beta.get_output_filename( outfile_directory, num_steps, i) else: actions_infile = run_effect_size_simulations.get_output_filename( outfile_directory, num_steps, i) if debug: print("processing file:", actions_infile) outfile_row = make_outfile_row(actions_infile, config, num_steps, prior, csv_writer_all_stats=csvwriter, forced_actions=forced) if debug or True: print("processing completed:", actions_infile) outfile_row[SIM_NUMBER_HEADER] = i rows.append(outfile_row) dataframe_headers = [ NUM_STEPS_HEADER, SIM_NUMBER_HEADER, PRIOR_MEAN_HEADER, PVALUE_HEADER, ACTUAL_STAT_HEADER ] for action in range(num_actions): dataframe_headers.append( NUM_SAMPLES_BY_ACTION_HEADER.format(action + 1)) df = pd.DataFrame.from_records(rows, columns=dataframe_headers) df[IS_BINARY_HEADER] = config[read_config.BINARY_REWARDS_HEADER] df[NUM_PERMUTATIONS_OUT_HEADER] = config[NUM_PERMUTATIONS_HEADER] if debug: print("writing to", outfile_prefix + PERMUTATION_TEST_OUT_FILE_SUFFIX) df.to_pickle(outfile_prefix + PERMUTATION_TEST_OUT_FILE_SUFFIX) if WRITE_CSV: df.to_csv(outfile_prefix + PERMUTATION_TEST_OUT_FILE_CSV_SUFFIX)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1, random_dur=0, random_start=0, mode='', c = 0.1, resample = True, ns_stop = 0): ''' Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). Bandit uses the thompson_ng sampling policy. ''' csv_output_file_names = [] sim_results_dfs_list = [] for num_steps in step_sizes: sim_results = [] for i in range(num_sims): if forceActions != 0: forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions) else: forced = forced_actions() if softmax_beta != None: # reorder rewards raise ValueError("softmax_beta is not supported in fast mode.") if mode=='uniform': models = [beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm))] random_dur = num_steps else: models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))] sim_result, column_names,_ = \ thompson_policy.ppd_two_phase_random_thompson_policy( prob_per_arm=prob_per_arm, users_count=num_steps, random_dur=random_dur,#100, models=models, random_start=random_start, action_mode='Greedy', relearn=True, forced = forced, batch_size = batch_size, c=c, resample = resample, ns_stop = ns_stop) # do ipw here? This is the equivalent of old acitons file(actions_df) # sim_result_df = pd.DataFrame(sim_result, columns=column_names) #Not used yet # calculate_ipw_by_step_size(actions_root = sim_result_df, num_samples=1000, num_actions = 2, cached_probs = {}, \ # prior = prior, binary_rewards = is_binary, config = config, n = n, num_sims = num_sims, batch_size = bs) # print("sim_result_df", sim_result_df) # print("shape", sim_result_df.shape) # print("shape cols", sim_result_df.columns) # print(sim_result.columns()) sim_results.extend(sim_result) sim_results_df = pd.DataFrame(sim_results, columns=column_names) sim_results_df.index = [idx for idx in range(num_steps)]*num_sims sim_results_dfs_list.append(sim_results_df) cur_output_file = get_output_filename(outfile_directory, num_steps, None, mode) csv_output_file_names.append(cur_output_file) return sim_results_dfs_list, csv_output_file_names