def run_simulations_uniform_random(num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions = 0):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''

    for i in range(num_sims):
        for num_steps in step_sizes:
            if forceActions != 0:
                print("Forcing actions:", forceActions)
                forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions)
            else:
                forced = forced_actions()
            cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i)
            generate_single_bandit.generate_file(np.array(prob_per_arm),
                                                 num_steps,        
                                                 cur_reward_file)
#        
            cur_output_file = get_output_filename(outfile_directory, num_steps, i)
            models = [beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm))]
            thompson_policy.calculate_thompson_single_bandit(cur_reward_file, 
                                         num_actions=len(prob_per_arm), 
                                         dest= cur_output_file, 
                                         models=models, 
                                         action_mode=thompson_policy.ActionSelectionMode.prob_is_best,
                                         epsilon = 1.0, 
                                         relearn=True,
                                         forced = forced)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \
    reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''

    for i in range(num_sims):
      #  num_steps_prev = 0
        for num_steps in step_sizes:
            if forceActions != 0:
#                 print("Forcing actions:", forceActions)
                forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions)
            else:
                forced = forced_actions()
            cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i)
            generate_single_bandit.generate_file(np.array(prob_per_arm),
                                                 num_steps,        
                                                 cur_reward_file)
            if softmax_beta != None:
                # reorder rewards
                reordered_reward_file = get_reordered_rewards_filename(outfile_directory, num_steps, i)
                reorder_samples_in_rewards.reorder_rewards_by_quartile(cur_reward_file, 
                                                                       reordered_reward_file, 
                                                                       reordering_fn, 
                                                                       softmax_beta)
            else:
                reordered_reward_file = cur_reward_file
            cur_output_file = get_output_filename(outfile_directory, num_steps, i)
            models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))]


            '''thompson_policy.calculate_thompson_single_bandit(reordered_reward_file, 
                                         num_actions=len(prob_per_arm), 
                                         dest= cur_output_file, 
                                         models=models, 
                                         action_mode=thompson_policy.ActionSelectionMode.prob_is_best, 
                                         relearn=True,
                                         forced = forced,
                                         batch_size = batch_size, 
                                         burn_in_size = burn_in_size)
            '''
            # num_steps_prev = num_steps
            thompson_policy.old_two_phase_random_thompson_policy(reordered_reward_file, 
                                         num_actions=len(prob_per_arm), 
                                         dest= cur_output_file, 
                                         random_dur=0,
                                         models=models,
                                         random_start=0,
                                         action_mode=thompson_policy.ActionSelectionMode.prob_is_best, 
                                         relearn=True,
                                         forced = forced,
                                         batch_size = batch_size, 
                                         burn_in_size = burn_in_size)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory,
    successPrior = 1, failurePrior = 1, softmax_beta = None,
    reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1,
    random_dur=0, random_start=0, mode='', epsilon = 0.1, resample = True):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''
    csv_output_file_names = []
    sim_results_dfs_list = []

    for num_steps in step_sizes:
        sim_results = []
        for i in range(num_sims):
            if forceActions != 0:
                forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions)
            else:
                forced = forced_actions()

            if softmax_beta != None:
                # reorder rewards
                raise ValueError("softmax_beta is not supported in fast mode.")

            if mode=='uniform':
                models = [beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm))]
                random_dur = num_steps
            else:
                models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))]


            sim_result, column_names,_ = \
                thompson_policy.two_phase_random_thompson_policy(
                            prob_per_arm=prob_per_arm,
                            users_count=num_steps,
                            random_dur=random_dur,#100,
                            models=models,
                            random_start=random_start,
                            action_mode=thompson_policy.ActionSelectionMode.prob_is_best,
                            relearn=True,
                            forced = forced,
                            batch_size = batch_size, epsilon=epsilon,
                            decreasing_epsilon=1)

            sim_results.extend(sim_result)

        sim_results_df = pd.DataFrame(sim_results, columns=column_names)
        sim_results_df.index = [idx for idx in range(num_steps)]*num_sims
        sim_results_dfs_list.append(sim_results_df)

        cur_output_file = get_output_filename(outfile_directory, num_steps, None, mode)
        csv_output_file_names.append(cur_output_file)

    return sim_results_dfs_list, csv_output_file_names
예제 #4
0
def run_simulations_empirical_rewards(num_sims,
                                      reward_file,
                                      experiment_id,
                                      reward_header,
                                      is_cost,
                                      outfile_directory,
                                      successPrior=1,
                                      failurePrior=1,
                                      forceActions=0,
                                      shuffle_data=False):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''
    num_actions = 2
    max_steps = -1
    means = []
    variance = []
    for i in range(num_sims):
        arm_1_rewards, arm_2_rewards = get_assistments_rewards.read_assistments_rewards(
            reward_file, reward_header, experiment_id, is_cost)
        if shuffle_data:
            random.shuffle(arm_1_rewards)
            random.shuffle(arm_2_rewards)
        max_steps = len(arm_1_rewards) + len(arm_2_rewards)
        means = [np.mean(arm_1_rewards), np.mean(arm_2_rewards)]
        variance = [np.var(arm_1_rewards), np.var(arm_2_rewards)]
        if forceActions != 0:
            print("Forcing actions:", forceActions)
            forced = run_effect_size_simulations.make_forced_actions(
                num_actions,
                len(arm_1_rewards) + len(arm_2_rewards), forceActions)
        else:
            forced = forced_actions()

        cur_output_file = get_output_filename(
            outfile_directory,
            len(arm_1_rewards) + len(arm_2_rewards), i)
        models = [
            beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior)
            for _ in range(num_actions)
        ]
        thompson_policy.calculate_thompson_single_bandit_empirical_params(
            arm_1_rewards,
            arm_2_rewards,
            num_actions=num_actions,
            dest=cur_output_file,
            models=models,
            action_mode=thompson_policy.ActionSelectionMode.prob_is_best,
            relearn=True,
            forced=forced)
    return max_steps, means, variance
예제 #5
0
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \
    reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1, c = 0.1, resample = True):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''

    for i in range(num_sims):
        #  num_steps_prev = 0
        for num_steps in step_sizes:
            if forceActions != 0:
                #                 print("Forcing actions:", forceActions)
                forced = run_effect_size_simulations.make_forced_actions(
                    len(prob_per_arm), num_steps, forceActions)
            else:
                forced = forced_actions()
            cur_reward_file = get_rewards_filename(outfile_directory,
                                                   num_steps, i)
            generate_single_bandit.generate_file(np.array(prob_per_arm),
                                                 num_steps, cur_reward_file)
            if softmax_beta != None:
                # reorder rewards
                reordered_reward_file = get_reordered_rewards_filename(
                    outfile_directory, num_steps, i)
                reorder_samples_in_rewards.reorder_rewards_by_quartile(
                    cur_reward_file, reordered_reward_file, reordering_fn,
                    softmax_beta)
            else:
                reordered_reward_file = cur_reward_file
            cur_output_file = get_output_filename(outfile_directory, num_steps,
                                                  i)
            models = [
                beta_bernoulli.BetaBern(success=successPrior,
                                        failure=failurePrior)
                for _ in range(len(prob_per_arm))
            ]

            #if don't pass model, then will be Greedy
            #thresh = 0.03
            #        thresh = 0.1 # for small effect, es = 0.1, 0.55 - 0.45 = 0.10
            ppd.calculate_epsilon_single_bandit(reordered_reward_file,
                                                models=models,
                                                num_actions=len(prob_per_arm),
                                                dest=cur_output_file,
                                                forced=forced,
                                                c=c,
                                                resample=resample)
def run_simulations_uniform_random_binary(
        num_sims,
        prob_per_arm,
        steps_before_switch,
        steps_after_switch,
        outfile_directory,
        forceActions=0,
        switch_to_best_if_nonsignificant=True):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Samples uniformly at random.
    '''
    num_steps = steps_before_switch + steps_after_switch

    for i in range(num_sims):
        if forceActions != 0:
            print("Forcing actions:", forceActions)
            forced = run_effect_size_simulations.make_forced_actions(
                len(prob_per_arm), num_steps, forceActions)
        else:
            forced = forced_actions()

        cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i)
        generate_single_bandit.generate_file(np.array(prob_per_arm), num_steps,
                                             cur_reward_file)
        #
        cur_output_file = get_output_filename(outfile_directory, num_steps, i)
        models = [
            beta_bernoulli.BetaBern(success=1, failure=1)
            for _ in range(len(prob_per_arm))
        ]
        thompson_policy.calculate_thompson_switch_to_fixed_policy(
            cur_reward_file,
            num_actions=len(prob_per_arm),
            dest=cur_output_file,
            num_actions_before_switch=steps_before_switch,
            models=models,
            action_mode=thompson_policy.ActionSelectionMode.prob_is_best,
            epsilon=1.0,
            switch_to_best_if_nonsignificant=switch_to_best_if_nonsignificant,
            forced=forced)
def test_all_in_directory(num_sims,
                          step_sizes,
                          outfile_directory,
                          outfile_prefix,
                          prior,
                          num_actions=2,
                          config={}):
    assert num_actions == 2
    if config.get(read_config.NUM_STEP_SIZES_HEADER,
                  len(step_sizes)) < len(step_sizes):
        step_sizes = step_sizes[:config.get(read_config.NUM_STEP_SIZES_HEADER)]

    if debug or True:
        print('Config:', config)

    rows = []
    all_stats_file = config.get(read_config.PRINT_ALL_STATS_FILE_HEADER)
    none_context = contextmanager(lambda: iter([None]))()
    with (open(all_stats_file)
          if all_stats_file is not None else none_context) as stats_outfile:
        if stats_outfile is not None:
            csvwriter = csv.writer()
        else:
            csvwriter = None
        for i in range(num_sims):
            for num_steps in step_sizes:
                forced = run_effect_size_simulations.make_forced_actions(
                    num_actions, num_steps,
                    config[read_config.FORCE_ACTIONS_HEADER])

                if config[read_config.BINARY_REWARDS_HEADER]:
                    actions_infile = run_effect_size_simulations_beta.get_output_filename(
                        outfile_directory, num_steps, i)
                else:
                    actions_infile = run_effect_size_simulations.get_output_filename(
                        outfile_directory, num_steps, i)

                if debug:
                    print("processing file:", actions_infile)
                outfile_row = make_outfile_row(actions_infile,
                                               config,
                                               num_steps,
                                               prior,
                                               csv_writer_all_stats=csvwriter,
                                               forced_actions=forced)
                if debug or True:
                    print("processing completed:", actions_infile)

                outfile_row[SIM_NUMBER_HEADER] = i
                rows.append(outfile_row)
    dataframe_headers = [
        NUM_STEPS_HEADER, SIM_NUMBER_HEADER, PRIOR_MEAN_HEADER, PVALUE_HEADER,
        ACTUAL_STAT_HEADER
    ]
    for action in range(num_actions):
        dataframe_headers.append(
            NUM_SAMPLES_BY_ACTION_HEADER.format(action + 1))
    df = pd.DataFrame.from_records(rows, columns=dataframe_headers)
    df[IS_BINARY_HEADER] = config[read_config.BINARY_REWARDS_HEADER]
    df[NUM_PERMUTATIONS_OUT_HEADER] = config[NUM_PERMUTATIONS_HEADER]
    if debug:
        print("writing to", outfile_prefix + PERMUTATION_TEST_OUT_FILE_SUFFIX)
    df.to_pickle(outfile_prefix + PERMUTATION_TEST_OUT_FILE_SUFFIX)
    if WRITE_CSV:
        df.to_csv(outfile_prefix + PERMUTATION_TEST_OUT_FILE_CSV_SUFFIX)
def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory,
    successPrior = 1, failurePrior = 1, softmax_beta = None,
    reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1,
    random_dur=0, random_start=0, mode='', c = 0.1, resample = True, ns_stop = 0):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''
    csv_output_file_names = []
    sim_results_dfs_list = []

    for num_steps in step_sizes:
        sim_results = []
        for i in range(num_sims):
            if forceActions != 0:
                forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions)
            else:
                forced = forced_actions()

            if softmax_beta != None:
                # reorder rewards
                raise ValueError("softmax_beta is not supported in fast mode.")

            if mode=='uniform':
                models = [beta_bernoulli.BetaBern(success=1, failure=1) for _ in range(len(prob_per_arm))]
                random_dur = num_steps
            else:
                models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))]


            
            sim_result, column_names,_ = \
                thompson_policy.ppd_two_phase_random_thompson_policy(
                            prob_per_arm=prob_per_arm,
                            users_count=num_steps,
                            random_dur=random_dur,#100,
                            models=models,
                            random_start=random_start,
                            action_mode='Greedy',
                            relearn=True,
                            forced = forced,
                            batch_size = batch_size, c=c, resample = resample, ns_stop = ns_stop)

            # do ipw here? This is the equivalent of old acitons file(actions_df) 
#            sim_result_df = pd.DataFrame(sim_result, columns=column_names) #Not used yet

 #           calculate_ipw_by_step_size(actions_root = sim_result_df, num_samples=1000, num_actions = 2, cached_probs = {}, \
  #                                        prior = prior, binary_rewards = is_binary, config = config, n = n, num_sims = num_sims, batch_size = bs)

   #         print("sim_result_df", sim_result_df)
    #        print("shape", sim_result_df.shape)
     #       print("shape cols", sim_result_df.columns)
#            print(sim_result.columns())
            sim_results.extend(sim_result)

        sim_results_df = pd.DataFrame(sim_results, columns=column_names)
        sim_results_df.index = [idx for idx in range(num_steps)]*num_sims
        sim_results_dfs_list.append(sim_results_df)

        cur_output_file = get_output_filename(outfile_directory, num_steps, None, mode)
        csv_output_file_names.append(cur_output_file)

    return sim_results_dfs_list, csv_output_file_names