Example #1
0
def empirical_main():
    # Assumes sys.argv[1] == 'empirical'
    recalculate_bandits = True
    num_arms = 2

    num_sims = int(sys.argv[2])

    reward_file = sys.argv[3]
    experiment_id = sys.argv[4]
    reward_header = sys.argv[5]

    if sys.argv[6] == "use_cost":
        is_cost = True
    else:
        is_cost = False

    outfile_directory = sys.argv[7]

    priorProportionOnSuccess = float(sys.argv[8])

    forceActions = 0

    shuffle_data = False
    if len(sys.argv) > 9:
        shuffle_data = sys.argv[9] == 'True'

    bandit_type = "Thompson"
    bandit_type_prefix = 'BB'

    prior_params = None
    if recalculate_bandits:
        # Make sure the prior sums to 2, mirroring the successes/failures of uniform prior
        prior_params = [
            priorProportionOnSuccess * 2, 2 - priorProportionOnSuccess * 2
        ]
        print("Prior params: ", prior_params)

        max_steps, prob_per_arm, variance = run_simulations_empirical_rewards(
            num_sims, reward_file, experiment_id, reward_header, is_cost,
            outfile_directory, prior_params[0], prior_params[1], forceActions,
            shuffle_data)

    outfile_prefix = outfile_directory + bandit_type_prefix + experiment_id + reward_header
    effect_size = 0
    step_sizes = [max_steps]
    df = calculate_statistics_from_sims(outfile_directory, num_sims,
                                        step_sizes, effect_size, DESIRED_ALPHA)
    df.to_pickle(outfile_prefix + 'Df.pkl')
    df_by_trial = calculate_by_trial_statistics_from_sims(
        outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA)

    df_by_trial.to_pickle(outfile_prefix + 'DfByTrial.pkl')
    # Print various stats
    summary_text = effect_size_sim_output_viz.print_output_stats(
        df, prob_per_arm, False, prior_params=prior_params, reordering_info=0)
    with open(outfile_prefix + 'SummaryText.txt', 'w', newline='') as outf:
        outf.write(summary_text)
    overall_stats_df = effect_size_sim_output_viz.make_overall_stats_df(
        df, prob_per_arm, False, effect_size)
    overall_stats_df.to_pickle(outfile_prefix + 'OverallStatsDf.pkl')

    # Make histogram
    hist_figure = effect_size_sim_output_viz.make_hist_of_trials(df)
    hist_figure.savefig(outfile_prefix + 'HistOfConditionProportions.pdf',
                        bbox_inches='tight')

    # Make line plot
    test_stat_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(
        df_by_trial, 'stat')
    test_stat_figure.savefig(outfile_prefix + 'TestStatOverTime.pdf',
                             bbox_inches='tight')

    pvalue_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(
        df_by_trial, 'pvalue')
    pvalue_figure.savefig(outfile_prefix + 'PValueOverTime.pdf',
                          bbox_inches='tight')

    # Plot power
    power_figure = effect_size_sim_output_viz.plot_power_by_steps(
        df_by_trial, DESIRED_ALPHA, DESIRED_POWER)
    power_figure.savefig(outfile_prefix + 'PowerOverTime.pdf',
                         bbox_inches='tight')

    #Plot reward
    reward_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(
        df_by_trial, 'total_reward')
    reward_figure = effect_size_sim_output_viz.add_expected_reward_to_figure(
        reward_figure, prob_per_arm, step_sizes)
    reward_figure.savefig(outfile_prefix + 'RewardOverTime.pdf',
                          bbox_inches='tight')

    # Plot arm statistics
    arm_df_by_trial = create_arm_stats_by_step(outfile_directory, num_sims,
                                               step_sizes[-1], num_arms)
    arm_stats_figure = effect_size_sim_output_viz.make_by_trial_arm_statistics(
        arm_df_by_trial, num_arms)
    arm_stats_figure.savefig(outfile_prefix + 'ArmStats.pdf',
                             bbox_inches='tight')
def mainNormalReward():
    print("Running normal")

    recalculate_bandits = True
    mean1, mu1 = get_mean_and_prior_from_string(sys.argv[1])
    mean2, mu2 = get_mean_and_prior_from_string(sys.argv[2])
    if mu1 != mu2 and mu2 != 0:
        print(
            "Error: different priors on the arms aren't implemented for normal bandits."
        )
        exit()

    means = [mean1, mean2]
    if mean1 == mean2:
        # effect size must be 0 - interpret third argument as the variance to use for the arms
        variance = float(sys.argv[3])
        # n, for basing number of steps off of, also has to be set
        n = int(sys.argv[7])
        # equal arm means indicates there's no effect
        effect_size = 0
    elif len(sys.argv) > 8 and sys.argv[8].startswith("fixedVariance"):
        # We're running a simulation based on an existing experiment, so we want to set the variance
        # manually
        variance = [float(num) for num in sys.argv[3].split(",")]
        # n, for basing number of steps off of, also has to be set
        n = int(sys.argv[7])
        # equal arm means indicates there's no effect
        effect_size = 0  #TODO: fix!
    else:
        effect_size = float(sys.argv[3])
        variance = get_var_from_effect_size(mean1, mean2, effect_size)
        nobs1 = statsmodels.stats.power.tt_ind_solve_power(
            effect_size, None, DESIRED_ALPHA, DESIRED_POWER, 1)
        n = math.ceil(nobs1)
    num_sims = int(sys.argv[4])
    outfile_directory = sys.argv[5]
    bandit_type = "Thompson"
    bandit_type_prefix = 'NG'
    if len(sys.argv) > 6:
        bandit_type = sys.argv[6]
    if bandit_type == "uniform":
        bandit_type_prefix = "NU"  # Normal rewards, uniform policy

    if len(sys.argv) > 8 and sys.argv[8].startswith("forceActions"):
        FORCE_ACTIONS = True
        num_to_force = float(sys.argv[8].split(",")[1])
    else:
        num_to_force = 0
    reorder_rewards = False
    softmax_beta = None
    num_samples_before_switch = -1
    if len(sys.argv) > 8 and sys.argv[8].startswith("numSamples:"):
        num_samples_array = sys.argv[8].split(":")[1:]
        num_samples_before_switch = int(num_samples_array[0])
        num_samples_after_switch = int(num_samples_array[1])

    if len(sys.argv) > 9 and sys.argv[9].startswith("switchIfNonSig:"):
        switch_to_best_if_nonsignificant = sys.argv[9].split(
            ":")[1].lower() == "true"
    else:
        switch_to_best_if_nonsignificant = False

    num_arms = 2

    # n here is half of what's required for .8 power (number in one condition
    step_sizes_before_switch = [int(round(0.5 * n)), n, 2 * n, 4 * n]
    if len(sys.argv) > 10 and sys.argv[10].startswith("multiplier:"):
        multiplier = int(sys.argv[10].split(":")[1])
        print("multiplier:", multiplier)
    else:
        multiplier = 5
    step_sizes = [(multiplier + 1) * step_size
                  for step_size in step_sizes_before_switch]

    if recalculate_bandits:
        if bandit_type == "uniform":
            if num_samples_before_switch > 0:
                step_sizes = [
                    num_samples_before_switch + num_samples_after_switch
                ]
                run_simulations_uniform_random(
                    num_sims,
                    means,
                    variance,
                    num_samples_before_switch,
                    num_samples_after_switch,
                    outfile_directory,
                    forceActions=num_to_force,
                    switch_to_best_if_nonsignificant=
                    switch_to_best_if_nonsignificant)
            else:
                for num_steps in step_sizes_before_switch:
                    run_simulations_uniform_random(
                        num_sims,
                        means,
                        variance,
                        num_steps,
                        num_steps * multiplier,
                        outfile_directory,
                        forceActions=num_to_force,
                        switch_to_best_if_nonsignificant=
                        switch_to_best_if_nonsignificant)
        else:
            if reorder_rewards:
                print(
                    "Error: reward reordering not implemented for run switch to best simulations"
                )
            else:
                if num_samples_before_switch > 0:
                    step_sizes = [
                        num_samples_before_switch + num_samples_after_switch
                    ]
                run_simulations(num_sims,
                                means,
                                variance,
                                step_sizes,
                                outfile_directory,
                                prior_mean=mu1,
                                forceActions=num_to_force)

    outfile_prefix = outfile_directory + bandit_type_prefix + str(effect_size)
    if effect_size == 0:
        # Then include the n and the arm variance in the prefix
        outfile_prefix += "N" + str(n) + "Var" + str(variance)
    print("step_sizes:", step_sizes)
    df = calculate_statistics_from_sims(outfile_directory, num_sims,
                                        step_sizes, effect_size,
                                        switch_to_best_if_nonsignificant,
                                        step_sizes_before_switch,
                                        DESIRED_ALPHA)
    df.to_pickle(outfile_prefix + 'Df.pkl')
    df_by_trial = calculate_by_trial_statistics_from_sims(
        outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA)
    df_by_trial.to_pickle(outfile_prefix + 'DfByTrial.pkl')

    # Print various stats
    summary_text = effect_size_sim_output_viz.print_output_stats(
        df,
        means + [variance],
        True,
        effect_size,
        reordering_info=softmax_beta)

    with open(outfile_prefix + 'SummaryText.txt', 'w', newline='') as outf:
        outf.write(summary_text)
    overall_stats_df = effect_size_sim_output_viz.make_overall_stats_df(
        df, means + [variance], True, effect_size)
    overall_stats_df.to_pickle(outfile_prefix + 'OverallStatsDf.pkl')
Example #3
0
def main():
    recalculate_bandits = True

    #batch_size = 1.0

    num_sims = int(sys.argv[2])
    outfile_directory = sys.argv[3]
    burn_in_size, batch_size = int(
        outfile_directory.split("=")[-1].split('-')[0]), int(
            outfile_directory.split("=")[-1].split('-')[1])
    print("burn_in_size, batch_size", burn_in_size, batch_size)
    num_arms = 2
    # if sys.argv[1] has a comma, just use the result as probability per arm
    if "," in sys.argv[1]:
        if sys.argv[1].count(",") == 1:
            # specifying probability per arm but not effect size
            prob_per_arm = [
                float(armProb) for armProb in sys.argv[1].split(",")
            ]
            effect_size = 0  # Note: This will be wrong if arm probs aren't equal!
        else:
            # specifying probability per arm as first two arguments, and then effect size
            numeric_arguments = [
                float(armProb) for armProb in sys.argv[1].split(",")
            ]
            prob_per_arm = numeric_arguments[:
                                             2]  # first two are arm probabilities
            effect_size = numeric_arguments[2]  # final is effect size
        # We also need to specify n in this case for deciding on step sizes
        n = int(sys.argv[6])
    else:
        # We just need effect size for this calculation
        effect_size = float(sys.argv[1].split("-")[0])
        center = float(sys.argv[1].split("-")[1])
        prob_per_arm = get_prob_per_arm_from_effect_size(effect_size, center)
        # Assumes we have two arms
        nobs_total = smp.GofChisquarePower().solve_power(effect_size,
                                                         n_bins=(2 - 1) *
                                                         (2 - 1) + 1,
                                                         alpha=DESIRED_ALPHA,
                                                         power=DESIRED_POWER)
        #         print("Calculated nobs for effect size:", nobs_total)
        n = math.ceil(nobs_total)
        print("center", center)
    #step_sizes = [math.ceil(n/2), n, 2*n] # These differ from the version for normal because in normal, n represented size for one cond rather than overall size
    step_sizes = [
        math.ceil(n / 2), n, 2 * n, 4 * n
    ]  # These differ from the version for normal because in normal, n represented size for one cond rather than overall size

    print("prob_per_arm", prob_per_arm)
    if len(sys.argv) > 7 and sys.argv[7].startswith("forceActions"):
        run_effect_size_simulations.FORCE_ACTIONS = True
        num_to_force = float(sys.argv[7].split(",")[1])
    else:
        num_to_force = 0

    bandit_type = "Thompson"
    bandit_type_prefix = 'BB'
    if len(sys.argv) > 4:
        bandit_type = sys.argv[4]
    if bandit_type == "uniform":
        bandit_type_prefix = "BU"  # Bernoulli rewards, uniform policy

    reorder_rewards = False
    softmax_beta = None
    reordering_fn = None
    if len(sys.argv) > 7 and not sys.argv[7].startswith("forceActions"):
        # softmax beta for how to reorder rewards
        reorder_rewards = True
        softmax_beta = float(sys.argv[7])
        reordering_fn = reorder_samples_in_rewards.order_by_named_column(
            'Action1OracleActualReward')
        if len(sys.argv) > 8:
            reordering_fn_specifier = sys.argv[8]
            reordering_fn = reorder_samples_in_rewards.get_reordering_fn(
                reordering_fn_specifier)

    prior_params = None
    if recalculate_bandits:

        if bandit_type == "uniform":
            run_simulations_uniform_random(num_sims,
                                           prob_per_arm,
                                           step_sizes,
                                           outfile_directory,
                                           forceActions=num_to_force)
        else:
            if len(sys.argv) > 5:
                if sys.argv[5] == "armsHigh":
                    # Arms should be higher than the prior
                    priorProportionOnSuccess = min(
                        prob_per_arm) * PRIOR_PROPORTION_DIFFERENCE
                elif sys.argv[5] == "armsLow":
                    # Arms should be lower than the prior
                    priorProportionOnSuccess = 1 - (
                        1 - max(prob_per_arm)) * PRIOR_PROPORTION_DIFFERENCE
                else:
                    # Prior should be uniform (in between arms)
                    priorProportionOnSuccess = .5
                # Make sure the prior sums to 2, mirroring the successes/failures of uniform prior
                prior_params = [
                    priorProportionOnSuccess * 2,
                    2 - priorProportionOnSuccess * 2
                ]
                print("Prior params: ", prior_params)

                run_simulations(num_sims,
                                prob_per_arm,
                                step_sizes,
                                outfile_directory,
                                prior_params[0],
                                prior_params[1],
                                softmax_beta=softmax_beta,
                                reordering_fn=reordering_fn,
                                forceActions=num_to_force,
                                batch_size=batch_size,
                                burn_in_size=burn_in_size)
            else:
                run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions = num_to_force, batch_size = batch_size, \
                    burn_in_size = burn_in_size)

    outfile_prefix = outfile_directory + bandit_type_prefix + str(effect_size)
    if effect_size == 0:
        # Then include the n  in the prefix
        outfile_prefix += "N" + str(n)

    df = calculate_statistics_from_sims(outfile_directory, num_sims,
                                        step_sizes, effect_size, DESIRED_ALPHA)
    df.to_pickle(outfile_prefix + 'Df.pkl')
    df_by_trial = calculate_by_trial_statistics_from_sims(
        outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA)

    df_by_trial.to_pickle(outfile_prefix + 'DfByTrial.pkl')
    # Print various stats
    summary_text = effect_size_sim_output_viz.print_output_stats(
        df,
        prob_per_arm,
        False,
        prior_params=prior_params,
        reordering_info=softmax_beta)
    with open(outfile_prefix + 'SummaryText.txt', 'w', newline='') as outf:
        outf.write(summary_text)
    overall_stats_df = effect_size_sim_output_viz.make_overall_stats_df(
        df, prob_per_arm, False, effect_size)
    overall_stats_df.to_pickle(outfile_prefix + 'OverallStatsDf.pkl')

    # Make histogram
    hist_figure = effect_size_sim_output_viz.make_hist_of_trials(df)
    hist_figure.savefig(outfile_prefix + 'HistOfConditionProportions.pdf',
                        bbox_inches='tight')

    # Make line plot
    test_stat_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(
        df_by_trial, 'stat')
    test_stat_figure.savefig(outfile_prefix + 'TestStatOverTime.pdf',
                             bbox_inches='tight')

    pvalue_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(
        df_by_trial, 'pvalue')
    pvalue_figure.savefig(outfile_prefix + 'PValueOverTime.pdf',
                          bbox_inches='tight')

    # Plot power
    power_figure = effect_size_sim_output_viz.plot_power_by_steps(
        df_by_trial, DESIRED_ALPHA, DESIRED_POWER)
    power_figure.savefig(outfile_prefix + 'PowerOverTime.pdf',
                         bbox_inches='tight')

    #Plot reward
    reward_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(
        df_by_trial, 'total_reward')
    reward_figure = effect_size_sim_output_viz.add_expected_reward_to_figure(
        reward_figure, prob_per_arm, step_sizes)
    reward_figure.savefig(outfile_prefix + 'RewardOverTime.pdf',
                          bbox_inches='tight')

    # Plot arm statistics
    arm_df_by_trial = create_arm_stats_by_step(outfile_directory, num_sims,
                                               step_sizes[-1], num_arms)
    arm_stats_figure = effect_size_sim_output_viz.make_by_trial_arm_statistics(
        arm_df_by_trial, num_arms)
    arm_stats_figure.savefig(outfile_prefix + 'ArmStats.pdf',
                             bbox_inches='tight')
def main():
    recalculate_bandits = True
    mean1, mu1 = get_mean_and_prior_from_string(sys.argv[1])
    mean2, mu2 = get_mean_and_prior_from_string(sys.argv[2])
    if mu1 != mu2 and mu2 != 0:
        print("Error: different priors on the arms aren't implemented for normal bandits.")
        exit()
        
    means = [mean1, mean2]
    if mean1 == mean2:
        # effect size must be 0 - interpret third argument as the variance to use for the arms
        variance = float(sys.argv[3])
        # n, for basing number of steps off of, also has to be set
        n = int(sys.argv[7])
        # equal arm means indicates there's no effect
        effect_size = 0
    elif len(sys.argv) > 8 and sys.argv[8].startswith("fixedVariance"):
        # We're running a simulation based on an existing experiment, so we want to set the variance
        # manually
        variance = [float(num) for num in sys.argv[3].split(",")]
        # n, for basing number of steps off of, also has to be set
        n = int(sys.argv[7])
        # equal arm means indicates there's no effect
        effect_size = 0 #TODO: fix!
    else:
        effect_size = float(sys.argv[3])
        variance = get_var_from_effect_size(mean1, mean2, effect_size)
        nobs1 = statsmodels.stats.power.tt_ind_solve_power(effect_size, None, DESIRED_ALPHA, DESIRED_POWER, 1)
        n = math.ceil(nobs1)
    num_sims = int(sys.argv[4])
    outfile_directory = sys.argv[5]
    bandit_type = "Thompson"
    bandit_type_prefix = 'NG'
    if len(sys.argv) > 6:
        bandit_type = sys.argv[6]
    if bandit_type == "uniform":
        bandit_type_prefix = "NU"# Normal rewards, uniform policy
    
    if len(sys.argv) > 8 and sys.argv[8].startswith("forceActions"):
        FORCE_ACTIONS = True
        num_to_force = float(sys.argv[8].split(",")[1])
    else:
        num_to_force = 0
    reorder_rewards = False
    softmax_beta = None
    if len(sys.argv) > 8 and not sys.argv[8].startswith("forceActions") and not sys.argv[8].startswith("fixedVariance"):
        # softmax beta for how to reorder rewards
        reorder_rewards = True
        softmax_beta = float(sys.argv[8])
        reordering_fn = reorder_samples_in_rewards.order_by_named_column('Action1OracleActualReward')
        if len(sys.argv) > 9:
            reordering_fn_specifier = sys.argv[9]
            reordering_fn = reorder_samples_in_rewards.get_reordering_fn(reordering_fn_specifier)

    num_arms = 2

    step_sizes = [n, 2*n, 4*n, 8*n]

    if recalculate_bandits:
            
        if bandit_type == "uniform":
            run_simulations_uniform_random(num_sims, means, variance, step_sizes, outfile_directory, forceActions = num_to_force)
        else:
            if reorder_rewards:
                run_simulations(num_sims, means, variance, step_sizes, outfile_directory, softmax_beta, reordering_fn, prior_mean = mu1, forceActions = num_to_force)
            else:
                run_simulations(num_sims, means, variance, step_sizes, outfile_directory, prior_mean = mu1, forceActions = num_to_force)

    outfile_prefix = outfile_directory  + bandit_type_prefix + str(effect_size);
    if effect_size == 0:
        # Then include the n and the arm variance in the prefix
        outfile_prefix += "N" + str(n) + "Var" + str(variance)
    df = calculate_statistics_from_sims(outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA)
    df.to_pickle(outfile_prefix + 'Df.pkl')
    df_by_trial = calculate_by_trial_statistics_from_sims(outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA)
    df_by_trial.to_pickle(outfile_prefix + 'DfByTrial.pkl')

    # Print various stats
    summary_text = effect_size_sim_output_viz.print_output_stats(df, means + [variance], True, effect_size, reordering_info = softmax_beta)
 
    with open(outfile_prefix + 'SummaryText.txt', 'w', newline='') as outf:
        outf.write(summary_text)
    overall_stats_df = effect_size_sim_output_viz.make_overall_stats_df(df, means + [variance], True, effect_size)
    overall_stats_df.to_pickle(outfile_prefix + 'OverallStatsDf.pkl')
         
    # Make histogram
    hist_figure = effect_size_sim_output_viz.make_hist_of_trials(df)
    hist_figure.savefig(outfile_prefix + 'HistOfConditionProportions.pdf', bbox_inches='tight')
 
    # Make line plot
    test_stat_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(df_by_trial, 'stat')
    test_stat_figure.savefig(outfile_prefix + 'TestStatOverTime.pdf', bbox_inches='tight')
     
    pvalue_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(df_by_trial, 'pvalue')
    pvalue_figure.savefig(outfile_prefix + 'PValueOverTime.pdf', bbox_inches='tight')
     
    # Plot power
    power_figure = effect_size_sim_output_viz.plot_power_by_steps(df_by_trial, DESIRED_ALPHA, DESIRED_POWER)
    power_figure.savefig(outfile_directory + bandit_type_prefix + str(effect_size) + 'PowerOverTime.pdf', bbox_inches='tight')
     
    #Plot reward
    reward_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(df_by_trial, 'total_reward')
    reward_figure = effect_size_sim_output_viz.add_expected_reward_to_figure(reward_figure, means, step_sizes)
    reward_figure.savefig(outfile_prefix + 'RewardOverTime.pdf', bbox_inches='tight')
     
     
    # Plot arm statistics
    arm_df_by_trial = create_arm_stats_by_step(outfile_directory, num_sims, step_sizes[-1], num_arms)
    arm_stats_figure = effect_size_sim_output_viz.make_by_trial_arm_statistics(arm_df_by_trial, num_arms)
    arm_stats_figure.savefig(outfile_prefix + 'ArmStats.pdf', bbox_inches='tight')