Ejemplo n.º 1
0
    #returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts.txt')
    #return_dist = np.dot(W,fcounts)
    write_directory = args.write_dir
    if eval == "map":
        #write_directory = '../../bayesian_dropout/'
        eval_fname = "_scratch_cluster_dsbrown_tflogs_mcmc_" + args.env_name + "_64_all_checkpoints_43000_"
        pred_filename = write_directory + args.env_name + "_" + eval_fname + "pred.txt"
        true_filename = write_directory + args.env_name + "_" + eval_fname + "true.txt"
    elif eval == "mean":
        #write_directory = '../../bayesian_dropout/'
        eval_fname = "_scratch_cluster_dsbrown_tflogs_mcmc_" + args.env_name + "_64_all_mean_checkpoints_43000_"
        pred_filename = write_directory + args.env_name + "_" + eval_fname + "pred.txt"
        true_filename = write_directory + args.env_name + "_" + eval_fname + "true.txt"
    elif eval == "noop":
        #write_directory = '../../bayesian_dropout/'
        pred_filename = write_directory + args.env_name + "_no_op_pred.txt"
        true_filename = write_directory + args.env_name + "_no_op_true.txt"
    else:
        #write_directory = '../../bayesian_dropout/'
        pred_filename = write_directory + args.env_name + "_" + eval + "pred.txt"
        true_filename = write_directory + args.env_name + "_" + eval + "true.txt"

    return_dist = genfromtxt(pred_filename, delimiter='\n')
    returns = genfromtxt(true_filename, delimiter='\n')
    #print(returns)

    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.0f}  \\\\".format(
        name_transform[eval], np.mean(return_dist),
        helper.worst_percentile(return_dist, 0.05), np.mean(returns),
        np.min(returns)))
fcount_list = []
return_dist_list = []
print(" policy & mean & 0.05-VaR & ave length & gt & min gt \\\\ \hline")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", eval)
    returns, fcounts = helper.parse_avefcount_array('../../policies/' +
                                                    args.env_name + '_' +
                                                    eval + '_fcounts.txt')
    #normalize
    fcounts_l1 = fcounts / np.sum(np.abs(fcounts))
    return_dist = np.dot(W, fcounts_l1)

    print("{} & {:.2E} & {:.2E} & {:.2E} & {:.1f} & {:.0f}  \\\\".format(
        name_transform[eval], np.mean(return_dist),
        helper.worst_percentile(return_dist, 0.05), fcounts[-1],
        np.mean(returns), np.min(returns)))

if args.env_name == "breakout":
    #evaluate the no-op policy
    returns, fcounts = helper.parse_avefcount_array(
        '../../policies/breakout_noop_fcounts.txt')
    #normalize
    fcounts_l1 = fcounts / np.sum(np.abs(fcounts))
    return_dist = np.dot(W, fcounts_l1)

    print("{} & {:.2E} & {:.2E} & {:.2E} & {:.1f} & {:.0f}  \\\\".format(
        "no-op", np.mean(return_dist),
        helper.worst_percentile(return_dist, 0.05), fcounts[-1],
        np.mean(returns), np.min(returns)))
name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
if args.env_name == "enduro":
    eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map']
    name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & " +  str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", name_transform[eval])
    returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot.txt')
    #print(fcounts)
    return_dist = np.dot(W,fcounts)

    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f}  \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, alpha), np.sum(fcounts), np.mean(returns), np.min(returns)))

    gt_return_list.append(returns)
    fcount_list.append(fcounts)
    return_dist_list.append(return_dist)

if args.env_name == "breakout":
    #I realized that I need to rerun the noop code for the full features. I keep overwriting it.
    #evaluate the no-op policy
    returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_noop_fcounts.txt')

    #normalize
    return_dist = np.dot(W,fcounts)
    return_dist_list.append(return_dist)
    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} \\\\".format("no-op", np.mean(return_dist), helper.worst_percentile(return_dist, alpha), np.sum(fcounts), np.mean(returns), np.min(returns)))
for f in eval_files:
        avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + f)
        print(f)
        print(np.mean(returns), np.mean(lengths))

#input()
alphas = np.linspace(0,1.0,21)
best_returns = []
for alpha in alphas:
    print(alpha)
    alpha_vars = []
    for f in eval_files:
        avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + f)
        return_dist = np.dot(W,avefcounts)
        var = helper.worst_percentile(return_dist, alpha)
        alpha_vars.append(var)
    #find the best under this alpha
    print(alpha_vars)
    print(len(alpha_vars))
    best_indx = np.argmax(alpha_vars)
    print("best index", best_indx)
    print("best file", eval_files[best_indx])
    avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + eval_files[best_indx])
    best_returns.append(np.mean(returns))
import matplotlib.pylab as pylab
params = {'legend.fontsize': 'xx-large',
                 # 'figure.figsize': (6, 5),
                          'axes.labelsize': 'xx-large',
                                   'axes.titlesize':'xx-large',
                                            'xtick.labelsize':'xx-large',
Ejemplo n.º 5
0
print("alpha:", args.alpha)

print(args.env_name)
#read in the weights as a 2-d array and the feature counts of the policy
W, likelihood = helper.get_weightchain_array(args.mcmc_file, burn=5000, skip=10, return_likelihood=True, preburn_until_accept=True)

gt_return_list = []
fcount_list = []
return_dist_list = []
#print(np.mean(W, axis=0))
print("policy & map & posterior ave & best-case & worst-case & ground truth & ave length\\\\")
#eval_policies = ['beamrider_rl_fcounts.txt', 'beamrider_brex_fcounts.txt', 'beamrider_reward_hack_fcounts.txt']
eval_policies = ['human_good.txt', 'human_everyother.txt', 'human_holdnfire.txt', 'human_shootbarriers.txt', 'human_flee.txt', 'human_hide.txt', 'human_miss.txt', 'human_suicidal.txt']
#eval_policies = ['beamrider_rl_fcounts_30000.txt', 'beamrider_brex_fcounts_30000.txt', 'beamrider_reward_hack_fcount_30000.txt']
for eval in eval_policies:
    fcounts, returns, lengths, all_fcounts = helper.parse_fcount_policy_eval('/home/dsbrown/Code/deep-bayesian-irl/spaceinvaders_eval_policies/' + eval)
    return_dist = np.dot(W,fcounts)

    #print(map_return)
    #print(return_dist[:200])
    #input()
    #print("{} & {:.1f}  & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(eval, map_return, np.mean(return_dist), helper.worst_percentile(return_dist, 1-args.alpha), helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths)))
    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(eval, np.mean(return_dist),helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths)))




    gt_return_list.append(returns)
    fcount_list.append(fcounts)
    return_dist_list.append(return_dist)
gt_return_list = []
fcount_list = []
return_dist_list = []
#print(np.mean(W, axis=0))
print(
    "policy & map & posterior ave & best-case & worst-case & ground truth & ave length\\\\"
)
#eval_policies = ['beamrider_rl_fcounts.txt', 'beamrider_brex_fcounts.txt', 'beamrider_reward_hack_fcounts.txt']
eval_policies = [
    'beamrider_rl_fcounts_2000.txt', 'beamrider_brex_fcounts_2000.txt',
    'beamrider_reward_hack_fcount_2000.txt'
]
#eval_policies = ['beamrider_rl_fcounts_30000.txt', 'beamrider_brex_fcounts_30000.txt', 'beamrider_reward_hack_fcount_30000.txt']
for eval in eval_policies:
    fcounts, returns, lengths, all_fcounts = helper.parse_fcount_policy_eval(
        '/home/dsbrown/Code/deep-bayesian-irl/beamrider_eval_policies/' + eval)
    return_dist = np.dot(W, fcounts)
    map_return = np.dot(map_weights, fcounts)[0]
    #print(map_return)
    #print(return_dist[:200])
    #input()
    #print("{} & {:.1f}  & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(eval, map_return, np.mean(return_dist), helper.worst_percentile(return_dist, 1-args.alpha), helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths)))
    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(
        eval, np.mean(return_dist),
        helper.worst_percentile(return_dist, args.alpha), np.mean(returns),
        np.mean(lengths)))

    gt_return_list.append(returns)
    fcount_list.append(fcounts)
    return_dist_list.append(return_dist)
Ejemplo n.º 7
0
    avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval(
        '/home/dsbrown/Code/deep-bayesian-irl/rl_policies/' + f)
    print(f)
    print(np.mean(returns), np.mean(lengths))

#input()
alphas = np.linspace(0, 1.0, 11)
best_returns = []
for alpha in alphas:
    print(alpha)
    alpha_vars = []
    for f in eval_files:
        avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval(
            '/home/dsbrown/Code/deep-bayesian-irl/rl_policies/' + f)
        return_dist = np.dot(W, avefcounts)
        var = helper.worst_percentile(return_dist, alpha)
        alpha_vars.append(var)
    #find the best under this alpha
    print(alpha_vars)
    print(len(alpha_vars))
    best_indx = np.argmax(alpha_vars)
    print("best index", best_indx)
    print("best file", eval_files[best_indx])
    avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval(
        '/home/dsbrown/Code/deep-bayesian-irl/rl_policies/' +
        eval_files[best_indx])
    best_returns.append(np.mean(returns))
import matplotlib.pylab as pylab
params = {
    'legend.fontsize': 'xx-large',
    # 'figure.figsize': (6, 5),