#read in the weights as a 2-d array and the feature counts of the policy W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_0.txt") print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450'] if args.env_name == "enduro": eval_policies = ['03125', '03425', '03900', '04875'] gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & 0.05-VaR & ave length & min & stdev") for eval in eval_policies: #print("-"*20) #print("eval", eval) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name + '_' + eval + '_fcounts_100.txt') return_dist = np.dot(W, fcounts) print( "{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} & {:.0f} \\\\".format( eval, np.mean(return_dist), helper.worst_percentile(return_dist, 0.01), fcounts[-1], np.mean(returns), np.min(returns), np.std(returns))) gt_return_list.append(returns) fcount_list.append(fcounts) return_dist_list.append(return_dist) plt.figure(0) plt.hist(return_dist_list, 30,
#read in the weights as a 2-d array and the feature counts of the policy W, log_lik = helper.get_weightchain_array(args.mcmc_file, return_likelihood=True) print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map'] name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} if args.env_name == "enduro": eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map'] name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & " + str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline") for eval in eval_policies: #print("-"*20) #print("eval", name_transform[eval]) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot_truncated_terminal' + str(args.no_term) + '.txt') #print(fcounts) return_dist = np.dot(W,fcounts) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, alpha), np.sum(fcounts), np.mean(returns), np.min(returns))) gt_return_list.append(returns) fcount_list.append(fcounts) return_dist_list.append(return_dist) if args.env_name == "breakout": #I realized that I need to rerun the noop code for the full features. I keep overwriting it. #evaluate the no-op policy #returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_noop_fcounts.txt') returns = np.array([0,0]) if args.no_term:
params = { 'legend.fontsize': 'x-large', 'figure.figsize': (5, 4), 'axes.labelsize': 'x-large', 'axes.titlesize': 'x-large', 'xtick.labelsize': 'x-large', 'ytick.labelsize': 'x-large' } plt.rcParams.update(params) #plt.style.use('seaborn-deep') parser = argparse.ArgumentParser(description=None) #parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'") args = parser.parse_args() eval_games = ['beamrider', 'breakout', 'enduro', 'seaquest', 'spaceinvaders'] gt_return_list = [] fcount_list = [] return_dist_list = [] for game in eval_games: print(game) for eval in ["mean", "map"]: print(eval) #print("eval", eval) returns, fcounts = helper.parse_avefcount_array('../../policies/' + game + '_' + eval + '_fcounts.txt') print("{:.0f} & {:.1f}".format(np.min(returns), np.mean(returns)))
#read in the weights as a 2-d array and the feature counts of the policy W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_gt_chain.txt") print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map'] name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} if args.env_name == "enduro": eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map'] name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & 0.05-VaR & ave length & gt & min gt \\\\ \hline") for eval in eval_policies: #print("-"*20) #print("eval", eval) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_gt.txt') return_dist = np.dot(W,fcounts) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, alpha), fcounts[-1], np.mean(returns), np.min(returns))) gt_return_list.append(returns) fcount_list.append(fcounts) return_dist_list.append(return_dist) if args.env_name == "breakout": #evaluate the no-op policy returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_noop_fcounts.txt') #normalize return_dist = np.dot(W,fcounts) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} ".format("no-op", np.mean(return_dist), helper.worst_percentile(return_dist, 0.05), fcounts[-1], np.mean(returns), np.min(returns)))
plt.show() eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map'] name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} if args.env_name == "enduro": eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map'] name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & " + str(args.alpha) + "-VaR & mu+10*Var & ave length & gt & min gt \\\\ \hline") for eval in eval_policies: #print("-"*20) #print("eval", eval) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + args.identifier + '.params_stripped.params_fcounts_auxiliary.txt') #print("num rollouts", len(returns)) return_dist = np.dot(W,fcounts) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, args.alpha), 10*helper.worst_percentile(return_dist, args.alpha) + np.mean(return_dist), 0, np.mean(returns), np.min(returns))) gt_return_list.append(returns) fcount_list.append(fcounts) return_dist_list.append(return_dist) if args.env_name == "breakout" and args.noop: #evaluate the no-op policy returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_no-op' + args.identifier + '.params_stripped.params_fcounts_auxiliary.txt') noop_returns = returns #normalize return_dist = np.dot(W,fcounts)