parser = argparse.ArgumentParser(description=None) parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'") parser.add_argument('--mcmc_file', help="name of mcmc file for chain") parser.add_argument('--no_term', action = 'store_true') parser.add_argument('--alpha', type=float, help="value of alpha-VaR, e.g. alpha = 0.05") args = parser.parse_args() print(args) alpha = args.alpha print(args.env_name) #read in the weights as a 2-d array and the feature counts of the policy W, log_lik = helper.get_weightchain_array(args.mcmc_file, return_likelihood=True) print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map'] name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} if args.env_name == "enduro": eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map'] name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & " + str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline") for eval in eval_policies: #print("-"*20) #print("eval", name_transform[eval]) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot_truncated_terminal' + str(args.no_term) + '.txt') #print(fcounts)
'axes.labelsize': 'x-large', 'axes.titlesize': 'x-large', 'xtick.labelsize': 'x-large', 'ytick.labelsize': 'x-large' } plt.rcParams.update(params) #plt.style.use('seaborn-deep') parser = argparse.ArgumentParser(description=None) parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'") args = parser.parse_args() #read in the weights as a 2-d array and the feature counts of the policy W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_0.txt") print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450'] if args.env_name == "enduro": eval_policies = ['03125', '03425', '03900', '04875'] gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & 0.05-VaR & ave length & min & stdev") for eval in eval_policies: #print("-"*20) #print("eval", eval) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name + '_' + eval + '_fcounts_100.txt') return_dist = np.dot(W, fcounts)
parser.add_argument('--alpha', type=float, help="value of alpha-VaR, e.g. alpha = 0.05") parser.add_argument('--identifier', help="keyword to find correct fcount files") parser.add_argument('--plot', action='store_true') parser.add_argument('--noop', action='store_true') args = parser.parse_args() print("alpha:", args.alpha) print(args.env_name) #read in the weights as a 2-d array and the feature counts of the policy W, likelihood = helper.get_weightchain_array(args.mcmc_file, burn=5000, skip=20, return_likelihood=True, preburn_until_accept=True) print("make sure that I actually accepted a value") print(np.sum(likelihood == -float('inf'))) W = W[likelihood != -float('inf')] #print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map', 'no-op'] name_transform = { '00025': 'A', '00325': 'B', '00800': 'C', '01450': 'D', 'mean': 'Mean', 'map': 'MAP',
import numpy as np import helper import argparse parser = argparse.ArgumentParser(description=None) parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'") parser.add_argument('--eval_fcounts', help='file with policy fcounts' parser.add_argument('--mcmc_file', help="name of mcmc file for chain") parser.add_argument('--alpha', type=float, help="value of alpha-VaR, e.g. alpha = 0.05") args = parser.parse_args() print(args.env_name) #read in the weights as a 2-d array and the feature counts of the policy W = helper.get_weightchain_array(args.mcmc_file) print(np.mean(W, axis=0)) print("mean & 0.05-VaR & gt & ave length") fcounts, returns, lengths, all_fcounts = helper.parse_fcount_policy_eval(args.eval_fcounts) return_dist = np.dot(W,fcounts) print("{:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(np.mean(return_dist),helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths)))
import numpy as np import helper import argparse #Create tables for NeurIPS workshop paper but with normalized feature counts to see if that helps! #I'm going to try the L1 and L2 norms parser = argparse.ArgumentParser(description=None) parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'") args = parser.parse_args() print(args.env_name) #read in the weights as a 2-d array and the feature counts of the policy W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_0.txt") #these are all normalized #print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map'] name_transform = { '00025': 'policy A', '00325': 'policy B', '00800': 'policy C', '01450': 'policy D', 'mean': 'mean', 'map': 'MAP', 'noop': 'no-op' } if args.env_name == "enduro": eval_policies = ['03125', '03425', '03900', '04875', 'mean', 'map'] name_transform = { '03125': 'policy A',
import numpy as np import helper import argparse parser = argparse.ArgumentParser(description=None) parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'") alpha = 0.01 args = parser.parse_args() print(args.env_name) #read in the weights as a 2-d array and the feature counts of the policy W, log_lik = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_onehot_chain.txt", return_likelihood=True) print(np.mean(W, axis=0)) eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map'] name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} if args.env_name == "enduro": eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map'] name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & " + str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline") for eval in eval_policies: #print("-"*20) #print("eval", name_transform[eval]) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot.txt') #print(fcounts) return_dist = np.dot(W,fcounts)
parser = argparse.ArgumentParser(description=None) parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'") parser.add_argument('--mcmc_file', help="name of mcmc file for chain") parser.add_argument('--alpha', type=float, help="value of alpha-VaR, e.g. alpha = 0.05") parser.add_argument('--plot', action='store_true') parser.add_argument('--noop', action='store_true') args = parser.parse_args() print("alpha:", args.alpha) print(args.env_name) #read in the weights as a 2-d array and the feature counts of the policy W = helper.get_weightchain_array(args.mcmc_file, burn=2000, skip=50) #print(np.mean(W, axis=0)) eval_files = [] for file in os.listdir('/scratch/cluster/dsbrown/rl_policies'): if args.env_name in file: print(file) eval_files.append(file) for f in eval_files: avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + f) print(f) print(np.mean(returns), np.mean(lengths)) #input() alphas = np.linspace(0,1.0,21)