parser = argparse.ArgumentParser(description=None)
parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'")
parser.add_argument('--mcmc_file', help="name of mcmc file for chain")
parser.add_argument('--no_term', action = 'store_true')
parser.add_argument('--alpha', type=float, help="value of alpha-VaR, e.g. alpha = 0.05")



args = parser.parse_args()
print(args)
alpha = args.alpha

print(args.env_name)
#read in the weights as a 2-d array and the feature counts of the policy
W, log_lik = helper.get_weightchain_array(args.mcmc_file, return_likelihood=True)
print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map']
name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
if args.env_name == "enduro":
    eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map']
    name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & " +  str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", name_transform[eval])
    returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot_truncated_terminal' + str(args.no_term) + '.txt')
    #print(fcounts)
Example #2
0
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}
plt.rcParams.update(params)
#plt.style.use('seaborn-deep')

parser = argparse.ArgumentParser(description=None)
parser.add_argument('--env_name',
                    help="name of the environment, e.g. 'breakout'")

args = parser.parse_args()

#read in the weights as a 2-d array and the feature counts of the policy
W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_0.txt")
print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450']
if args.env_name == "enduro":
    eval_policies = ['03125', '03425', '03900', '04875']
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & 0.05-VaR & ave length & min & stdev")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", eval)
    returns, fcounts = helper.parse_avefcount_array('../../policies/' +
                                                    args.env_name + '_' +
                                                    eval + '_fcounts_100.txt')
    return_dist = np.dot(W, fcounts)
parser.add_argument('--alpha',
                    type=float,
                    help="value of alpha-VaR, e.g. alpha = 0.05")
parser.add_argument('--identifier',
                    help="keyword to find correct fcount files")
parser.add_argument('--plot', action='store_true')
parser.add_argument('--noop', action='store_true')
args = parser.parse_args()

print("alpha:", args.alpha)

print(args.env_name)
#read in the weights as a 2-d array and the feature counts of the policy
W, likelihood = helper.get_weightchain_array(args.mcmc_file,
                                             burn=5000,
                                             skip=20,
                                             return_likelihood=True,
                                             preburn_until_accept=True)
print("make sure that I actually accepted a value")
print(np.sum(likelihood == -float('inf')))

W = W[likelihood != -float('inf')]
#print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map', 'no-op']
name_transform = {
    '00025': 'A',
    '00325': 'B',
    '00800': 'C',
    '01450': 'D',
    'mean': 'Mean',
    'map': 'MAP',
Example #4
0
import numpy as np
import helper
import argparse


parser = argparse.ArgumentParser(description=None)
parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'")
parser.add_argument('--eval_fcounts', help='file with policy fcounts'
parser.add_argument('--mcmc_file', help="name of mcmc file for chain")
parser.add_argument('--alpha', type=float, help="value of alpha-VaR, e.g. alpha = 0.05")


args = parser.parse_args()
print(args.env_name)
#read in the weights as a 2-d array and the feature counts of the policy
W = helper.get_weightchain_array(args.mcmc_file)
print(np.mean(W, axis=0))

print("mean & 0.05-VaR & gt & ave length")

fcounts, returns, lengths, all_fcounts = helper.parse_fcount_policy_eval(args.eval_fcounts)
return_dist = np.dot(W,fcounts)

print("{:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(np.mean(return_dist),helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths)))
import numpy as np
import helper
import argparse

#Create tables for NeurIPS workshop paper but with normalized feature counts to see if that helps!
#I'm going to try the L1 and L2 norms

parser = argparse.ArgumentParser(description=None)
parser.add_argument('--env_name',
                    help="name of the environment, e.g. 'breakout'")

args = parser.parse_args()
print(args.env_name)
#read in the weights as a 2-d array and the feature counts of the policy
W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name +
                                 "_0.txt")  #these are all normalized
#print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map']
name_transform = {
    '00025': 'policy A',
    '00325': 'policy B',
    '00800': 'policy C',
    '01450': 'policy D',
    'mean': 'mean',
    'map': 'MAP',
    'noop': 'no-op'
}
if args.env_name == "enduro":
    eval_policies = ['03125', '03425', '03900', '04875', 'mean', 'map']
    name_transform = {
        '03125': 'policy A',
import numpy as np
import helper
import argparse


parser = argparse.ArgumentParser(description=None)
parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'")

alpha = 0.01

args = parser.parse_args()
print(args.env_name)
#read in the weights as a 2-d array and the feature counts of the policy
W, log_lik = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_onehot_chain.txt", return_likelihood=True)
print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map']
name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
if args.env_name == "enduro":
    eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map']
    name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & " +  str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", name_transform[eval])
    returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot.txt')
    #print(fcounts)
    return_dist = np.dot(W,fcounts)

parser = argparse.ArgumentParser(description=None)
parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'")
parser.add_argument('--mcmc_file', help="name of mcmc file for chain")
parser.add_argument('--alpha', type=float, help="value of alpha-VaR, e.g. alpha = 0.05")
parser.add_argument('--plot', action='store_true')
parser.add_argument('--noop', action='store_true')
args = parser.parse_args()


print("alpha:", args.alpha)

print(args.env_name)
#read in the weights as a 2-d array and the feature counts of the policy
W = helper.get_weightchain_array(args.mcmc_file, burn=2000, skip=50)
#print(np.mean(W, axis=0))

eval_files = []
for file in os.listdir('/scratch/cluster/dsbrown/rl_policies'):
    if args.env_name in file:
        print(file)
        eval_files.append(file)

for f in eval_files:
        avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + f)
        print(f)
        print(np.mean(returns), np.mean(lengths))

#input()
alphas = np.linspace(0,1.0,21)