def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from utils.utils import TabularPolicy, TabularValueFun from part1.tabular_value_iteration import ValueIteration from envs import Grid1DEnv, GridWorldEnv envs = [GridWorldEnv(seed=0), GridWorldEnv(seed=1)] for env in envs: env_name = env.__name__ exp_dir = os.getcwd() + '/data/part1/%s/policy_type%s_temperature%s/' % (env_name, args.policy_type, args.temperature) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) policy = TabularPolicy(env) value_fun = TabularValueFun(env) algo = ValueIteration(env, value_fun, policy, policy_type=args.policy_type, render=render, temperature=args.temperature) algo.train()
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv from utils.utils import TabularPolicy, TabularValueFun from part1.tabular_value_iteration import ValueIteration from part2.look_ahead_policy import LookAheadPolicy from part2.discretize import Discretize envs = [ DoubleIntegratorEnv(), MountainCarEnv(), CartPoleEnv(), SwingUpEnv() ] for env in envs: env_name = env.__class__.__name__ if env_name == 'MountainCarEnv': state_discretization = 51 else: state_discretization = 21 exp_dir = os.getcwd( ) + '/data/part2_d/%s/policy_type%s_mode%s_horizon%s/' % ( env_name, args.policy_type, args.mode, args.horizon) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) env = Discretize(env, state_discretization=state_discretization, mode=args.mode) value_fun = TabularValueFun(env) if args.policy_type == 'tabular': policy = TabularPolicy(env) elif args.policy_type == 'look_ahead': policy = LookAheadPolicy(env, value_fun, args.horizon) else: raise NotImplementedError algo = ValueIteration(env, value_fun, policy, render=render, max_itr=args.max_iter, num_rollouts=1, render_itr=5, log_itr=5) algo.train()
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from utils.utils import TabularPolicy from utils.value_function import TabularValueFun from algos.tabular_value_iteration import ValueIteration from envs import ASRSEnv, TabularEnv, ProbDistEnv, DynamicProbEnv, StaticOrderProcess, SeasonalOrderProcess num_products = np.array(eval(args.storage_shape)).prod() assert (eval(args.dist_param) is None) or (num_products == len( eval(args.dist_param) )), 'storage_shape should be consistent with dist_param length' op = StaticOrderProcess(num_products=num_products, dist_param=eval(args.dist_param)) base_env = ASRSEnv(eval(args.storage_shape), order_process=op, origin_coord=eval(args.exit_coord)) env = TabularEnv(base_env) env_name = env.__name__ exp_dir = os.getcwd( ) + '/data/version1/%s/policy_type%s_temperature%s_envsize_%s/' % ( env_name, args.policy_type, args.temperature, np.array(eval(args.storage_shape)).prod()) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], level=eval(args.logger_level)) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) policy = TabularPolicy(env) value_fun = TabularValueFun(env) algo = ValueIteration(env, value_fun, policy, policy_type=args.policy_type, render=render, temperature=args.temperature, num_rollouts=args.num_rollouts) algo.train() value_fun.save(f'{exp_dir}/value_fun.npy')
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from envs import CartPoleEnv, SwingUpEnv from utils.utils import TabularPolicy, TabularValueFun from part1.tabular_value_iteration import ValueIteration from part4.discretize import Discretize envs = [CartPoleEnv(), SwingUpEnv()] for env in envs: env_name = env.__class__.__name__ exp_dir = os.getcwd( ) + '/data/part4/%s/mode%s_state_discretization%s/' % ( env_name, args.mode, str(args.state_discretization)) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) env = Discretize(env, state_discretization=args.state_discretization, mode=args.mode) value_fun = TabularValueFun(env) policy = TabularPolicy(env) algo = ValueIteration(env, value_fun, policy, render=render, max_itr=args.max_iter, num_rollouts=1, render_itr=5, log_itr=5) algo.train()