def common_arg_parser(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='Hopper-v1') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--alg', help='Algorithm', type=str, default='bgail') parser.add_argument('--reward_scale', help='Reward scale factor. Default: 1.0', type=float, default=1.0) parser.add_argument('--save_path', help='Path to save trained model to', type=str, default='./outputs') parser.add_argument('--load_path', help='Path to load trained model for evaluation', type=str, default=None) parser.add_argument('--render', help='Whether to display the simulation or not', default=False) return parser
def network_arg_parser(): parser = arg_parser() parser.add_argument('--value_network', '--value-network', type=str, default=None, choices=[None, 'copy', 'shared'], help='bool to decide if value network is to be used') parser.add_argument('--normalize_observations', '--normalize-observations', type=bool, default=False, help='decide whether to normalize observations') parser.add_argument('--estimate_q', '--estimate-q', type=bool, default=False, help='whether policy should estimate q or v') parser.add_argument('--num_layers', '--num-layers', type=int, default=2) parser.add_argument('--num_hidden', '--num-hidden', type=int, default=64) parser.add_argument('--layer_norm', '--layer-norm', type=bool, default=False) return parser
def mujoco_arg_parser(): import ast """ Create an argparse.ArgumentParser for run_mujoco.py. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='InvertedPendulum-v2') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num_timesteps', type=int, default=int(1e6)) parser.add_argument('--play', default=False, action='store_true') parser.add_argument('--clipped_type', default='kl2clip', type=str) parser.add_argument('--use_tabular', default=False, type=ast.literal_eval) parser.add_argument('--cliprange', default=0.2, type=ast.literal_eval) parser.add_argument('--delta_kl', default=None, type=float) parser.add_argument('--lr', default=3e-4, type=float) # TODO: 修改根路径 root_dir_default = '/tmp/baselines' if not os.path.exists(root_dir_default): tools.mkdir(root_dir_default) parser.add_argument('--root_dir', default=root_dir_default, type=str) parser.add_argument('--sub_dir', default=None, type=str) parser.add_argument('--policy_type', default='MlpPolicy', type=str) parser.add_argument('--force_write', default=1, type=int) return parser
def argparser(): """ Create an argparse.ArgumentParser. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='CartPole-v0') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(1e6)) return parser
def neyboy_arg_parser(): """ Create an argparse.ArgumentParser for run_neyboy.py. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', default='neyboy-v0') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) return parser
def argparser(): """ Create an argparse.ArgumentParser. """ def str2bool(v): if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') parser = arg_parser() parser.add_argument('--env-id', help='environment ID', type=str, default='RoboschoolReacher-v1') parser.add_argument('--seed', help='RNG seed', type=int, default=int(0)) parser.add_argument('--num-timesteps', type=int, default=int(0.5e5)) parser.add_argument('--timesteps_per_episode', type=int, default=int(10000)) parser.add_argument('--n_policy', help='Number of policies to execute', type=int, default=int(1)) parser.add_argument('--filepath', type=str, default='/tmp/') parser.add_argument('--visualize', help='Load and visualize experiment?', type=str2bool, default=False) parser.add_argument('--retrace', help='Use retrace?', type=str2bool, default=False) parser.add_argument('--trpo', help='Use TRPO instead of COPOS?', type=str2bool, default=False) parser.add_argument('--entropy_bonus', help='Entropy bonus factor', type=float, default=float(0.0)) parser.add_argument('--epsilon', help='Epsilon', type=float, default=float(0.01)) parser.add_argument('--beta', help='Beta', type=float, default=float(0.01)) parser.add_argument('--compatible', help='Use compatible policy?', type=str2bool, default=True) return parser
def common_arg_parser(): """ Create an argparse.ArgumentParser for run_mujoco.py. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='Reacher-v2') parser.add_argument( '--env_type', help= 'type of environment, used when the environment type cannot be automatically determined', type=str, default="custom") parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--alg', help='Algorithm', type=str, default='ppo2') parser.add_argument('--num_timesteps', type=float, default=1e6), parser.add_argument( '--network', help='network type (mlp, cnn, lstm, cnn_lstm, conv_only)', choices=['mlp', 'cnn', 'lstm', 'cnn_lstm', 'conv_only'], default='mlp') parser.add_argument( '--gamestate', help='game state to load (so far only used in retro games)', default=None) parser.add_argument( '--num_env', help= 'Number of environment copies being run in parallel. When not specified, set to number of cpus for Atari, and to 1 for Mujoco', default=None, type=int) parser.add_argument('--reward_scale', help='Reward scale factor. Default: 1.0', default=1.0, type=float) parser.add_argument('--save_path', help='Path to save trained model to', default=None, type=str) parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=1, type=int) parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int) parser.add_argument('--play', default=False, action='store_true') return parser
def control_arg_parser(): """ Create an argparse.ArgumentParser for run_box2d.py. """ parser = arg_parser() parser.add_argument('--log_dir', type=str, default='../logs') parser.add_argument('--env', help='environment ID', type=str, default='LunarLanderContinuousPOMDP-v0') # parser.add_argument('--net_size', help='Network size', default=[64,64], type=str2list) # parser.add_argument('--filter_size', help='Define filter size for modified CNN policy', default=[16, 2], type=str2list) parser.add_argument('--hist_len', help='History Length', type=int, default=8) # parser.add_argument('--block_high', help='Define the hight of shelter area, should be greater than 1/2', # default=5/8, type=frac2float) parser.add_argument( '--block_high', help='Define the hight of shelter area, should be greater than 1/2', default=3 / 4, type=frac2float) parser.add_argument('--nsteps', help='timesteps each iteration', type=int, default=2048) parser.add_argument('--hid_size', help='number of neurons for each hidden layer', type=int, default=32) # parser.add_argument('--batch_size', help='batch size', type=int, default=32) parser.add_argument('--method', help='method', type=str, default='trpo-new-evaluation') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(1e6)) parser.add_argument('--give_state', help='0:False, 1:True', type=int, default=1) # parser.add_argument('--train', help='train', default=False, type=str2bool) # parser.add_argument('--render', help='render', default=False, type=str2bool) # parser.add_argument('--load_path', default=None) # parser.add_argument('--checkpoint', help='Use saved checkpoint?', default=False, type=str2bool) # parser.add_argument('--iters', help='Iterations so far(to produce videos)', default=0) # parser.add_argument('--use_entr', help='Use dynammic entropy regularization term?', default=False, type=str2bool) return parser
def argparser(): """ Create an argparse.ArgumentParser. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='MountainCarContinuous-v0') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(0.5e5)) parser.add_argument('--timesteps_per_episode', type=int, default=int(10000)) parser.add_argument('--n_policy', help='Number of policies to execute', type=int, default=1) parser.add_argument('--filepath', type=str, default='/tmp/') parser.add_argument('--visualize', help='Load and visualize experiment?', type=bool, default=False) parser.add_argument('--retrace', help='Use retrace?', type=bool, default=False) parser.add_argument('--trpo', help='Use TRPO instead of COPOS?', type=bool, default=False) return parser
def arg_parser_of_interest(): parser = arg_parser() parser.add_argument( '--process_id', help='Process ID (among all hyperparameter combinations)', type=int, default=0) parser.add_argument('--alg', help='Algorithm', type=str, default='bgail') parser.add_argument('--env', help='environment ID', type=str, default='Hopper-v1') parser.add_argument('--num_expert_trajs', help='Number of expert trajectories for training', default=25, type=int) parser.add_argument( '--d_step', help='Number of classifier update steps for each iteration', default=5, type=int) parser.add_argument('--num_particles', help='Number of SVGD or Ensemble classifiers', default=5, type=int) parser.add_argument('--timesteps_per_batch', help='Minimum batch size for each iteration', default=1000, type=int) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--save_path', help='Path to save trained model to', default='./outputs', type=str) parser.add_argument('--use_classifier_logsumexp', help='Use classifier logsumexp or not', default=True) return parser
def main(): parser = arg_parser() parser.add_argument( '--flags', '-f', help="flags cfg file (will load checkpoint in save dir if found)", default=None) args = parser.parse_args() flags = RogueAcerFlags.from_cfg( args.flags) if args.flags else RogueAcerFlags() RogueEnv.register(flags) logger.configure(flags.log_dir) env = make_rogue_env(num_env=flags.num_env, seed=flags.seed) set_global_seeds(flags.seed) policy_fn = models.get(flags.policy) learn(policy_fn, env, flags) env.close()
def main(): parser = arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--N_itr', type=int, default=int(2e4)) parser.add_argument('--lr', type=float, default=7e-4) parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--save_interval', help='model save frequency', type=int, default=1000) parser.add_argument('--alg', help='training algorithm', choices=['a2c', 'ppo2'], default='a2c') args = parser.parse_args() log_path = "./Data/" + args.alg + '_' + args.policy + "_" + args.env + "lr" + str( args.lr) + "seed" + str(args.seed) # log_path = "./Data/a2cTest/" logger.configure(dir=log_path) train(args.env, N_itr=args.N_itr, seed=args.seed, policy=args.policy, lr=args.lr, lrschedule=args.lrschedule, num_env=16, log_path=log_path, save_interval=args.save_interval, alg=args.alg)
def atari_arg_parser(): """ Create an argparse.ArgumentParser for run_atari.py. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('--clipped_type', default='kl2clip', type=str) parser.add_argument('--use_tabular', default=False, type=ast.literal_eval) parser.add_argument('--cliprange', default=0.1, type=ast.literal_eval) parser.add_argument('--delta_kl', default=0.001, type=float) root_dir_default = '/tmp/baselines' if not os.path.exists(root_dir_default): tools.mkdir(root_dir_default) parser.add_argument('--root_dir', default=root_dir_default, type=str) parser.add_argument('--sub_dir', default=None, type=str) parser.add_argument('--force_write', default=1, type=int) return parser
def main(): parser = arg_parser() parser.add_argument( '--env', help='environment ID', default='MiniGrid-MultiRoom-N2-S4-v0') # MiniGrid-MultiRoom-N4-v0 parser.add_argument('--seed', help='RNG seed', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(30e6)) parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--sil-update', type=int, default=4, help="Number of updates per iteration") parser.add_argument('--sil-beta', type=float, default=0.1, help="Beta for weighted IS") parser.add_argument('--log', default='./log') parser.add_argument('--save_name', default='MultiRoomN2S4_a2c', help="Path for saved model") args = parser.parse_args() logger.configure(dir=args.log) train(args.env, args.save_name, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, sil_update=args.sil_update, sil_beta=args.sil_beta, num_env=16)
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(1e4)) parser.add_argument('--policy', help='Policy architecture', choices=['lstm', 'qmdp'], default='qmdp') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() logger.configure(dir="./Data/a2cTest/") train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', default='carnivalRam20-v0') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--N_itr', type=int, default=int(2e4)) parser.add_argument('--policy', help='Policy architecture', choices=['lstm16','lstm2',\ 'qmdp','qmdp_relu','qmdp_split','qmdp_k1','qmdp_shallow','qmdp_dc','qmdp_svn'], default='qmdp') parser.add_argument('--lr', type=float, default=7e-4) parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--save_interval', help='model save frequency', type=int, default=1000) parser.add_argument('--alg', help='training algorithm', choices=['a2c', 'ppo2'], default='a2c') args = parser.parse_args() log_path = "./Data/" + args.alg + '_' + args.policy + "_" + args.env + "lr" + str( args.lr) + "seed" + str(args.seed) + "_ID_12345/" # log_path = "./Data/a2cTest/" logger.configure(dir=log_path) train(args.env, N_itr=args.N_itr, seed=args.seed, policy=args.policy, lr=args.lr, lrschedule=args.lrschedule, num_env=16, log_path=log_path, save_interval=args.save_interval, alg=args.alg)
def ppo_arg_parser(): parser = arg_parser() parser.add_argument('--nsteps', type=int, default=2048, help='number of steps of the vectorized ' 'environment per update (i.e. batch' ' size is nsteps * nenv where nenv' ' is number of environment copies' ' simulated in parallel)') parser.add_argument('--ent-coef', '--ent_coef', type=float, default=0.0, help='entropy coefficient') parser.add_argument('--lr', type=float, default=3e-4, help='learning rate') parser.add_argument('--vf-coef', '--vf_coef', type=float, default=0.5, help='value fn loss coefficient') parser.add_argument('--max-grad-norm', '--max_grad_norm', type=float, default=0.5, help='grad norm clipping scalar') parser.add_argument('--gamma', type=float, default=0.99, help='discount factor') parser.add_argument('--lam', type=float, default=0.95, help='advantage estimation discouting factor') parser.add_argument('--log-interval', '--log_interval', type=int, default=10, help='logging interval') parser.add_argument('--nminibatches', type=int, default=4, help='number of training minibatches per update') parser.add_argument('--noptepochs', type=int, default=4, help='number of training epochs per update') parser.add_argument('--cliprange', type=float, default=0.2, help='clipping range, r schedule function [0,1] -> R+' ' where 1 is beginning of the training') parser.add_argument('--save-interval', '--save_interval', type=int, default=0, help='number of timesteps between saving events') parser.add_argument('--load-path', '--load_path', type=str, default=None, help='path to load model from') return parser
def video(flags=RogueAcerFlags(), checkpoint_path=None, record_dir=None): try: RogueEnv.register(flags) except gym.error.Error: # an error is raised if Rogue was already registered pass options = AgentOptions(gui=True, gui_timer_ms=50, userinterface='curses') agent = ACER_Agent(options, flags=flags, checkpoint_path=checkpoint_path) if record_dir: agent = RecordingWrapper(agent, record_dir=record_dir) agent.run() if __name__ == '__main__': parser = arg_parser() parser.add_argument('--flags', '-f', help="flags cfg file", default=None) parser.add_argument('--record_dir', '-r', help="directory where to record frames on file (leave blank to avoid recording)", default='') parser.add_argument('--checkpoint_path', '-c', help="checkpoint file to load (without extension) N.B.: if you don't provide one an untrained model will be used") args = parser.parse_args() flags = RogueAcerFlags.from_cfg(args.flags) if args.flags else RogueAcerFlags() video(flags=flags, checkpoint_path=args.checkpoint_path, record_dir=args.record_dir)
def arg_parser_common(): import ast,json """ Create an argparse.ArgumentParser for run_mujoco.py. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='Walker2d-v2') parser.add_argument('--isatari', default=False, action='store_true') # parser.add_argument('--env', help='environment ID', type=str, default='AtlantisNoFrameskip')#TODO: tmp # parser.add_argument('--isatari', default=True, action='store_true') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--alg', help='You can run following algorithms: pporb, trppo, trpporb, trulyppo', default='trulyppo', type=str)# parser.add_argument('--cliptype', default='' , type=str)#wasserstein_wassersteinrollback_constant,kl_klrollback_constant_withratio parser.add_argument('--clipargs', default=dict(), type=json.loads) # The priority of the default args is defined by the order it appears. # The input args has highest priority # The cliptype has highest priority args_default_all = \ dict(__envtype=dict( mujoco=dict( policy_type = 'MlpPolicyExt', n_steps = 1024, n_envs = 2, n_minibatches = 32, n_opt_epochs = 10, lr = 3e-4, coef_entropy = 0, eval_interval = 1, num_timesteps = int(1e6), save_interval = 10, logstd = 0, __env = dict( Humanoid = dict( n_envs = 64, n_minibatches = 64, num_timesteps = int(20e6), ), HalfCheetah = dict( logstd = -1.34 ) ), __cliptype = dict( ratio=dict(clipargs=dict(cliprange=0.2)), ratio_rollback=dict( clipargs=dict(cliprange=0.2, slope_rollback=-0.3), __env = dict( Humanoid=dict( logstd=-1.34657, clipargs=dict(cliprange=0.2, slope_rollback=-0.02) ) ) ), ratio_strict=dict(clipargs=dict(cliprange=0.2)), ratio_rollback_constant=dict(clipargs=dict(cliprange=0.2, slope_rollback=-0.3)), a2c=dict(clipargs=dict(cliprange=0.1)), kl=dict( clipargs=dict(klrange=0.035, cliprange=0.2), __env=dict( Humanoid=dict( logstd=-0.5, clipargs=dict(klrange=0.05, cliprange=0.2) ) ) ), kl_strict=dict(clipargs=dict(klrange=0.025, cliprange=0.2)), kl_ratiorollback=dict(clipargs=dict(klrange=0.03, slope_rollback=-0.05, cliprange=0.2)), kl_klrollback_constant=dict(clipargs=dict(klrange=0.03, slope_rollback=-0.4, cliprange=0.2)), kl_klrollback_constant_withratio=dict( # The common args clipargs=dict(klrange=0.03, slope_rollback=-5, slope_likelihood=1, cliprange=0.2), # The args for specific env __env=dict( Humanoid=dict( logstd=-0.5, clipargs=dict(klrange=0.05, slope_rollback=-0.4, slope_likelihood=0, cliprange=0.2) ) ) ), kl_klrollback=dict(clipargs=dict(klrange=0.03, slope_rollback=-0.1, cliprange=0.2)), # klrange is used for kl2clip, which could be None. If it's None, it is adjusted by cliprange. # cliprange is used for value clip, which could be None. If it's None, it is adjusted by klrange. kl2clip=dict( clipargs=dict(klrange=None, adjusttype='base_clip_upper', cliprange=0.2, kl2clip_opttype='tabular', adaptive_range=''), __env=dict( Humanoid=dict( logstd=-1.34657359, clipargs=dict(klrange=0.03, slope_rollback=-5, slope_likelihood=1.) ) ) ), kl2clip_rollback=dict( clipargs=dict(klrange=None, adjusttype='base_clip_upper', cliprange=0.2, kl2clip_opttype='tabular', adaptive_range='', slope_rollback=-0.3) ), adaptivekl=dict(clipargs=dict(klrange=0.01, cliprange=0.2)), adaptiverange_advantage=dict(clipargs=dict(cliprange_min=0, cliprange_max=0.4, cliprange=0.2)), wasserstein=dict(clipargs=dict(range=0.05, cliprange=0.2)), wasserstein_rollback_constant=dict(clipargs=dict(range=0.05, slope_rollback=-0.4, cliprange=0.2)), ) ), atari=dict( policy_type='CnnPolicy', n_steps = 128 , n_envs = 8, n_minibatches = 4, n_opt_epochs = 4, lr = 2.5e-4, coef_entropy= 0.01, eval_interval=0, num_timesteps=int(1e7), save_interval = 400, logstd = 0, __cliptype= dict( ratio=dict(clipargs=dict(cliprange=0.1)), ratio_rollback=dict(clipargs=dict(cliprange=0.1, slope_rollback=-0.01)), a2c=dict(clipargs=dict(cliprange=0.1)), kl=dict( clipargs=dict(klrange=0.001, cliprange=0.1, decay_threshold=0.), coef_entropy = 0 ), kl_ratiorollback=dict(clipargs=dict(klrange=0.001,slope_rollback=-0.05, cliprange=0.1, decay_threshold=0.)), kl_klrollback_constant=dict(clipargs=dict(klrange=0.001, slope_rollback=-0.05, cliprange=0.1, decay_threshold=0.)), kl_klrollback_constant_withratio= dict( clipargs = dict(klrange=0.0008, slope_rollback=-20, slope_likelihood=1, cliprange=0.1, decay_threshold=0.), coef_entropy=0, ), totalvariation=dict(clipargs=dict(range=0.02, cliprange=0.1, decay_threshold=0.)), totalvariation_rollback_constant=dict( clipargs=dict(range=0.02, slope_rollback=-0.05, cliprange=0.1, decay_threshold=0.) ), kl2clip=dict( clipargs=dict(klrange=0.001, cliprange=0.1, kl2clip_opttype='tabular', adaptive_range=''), coef_entropy=0 ), adaptivekl=dict( clipargs=dict(klrange=0.01, cliprange=0.1) ), ) ) )) parser.add_argument('--lam', default=0.95, type=float ) parser.add_argument('--lr', default=None, type=float) parser.add_argument('--policy_type', default=None, type=str) parser.add_argument('--log_dir_mode', default='finish_then_exit_else_overwrite', type=str)#overwrite,finish_then_exit_else_overwrite parser.add_argument('--name_group', default=None, type=str) parser.add_argument('--keys_group', default=[], type=ast.literal_eval) # architecture of network parser.add_argument('--policy_variance_state_dependent', default=False, type=ast.literal_eval) parser.add_argument('--hidden_sizes', default=64, type=ast.literal_eval) parser.add_argument('--num_layers', default=2, type=ast.literal_eval) parser.add_argument('--num_sharing_layers', default=0, type=int) parser.add_argument('--ac_fn', default='tanh', type=str) parser.add_argument('--coef_predict_task', default=0, type=float) parser.add_argument('--reward_scale', default=1., type=float) parser.add_argument('--lam_decay', default=False, type=ast.literal_eval) # ----- Please keep the default values of the following args to be None, the default value are different for different tasks parser.add_argument('--coef_entropy', default=None, type=float) parser.add_argument('--n_envs', default=None, type=int) parser.add_argument('--n_steps', default=None, type=int) parser.add_argument('--n_minibatches', default=None, type=int) parser.add_argument('--n_opt_epochs', default=None, type=int) parser.add_argument('--logstd', default=None, type=float) parser.add_argument('--log_interval', default=1, type=int) parser.add_argument('--n_eval_epsiodes', default=1, type=int) parser.add_argument('--num_timesteps', type=int, default=None) parser.add_argument('--eval_interval', type=int, default=None) parser.add_argument('--save_interval', default=None, type=int) parser.add_argument('--save_debug', default=False, action='store_true') # parser.add_argument('--debug_halfcheetah', default=0, type=int) parser.add_argument('--is_multiprocess', default=0, type=ast.literal_eval) return parser, args_default_all
def common_arg_parser(): """ Create an argparse.ArgumentParser. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='Reacher-v2') parser.add_argument( '--env_type', help= 'type of environment, used when the environment type cannot be automatically determined', type=str) parser.add_argument('--seed', help='RNG seed', type=int, default=None) parser.add_argument('--alg', help='Algorithm', type=str, default='ppo2') parser.add_argument('--num_timesteps', type=float, default=1e6), parser.add_argument( '--network', help='network type (mlp, cnn, lstm, cnn_lstm, conv_only)', default=None) parser.add_argument( '--gamestate', help='game state to load (so far only used in retro games)', default=None) parser.add_argument( '--num_env', help= 'Number of environment copies being run in parallel. When not specified, set to number of cpus for Atari, and to 1 for Mujoco', default=None, type=int) parser.add_argument('--reward_scale', help='Reward scale factor. Default: 1.0', default=1.0, type=float) parser.add_argument('--save_path', help='Path to save trained model to', default=None, type=str) parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int) parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int) parser.add_argument('--log_path', help='Directory to save learning curve data.', default=None, type=str) parser.add_argument('--play', default=False, action='store_true') # RM-related arguments parser.add_argument("--use_rs", help="Use reward shaping", action="store_true", default=False) parser.add_argument("--use_crm", help="Use counterfactual experience", action="store_true", default=False) parser.add_argument('--gamma', help="Discount factor", type=float, default=0.9) parser.add_argument('--rs_gamma', help="Discount factor used for reward shaping", type=float, default=0.9) parser.add_argument( '--r_min', help="R-min reward used for training option policies in hrm", type=float, default=0.0) parser.add_argument( '--r_max', help="R-max reward used for training option policies in hrm", type=float, default=1.0) parser.add_argument("--use_self_loops", help="Add option policies for self-loops in the RMs", action="store_true", default=False) return parser
def arg_parser_common(): import ast, json """ Create an argparse.ArgumentParser for run_mujoco.py. """ parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='InvertedPendulum-v2') parser.add_argument('--is_atari', default=False, action='store_true') # parser.add_argument('--env', help='environment ID', type=str, default='PongNoFrameskip') # parser.add_argument('--is_atari', default=True, action='store_true') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--cliptype', default='kl2clip', type=str) #wasserstein_wassersteinrollback_constant # parser.add_argument('--cliprange', default=0.2, type=float) # import demjson parser.add_argument('--clipargs', default=dict(), type=json.loads) clipargs_default_all = { MUJOCO: dict( ratio=dict(cliprange=0.2), ratio_rollback=dict(cliprange=0.2, slope_rollback=-0.3), ratio_strict=dict(cliprange=0.2), ratio_rollback_constant=dict(cliprange=0.2, slope_rollback=-0.3), a2c=dict(cliprange=0.1), wasserstein=dict(range=0.05, cliprange=0.2), wasserstein_wassersteinrollback_constant=dict(range=0.05, slope_rollback=-0.4, cliprange=0.2), kl=dict(klrange=0.03, cliprange=0.2), kl_strict=dict(klrange=0.025, cliprange=0.2), kl_ratiorollback=dict(klrange=0.03, slope_rollback=-0.05, cliprange=0.2), kl_klrollback_constant=dict(klrange=0.03, slope_rollback=-0.1, cliprange=0.2), kl_klrollback=dict(klrange=0.03, slope_rollback=-0.1, cliprange=0.2), # base_clip_lower, base_clip_upper # kl2clip = dict(klrange=0.03, adjusttype='origin', cliprange=0.2, kl2clip_opttype='tabular', adaptive_range=''), kl2clip=dict(klrange=None, adjusttype='base_clip_upper', cliprange=0.2, kl2clip_opttype='tabular', adaptive_range=''), kl2clip_rollback=dict(klrange=None, adjusttype='base_clip_upper', cliprange=0.2, kl2clip_opttype='tabular', adaptive_range='', slope_rollback=-0.3), # kl2clip = dict( klrange=None, adjusttype='base_clip_lower', cliprange=0.2) # kl2clip=dict(klrange=None, adjusttype='base_clip_upper', cliprange=0.2, kl2clip_opttype='tabular'),#nn # klrange is used for kl2clip, which could be None. If it's None, it is adjusted by cliprange. # cliprange is used for value clip, which could be None. If it's None, it is adjusted by klrange. adaptivekl=dict(klrange=0.01, cliprange=0.2), adaptiverange_advantage=dict(cliprange_min=0, cliprange_max=0.4, cliprange=0.2)), ATARI: dict( # TODO:!!! Please modify the parameters here ratio=dict(cliprange=0.1), ratio_rollback=dict(cliprange=0.1, slope_rollback=-0.3), ratio_strict=dict(cliprange=0.1), ratio_rollback_constant=dict(cliprange=0.1, slope_rollback=-0.3), a2c=dict(cliprange=0.1), kl=dict(klrange=0.03, cliprange=0.1), kl_strict=dict(klrange=0.025, cliprange=0.1), kl_ratiorollback=dict(klrange=0.03, slope_rollback=-0.05, cliprange=0.1), kl_klrollback_constant=dict(klrange=0.03, slope_rollback=-0.1, cliprange=0.1), kl_klrollback=dict(klrange=0.03, slope_rollback=-0.1, cliprange=0.1), # kl2clip = dict(klrange=0.03, adjusttype='origin', cliprange=0.2) # kl2clip = dict( klrange=None, adjusttype='base_clip_lower', cliprange=0.2) kl2clip=dict(klrange=0.001, cliprange=0.1, kl2clip_opttype='tabular', adaptive_range=''), # klrange is used for kl2clip, which could be None. If it's None, it is adjusted by cliprange. # cliprange is used for value clip, which could be None. If it's None, it is adjusted by klrange. adaptivekl=dict(klrange=0.01, cliprange=0.1)) } # parser.add_argument('--cliptype', default='origin', type=str) # parser.add_argument('--slope', default=0, type=float) # parser.add_argument('--cliprange', default=0.2, type=ast.literal_eval) # parser.add_argument('--delta_kl', default=None, type=ast.literal_eval) parser.add_argument('--lam', default=0.95, type=float) parser.add_argument('--lr', default=None, type=float) parser.add_argument('--policy_type', default=None, type=str) parser.add_argument('--log_dir_mode', default='finish_then_exit_else_overwrite', type=str) #overwrite,finish_then_exit_else_overwrite parser.add_argument('--name_group', default='tmp', type=str) parser.add_argument('--keys_group', default=['cliptype', 'clipargs'], type=ast.literal_eval) # architecture of network parser.add_argument('--policy_variance_state_dependent', default=False, type=ast.literal_eval) parser.add_argument('--hidden_sizes', default=64, type=ast.literal_eval) parser.add_argument('--num_layers', default=2, type=ast.literal_eval) parser.add_argument('--num_sharing_layers', default=0, type=int) parser.add_argument('--ac_fn', default='tanh', type=str) # parser.add_argument('--explore', default=0, type=int) # parser.add_argument('--explore_timesteps', default=0, type=int) # parser.add_argument('--explore_additive_threshold', default=None, type=float) # parser.add_argument('--explore_additive_rate', default=0, type=float) parser.add_argument('--coef_predict_task', default=0, type=float) parser.add_argument('--reward_scale', default=1., type=float) parser.add_argument('--lam_decay', default=False, type=ast.literal_eval) # ----- Please keep the default values of the following args to be None, the default value are different for different tasks parser.add_argument('--coef_entropy', default=None, type=float) parser.add_argument('--n_envs', default=None, type=int) parser.add_argument('--n_steps', default=None, type=int) parser.add_argument('--n_minibatches', default=None, type=int) parser.add_argument('--n_opt_epochs', default=None, type=int) parser.add_argument('--logstd', default=None, type=float) parser.add_argument('--log_interval', default=1, type=int) parser.add_argument('--n_eval_epsiodes', default=1, type=int) parser.add_argument('--num_timesteps', type=int, default=None) parser.add_argument('--eval_interval', type=int, default=None) parser.add_argument('--save_interval', default=None, type=int) parser.add_argument('--save_debug', default=False, action='store_true') args_default_all = \ { # MUJOCO MUJOCO: dict( policy_type = dict(_default='MlpPolicyExt'), n_steps = dict( _default=1024 ), n_envs = dict( Humanoid=64, _default=2 ), n_minibatches = dict( Humanoid=64, _default=32 ), n_opt_epochs = dict(_default=10), lr = dict(_default=3e-4), coef_entropy = dict( _default=0 ), eval_interval = dict( _default=1 ), num_timesteps = dict( Humanoid=int(20e6), _default=int(1e6) ), save_interval = dict( _default=10 ), logstd = dict( HalfCheetah=-1.34, Humanoid=-1.34657, _default=0, ), ), # ATARI ATARI: dict( policy_type=dict(_default='CnnPolicy'), n_steps = dict( _default=128 ), n_envs = dict( _default=8 ), n_minibatches = dict( _default=4 ), n_opt_epochs = dict( _default=4 ), lr = dict(_default=2.5e-4 ), coef_entropy= dict(_default=0),#TODO: tmp for kl2clip eval_interval=dict(_default=-1), num_timesteps=dict(_default=int(1e7)), save_interval = dict( _default=400 ), ) } # parser.add_argument('--debug_halfcheetah', default=0, type=int) parser.add_argument('--is_multiprocess', default=0, type=ast.literal_eval) return parser, clipargs_default_all, args_default_all