def parse_args(): """ parse the arguments for DDPG training :return: (dict) the arguments """ parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--config', type=str, default='cfg/train_no_teachers.yaml') parser.add_argument('--log-base-dir', type=str, default=None) parser.add_argument('--log-dir', type=str, default=None) parser.add_argument('--experiment-name', type=str, default=None) parser.add_argument('--load-path', type=str, default=None) parser.add_argument('--learner-type', type=str, choices=['DDPG', 'Q'], default=None) # Can optionally overwrite some params from config parser.add_argument('--env-id', type=str, default=None) parser.add_argument('--teacher_behavior_policy', type=str, default=None) parser.add_argument('--seed', help='RNG seed', type=int, default=None) parser.add_argument('--batch-size', type=int, default=None) # per MPI worker parser.add_argument('--reward-scale', type=float, default=None) parser.add_argument('--noise-type', type=str, default=None) parser.add_argument('--feature-extraction', type=str, default=None) parser.add_argument('--nb-train-steps', type=int, default=None) # per epoch cycle and MPI worker parser.add_argument('--nb-rollout-steps', type=int, default=None) # per epoch cycle and MPI worker parser.add_argument('--num-timesteps', type=int, default=None) parser.add_argument('--nb-eval-steps', type=int, default=None) # per epoch cycle and MPI worker parser.add_argument('--log-interval', type=int, default=None) # per epoch cycle and MPI worker #parser.add_argument('--demo-path', type=str, default='') parser.add_argument( "--load-from", type=str, help="load the saved model and optimizer at the beginning") boolean_flag(parser, 'do_eval', default=None) boolean_flag(parser, 'render-eval', default=None) boolean_flag(parser, 'render', default=None) boolean_flag(parser, 'save_exps', default=None) args = parser.parse_args() dict_args = vars(args) return dict_args
def argsparser(): """ make a argument parser for evaluation of gail :return: (ArgumentParser) """ parser = argparse.ArgumentParser('Do evaluation') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--policy_hidden_size', type=int, default=100) parser.add_argument('--env', type=str, choices=[ 'Hopper', 'Walker2d', 'HalfCheetah', 'Humanoid', 'HumanoidStandup' ]) boolean_flag(parser, 'stochastic_policy', default=False, help_msg='use stochastic/deterministic policy to evaluate') return parser.parse_args()
def argsparser(): """ make a behavior cloning argument parser :return: (ArgumentParser) """ parser = argparse.ArgumentParser( "Tensorflow Implementation of Behavior Cloning") parser.add_argument('--env_id', help='environment ID', default='Hopper-v1') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--expert_path', type=str, default='data/deterministic.trpo.Hopper.0.00.npz') parser.add_argument('--checkpoint_dir', help='the directory to save model', default='checkpoint') parser.add_argument('--log_dir', help='the directory to save log file', default='log') # Mujoco Dataset Configuration parser.add_argument('--traj_limitation', type=int, default=-1) # Network Configuration (Using MLP Policy) parser.add_argument('--policy_hidden_size', type=int, default=100) # for evaluatation boolean_flag(parser, 'stochastic_policy', default=False, help_msg='use stochastic/deterministic policy to evaluate') boolean_flag(parser, 'save_sample', default=False, help_msg='save the trajectories or not') parser.add_argument('--BC_max_iter', help='Max iteration for training BC', type=int, default=1e5) return parser.parse_args()
def parse_args(): """ parse the arguments for DDPG training :return: (dict) the arguments """ parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env_type', type=str, default="mujoco") parser.add_argument('--env-id', type=str, default='Ant-v2') parser.add_argument('--agent', type=str, default='TD3') # boolean_flag(parser, 'render-eval', default=False) boolean_flag(parser, 'layer-norm', default=False) boolean_flag(parser, 'evaluation', default=True) parser.add_argument('--seed', help='RNG seed', type=int, default=int(time.time())) parser.add_argument('--comment', help='to show name', type=str, default="show_name_in_htop") parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--num-timesteps', type=int, default=int(1e6) + 10) # plus 10 to make one more evaluation parser.add_argument('--max_steps', type=int, default=1000) # truncate steps for ddq parser.add_argument('--delay-step', type=int, default=0) args = parser.parse_args() dict_args = vars(args) return dict_args
def argsparser(): """ get an argument parser for training mujoco on gail :return: (ArgumentParser) """ parser = argparse.ArgumentParser("Tensorflow Implementation of GAIL") parser.add_argument('--env_id', help='environment ID', default='Hopper-v2') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--expert_path', type=str, default='data/deterministic.trpo.Hopper.0.00.npz') parser.add_argument('--checkpoint_dir', help='the directory to save model', default='checkpoint') parser.add_argument('--log_dir', help='the directory to save log file', default='log') parser.add_argument('--load_model_path', help='if provided, load the model', type=str, default=None) # Task parser.add_argument('--task', type=str, choices=['train', 'evaluate', 'sample'], default='train') # for evaluatation boolean_flag(parser, 'stochastic_policy', default=False, help_msg='use stochastic/deterministic policy to evaluate') boolean_flag(parser, 'save_sample', default=False, help_msg='save the trajectories or not') # Mujoco Dataset Configuration parser.add_argument('--traj_limitation', type=int, default=-1) # Optimization Configuration parser.add_argument('--g_step', help='number of steps to train policy in each epoch', type=int, default=3) parser.add_argument('--d_step', help='number of steps to train discriminator in each epoch', type=int, default=1) # Network Configuration (Using MLP Policy) parser.add_argument('--policy_hidden_size', type=int, default=100) parser.add_argument('--adversary_hidden_size', type=int, default=100) # Algorithms Configuration # ----------------------------------GAIL 은 현재 trpo만 가능----------------------------------------- parser.add_argument('--algo', type=str, default='trpo') # ------------------------------------------------------------------------------------------------ parser.add_argument('--max_kl', type=float, default=0.01) parser.add_argument('--policy_entcoeff', help='entropy coefficiency of policy', type=float, default=0) parser.add_argument('--adversary_entcoeff', help='entropy coefficiency of discriminator', type=float, default=1e-3) # Traing Configuration parser.add_argument('--save_per_iter', help='save model every xx iterations', type=int, default=100) parser.add_argument('--num_timesteps', help='number of timesteps per episode', type=int, default=5e6) # Behavior Cloning boolean_flag(parser, 'pretrained', default=False, help_msg='Use BC to pretrain') parser.add_argument('--bc_max_iter', help='Max iteration for training BC', type=int, default=1e4) return parser.parse_args()
def parse_args(): """ parse the arguments for DDPG training :return: (dict) the arguments """ parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env-id', type=str, default='HalfCheetah-v1') boolean_flag(parser, 'render-eval', default=False) boolean_flag(parser, 'layer-norm', default=True) boolean_flag(parser, 'render', default=False) boolean_flag(parser, 'normalize-returns', default=False) boolean_flag(parser, 'normalize-observations', default=True) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--critic-l2-reg', type=float, default=1e-2) parser.add_argument('--batch-size', type=int, default=64) # per MPI worker parser.add_argument('--actor-lr', type=float, default=1e-4) parser.add_argument('--critic-lr', type=float, default=1e-3) boolean_flag(parser, 'enable-popart', default=False) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--reward-scale', type=float, default=1.) parser.add_argument('--clip-norm', type=float, default=None) parser.add_argument('--nb-train-steps', type=int, default=50) # per epoch cycle and MPI worker parser.add_argument('--nb-eval-steps', type=int, default=100) # per epoch cycle and MPI worker parser.add_argument('--nb-rollout-steps', type=int, default=100) # per epoch cycle and MPI worker # choices are adaptive-param_xx, ou_xx, normal_xx, none parser.add_argument('--noise-type', type=str, default='adaptive-param_0.2') boolean_flag(parser, 'evaluation', default=False) args = parser.parse_args() dict_args = vars(args) return dict_args