예제 #1
0
def parse_args():
    """
    parse the arguments for DDPG training

    :return: (dict) the arguments
    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--config',
                        type=str,
                        default='cfg/train_no_teachers.yaml')
    parser.add_argument('--log-base-dir', type=str, default=None)
    parser.add_argument('--log-dir', type=str, default=None)
    parser.add_argument('--experiment-name', type=str, default=None)
    parser.add_argument('--load-path', type=str, default=None)
    parser.add_argument('--learner-type',
                        type=str,
                        choices=['DDPG', 'Q'],
                        default=None)

    # Can optionally overwrite some params from config
    parser.add_argument('--env-id', type=str, default=None)
    parser.add_argument('--teacher_behavior_policy', type=str, default=None)
    parser.add_argument('--seed', help='RNG seed', type=int, default=None)
    parser.add_argument('--batch-size', type=int,
                        default=None)  # per MPI worker
    parser.add_argument('--reward-scale', type=float, default=None)
    parser.add_argument('--noise-type', type=str, default=None)
    parser.add_argument('--feature-extraction', type=str, default=None)
    parser.add_argument('--nb-train-steps', type=int,
                        default=None)  # per epoch cycle and MPI worker
    parser.add_argument('--nb-rollout-steps', type=int,
                        default=None)  # per epoch cycle and MPI worker
    parser.add_argument('--num-timesteps', type=int, default=None)
    parser.add_argument('--nb-eval-steps', type=int,
                        default=None)  # per epoch cycle and MPI worker
    parser.add_argument('--log-interval', type=int,
                        default=None)  # per epoch cycle and MPI worker

    #parser.add_argument('--demo-path', type=str, default='')

    parser.add_argument(
        "--load-from",
        type=str,
        help="load the saved model and optimizer at the beginning")

    boolean_flag(parser, 'do_eval', default=None)
    boolean_flag(parser, 'render-eval', default=None)
    boolean_flag(parser, 'render', default=None)
    boolean_flag(parser, 'save_exps', default=None)

    args = parser.parse_args()
    dict_args = vars(args)
    return dict_args
예제 #2
0
def argsparser():
    """
    make a argument parser for evaluation of gail

    :return: (ArgumentParser)
    """
    parser = argparse.ArgumentParser('Do evaluation')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--policy_hidden_size', type=int, default=100)
    parser.add_argument('--env',
                        type=str,
                        choices=[
                            'Hopper', 'Walker2d', 'HalfCheetah', 'Humanoid',
                            'HumanoidStandup'
                        ])
    boolean_flag(parser,
                 'stochastic_policy',
                 default=False,
                 help_msg='use stochastic/deterministic policy to evaluate')
    return parser.parse_args()
def argsparser():
    """
    make a behavior cloning argument parser

    :return: (ArgumentParser)
    """
    parser = argparse.ArgumentParser(
        "Tensorflow Implementation of Behavior Cloning")
    parser.add_argument('--env_id', help='environment ID', default='Hopper-v1')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--expert_path',
                        type=str,
                        default='data/deterministic.trpo.Hopper.0.00.npz')
    parser.add_argument('--checkpoint_dir',
                        help='the directory to save model',
                        default='checkpoint')
    parser.add_argument('--log_dir',
                        help='the directory to save log file',
                        default='log')
    #  Mujoco Dataset Configuration
    parser.add_argument('--traj_limitation', type=int, default=-1)
    # Network Configuration (Using MLP Policy)
    parser.add_argument('--policy_hidden_size', type=int, default=100)
    # for evaluatation
    boolean_flag(parser,
                 'stochastic_policy',
                 default=False,
                 help_msg='use stochastic/deterministic policy to evaluate')
    boolean_flag(parser,
                 'save_sample',
                 default=False,
                 help_msg='save the trajectories or not')
    parser.add_argument('--BC_max_iter',
                        help='Max iteration for training BC',
                        type=int,
                        default=1e5)
    return parser.parse_args()
예제 #4
0
파일: run_util.py 프로젝트: MouseHu/gem
def parse_args():
    """
    parse the arguments for DDPG training

    :return: (dict) the arguments
    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env_type', type=str, default="mujoco")
    parser.add_argument('--env-id', type=str, default='Ant-v2')
    parser.add_argument('--agent', type=str, default='TD3')
    # boolean_flag(parser, 'render-eval', default=False)
    boolean_flag(parser, 'layer-norm', default=False)
    boolean_flag(parser, 'evaluation', default=True)

    parser.add_argument('--seed',
                        help='RNG seed',
                        type=int,
                        default=int(time.time()))
    parser.add_argument('--comment',
                        help='to show name',
                        type=str,
                        default="show_name_in_htop")

    parser.add_argument('--gamma', type=float, default=0.99)

    parser.add_argument('--num-timesteps', type=int, default=int(1e6) +
                        10)  # plus 10 to make one more evaluation
    parser.add_argument('--max_steps', type=int,
                        default=1000)  # truncate steps for ddq

    parser.add_argument('--delay-step', type=int, default=0)

    args = parser.parse_args()
    dict_args = vars(args)
    return dict_args
def argsparser():
    """
    get an argument parser for training mujoco on gail

    :return: (ArgumentParser)
    """
    parser = argparse.ArgumentParser("Tensorflow Implementation of GAIL")
    parser.add_argument('--env_id', help='environment ID', default='Hopper-v2')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--expert_path', type=str, default='data/deterministic.trpo.Hopper.0.00.npz')
    parser.add_argument('--checkpoint_dir', help='the directory to save model', default='checkpoint')
    parser.add_argument('--log_dir', help='the directory to save log file', default='log')
    parser.add_argument('--load_model_path', help='if provided, load the model', type=str, default=None)
    # Task
    parser.add_argument('--task', type=str, choices=['train', 'evaluate', 'sample'], default='train')
    # for evaluatation
    boolean_flag(parser, 'stochastic_policy', default=False, help_msg='use stochastic/deterministic policy to evaluate')
    boolean_flag(parser, 'save_sample', default=False, help_msg='save the trajectories or not')
    #  Mujoco Dataset Configuration
    parser.add_argument('--traj_limitation', type=int, default=-1)
    # Optimization Configuration
    parser.add_argument('--g_step', help='number of steps to train policy in each epoch', type=int, default=3)
    parser.add_argument('--d_step', help='number of steps to train discriminator in each epoch', type=int, default=1)
    # Network Configuration (Using MLP Policy)
    parser.add_argument('--policy_hidden_size', type=int, default=100)
    parser.add_argument('--adversary_hidden_size', type=int, default=100)

    # Algorithms Configuration
    # ----------------------------------GAIL 은 현재 trpo만 가능-----------------------------------------
    parser.add_argument('--algo', type=str, default='trpo')
    # ------------------------------------------------------------------------------------------------

    parser.add_argument('--max_kl', type=float, default=0.01)
    parser.add_argument('--policy_entcoeff', help='entropy coefficiency of policy', type=float, default=0)
    parser.add_argument('--adversary_entcoeff', help='entropy coefficiency of discriminator', type=float, default=1e-3)
    # Traing Configuration
    parser.add_argument('--save_per_iter', help='save model every xx iterations', type=int, default=100)
    parser.add_argument('--num_timesteps', help='number of timesteps per episode', type=int, default=5e6)

    # Behavior Cloning
    boolean_flag(parser, 'pretrained', default=False, help_msg='Use BC to pretrain')
    parser.add_argument('--bc_max_iter', help='Max iteration for training BC', type=int, default=1e4)
    return parser.parse_args()
예제 #6
0
def parse_args():
    """
    parse the arguments for DDPG training

    :return: (dict) the arguments
    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--env-id', type=str, default='HalfCheetah-v1')
    boolean_flag(parser, 'render-eval', default=False)
    boolean_flag(parser, 'layer-norm', default=True)
    boolean_flag(parser, 'render', default=False)
    boolean_flag(parser, 'normalize-returns', default=False)
    boolean_flag(parser, 'normalize-observations', default=True)
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--critic-l2-reg', type=float, default=1e-2)
    parser.add_argument('--batch-size', type=int, default=64)  # per MPI worker
    parser.add_argument('--actor-lr', type=float, default=1e-4)
    parser.add_argument('--critic-lr', type=float, default=1e-3)
    boolean_flag(parser, 'enable-popart', default=False)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--reward-scale', type=float, default=1.)
    parser.add_argument('--clip-norm', type=float, default=None)
    parser.add_argument('--nb-train-steps', type=int,
                        default=50)  # per epoch cycle and MPI worker
    parser.add_argument('--nb-eval-steps', type=int,
                        default=100)  # per epoch cycle and MPI worker
    parser.add_argument('--nb-rollout-steps', type=int,
                        default=100)  # per epoch cycle and MPI worker
    # choices are adaptive-param_xx, ou_xx, normal_xx, none
    parser.add_argument('--noise-type', type=str, default='adaptive-param_0.2')
    boolean_flag(parser, 'evaluation', default=False)
    args = parser.parse_args()
    dict_args = vars(args)
    return dict_args