Beispiel #1
0
class G:
    # log_dir = "http://54.71.92.65:8081"
    log_dir = os.path.realpath(f'../../../../../ins-runs/')
    # log_dir = "/tmp/maml_torch"
    log_prefix = f'{now:%Y-%m-%d}/supervised-maml-debug'
    seed = 19024
    alpha = 0.001
    beta = 0.001
    debug = False
    n_epochs = 80000
    task_batch_n = 25
    k_shot = 10
    n_gradient_steps = 5
    test_grad_steps = Proto(
        [0, 1, 2, 3, 4, 5],
        help=
        'run test_fn when the grad_ind matches the element inside this list.')
    test_interval = Proto(
        5, help="The frequency at which we run the test function `test_fn`")
    save_interval = Proto(
        100,
        dtype=int,
        help="interval (of epochs) to save the network weights.")
    test_mode = BoolFlag(True,
                         help="boolean flag for test model. False by default.")
Beispiel #2
0
class Params:
    data_dir = Proto("/tmp/logging-server", help="The directory for saving the logs")
    port = Proto(8081, help="port for the logging server")
    host = Proto("127.0.0.1", help="IP address for running the server. Default only allows localhost from making "
                                   "requests. If you want to allow all ip, set this to '0.0.0.0'.")
    workers = Proto(1, help="Number of workers to run in parallel")
    debug = BoolFlag(False, help='boolean flag for printing out debug traces')
Beispiel #3
0
class ServerArgs:
    host = Proto(
        "",
        help=
        "use 0.0.0.0 if you want external clients to be able to access this.")
    port = Proto(8081, help="the port")
    workers = Proto(1, help="the number of worker processes")
    debug = False
Beispiel #4
0
class DEBUG:
    """To debug:
    Set debug_params = 1,
    set debug_apply_gradient = 1.
    Then the gradient ratios between the worker and the meta runner should be print out, and they should be 1.
    Otherwise, the runner model is diverging from the meta network.
    """
    no_weight_reset = Proto(0, help="flag to turn off the caching and resetting the weights")
    no_task_resample = Proto(0, help="by-pass task re-sample")
Beispiel #5
0
class G:
    term_reward_threshold = -8000.0
    run_mode = "maml"  # type:  "Choose between maml and e_maml. Switches the loss function used for training"
    # env_name = 'HalfCheetah-v2'  # type:  "Name of the task environment"
    env_name = 'HalfCheetahGoalDir-v0'  # type:  "Name of the task environment"
    start_seed = Proto(0, help="seed for initialization of each game")
    render = False
    n_cpu = multiprocessing.cpu_count()  # type: "number of threads used"
    # Note: (E_)MAML Training Parameters
    n_tasks = Proto(20,
                    help="40 for locomotion, 20 for 2D navigation ref:cbfinn")
    n_graphs = Proto(
        1,
        help=
        "number of parallel graphs for multi-device parallelism. Hard coded to 1 atm."
    )
    n_grad_steps = 1  # type:  "number of gradient descent steps for the worker." #TODO change back to 1
    eval_grad_steps = Proto(
        list(range(n_grad_steps + 1)),
        help="the gradient steps at which we evaluate the policy. Used "
        "to make pretty plots.")
    n_epochs = 800  # type:  "Number of epochs"
    # 40k per task (action, state) tuples, or 20k (per task) if you have 10/20 meta tasks
    n_parallel_envs = 40  # type:  "Number of parallel envs in minibatch. The SubprocVecEnv batch_size."
    batch_timesteps = 100  # type:  "max_steps for each episode, used to set env._max_steps parameter"
    env_max_timesteps = Proto(
        0,
        help=
        "max_steps for each episode, used to set env._max_steps parameter. 0 to use "
        "gym default.")
    single_sampling = 0  # type:  "flag for running a single sampling step. 1 ON, 0 OFF"
    baseline = Proto('linear', help="using the critic as the baseline")
    meta_sgd = Proto(
        False, help="NOT YET IMPLEMENTED. Learn a gradient for each parameter")
    # Note: MAML Options
    first_order = Proto(
        True,
        help=
        "Whether to stop gradient calculation during meta-gradient calculation"
    )
    alpha = 0.05  # type:  "worker learning rate. use 0.1 for first step, 0.05 afterward ref:cbfinn"
    beta = 0.01  # type:  "meta learning rate"
    inner_alg = "VPG"  # type:  '"PPO" or "VPG", "rl_algs.VPG" or "rl_algs.PPO" for rl_algs baselines'
    inner_optimizer = "SGD"  # type:  '"Adam" or "SGD"'
    meta_alg = "PPO"  # type:  "PPO or TRPO, TRPO is not yet implemented."
    meta_optimizer = "Adam"  # type:  '"Adam" or "SGD"'
    activation = "tanh"
    hidden_size = 64  # type: "hidden size for the MLP policy"
    # Model options
    normalize_env = False  # type: "normalize the environment"
    vf_coef = 0.5  # type:  "loss weighing coefficient for the value function loss. with the VPG loss being 1.0"
    ent_coef = 0.01  # type:  "PPO entropy coefficient"
    max_grad_norm = 1.0  # type:  "PPO maximum gradient norm"
    clip_range = 0.2  # type:  "PPO clip_range parameter"
    # GAE runner options
    gamma = 0.99  # type:  "GAE gamma"
    lam = 0.95  # type:  "GAE lambda"
    # Grid World config parameters
    change_colors = 0  # type:  "shuffle colors of the board game"
    change_dynamics = 0  # type:  'shuffle control actions (up down, left right) of the game'
class Config:
    """
    Your ICLR best paper project

    -- Ge
    """
    seed = Proto(10, help="random seed for the environment")
Beispiel #7
0
class AppServerArgs:
    """
    Configuration Arguments for the Sanic App that serves
    the static web-application.

    [Usage]

    To launch the web-app client, do

    python -m ml_dash.app port=3001 host=0.0.0.0 workers=4 debug=True
    """
    host = Proto("", help="use 0.0.0.0 if you want external clients to be able to access this.")
    port = Proto(3001, help="the port")
    workers = Proto(1, help="the number of worker processes")
    debug = False
    access_log = True
Beispiel #8
0
class ServerArgs:
    host = Proto(
        "",
        help=
        "use 0.0.0.0 if you want external clients to be able to access this.")
    port = 8082
    workers = 1
    debug = False
Beispiel #9
0
class Args:
    """
    ML-Dash
    -------

    This module contains `ml_dash.server`, the visualization backend, and `ml_dash.app`, a
    static server hosting the web application.

    Usage
    -----

        python -m ml_dash.server --port 8090 --host 0.0.0.0 --workers 10

    """
    logdir = Proto(os.path.realpath("."), help="the root directory for all of the logs")
Beispiel #10
0
class G:
    plot_interval = 10
    log_dir = os.path.realpath('./outputs')
    log_prefix = f"{now:%Y-%m-%d}/maml_torch/local-debug"
    seed = 0
    # model parameters
    input_n = 1
    output_n = 1
    optimizer = 'SGD'  # currently not used. Hard coded inner optimizer.
    meta_optimizer = 'Adam'
    # maml parameters
    npts = Proto(100, help="the number of datapoints in the generated dataset")
    n_epochs = 8000
    task_batch_n = 40
    k_shot = 5
    n_gradient_steps = 1
    test_grad_steps = [1]
    save_interval = 100
    test_interval = 1
    alpha = 0.001
    beta = 1e-3
Beispiel #11
0
class G:
    plot_interval = 10
    SHOW_10_GRAD = True
    log_dir = os.path.realpath('./outputs')
    log_prefix = f"maml_torch/local-debug"
    seed = 0
    # model parameters
    input_n = 1
    output_n = 1
    optimizer = 'SGD'
    meta_optimizer = 'Adam'
    # maml parameters
    npts = Proto(100, help="the number of datapoints in the generated dataset")
    n_epochs = 20000
    task_batch_n = 40
    k_shot = 10
    n_gradient_steps = 3
    # aws run configs
    mode = "ssh"
    use_gpu = False
    docker_image = f"ufoym/deepo{':cpu' if not use_gpu else ''}"
    instance_type = "p2.xlarge" if use_gpu else "c4.large"
Beispiel #12
0
 class G(ParamsProto):
     """Supervised MAML in tensorflow"""
     npts = Proto(100, help="number of points to sample from distribution")
     num_epochs = Proto(70000, help="number of epochs to train")
     num_tasks = Proto(10, help="number of tasks in the inner loop")
     num_grad_steps = Proto(
         1, help="number of gradient descent steps in the inner loop")
     num_points_sampled = Proto(10, help="effectively the k-shot")
     eval_grad_steps = Proto(
         [0, 1, 10], help="the grad steps evaluated with full sample")
     fix_amp = Proto(
         False,
         help=
         "controls the sampling, fix the amplitude of the sample distribution if True"
     )
     render = BoolFlag(False, help="turn on the rendering")
     no_dump = BoolFlag(
         True,
         help=
         "turn off the data dump. By default dump when no flag is present.")
Beispiel #13
0
class Params:
    data_dir = Proto("/tmp/logging-server", help="The directory for saving the logs")
    port = Proto(8081, help="port for the logging server")
    debug = BoolFlag(False, help='boolean flag for printing out debug traces')
Beispiel #14
0
class G:
    # Termination conditions
    term_loss_threshold = 100
    term_reward_threshold = -8000.0

    run_mode = "maml"  # type:  "Choose between maml and e_maml. Switches the loss function used for training"
    e_maml_lambda = Proto(1.0, help="The scaling factor for the E-MAML term")
    # env_name = 'HalfCheetah-v2'  # type:  "Name of the task environment"
    env_name = 'HalfCheetahGoalDir-v0'  # type:  "Name of the task environment"
    start_seed = Proto(0, help="seed for initialization of each game")
    render = False
    n_cpu = multiprocessing.cpu_count() * 2  # type: "number of threads used"

    # (E_)MAML Training Parameters
    n_tasks = Proto(20, help="40 for locomotion, 20 for 2D navigation ref:cbfinn")
    n_graphs = Proto(1, help="number of parallel graphs for multi-device parallelism. Hard coded to 1 atm.")
    n_grad_steps = 5  # type:  "number of gradient descent steps for the worker." #TODO change back to 1
    meta_n_grad_steps = Proto(1, help="number of gradient descent steps for the meta algorithm.")
    reuse_meta_optimizer = Proto(True, help="Whether to use the same AdamW optimizer for all "
                                            "meta gradient steps. MUCH FASTER to initialize with [True].")
    eval_grad_steps = Proto(list(range(n_grad_steps + 1)),
                            help="the gradient steps at which we evaluate the policy. Used to make pretty plots.")

    bias_dim = Proto(20, help="the input bias variable dimension that breaks the input symmetry")
    # 40k per task (action, state) tuples, or 20k (per task) if you have 10/20 meta tasks
    n_parallel_envs = 40  # type:  "Number of parallel envs in minibatch. The SubprocVecEnv batch_size."
    batch_timesteps = 100  # type:  "max_steps for each episode, used to set env._max_steps parameter"

    epoch_init = Proto(0, help="the epoch to start with.")
    n_epochs = 800  # type:  "Number of epochs"
    eval_interval = Proto(None, help="epoch interval for evaluation.")
    eval_num_envs = Proto(n_parallel_envs, help="default to same as sampling envs")
    eval_timesteps = Proto(50, help="batch size for the evaluation RL runs")

    record_movie_interval = 500
    start_movie_after_epoch = 700
    render_num_envs = Proto(10, help="keep small b/c rendering is slow")
    movie_timesteps = 100  # type: "now runs in batch mode"
    start_checkpoint_after_epoch = Proto(200, help="epoch at which start saving checkpoints.")
    checkpoint_interval = Proto(None, help="the frequency for saving checkpoints on the policy")
    load_from_checkpoint = Proto(None, help="the path to the checkpoint file (saved by logger) to be loaded at the"
                                            " beginning of the training session. Also includes the learned loss, "
                                            "and learned learning rates if available.")

    # RL sampling settings
    reset_on_start = Proto(False, help="reset the environment at the beginning of each episode. "
                                       "Do NOT use this when using SubProcessVecEnv")

    # behavior cloning
    mask_meta_bc_data = Proto(False, help='masking the state space for one-shot imitation baseline')
    # bc_eval_timesteps = Proto(100, help="number of timesteps for evaluation")
    episode_subsample = Proto(1, help='the subsampling ratio for episodic training dataset. Active under episode mode')
    sample_limit = Proto(None, help='the number of timesteps uses in behavior cloning algorithm.')
    k_fold = Proto(5, help='the k-fold cross validation')

    env_max_timesteps = Proto(0, help="max_steps for each episode, used to set env._max_steps parameter. 0 to use "
                                      "gym default.")
    single_sampling = 0  # type:  "flag for running a single sampling step. 1 ON, 0 OFF"
    baseline = Proto('linear', help="using the critic as the baseline")
    use_gae = Proto(True, help="flag to turn GAE on and off")
    # GAE runner options
    gamma = Proto(0.995, help="GAE gamma")
    lam = Proto(0.97, help="GAE lambda")
    # Imperfect Demonstration Options
    # imperfect_demo = Proto(None, help='flag to turn on the systematic noise for the imperfect demonstration')
    # demo_offset_abs = Proto(None, help='size of the systematic offset to the goal position in expert demo')
    # demo_noise_scale = Proto(None, help='scale of the noise added to the goal position in expert demo')

    # MAML Options
    first_order = Proto(False, help="Whether to stop gradient calculation during meta-gradient calculation")
    alpha = 0.05  # type:  "worker learning rate. use 0.1 for first step, 0.05 afterward ref:cbfinn"
    meta_sgd = Proto(None, help='One of [None, True, "full"]. When full learns alpha same shape as tensors.')
    beta = 0.01  # type:  "meta learning rate"
    inner_alg = "VPG"  # type:  '"PPO" or "VPG", "rl_algs.VPG" or "rl_algs.PPO" for rl_algs baselines'
    learned_loss_type = None
    inner_optimizer = "SGD"  # type:  '"AdamW", "Adam", or "SGD"'
    meta_alg = "PPO"  # type:  "PPO or TRPO, TRPO is not yet implemented."
    meta_optimizer = "AdamW"  # type:  '"AdamW", "Adam" or "SGD"'
    activation = "tanh"
    n_layers = 4  # type: "the number of hidden layers for the policy network. Sometimes, bigger, is better"
    hidden_size = 64  # type: "hidden size for the MLP policy"

    # Model options
    use_k_index = Proto(False, help="whether to wrap k_index around the environment. Helps for the value baseline")
    normalize_env = False  # type: "normalize the environment"
    vf_coef = 0.5  # type:  "loss weighing coefficient for the value function loss. with the VPG loss being 1.0"
    ent_coef = 0.01  # type:  "PPO entropy coefficient"
    inner_max_grad_norm = 1.0  # type:  "PPO maximum gradient norm"
    meta_max_grad_norm = 1.0  # type:  "PPO maximum gradient norm"
    inner_max_grad_clip = Proto(None, help="maximum gradient clip")
    meta_max_grad_clip = Proto(None, help="maximum gradient clip")
    clip_range = Proto(0.2, help="PPO clip_range parameter")

    # policy parameters
    init_logstd = Proto(0, help="initial log standard deviation of the gaussian policy")
    control_variance = Proto(False, help='flag for fixing the variance of the policy for the inner worker. Helps '
                                         'prevent inner adaptation from gaining too much from reducing variance.')
    fix_meta_variance = Proto(False, help="flag for fixing the meta runner's variance.")
    std_l2_coef = Proto(0, help="the regularization coefficient for the standard deviation")

    # Grid World config parameters
    change_colors = 0  # type:  "shuffle colors of the board game"
    change_dynamics = 0  # type:  'shuffle control actions (up down, left right) of the game'
Beispiel #15
0
 class G:
     some_arg = Proto(0, aliases=['-s'])
Beispiel #16
0
 class G_2:
     a = 23
     b = 29
     c = Proto(default=31, help="this is working")
     d = Proto(default=None, help="this is working")
     e = True
Beispiel #17
0
 class G(ParamsProto):
     """some parameter proto"""
     n = 1
     npts = Proto(100, help="number of points to sample from distribution")
     ok = True
Beispiel #18
0
class Args:
    logdir = Proto(os.path.realpath("."),
                   help="the root directory for all of the logs")
Beispiel #19
0
class Args:
    run_dir = Proto(
        None, help="path to the data.pkl file saved by the maml_torch.py")
    test_n_steps = [1, 5]