def ppo_params_parser(**kwargs):
    param_dict = params_parser()
    # param_dict.add_cat_param("architecture", options=['ff', 'lstm', 'gru'], default='ff', dtype=str)

    # param_dict.add_num_param("lr", lb=1e-12, ub=1, default=5e-4, dtype=float)
    # param_dict.add_num_param("batch_size", lb=1, ub=500, default=50, dtype=int)
    # param_dict.add_num_param("gamma", lb=0.01, ub=1, default=0.90, dtype=float)
    # param_dict.add_cat_param("activ_fcn", options=['relu6', 'elu', 'mixed'], default='relu6', dtype=str)

    param_dict.add_num_param("nenvs", lb=1, ub=30, default=3, dtype=int)
    param_dict.add_num_param("vf_coeff", lb=1e-2, ub=1., default=0.2, dtype=float)
    param_dict.add_num_param("ent_coeff", lb=1e-12, ub=1., default=1e-7, dtype=float)
    # param_dict.add_cat_param("lrschedule", options=['constant', 'linear', 'double_linear_con'], default='constant',
    #                         dtype=str)
    # param_dict.add_num_param("max_grad_norm", lb=0.001, ub=20, default=0.01, dtype=float)
    param_dict.add_num_param("units_shared_layer1", lb=1, ub=700, default=64, dtype=int)
    param_dict.add_num_param("units_shared_layer2", lb=1, ub=700, default=64, dtype=int)
    param_dict.add_num_param("units_policy_layer", lb=1, ub=700, default=64, dtype=int)

    # if not kwargs.get("architecture") == 'ff':
    param_dict.add_num_param("nminibatches", lb=1, ub=500, default=1, dtype=int)
    param_dict.add_num_param("noptepochs", lb=1, ub=500, default=1, dtype=int)
    param_dict.add_num_param("lam", lb=0, ub=1., default=0.95, dtype=float)
    param_dict.add_num_param("nsteps", lb=1, ub=500, default=32, dtype=int)
    param_dict.add_num_param("cliprange", lb=0., ub=1., default=0.2, dtype=float)

    return param_dict.check_params(**kwargs)
Exemple #2
0
def dqn_params_parser(**kwargs):
    param_dict = params_parser()
    # param_dict.add_cat_param("architecture", options=['dqn', 'lstm', 'gru'], default='dqn', dtype=str)
    # param_dict.add_num_param("gamma", lb=0.01, ub=1., default=0.90, dtype=float)
    param_dict.add_num_param("epsilon",
                             lb=0.01,
                             ub=1.,
                             default=0.50,
                             dtype=float)
    param_dict.add_num_param("epsilon_decay",
                             lb=0.01,
                             ub=1.,
                             default=0.995,
                             dtype=float)
    param_dict.add_num_param("tau", lb=0.1, ub=1., default=0.99, dtype=float)
    # param_dict.add_num_param("lr", lb=1e-12, ub=1., default=5e-4, dtype=float)
    # param_dict.add_cat_param("lrschedule", options=['constant', 'linear', 'double_linear_con'], default='constant', dtype=str)
    # param_dict.add_num_param("batch_size", lb=1, ub=2000, default=128, dtype=int)
    param_dict.add_num_param("trace_length",
                             lb=1,
                             ub=100,
                             default=8,
                             dtype=int)
    param_dict.add_num_param("buffer_size",
                             lb=1,
                             ub=1e6,
                             default=int(4000),
                             dtype=int)
    # param_dict.add_num_param("max_grad_norm", lb=0.001, ub=20, default=0.01, dtype=float)
    param_dict.add_num_param("units_layer1",
                             lb=1,
                             ub=700,
                             default=64,
                             dtype=int)
    param_dict.add_num_param("units_layer2",
                             lb=1,
                             ub=700,
                             default=64,
                             dtype=int)
    param_dict.add_num_param("units_layer3",
                             lb=1,
                             ub=700,
                             default=64,
                             dtype=int)
    param_dict.add_num_param("update_interval",
                             lb=1,
                             ub=1000,
                             default=5,
                             dtype=int)
    return param_dict.check_params(**kwargs)
Exemple #3
0
def ppo_params_parser(**kwargs):
    param_dict = params_parser()
    param_dict.add_num_param("nenvs", lb=1, ub=30, default=3, dtype=int)
    param_dict.add_num_param("vf_coeff",
                             lb=1e-2,
                             ub=1.,
                             default=0.2,
                             dtype=float)
    param_dict.add_num_param("ent_coeff",
                             lb=1e-12,
                             ub=1.,
                             default=1e-7,
                             dtype=float)
    param_dict.add_num_param("units_shared_layer1",
                             lb=1,
                             ub=700,
                             default=64,
                             dtype=int)
    param_dict.add_num_param("units_shared_layer2",
                             lb=1,
                             ub=700,
                             default=64,
                             dtype=int)
    param_dict.add_num_param("units_policy_layer",
                             lb=1,
                             ub=700,
                             default=64,
                             dtype=int)
    param_dict.add_num_param("nminibatches",
                             lb=1,
                             ub=500,
                             default=1,
                             dtype=int)
    param_dict.add_num_param("noptepochs", lb=1, ub=500, default=1, dtype=int)
    param_dict.add_num_param("lam", lb=0, ub=1., default=0.95, dtype=float)
    param_dict.add_num_param("nsteps", lb=1, ub=500, default=32, dtype=int)
    param_dict.add_num_param("cliprange",
                             lb=0.,
                             ub=1.,
                             default=0.2,
                             dtype=float)

    return param_dict.check_params(**kwargs)