def ppo_params_parser(**kwargs): param_dict = params_parser() # param_dict.add_cat_param("architecture", options=['ff', 'lstm', 'gru'], default='ff', dtype=str) # param_dict.add_num_param("lr", lb=1e-12, ub=1, default=5e-4, dtype=float) # param_dict.add_num_param("batch_size", lb=1, ub=500, default=50, dtype=int) # param_dict.add_num_param("gamma", lb=0.01, ub=1, default=0.90, dtype=float) # param_dict.add_cat_param("activ_fcn", options=['relu6', 'elu', 'mixed'], default='relu6', dtype=str) param_dict.add_num_param("nenvs", lb=1, ub=30, default=3, dtype=int) param_dict.add_num_param("vf_coeff", lb=1e-2, ub=1., default=0.2, dtype=float) param_dict.add_num_param("ent_coeff", lb=1e-12, ub=1., default=1e-7, dtype=float) # param_dict.add_cat_param("lrschedule", options=['constant', 'linear', 'double_linear_con'], default='constant', # dtype=str) # param_dict.add_num_param("max_grad_norm", lb=0.001, ub=20, default=0.01, dtype=float) param_dict.add_num_param("units_shared_layer1", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("units_shared_layer2", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("units_policy_layer", lb=1, ub=700, default=64, dtype=int) # if not kwargs.get("architecture") == 'ff': param_dict.add_num_param("nminibatches", lb=1, ub=500, default=1, dtype=int) param_dict.add_num_param("noptepochs", lb=1, ub=500, default=1, dtype=int) param_dict.add_num_param("lam", lb=0, ub=1., default=0.95, dtype=float) param_dict.add_num_param("nsteps", lb=1, ub=500, default=32, dtype=int) param_dict.add_num_param("cliprange", lb=0., ub=1., default=0.2, dtype=float) return param_dict.check_params(**kwargs)
def dqn_params_parser(**kwargs): param_dict = params_parser() # param_dict.add_cat_param("architecture", options=['dqn', 'lstm', 'gru'], default='dqn', dtype=str) # param_dict.add_num_param("gamma", lb=0.01, ub=1., default=0.90, dtype=float) param_dict.add_num_param("epsilon", lb=0.01, ub=1., default=0.50, dtype=float) param_dict.add_num_param("epsilon_decay", lb=0.01, ub=1., default=0.995, dtype=float) param_dict.add_num_param("tau", lb=0.1, ub=1., default=0.99, dtype=float) # param_dict.add_num_param("lr", lb=1e-12, ub=1., default=5e-4, dtype=float) # param_dict.add_cat_param("lrschedule", options=['constant', 'linear', 'double_linear_con'], default='constant', dtype=str) # param_dict.add_num_param("batch_size", lb=1, ub=2000, default=128, dtype=int) param_dict.add_num_param("trace_length", lb=1, ub=100, default=8, dtype=int) param_dict.add_num_param("buffer_size", lb=1, ub=1e6, default=int(4000), dtype=int) # param_dict.add_num_param("max_grad_norm", lb=0.001, ub=20, default=0.01, dtype=float) param_dict.add_num_param("units_layer1", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("units_layer2", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("units_layer3", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("update_interval", lb=1, ub=1000, default=5, dtype=int) return param_dict.check_params(**kwargs)
def ppo_params_parser(**kwargs): param_dict = params_parser() param_dict.add_num_param("nenvs", lb=1, ub=30, default=3, dtype=int) param_dict.add_num_param("vf_coeff", lb=1e-2, ub=1., default=0.2, dtype=float) param_dict.add_num_param("ent_coeff", lb=1e-12, ub=1., default=1e-7, dtype=float) param_dict.add_num_param("units_shared_layer1", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("units_shared_layer2", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("units_policy_layer", lb=1, ub=700, default=64, dtype=int) param_dict.add_num_param("nminibatches", lb=1, ub=500, default=1, dtype=int) param_dict.add_num_param("noptepochs", lb=1, ub=500, default=1, dtype=int) param_dict.add_num_param("lam", lb=0, ub=1., default=0.95, dtype=float) param_dict.add_num_param("nsteps", lb=1, ub=500, default=32, dtype=int) param_dict.add_num_param("cliprange", lb=0., ub=1., default=0.2, dtype=float) return param_dict.check_params(**kwargs)