def __init__( self, env, actor_critic_fn=mlp_actor_critic, ac_kwargs=dict(), seed=0, render=False, # Logging: save_path=None, exp_name=None, save_freq=1, _init_model=True): self.sess, self.pi, self.mu, self.x_ph = None, None, None, None if _init_model: self.env_fn = lambda: copy.deepcopy(env) cpo_kwargs = dict( reward_penalized=False, # Irrelevant in CPO objective_penalized=False, # Irrelevant in CPO learn_penalty=False, # Irrelevant in CPO penalty_param_loss=False # Irrelevant in CPO ) self.agent = CPOAgent(**cpo_kwargs) self.actor_critic_fn = actor_critic_fn self.ac_kwargs = ac_kwargs self.seed = seed self.render = render self.logger = None self.logger_kwargs = setup_logger_kwargs(exp_name=exp_name, seed=seed, data_dir=save_path) self.logger_kwargs["output_fname"] = "log.csv" self.save_freq = save_freq
def main(robot='doggo', task='goal1', algo='hrl', seed=1, exp_name="test", cpu=1): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = [ 'ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo', 'hrl' ] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task + exp_name if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: # num_steps = 1e7 # steps_per_epoch = 30000 num_steps = 1e5 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) print("algo", algo) env_name = 'Safexp-' + robot + task + '-v0' algo(env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(robot, task, algo, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 6000000 #10000000#5000000 #1e6 #1e7 10 000 000 vs 1 500 000 => 50epochs; 3 000 000 => 100 epochs; 6 000 000 => 200 epochs steps_per_epoch = 100000 #100000#30000 epochs = int(num_steps / steps_per_epoch) # original #num_steps 1e7 = 10 000 000 steps_per_epoch=30 000 print('\n\nNum steps', num_steps, ', epochs', epochs, ', steps per epoch', steps_per_epoch, '\n') save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' algo( env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs, #penalty_lr=100000 )
def main(robot, task, algo, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = [ 'goal1', 'goal2', 'button1', 'button2', 'push1', 'push2', 'safety' ] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 3e6 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval(algo) if task == 'Safety': env_config = safety_point_goal_1.SafetyPointGoal1ConfigModule() getter_fn = lambda: env_config.get_env() else: env_name = 'Safexp-' + robot + task + '-v0' getter_fn = lambda: gym.make(env_name) algo(env_fn=getter_fn, ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(env, alg, seed, exp_name, cpu): # Verify experiment # robot_list = ['point', 'car', 'doggo'] # task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = alg.lower() # task = task.capitalize() # robot = robot.capitalize() assert algo in algo_list, "Invalid algo" # assert task.lower() in task_list, "Invalid task" # assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters # exp_name = algo + '_' + robot + task # if robot=='Doggo': # num_steps = 1e8 # steps_per_epoch = 60000 # else: num_steps = 1e7 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 50 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger env_name = env exp_name = exp_name or env_name logger_kwargs = setup_logger_kwargs(exp_name, seed, data_dir='/var/tmp/') # Algo and Env algo = eval('safe_rl.' + algo) # env_name = 'Safexp-'+robot+task+'-v0' algo(env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(64, 64), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(robot, task, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] task = task.capitalize() robot = robot.capitalize() assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" algo = 'sac' # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 1e7 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 entropy_constraint = -1. cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' algo(env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, entropy_constraint=entropy_constraint, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
parser.add_argument('--objective_penalized', action='store_true') parser.add_argument('--learn_penalty', action='store_true') parser.add_argument('--penalty_param_loss', action='store_true') parser.add_argument('--entreg', type=float, default=0.) args = parser.parse_args() try: import safety_gym except: print('Make sure to install Safety Gym to use constrained RL environments.') mpi_fork(args.cpu) # run parallel code with mpi # Prepare logger from safe_rl.utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) # Prepare agent agent_kwargs = dict(reward_penalized=args.reward_penalized, objective_penalized=args.objective_penalized, learn_penalty=args.learn_penalty, penalty_param_loss=args.penalty_param_loss) if args.agent=='ppo': agent = PPOAgent(**agent_kwargs) elif args.agent=='trpo': agent = TRPOAgent(**agent_kwargs) elif args.agent=='cpo': agent = CPOAgent(**agent_kwargs) run_polopt_agent(lambda : gym.make(args.env), agent=agent,
def main(robot, task, algo, seed, exp_name, cpu): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e10 steps_per_epoch = 60000 else: num_steps = 1e10 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = algo logger_kwargs = setup_logger_kwargs(exp_name, seed) # if not os.path.exists("./log"): # os.makedirs("./log") args.log_name = \ "seed::" + str(args.seed) + "_algo::" + args.algo + "_task::" + str(args.obstacle_type) + \ "_cost_lim::" + str(args.cost_lim) # custom_log = set_log(args) # Algo and Env algo = eval('safe_rl.' + algo) # env = gym.make("Pendulum-v0") # env._max_episode_steps = 64 # env = PendulumCostWrapper(env) import gym_env # Setup pointmass env = gym.make("pointmass-v0", args=args) lam = 0.95 cost_lam = 0.95 pi_lr = 0.001 algo(env_fn=lambda: env, ac_kwargs=dict(hidden_sizes=(16, 16), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=args.cost_lim, seed=seed, logger_kwargs=logger_kwargs, prefix=algo, lam=lam, cost_lam=cost_lam, max_ep_len=1000, pi_lr=pi_lr, args=args)
def main(robot, task, algo, seed, exp_name, n_envs, visual_obs, safety_checks): # Verify experiment robot_list = ["point", "car", "doggo"] task_list = ["goal1", "goal2", "button1", "button2", "push1", "push2"] algo_list = ["ppo", "ppo_lagrangian", "trpo", "trpo_lagrangian", "cpo"] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" k = 5 pi_iters = int(80 / k) vf_iters = int(80 / k) # Hyperparameters if robot == "Doggo": num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 1e7 steps_per_epoch = int(30000 / k) epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 25 # Prepare Logger exp_name = exp_name or (algo + "_" + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) kwargs = {} if algo.startswith("ppo"): kwargs["pi_iters"] = pi_iters # Algo and Env algo = getattr(safe_rl, algo) env_name = "Safexp-" + robot + task + "-v0" log_params = {"pi_iters": pi_iters} algo( env_fn=lambda: gym.make(env_name), ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs, env_name=env_name, visual_obs=visual_obs, safety_checks=safety_checks, vf_iters=vf_iters, log_params=log_params, n_envs=n_envs, **kwargs, )
def main(robot, task, algo, seed, exp_name, cpu, wrapper): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Car': num_steps = 1e7 steps_per_epoch = 30000 max_ep_len = 150 env_config = DEFAULT_ENV_CONFIG_C else: #Point num_steps = 1e7 steps_per_epoch = 30000 max_ep_len = 300 env_config = DEFAULT_ENV_CONFIG_P epochs = int(num_steps / steps_per_epoch) save_freq = 10 target_kl = 0.01 cost_lim = 5 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' if not wrapper: env_fn = lambda: gym.make(env_name) else: env_fn = lambda: SafetyGymEnv(robot=robot, task=task[:-1], level=int(task[-1]), seed=seed, config=env_config) algo(env_fn=env_fn, ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, max_ep_len=max_ep_len, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs)
def main(robot, task, algo, seed, exp_name, cpu, constraint, use_aug, dense_coeff): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters #exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 1e7 steps_per_epoch = 30000 epochs = int(num_steps / steps_per_epoch) save_freq = 50 target_kl = 0.01 cost_lim = 25 # Fork for parallelizing mpi_fork(cpu) # Prepare Logger exp_name = exp_name or (algo + '_' + robot.lower() + task.lower()) logger_kwargs = setup_logger_kwargs(exp_name, seed, data_dir=str( pathlib.Path('../tests', exp_name)), datestamp=False) # Algo and Env algo = eval('safe_rl.' + algo) env_name = 'Safexp-' + robot + task + '-v0' def env_fn(): env = gym.make(env_name) if constraint != None: if use_aug: augmentation_type = 'constraint_state_concat' else: augmentation_type = 'None' use_dense = dense_coeff > 0. env = ConstraintEnv( env, [get_constraint(constraint)(False, use_dense, dense_coeff)], augmentation_type=augmentation_type, log_dir='../tests/' + exp_name) fcenv = FlattenObservation(env) return fcenv algo(env_fn=env_fn, ac_kwargs=dict(hidden_sizes=(256, 256), ), epochs=epochs, steps_per_epoch=steps_per_epoch, save_freq=save_freq, target_kl=target_kl, cost_lim=cost_lim, seed=seed, logger_kwargs=logger_kwargs) (pathlib.Path('../tests') / exp_name / 'final.txt').touch()