예제 #1
0
    def __init__(
            self,
            env,
            actor_critic_fn=mlp_actor_critic,
            ac_kwargs=dict(),
            seed=0,
            render=False,
            # Logging:
            save_path=None,
            exp_name=None,
            save_freq=1,
            _init_model=True):
        self.sess, self.pi, self.mu, self.x_ph = None, None, None, None

        if _init_model:
            self.env_fn = lambda: copy.deepcopy(env)
            cpo_kwargs = dict(
                reward_penalized=False,  # Irrelevant in CPO
                objective_penalized=False,  # Irrelevant in CPO
                learn_penalty=False,  # Irrelevant in CPO
                penalty_param_loss=False  # Irrelevant in CPO
            )
            self.agent = CPOAgent(**cpo_kwargs)
            self.actor_critic_fn = actor_critic_fn
            self.ac_kwargs = ac_kwargs
            self.seed = seed
            self.render = render

            self.logger = None
            self.logger_kwargs = setup_logger_kwargs(exp_name=exp_name,
                                                     seed=seed,
                                                     data_dir=save_path)
            self.logger_kwargs["output_fname"] = "log.csv"
            self.save_freq = save_freq
def main(robot='doggo',
         task='goal1',
         algo='hrl',
         seed=1,
         exp_name="test",
         cpu=1):

    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2']
    algo_list = [
        'ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo', 'hrl'
    ]

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    exp_name = algo + '_' + robot + task + exp_name
    if robot == 'Doggo':
        num_steps = 1e8
        steps_per_epoch = 60000
    else:
        # num_steps = 1e7
        # steps_per_epoch = 30000

        num_steps = 1e5
        steps_per_epoch = 30000

    epochs = int(num_steps / steps_per_epoch)
    save_freq = 50
    target_kl = 0.01
    cost_lim = 25

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    exp_name = exp_name or (algo + '_' + robot.lower() + task.lower())
    logger_kwargs = setup_logger_kwargs(exp_name, seed)

    # Algo and Env
    algo = eval('safe_rl.' + algo)
    print("algo", algo)
    env_name = 'Safexp-' + robot + task + '-v0'

    algo(env_fn=lambda: gym.make(env_name),
         ac_kwargs=dict(hidden_sizes=(256, 256), ),
         epochs=epochs,
         steps_per_epoch=steps_per_epoch,
         save_freq=save_freq,
         target_kl=target_kl,
         cost_lim=cost_lim,
         seed=seed,
         logger_kwargs=logger_kwargs)
예제 #3
0
def main(robot, task, algo, seed, exp_name, cpu):
    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2']
    algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo']

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    exp_name = algo + '_' + robot + task
    if robot == 'Doggo':
        num_steps = 1e8
        steps_per_epoch = 60000
    else:
        num_steps = 6000000  #10000000#5000000 #1e6 #1e7 10 000 000 vs 1 500 000 => 50epochs; 3 000 000 => 100 epochs;  6 000 000 => 200 epochs
        steps_per_epoch = 100000  #100000#30000
    epochs = int(num_steps / steps_per_epoch)

    # original
    #num_steps 1e7 = 10 000 000 steps_per_epoch=30 000

    print('\n\nNum steps', num_steps, ', epochs', epochs, ', steps per epoch',
          steps_per_epoch, '\n')
    save_freq = 50
    target_kl = 0.01
    cost_lim = 25

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    exp_name = exp_name or (algo + '_' + robot.lower() + task.lower())
    logger_kwargs = setup_logger_kwargs(exp_name, seed)

    # Algo and Env
    algo = eval('safe_rl.' + algo)
    env_name = 'Safexp-' + robot + task + '-v0'

    algo(
        env_fn=lambda: gym.make(env_name),
        ac_kwargs=dict(hidden_sizes=(256, 256), ),
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        save_freq=save_freq,
        target_kl=target_kl,
        cost_lim=cost_lim,
        seed=seed,
        logger_kwargs=logger_kwargs,
        #penalty_lr=100000
    )
예제 #4
0
def main(robot, task, algo, seed, exp_name, cpu):

    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = [
        'goal1', 'goal2', 'button1', 'button2', 'push1', 'push2', 'safety'
    ]
    algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo']

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    exp_name = algo + '_' + robot + task
    if robot == 'Doggo':
        num_steps = 1e8
        steps_per_epoch = 60000
    else:
        num_steps = 3e6
        steps_per_epoch = 30000
    epochs = int(num_steps / steps_per_epoch)
    save_freq = 50
    target_kl = 0.01
    cost_lim = 25

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    exp_name = exp_name or (algo + '_' + robot.lower() + task.lower())
    logger_kwargs = setup_logger_kwargs(exp_name, seed)

    # Algo and Env
    algo = eval(algo)
    if task == 'Safety':
        env_config = safety_point_goal_1.SafetyPointGoal1ConfigModule()
        getter_fn = lambda: env_config.get_env()
    else:
        env_name = 'Safexp-' + robot + task + '-v0'
        getter_fn = lambda: gym.make(env_name)

    algo(env_fn=getter_fn,
         ac_kwargs=dict(hidden_sizes=(256, 256), ),
         epochs=epochs,
         steps_per_epoch=steps_per_epoch,
         save_freq=save_freq,
         target_kl=target_kl,
         cost_lim=cost_lim,
         seed=seed,
         logger_kwargs=logger_kwargs)
예제 #5
0
def main(env, alg, seed, exp_name, cpu):

    # Verify experiment
    # robot_list = ['point', 'car', 'doggo']
    # task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2']
    algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo']

    algo = alg.lower()
    # task = task.capitalize()
    # robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    # assert task.lower() in task_list, "Invalid task"
    # assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    # exp_name = algo + '_' + robot + task
    # if robot=='Doggo':
    #     num_steps = 1e8
    #     steps_per_epoch = 60000
    # else:
    num_steps = 1e7
    steps_per_epoch = 30000
    epochs = int(num_steps / steps_per_epoch)
    save_freq = 50
    target_kl = 0.01
    cost_lim = 50

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    env_name = env
    exp_name = exp_name or env_name
    logger_kwargs = setup_logger_kwargs(exp_name, seed, data_dir='/var/tmp/')

    # Algo and Env
    algo = eval('safe_rl.' + algo)
    # env_name = 'Safexp-'+robot+task+'-v0'

    algo(env_fn=lambda: gym.make(env_name),
         ac_kwargs=dict(hidden_sizes=(64, 64), ),
         epochs=epochs,
         steps_per_epoch=steps_per_epoch,
         save_freq=save_freq,
         target_kl=target_kl,
         cost_lim=cost_lim,
         seed=seed,
         logger_kwargs=logger_kwargs)
예제 #6
0
def main(robot, task, seed, exp_name, cpu):

    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2']

    task = task.capitalize()
    robot = robot.capitalize()
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    algo = 'sac'

    # Hyperparameters
    exp_name = algo + '_' + robot + task
    if robot == 'Doggo':
        num_steps = 1e8
        steps_per_epoch = 60000
    else:
        num_steps = 1e7
        steps_per_epoch = 30000
    epochs = int(num_steps / steps_per_epoch)
    save_freq = 50
    entropy_constraint = -1.
    cost_lim = 25

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    exp_name = exp_name or (algo + '_' + robot.lower() + task.lower())
    logger_kwargs = setup_logger_kwargs(exp_name, seed)

    # Algo and Env
    algo = eval('safe_rl.' + algo)
    env_name = 'Safexp-' + robot + task + '-v0'

    algo(env_fn=lambda: gym.make(env_name),
         ac_kwargs=dict(hidden_sizes=(256, 256), ),
         epochs=epochs,
         steps_per_epoch=steps_per_epoch,
         save_freq=save_freq,
         entropy_constraint=entropy_constraint,
         cost_lim=cost_lim,
         seed=seed,
         logger_kwargs=logger_kwargs)
예제 #7
0
    parser.add_argument('--objective_penalized', action='store_true')
    parser.add_argument('--learn_penalty', action='store_true')
    parser.add_argument('--penalty_param_loss', action='store_true')
    parser.add_argument('--entreg', type=float, default=0.)
    args = parser.parse_args()

    try:
        import safety_gym
    except:
        print('Make sure to install Safety Gym to use constrained RL environments.')

    mpi_fork(args.cpu)  # run parallel code with mpi

    # Prepare logger
    from safe_rl.utils.run_utils import setup_logger_kwargs
    logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)

    # Prepare agent
    agent_kwargs = dict(reward_penalized=args.reward_penalized,
                        objective_penalized=args.objective_penalized,
                        learn_penalty=args.learn_penalty,
                        penalty_param_loss=args.penalty_param_loss)
    if args.agent=='ppo':
        agent = PPOAgent(**agent_kwargs)
    elif args.agent=='trpo':
        agent = TRPOAgent(**agent_kwargs)
    elif args.agent=='cpo':
        agent = CPOAgent(**agent_kwargs)

    run_polopt_agent(lambda : gym.make(args.env),
                     agent=agent,
예제 #8
0
def main(robot, task, algo, seed, exp_name, cpu):

    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2']
    algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo']

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    exp_name = algo + '_' + robot + task
    if robot == 'Doggo':
        num_steps = 1e10
        steps_per_epoch = 60000
    else:
        num_steps = 1e10
        steps_per_epoch = 30000
    epochs = int(num_steps / steps_per_epoch)
    save_freq = 50
    target_kl = 0.01

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    exp_name = algo
    logger_kwargs = setup_logger_kwargs(exp_name, seed)

    # if not os.path.exists("./log"):
    #     os.makedirs("./log")
    args.log_name = \
        "seed::" + str(args.seed) + "_algo::" + args.algo + "_task::" + str(args.obstacle_type) + \
        "_cost_lim::" + str(args.cost_lim)
    # custom_log = set_log(args)

    # Algo and Env
    algo = eval('safe_rl.' + algo)

    # env = gym.make("Pendulum-v0")
    # env._max_episode_steps = 64
    # env = PendulumCostWrapper(env)

    import gym_env
    # Setup pointmass
    env = gym.make("pointmass-v0", args=args)
    lam = 0.95
    cost_lam = 0.95
    pi_lr = 0.001

    algo(env_fn=lambda: env,
         ac_kwargs=dict(hidden_sizes=(16, 16), ),
         epochs=epochs,
         steps_per_epoch=steps_per_epoch,
         save_freq=save_freq,
         target_kl=target_kl,
         cost_lim=args.cost_lim,
         seed=seed,
         logger_kwargs=logger_kwargs,
         prefix=algo,
         lam=lam,
         cost_lam=cost_lam,
         max_ep_len=1000,
         pi_lr=pi_lr,
         args=args)
예제 #9
0
def main(robot, task, algo, seed, exp_name, n_envs, visual_obs, safety_checks):

    # Verify experiment
    robot_list = ["point", "car", "doggo"]
    task_list = ["goal1", "goal2", "button1", "button2", "push1", "push2"]
    algo_list = ["ppo", "ppo_lagrangian", "trpo", "trpo_lagrangian", "cpo"]

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    k = 5
    pi_iters = int(80 / k)
    vf_iters = int(80 / k)

    # Hyperparameters
    if robot == "Doggo":
        num_steps = 1e8
        steps_per_epoch = 60000
    else:
        num_steps = 1e7
        steps_per_epoch = int(30000 / k)
    epochs = int(num_steps / steps_per_epoch)
    save_freq = 50
    target_kl = 0.01
    cost_lim = 25

    # Prepare Logger
    exp_name = exp_name or (algo + "_" + robot.lower() + task.lower())
    logger_kwargs = setup_logger_kwargs(exp_name, seed)

    kwargs = {}
    if algo.startswith("ppo"):
        kwargs["pi_iters"] = pi_iters

    # Algo and Env
    algo = getattr(safe_rl, algo)
    env_name = "Safexp-" + robot + task + "-v0"

    log_params = {"pi_iters": pi_iters}
    algo(
        env_fn=lambda: gym.make(env_name),
        ac_kwargs=dict(hidden_sizes=(256, 256), ),
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        save_freq=save_freq,
        target_kl=target_kl,
        cost_lim=cost_lim,
        seed=seed,
        logger_kwargs=logger_kwargs,
        env_name=env_name,
        visual_obs=visual_obs,
        safety_checks=safety_checks,
        vf_iters=vf_iters,
        log_params=log_params,
        n_envs=n_envs,
        **kwargs,
    )
예제 #10
0
def main(robot, task, algo, seed, exp_name, cpu, wrapper):

    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2']
    algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo']

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    exp_name = algo + '_' + robot + task
    if robot == 'Car':
        num_steps = 1e7
        steps_per_epoch = 30000
        max_ep_len = 150
        env_config = DEFAULT_ENV_CONFIG_C
    else:  #Point
        num_steps = 1e7
        steps_per_epoch = 30000
        max_ep_len = 300
        env_config = DEFAULT_ENV_CONFIG_P

    epochs = int(num_steps / steps_per_epoch)
    save_freq = 10
    target_kl = 0.01
    cost_lim = 5

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    exp_name = exp_name or (algo + '_' + robot.lower() + task.lower())
    logger_kwargs = setup_logger_kwargs(exp_name, seed)

    # Algo and Env
    algo = eval('safe_rl.' + algo)
    env_name = 'Safexp-' + robot + task + '-v0'

    if not wrapper:
        env_fn = lambda: gym.make(env_name)
    else:
        env_fn = lambda: SafetyGymEnv(robot=robot,
                                      task=task[:-1],
                                      level=int(task[-1]),
                                      seed=seed,
                                      config=env_config)

    algo(env_fn=env_fn,
         ac_kwargs=dict(hidden_sizes=(256, 256), ),
         epochs=epochs,
         max_ep_len=max_ep_len,
         steps_per_epoch=steps_per_epoch,
         save_freq=save_freq,
         target_kl=target_kl,
         cost_lim=cost_lim,
         seed=seed,
         logger_kwargs=logger_kwargs)
예제 #11
0
def main(robot, task, algo, seed, exp_name, cpu, constraint, use_aug,
         dense_coeff):

    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = ['goal1', 'goal2', 'button1', 'button2', 'push1', 'push2']
    algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo']

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    #exp_name = algo + '_' + robot + task
    if robot == 'Doggo':
        num_steps = 1e8
        steps_per_epoch = 60000
    else:
        num_steps = 1e7
        steps_per_epoch = 30000
    epochs = int(num_steps / steps_per_epoch)
    save_freq = 50
    target_kl = 0.01
    cost_lim = 25

    # Fork for parallelizing
    mpi_fork(cpu)

    # Prepare Logger
    exp_name = exp_name or (algo + '_' + robot.lower() + task.lower())
    logger_kwargs = setup_logger_kwargs(exp_name,
                                        seed,
                                        data_dir=str(
                                            pathlib.Path('../tests',
                                                         exp_name)),
                                        datestamp=False)

    # Algo and Env
    algo = eval('safe_rl.' + algo)
    env_name = 'Safexp-' + robot + task + '-v0'

    def env_fn():
        env = gym.make(env_name)
        if constraint != None:
            if use_aug:
                augmentation_type = 'constraint_state_concat'
            else:
                augmentation_type = 'None'
            use_dense = dense_coeff > 0.
            env = ConstraintEnv(
                env,
                [get_constraint(constraint)(False, use_dense, dense_coeff)],
                augmentation_type=augmentation_type,
                log_dir='../tests/' + exp_name)
        fcenv = FlattenObservation(env)
        return fcenv

    algo(env_fn=env_fn,
         ac_kwargs=dict(hidden_sizes=(256, 256), ),
         epochs=epochs,
         steps_per_epoch=steps_per_epoch,
         save_freq=save_freq,
         target_kl=target_kl,
         cost_lim=cost_lim,
         seed=seed,
         logger_kwargs=logger_kwargs)
    (pathlib.Path('../tests') / exp_name / 'final.txt').touch()