Ejemplo n.º 1
0
def train(env_id, num_iters, seed, n=1, success_reward=1000, save_path='model/new_model'):

    U.make_session(num_cpu=4).__enter__()
    set_global_seeds(seed)

    env = gym.make(env_id)
    # env.update_adversary(n)
    env=Monitor(env, log_dir, allow_early_resets=True)
    env.seed(seed)

    test_env = gym.make(env_id)
    test_env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    # debug not working
    # num_cpu=4
    # env=SubprocVecEnv([make_env(env_id,i,seed) for i in range(num_cpu)])

    rew = PPO_RARL.learn(env, test_env, policy_fn,
                         timesteps_per_batch=2048,
                         clip_param=0.2, entcoeff=0.0,
                         optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
                         gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward,
                         save_path=save_path, max_iters=num_iters, callback=plot_callback
                         )

    env.close()

    return rew
Ejemplo n.º 2
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir())
    env.seed(seed)
    return env
Ejemplo n.º 3
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir())
    env.seed(seed)
    return env
Ejemplo n.º 4
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)))
    env.seed(seed)
    return env
Ejemplo n.º 5
0
def make_gym_control_env(env_id, seed):
    """
    Added by Yiming (29/5/2018)
    Create a wrapped, monitored gym.Env for Simple Control Problems.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir(), allow_early_resets=True)
    env.seed(seed)
    return env
Ejemplo n.º 6
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)))
    env.seed(seed)
    return env
Ejemplo n.º 7
0
def make_halide_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for Halide.
    """
    logger.log('random_seed: %s' % seed)
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env,
                  logger.get_dir(),
                  info_keywords=('best_exec', 'best_schedule'))
    env.seed(seed)
    return env
Ejemplo n.º 8
0
def make_pybullet_env(env_id, seed):
    """
    Added by Yiming (29/5/2018)
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    # pybullet.connect(None)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir(),allow_early_resets=True)
    # env = InvertedDoublePendulumBulletEnv()
    env.seed(seed)
    return env
Ejemplo n.º 9
0
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for safety.
    """
    env = gym.make(env_id, **{"train":train})
    env = Monitor(env, logger_dir, allow_early_resets=True, info_keywords=tuple("s"))
    env.seed(seed)
    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Ejemplo n.º 10
0
def make_program_env(env_id, seed, hier=True, curiosity=True, visualize=True, model='LSTM'):
    """
    Create a wrapped, monitored gym.Env for custom environment
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env.set_curiosity(curiosity, model)
    env.set_hier(hier)
    env.set_visualize(visualize)

    env = Monitor(env, logger.get_dir())
    env.seed(seed)
    return env
Ejemplo n.º 11
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Ejemplo n.º 12
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    
    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :return: (Gym Environment) The mujoco environment
    """
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)))
    env.seed(seed)
    return env
Ejemplo n.º 13
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Ejemplo n.º 14
0
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0):
    """
    Create a wrapped, monitored gym.Env for safety.
    """
    env = gym.make(env_id, **{"train":train})
    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env, 
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    env.seed(seed)
    env = ClipActionsWrapper(env)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Ejemplo n.º 15
0
def make_control_env(env_id, seed, hist_len, block_high, version0, give_state):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    if env_id == 'LunarLanderContinuousPOMDP-v0':
        newenv(hist_len=hist_len,
               block_high=block_high,
               version0=version0,
               give_state=give_state)
    env = gym.make(env_id)
    env = Monitor(env,
                  logger.get_dir(),
                  allow_early_resets=True,
                  version0=version0)
    env.seed(seed)
    return env
Ejemplo n.º 16
0
def make_mujoco_env(env_id, seed, soc, psi):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """

    set_global_seeds(seed)
    env = gym.make(env_id)
    from os.path import expanduser
    import os
    current_run = str(env_id) + "_soc" + str(soc) + "_psi" + str(
        psi) + "_seed" + str(seed)
    directory = expanduser(
        "~") + "/baselines/baselines/ppo1/results/" + current_run
    if not os.path.exists(directory):
        os.makedirs(directory)

    env = Monitor(env, directory)
    # pdb.set_trace()
    env.seed(seed)
    return env
Ejemplo n.º 17
0
def train(env_id, num_iters, seed, success_reward, save_path, q):
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(seed)
    env = gym.make(env_id)
    env=Monitor(env, log_dir, allow_early_resets=True)
    test_env = gym.make(env_id)

    env = bench.Monitor(env, logger.get_dir() and
        osp.join(logger.get_dir(), "monitor.json"))
    env.seed(seed)
    test_env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    rew = PPO.learn_with_human(env, test_env, policy_fn,
                    max_iters=num_iters,
                    timesteps_per_batch=2048,
                    clip_param=0.2, entcoeff=0.0,
                    optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
                    gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward,
                    save_path=save_path, callback=plot_callback, data_queue=q,
                    )
    env.close()
    return rew
Ejemplo n.º 18
0
def make_env(env_id,
             seed,
             train=True,
             logger_dir=None,
             mpi_rank=0,
             subrank=0,
             reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for safety.
    """
    env = gym.make(env_id, **{"train": train, "penalty": penalty})
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)
    env.seed(seed)
    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Ejemplo n.º 19
0
import gym_gvgai
from ZeldaEnv import ZeldaEnv
from model import GAIL
from dataset.dataset import ExpertDataset
from baselines.bench import Monitor
from baselines import logger
sys.path.append("/home/chang/gail/nnrunner/a2c_gvgai")
import nnrunner.a2c_gvgai.env as gvgai_env

# Load the expert dataset
dataset = ExpertDataset(expert_path='dataset/expert_zelda.npz', traj_limitation=-1, verbose=1)
# env = gvgai_env.make_gvgai_env("Pendulum-v0", 2, 571846)
# env = gvgai_env.make_gvgai_env("gvgai-zelda-lvl0-v0", 2, 571846)
env = Monitor(gym.make("gvgai-zelda-lvl0-v0"), logger.get_dir() and os.path.join(logger.get_dir(), "monitor.json"))
env = ZeldaEnv(env)
env.seed(571846) 
level = "/home/chang/gail/levels/1_level.txt"
env.unwrapped._setLevel(level)
try:
    # model = GAIL("CnnPolicy", 'Pendulum-v0', dataset, verbose=1)
    model = GAIL("CnnPolicy", env, dataset, verbose=1, tensorboard_log="./gail_log")

    model.pretrain(dataset, n_epochs=1e4)
    model.save("bc_zelda")
    # Note: in practice, you need to train for 1M steps to have a working policy
    model.learn(total_timesteps=100000000)
    model.save("gail_zelda")

finally:
    pass