def train(env_id, num_iters, seed, n=1, success_reward=1000, save_path='model/new_model'): U.make_session(num_cpu=4).__enter__() set_global_seeds(seed) env = gym.make(env_id) # env.update_adversary(n) env=Monitor(env, log_dir, allow_early_resets=True) env.seed(seed) test_env = gym.make(env_id) test_env.seed(seed) gym.logger.setLevel(logging.WARN) # debug not working # num_cpu=4 # env=SubprocVecEnv([make_env(env_id,i,seed) for i in range(num_cpu)]) rew = PPO_RARL.learn(env, test_env, policy_fn, timesteps_per_batch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward, save_path=save_path, max_iters=num_iters, callback=plot_callback ) env.close() return rew
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir()) env.seed(seed) return env
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir()) env.seed(seed) return env
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def make_gym_control_env(env_id, seed): """ Added by Yiming (29/5/2018) Create a wrapped, monitored gym.Env for Simple Control Problems. """ set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir(), allow_early_resets=True) env.seed(seed) return env
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def make_halide_env(env_id, seed): """ Create a wrapped, monitored gym.Env for Halide. """ logger.log('random_seed: %s' % seed) set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir(), info_keywords=('best_exec', 'best_schedule')) env.seed(seed) return env
def make_pybullet_env(env_id, seed): """ Added by Yiming (29/5/2018) Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) # pybullet.connect(None) env = gym.make(env_id) env = Monitor(env, logger.get_dir(),allow_early_resets=True) # env = InvertedDoublePendulumBulletEnv() env.seed(seed) return env
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for safety. """ env = gym.make(env_id, **{"train":train}) env = Monitor(env, logger_dir, allow_early_resets=True, info_keywords=tuple("s")) env.seed(seed) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_program_env(env_id, seed, hier=True, curiosity=True, visualize=True, model='LSTM'): """ Create a wrapped, monitored gym.Env for custom environment """ set_global_seeds(seed) env = gym.make(env_id) env.set_curiosity(curiosity, model) env.set_hier(hier) env.set_visualize(visualize) env = Monitor(env, logger.get_dir()) env.seed(seed) return env
def make_mujoco_env(env_id, seed, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() myseed = seed + 1000 * rank if seed is not None else None set_global_seeds(myseed) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) env.seed(seed) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. :param env_id: (str) the environment ID :param seed: (int) the inital seed for RNG :return: (Gym Environment) The mujoco environment """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def make_mujoco_env(env_id, seed, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() myseed = seed + 1000 * rank if seed is not None else None set_global_seeds(myseed) env = gym.make(env_id) logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank)) env = Monitor(env, logger_path, allow_early_resets=True) env.seed(seed) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0): """ Create a wrapped, monitored gym.Env for safety. """ env = gym.make(env_id, **{"train":train}) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) env.seed(seed) env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_control_env(env_id, seed, hist_len, block_high, version0, give_state): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) if env_id == 'LunarLanderContinuousPOMDP-v0': newenv(hist_len=hist_len, block_high=block_high, version0=version0, give_state=give_state) env = gym.make(env_id) env = Monitor(env, logger.get_dir(), allow_early_resets=True, version0=version0) env.seed(seed) return env
def make_mujoco_env(env_id, seed, soc, psi): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) from os.path import expanduser import os current_run = str(env_id) + "_soc" + str(soc) + "_psi" + str( psi) + "_seed" + str(seed) directory = expanduser( "~") + "/baselines/baselines/ppo1/results/" + current_run if not os.path.exists(directory): os.makedirs(directory) env = Monitor(env, directory) # pdb.set_trace() env.seed(seed) return env
def train(env_id, num_iters, seed, success_reward, save_path, q): U.make_session(num_cpu=1).__enter__() set_global_seeds(seed) env = gym.make(env_id) env=Monitor(env, log_dir, allow_early_resets=True) test_env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(seed) test_env.seed(seed) gym.logger.setLevel(logging.WARN) rew = PPO.learn_with_human(env, test_env, policy_fn, max_iters=num_iters, timesteps_per_batch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward, save_path=save_path, callback=plot_callback, data_queue=q, ) env.close() return rew
def make_env(env_id, seed, train=True, logger_dir=None, mpi_rank=0, subrank=0, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for safety. """ env = gym.make(env_id, **{"train": train, "penalty": penalty}) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) env.seed(seed) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
import gym_gvgai from ZeldaEnv import ZeldaEnv from model import GAIL from dataset.dataset import ExpertDataset from baselines.bench import Monitor from baselines import logger sys.path.append("/home/chang/gail/nnrunner/a2c_gvgai") import nnrunner.a2c_gvgai.env as gvgai_env # Load the expert dataset dataset = ExpertDataset(expert_path='dataset/expert_zelda.npz', traj_limitation=-1, verbose=1) # env = gvgai_env.make_gvgai_env("Pendulum-v0", 2, 571846) # env = gvgai_env.make_gvgai_env("gvgai-zelda-lvl0-v0", 2, 571846) env = Monitor(gym.make("gvgai-zelda-lvl0-v0"), logger.get_dir() and os.path.join(logger.get_dir(), "monitor.json")) env = ZeldaEnv(env) env.seed(571846) level = "/home/chang/gail/levels/1_level.txt" env.unwrapped._setLevel(level) try: # model = GAIL("CnnPolicy", 'Pendulum-v0', dataset, verbose=1) model = GAIL("CnnPolicy", env, dataset, verbose=1, tensorboard_log="./gail_log") model.pretrain(dataset, n_epochs=1e4) model.save("bc_zelda") # Note: in practice, you need to train for 1M steps to have a working policy model.learn(total_timesteps=100000000) model.save("gail_zelda") finally: pass