def create_env(env_id, output_path, seed=0): rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(output_path, str(rank)), allow_early_resets=True) env.seed(seed) return env
def _init(): env = gym.make(env_id, cf_id=rank + 1, n_obstacles=n_obstacles, gazebo=gazebo, gazebo_process=gazebo_process, cf_process=cf_process) env = Monitor(env, log_dir) env.seed(seed + rank) return env
def _init(): env = make_atari(env_id) # if 'BoxWorld' in env_id: # print('using wrap_boxworld!') # env = wrap_boxworld(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False) # else: # env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False) if useMonitor: env = Monitor(env, log_dir + str(rank), allow_early_resets=True) env.seed(seed + rank) return env
def make_mujoco_env(env_id, seed, allow_early_resets=True): """ Create a wrapped, monitored gym.Env for MuJoCo. :param env_id: (str) the environment ID :param seed: (int) the inital seed for RNG :param allow_early_resets: (bool) allows early reset of the environment :return: (Gym Environment) The mujoco environment """ set_global_seeds(seed + 10000 * mpi_rank_or_zero()) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), '0'), allow_early_resets=allow_early_resets) env.seed(seed) return env
def _init(): # env = make_atari(env_id) env = football_env.create_environment( env_name=env_name, stacked=True, render=True if 'pixel' in rep else False, # rewards=FLAGS.reward_experiment, logdir=log_dir, enable_goal_videos=FLAGS.dump_scores and (seed == 0), enable_full_episode_videos=FLAGS.dump_full_episodes and (seed == 0), dump_frequency=50 if FLAGS.render and seed == 0 else 0) if useMonitor: env = Monitor(env, log_dir + str(rank), allow_early_resets=True) env.seed(seed + rank) return env
def _init(): if sb_version == "sb3": set_random_seed(seed + rank) else: set_global_seeds(seed + rank) # this seed will be overridden by the last but one statement of this function env = make_custom_env(seed=0, sb_version=sb_version, env_kwargs=env_kwargs, env_name=env_name, algo_name=algo_name) log_file = os.path.join(log_dir, str(rank)) if log_dir is not None else None info_keywords = () env = Monitor(env, log_file, info_keywords=info_keywords) env.seed(seed + rank) return env
from stable_baselines.bench import Monitor from model import BnnPolicy NUM_TIMESTEPS = 5_000_000 EVAL_FREQ = 100_000 EVAL_EPISODES = 100 NUM_TRIALS = 10 for n in range(4, NUM_TRIALS + 1): LOGDIR = f"exp/expert/ppo-bnn-mujoco/{n}" logger.configure(folder=LOGDIR) env = gym.make("SlimeVolley-v0") env = Monitor(env, LOGDIR, allow_early_resets=True) env.seed(n) model = PPO1(BnnPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2) eval_callback = EvalCallback(env,
def _init(): env = gym.make(env_id) env = Monitor(env, filename=path) env.seed(seed + rank) return env
def _init(): env = make_atari(env_id) env = Monitor(env, log_dir + str(rank), allow_early_resets=True) env.seed(seed + rank) return env
def _init(): env = gym.make(env_id, n_obstacles=1, avoidance_method='Heuristic') env = Monitor(env, log_dir) env.seed(seed + rank) return env
def _init(): env = gym.make(env_id) env = Monitor(env, log_dir) env.seed(seed) return env