Beispiel #1
0
def create_env(env_id, output_path, seed=0):
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(output_path, str(rank)), allow_early_resets=True)
    env.seed(seed)
    return env
Beispiel #2
0
 def _init():
     env = gym.make(env_id,
                    cf_id=rank + 1,
                    n_obstacles=n_obstacles,
                    gazebo=gazebo,
                    gazebo_process=gazebo_process,
                    cf_process=cf_process)
     env = Monitor(env, log_dir)
     env.seed(seed + rank)
     return env
Beispiel #3
0
 def _init():
     env = make_atari(env_id)
     # if 'BoxWorld' in env_id:
     #     print('using wrap_boxworld!')
     #     env = wrap_boxworld(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False)
     # else:
     #     env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False)
     if useMonitor:
         env = Monitor(env, log_dir + str(rank), allow_early_resets=True)
     env.seed(seed + rank)
     return env
Beispiel #4
0
def make_mujoco_env(env_id, seed, allow_early_resets=True):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param allow_early_resets: (bool) allows early reset of the environment
    :return: (Gym Environment) The mujoco environment
    """
    set_global_seeds(seed + 10000 * mpi_rank_or_zero())
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), '0'), allow_early_resets=allow_early_resets)
    env.seed(seed)
    return env
Beispiel #5
0
 def _init():
     # env = make_atari(env_id)
     env = football_env.create_environment(
         env_name=env_name,
         stacked=True,
         render=True if 'pixel' in rep else False,
         # rewards=FLAGS.reward_experiment,
         logdir=log_dir,
         enable_goal_videos=FLAGS.dump_scores and (seed == 0),
         enable_full_episode_videos=FLAGS.dump_full_episodes
         and (seed == 0),
         dump_frequency=50 if FLAGS.render and seed == 0 else 0)
     if useMonitor:
         env = Monitor(env, log_dir + str(rank), allow_early_resets=True)
     env.seed(seed + rank)
     return env
Beispiel #6
0
    def _init():
        if sb_version == "sb3":
            set_random_seed(seed + rank)
        else:
            set_global_seeds(seed + rank)

        # this seed will be overridden by the last but one statement of this function
        env = make_custom_env(seed=0,
                              sb_version=sb_version,
                              env_kwargs=env_kwargs,
                              env_name=env_name,
                              algo_name=algo_name)

        log_file = os.path.join(log_dir,
                                str(rank)) if log_dir is not None else None
        info_keywords = ()

        env = Monitor(env, log_file, info_keywords=info_keywords)
        env.seed(seed + rank)

        return env
Beispiel #7
0
from stable_baselines.bench import Monitor

from model import BnnPolicy

NUM_TIMESTEPS = 5_000_000
EVAL_FREQ = 100_000
EVAL_EPISODES = 100
NUM_TRIALS = 10

for n in range(4, NUM_TRIALS + 1):
    LOGDIR = f"exp/expert/ppo-bnn-mujoco/{n}"
    logger.configure(folder=LOGDIR)

    env = gym.make("SlimeVolley-v0")
    env = Monitor(env, LOGDIR, allow_early_resets=True)
    env.seed(n)

    model = PPO1(BnnPolicy,
                 env,
                 timesteps_per_actorbatch=4096,
                 clip_param=0.2,
                 entcoeff=0.0,
                 optim_epochs=10,
                 optim_stepsize=3e-4,
                 optim_batchsize=64,
                 gamma=0.99,
                 lam=0.95,
                 schedule='linear',
                 verbose=2)

    eval_callback = EvalCallback(env,
Beispiel #8
0
 def _init():
     env = gym.make(env_id)
     env = Monitor(env, filename=path)
     env.seed(seed + rank)
     return env
Beispiel #9
0
 def _init():
     env = make_atari(env_id)
     env = Monitor(env, log_dir + str(rank), allow_early_resets=True)
     env.seed(seed + rank)
     return env
 def _init():
     env = gym.make(env_id, n_obstacles=1, avoidance_method='Heuristic')
     env = Monitor(env, log_dir)
     env.seed(seed + rank)
     return env
Beispiel #11
0
 def _init():
     env = gym.make(env_id)
     env = Monitor(env, log_dir)
     env.seed(seed)
     return env