Esempio n. 1
0
File: eval.py Progetto: phymucs/EDHR
def eval_model(model: ActorCritic,
               env: ShmemVecEnv,
               history_size: int,
               emb_size: int,
               device: str,
               num_ep=100):
    model.eval()

    obs_emb = torch.zeros(env.num_envs,
                          history_size,
                          1,
                          84,
                          84,
                          device=device,
                          dtype=torch.uint8)
    obs = env.reset().to(device=device)
    obs_emb[:, -1] = obs[:, -1:]

    ep_reward = []
    while True:
        with torch.no_grad():
            a = model(obs, obs_emb)[0].sample().unsqueeze(1)
        obs, rewards, terms, infos = env.step(a)
        obs = obs.to(device=device)

        obs_emb[:, :-1].copy_(obs_emb[:, 1:])
        obs_emb *= (~terms)[..., None, None, None].to(device=device,
                                                      dtype=torch.uint8)
        obs_emb[:, -1] = obs[:, -1:]

        for info in infos:
            if 'episode' in info.keys():
                ep_reward.append(info['episode']['r'])
                if len(ep_reward) == num_ep:
                    return torch.tensor(ep_reward)
Esempio n. 2
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  allow_early_resets,
                  num_frame_stack=None):
    envs = [
        make_env(env_name, seed, i, log_dir, allow_early_resets)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            envs = VecNormalize(envs, gamma=gamma)
        envs.ob_rms = None

    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
Esempio n. 3
0
def make_vec_envs(
    env_name,
    seed,
    num_processes,
    gamma,
    log_dir,
    device,
    allow_early_resets,
    custom_gym,
    navi=False,
    num_frame_stack=None,
    coeff_reward_run=1,
    coeff_reward_stable=0,
    coeff_reward_ctrl=0,
    enjoy=False,
):
    print(f"=== Making {num_processes} parallel envs with {num_frame_stack} stacked frames")
    envs = [
        #TODO(add coefficients)
        make_env(env_name,
                 seed,
                 i,
                 log_dir, 
                 allow_early_resets, 
                 custom_gym, 
                 navi=navi, 
                 coeff_reward_run=coeff_reward_run,
                 coeff_reward_stable=coeff_reward_stable,
                 coeff_reward_ctrl=coeff_reward_ctrl,
                 enjoy=enjoy)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        print("ENV: ShmemVecEnv")
        envs = ShmemVecEnv(envs, context="fork")
    else:
        print("ENV: DummyVecEnv")
        envs = DummyVecEnvPPO(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            print("ENV: VecNormalize, ret = False")
            envs = VecNormalize(envs, ret=False)
        else:
            print(f"ENV: VecNormalize, gamma = {gamma}")
            envs = VecNormalize(envs, gamma=gamma)

    print(f"ENV: VecPyTorch")
    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        print(f"ENV: VecPyTorchFrameStack, stack: {num_frame_stack}")
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    # elif not navi and not "Gibson" in env_name and len(envs.observation_space.shape) == 3:
    elif not navi and len(envs.observation_space.shape) == 3:
        print("ENV: VecPyTorchFrameStack, stack: 4")
        envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
Esempio n. 4
0
def vectorize_env(envs,
                  state_normalize=True,
                  device=None,
                  train=True,
                  gamma=0.99,
                  ob_rms=None):

    assert (type(envs) == list)
    for env_fn in envs:
        assert (isfunction(env_fn))

    if len(envs) == 1:
        envs = DummyVecEnv(envs)
    else:
        envs = ShmemVecEnv(envs, context='fork')

    if state_normalize:
        if train:
            envs = VecNormalize(envs, gamma=gamma)
        else:
            envs = VecNormalize(envs, ret=False)
            envs.eval()
            assert (ob_rms != None)
            envs.ob_rms = ob_rms

    if device != None:
        envs = VecPyTorch(envs, device)

    return envs
Esempio n. 5
0
def make_vec_envs(env_name,
                  seed,
                  num_parallel,
                  device,
                  datadirs,
                  config,
                  R_base=(None, None)):
    # print(len(datadirs), len(training_targets), len(fixed_agents))
    # print(dict_target)
    # print("R_base @ make_vec_envs",R_base)
    envs = [
        # make_env(env_name, seed, i, datadirs[i], training_targets[i], config)
        make_env(env_name, seed, i, datadirs, config, R_base)
        for i in range(num_parallel)  # i: env_id ということにする
        # for i in range(num_parallel * len(training_targets))
        # for i in range(num_process)
    ]
    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecPyTorch(envs, device)

    return envs
Esempio n. 6
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  ):
    envs = [
        make_env(env_name, seed, i, log_dir)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    # if len(envs.observation_space.shape) == 1:
    #     if gamma is None:
    #         envs = VecNormalize(envs, ret=False)
    #     else:
    #         envs = VecNormalize(envs, gamma=gamma)

    envs = VecPyTorch(envs, device)

    return envs
Esempio n. 7
0
def make_vec_envs(env_id,
                  seed,
                  num_processes,
                  gamma,
                  log_dir=None,
                  device=torch.device('cpu'),
                  obs_keys=None,
                  allow_early_resets=False,
                  save_video=False,
                  num_frame_stack=None,
                  max_steps=None,
                  evaluating=False):
    envs = [
        make_env(env_id,
                 seed,
                 i,
                 log_dir,
                 obs_keys,
                 allow_early_resets,
                 save_video,
                 max_steps=max_steps) for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            envs = VecNormalize(envs, gamma=gamma)

        # since our network is frozen, an online normalization would make observations diverge from what it learned
        if evaluating:
            envs.eval()

    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
Esempio n. 8
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  allow_early_resets,
                  num_frame_stack=None,
                  coin_run_level=0,
                  coin_run_seed=-1,
                  difficulty=False):
    # coinrun environments need to be treated differently.
    coinrun_envs = {
        'CoinRun': 'standard',
        'CoinRun-Platforms': 'platform',
        'Random-Mazes': 'maze'
    }
    if env_name in coinrun_envs:
        coin_run_args = setup_utils.setup_and_load(use_cmd_line_args=False)
        Coinrun_Config.GAME_TYPE = coinrun_envs[env_name]
        Coinrun_Config.NUM_LEVELS = coin_run_level
        Coinrun_Config.SET_SEED = coin_run_seed
        # If SET_SEED = -1, this seed is not used and level seeds will be drawn from the
        # range [0, NUM_LEVELS). Use SET_SEED = -1 and NUM_LEVELS = 500 to train with the same levels in the paper.
        Coinrun_Config.NUM_ENVS = num_processes
        Coinrun_Config.HIGH_DIFFICULTY = difficulty
        envs = coinrun_utils.make_general_env(num_processes)
        envs.spec = Coinrun_Config.GAME_TYPE
        envs = CoinRunVecPyTorch(envs, device)
        envs = add_final_pytorch_wrappers(envs)

    else:
        envs = [
            make_env(env_name, seed, i, log_dir, allow_early_resets)
            for i in range(num_processes)
        ]

        if len(envs) > 1:
            envs = ShmemVecEnv(envs, context='fork')
        else:
            envs = DummyVecEnv(envs)

        if len(envs.observation_space.shape) == 1:
            if gamma is None:
                envs = VecNormalize(envs, ret=False)
            else:
                envs = VecNormalize(envs, gamma=gamma)

        envs = VecPyTorch(envs, device)

        if num_frame_stack is not None:
            envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
        elif len(envs.observation_space.shape) == 3:
            envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
Esempio n. 9
0
def make_vec_envs(fns_to_make_envs, device):

    if len(fns_to_make_envs) > 1:
        envs = ShmemVecEnv(fns_to_make_envs, context="spawn")
    else:
        envs = DummyVecEnv(fns_to_make_envs)

    envs = VecPyTorch(envs, device, len(fns_to_make_envs))

    return envs
Esempio n. 10
0
def make_vec_envs(env_name, num_envs, seed=1, num_frame_stack=1):
    envs = [make_env(env_name, seed, i) for i in range(num_envs)]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecFrameStack(envs, num_frame_stack)

    return envs
Esempio n. 11
0
def make_vec_house3d_envs(num_processes,
                          log_dir,
                          device,
                          allow_early_resets):
    envs = [
        make_house3d_env(i, log_dir, allow_early_resets)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)

    envs.observation_shape = (3200,)
    envs.observation_space = Box(0, 1, shape=envs.observation_shape, dtype=np.uint8)

    envs = VecHouse3DEnv(envs, device)
    #envs = VecNormalize(envs)

    return envs
Esempio n. 12
0
def PrepareParallelEnv(env_id, seed, num_processes, gamma, log_dir, device,
                       allow_early_resets):
    envs = [
        PrepareMujocoEnv(env_id, seed, i, log_dir, allow_early_resets)
        for i in range(num_processes)
    ]
    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)
    envs = VecNormalize(envs, gamma=gamma)
    envs = VecPyTorch(envs, device)
    return envs
Esempio n. 13
0
def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, device,
                  allow_early_resets, num_frame_stack=None, rank=0,
                  signature='', max_steps=None, env_group_spec=None):
    """ Make vectorised environments for parallelized experience sampling. """
    # Should environments be the all the same for each learner or differ across processes for the
    # same learner.
    heterogeneous_envs = not (env_group_spec is not None and env_group_spec[1] == num_processes)
    if env_group_spec is None or env_group_spec[0] == 1:
        # No grouping of environment processes for each agent.
        envs = [
            make_env(env_name, seed + num_processes * rank, (rank * num_processes) + i,
                     log_dir, allow_early_resets, signature, max_steps,
                     heterogeneous=heterogeneous_envs)
            for i in range(num_processes)
        ]
    else:
        # We have environments grouped such that environments differ even for the same learner.
        envs = []
        counter = 0
        for i in range(env_group_spec[0]):
            envs += [
                make_env(env_name, seed + num_processes * rank,
                         (rank * num_processes) + counter + i, log_dir,
                         allow_early_resets, signature, max_steps, heterogeneous=False)
                for i in range(env_group_spec[1])
            ]
            seed += env_group_spec[1]
            counter += env_group_spec[1]

    # Allow dummy environment wrapper if no parallelisation required.
    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            envs = VecNormalize(envs, gamma=gamma)

    # Ensure environments are compatible with the PyTorch agents.
    envs = VecPyTorch(envs, device)

    # Frame stacking for visual environments.
    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
def make_vec_envs(args, device="cpu"):
    envs = [
        make_env(args.env_name, args.seed, i, args.log_dir, args.default_ind, \
            args.num_envs, args.basepath) for i in range(args.num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecPyTorch(envs, device)

    return envs
Esempio n. 15
0
def make_vec_envs(args,
                  seed,
                  num_processes,
                  gamma,
                  device):
    envs = [make_env(args, seed, i) for i in range(num_processes)]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecPyTorch(envs, device)

    return envs
Esempio n. 16
0
def make_vec_random_env(num_envs: int, mk_config: Union[MkConfig,
                                                        Dict]) -> VectorEnv:
    # Move import here in case we don't have `baselines` installed:
    # TODO: Use the "native" vectorized envs from gym rather than those from baselines.
    # The only thing we'd lose is the ability to render the envs, which isn't part of
    # gym at the time of writing. One potential solution would be to use a fork of gym
    # which adds this support for rendering the envs.
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    from baselines.common.vec_env.shmem_vec_env import ShmemVecEnv

    env_func = partial(make_env, mk_config=mk_config)

    if num_envs == 1:
        return DummyVecEnv([env_func for _ in range(num_envs)])
    return ShmemVecEnv([env_func for _ in range(num_envs)])
Esempio n. 17
0
def make_vec_envs_custom(constants, device, env_lambda):
    
    # Construct envs
    envs = [
        env_lambda for i in range(constants["num_processes"])
    ]
    # Multiple processes
    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)
    # Put on gpu whatever can be
    envs = VecPyTorch(envs, device)

    return envs
Esempio n. 18
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  allow_early_resets,
                  num_frame_stack=None,
                  video_dir=None,
                  max_episode_steps=None,
                  info_keywords=None,
                  makeEnvFunc=None,
                  normalizeOb=True,
                  normalizeReturns=True):
    envs = [
        make_env(env_name,
                 seed,
                 i,
                 log_dir,
                 allow_early_resets,
                 video_dir,
                 max_episode_steps,
                 info_keywords=info_keywords,
                 makeEnvFunc=makeEnvFunc) for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ob=normalizeOb, ret=normalizeReturns)
        else:
            envs = VecNormalize(envs,
                                ob=normalizeOb,
                                ret=normalizeReturns,
                                gamma=gamma)

    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
Esempio n. 19
0
def make_vec_envs(name, num, seed=0, max_ep_len=100000):
    def make_env(rank):
        def _thunk():
            full_name = f"{name}NoFrameskip-v4"
            env = make_atari(full_name, max_episode_steps=max_ep_len)
            env.seed(seed + rank)
            env = bench.Monitor(env, None)
            env = wrap_deepmind(env, episode_life=True, clip_rewards=False)
            return env

        return _thunk

    envs = [make_env(i) for i in range(num)]
    envs = ShmemVecEnv(envs, context="fork")
    envs = VecTorch(envs)
    return envs
Esempio n. 20
0
def make_vec_envs(env_name, seed, num_processes, log_dir, device,
                  allow_early_resets):
    envs = [
        make_env(env_name, seed, i, log_dir, allow_early_resets)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecPyTorch(envs, device)

    envs = VecPyTorchFrameStack(envs, 4, device)  # Atari

    return envs
Esempio n. 21
0
def make_vec_envs(args,
                  device,
                  allow_early_resets,
                  env_data=None):
    envs = [
        make_env(args.env_name, args, args.seed, i, args.log_dir, allow_early_resets, env_data)
        for i in range(args.num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecPyTorch(envs, device)

    return envs
Esempio n. 22
0
def make_vec_envs(args, allow_early_resets, seed_change=0):
    envs = [
        make_env(args.env_name, args.seed + seed_change, i, args.log_dir,
                 allow_early_resets, args.n_reactive)
        for i in range(0, args.num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecNormalize(envs)

    envs = VecPyTorch(envs, args.device)

    return envs
Esempio n. 23
0
def make_vec_envs(env_name,
                  num_processes,
                  seed=0,
                  device='cpu',
                  allow_early_resets=False,
                  num_frame_stack=1):
    envs = [
        make_env(env_name, seed, i, allow_early_resets)
        for i in range(num_processes)
    ]

    envs = ShmemVecEnv(envs,
                       context='fork') if len(envs) > 1 else DummyVecEnv(envs)
    envs = VecPyTorch(envs, device)

    if num_frame_stack != 1:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    return envs
Esempio n. 24
0
def make_otc_env(args):
    def make_env(rank):
        def _thunk():
            env = make_env_all_params(rank, args)
            return env

        return _thunk

    env_fns = [
        make_env(i)
        for i in range(args.num_processes)
    ]

    if args.num_processes == 1:
        envs = DummyVecEnv(env_fns)
    else:
        envs = ShmemVecEnv(env_fns)

    return envs
Esempio n. 25
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  allow_early_resets,
                  num_frame_stack = None):
    envs = [
        make_env(env_name, seed, i, log_dir, allow_early_resets)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        """
            If you don't specify observation_space, we'll have to create a dummy
            environment to get it.
        """
        env = gym.make(env_name,
                       _adjust_ratio=config.adjust_ratio, image_folder=config.image_folder,
                       adjust=config.adjust, enable_give_up=config.give_up, enable_rotation=config.enable_rotation,
                       box_set=config.box_size_set, container_size=config.container_size, test = False,
                       data_name = None)
        spaces = [env.observation_space, env.action_space]
        envs = ShmemVecEnv(envs, spaces, context='fork')

        # envs = DummyVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            # envs = VecNormalize(envs, gamma = gamma, ob = False, ret = True)
            envs = VecNormalize(envs, gamma = gamma, ob = False, ret = False)
    envs = VecPyTorch(envs, device)
    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 4, device)
    return envs
Esempio n. 26
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  log_dir,
                  device,
                  allow_early_resets,
                  max_episode_steps=None,
                  args=None,
                  train=True):
    """
        Make vectorized environments 
        :param env_name: str - name of environment
        :param seed: int - random seed of environment
        :num_process: int - number of parallel environment
        :param log_dir: str - path to log directory
        :param device: str - 'cuda' or 'cpu'
        :param allow_early_reset: bool - if apply TimeLimitMask on environments, set this param to True
        :param max_episode_steps: int - maximum number of action in 1 episode
        :param args: ArgsParser - use to specifiy environment args
        :param train: bool - determine if we are using created to train or evaluate
                            if we're training, all environment share same random seed to repeat input sequence
                            otherwise, we diversify the random seed
    """
    envs = [
        make_env(env_id=env_name,
                 seed=seed,
                 rank=i,
                 log_dir=log_dir,
                 allow_early_resets=allow_early_resets,
                 max_episode_steps=max_episode_steps,
                 args=args,
                 train=train) for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    envs = VecPyTorch(envs, device)

    return envs
Esempio n. 27
0
def make_vec_envs(env_name, seed, num_processes, log_dir):
    """
        Creates atari enviroments.

        Args:
            env_name (string): Name of the enviroment.
            seed (int): Random seed for the enviroment.
            num_processes (int): Number of parallel enviroments.
            log_dir (string): Directory to save logs for visualize.ipynb.

        Returns:
            Parallel atari enviroments.

    """

    envs = [make_env(env_name, seed, i, log_dir) for i in range(num_processes)]
    envs = ShmemVecEnv(envs, context='fork')
    envs = VecFrameStack(envs, 4)

    return envs
Esempio n. 28
0
def make_vec_envs(name, num, seed=0):
    def make_env(rank):
        def _thunk():
            env = gym.make(name)
            is_atari = hasattr(gym.envs, 'atari') and isinstance(
                env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
            if is_atari:
                env = make_atari(name, max_episode_steps=10000)

            env.seed(seed + rank)
            env = bench.Monitor(env, None)
            if is_atari:
                env = wrap_deepmind(env, frame_stack=True)
            return env

        return _thunk

    envs = [make_env(i) for i in range(num)]
    envs = DummyVecEnv(envs) if num == 1 else ShmemVecEnv(envs, context='fork')
    envs = VecPyTorch(envs)
    return envs
def make_vec_envs(env_def,
                  level_path,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  allow_early_resets,
                  num_frame_stack=None):

    envs = [
        make_env(env_def, level_path, seed, i, log_dir, allow_early_resets)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    return envs
Esempio n. 30
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  allow_early_resets,
                  num_frame_stack=None,
                  state=None,
                  reward_experiment=None,
                  dump_scores=None,
                  dump_full_episodes=None,
                  render=None):
    envs = [
        make_env(env_name, seed, i, log_dir, allow_early_resets, state,
                 reward_experiment, dump_scores, dump_full_episodes, render)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            envs = VecNormalize(envs, gamma=gamma)

    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 1, device)

    return envs