예제 #1
0
def make_vec_envs(
    env_name,
    seed,
    num_processes,
    gamma,
    log_dir,
    device,
    allow_early_resets,
    training=True,
    num_frame_stack=None,
    red=False,
):
    envs = [
        make_env(env_name, seed, i, log_dir, allow_early_resets)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    # Dont filter if RED
    obfilt = not red
    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ob=obfilt, ret=False)
        else:
            envs = VecNormalize(envs, ob=obfilt, gamma=gamma)
        if not training:
            envs.eval()

    elif env_name.startswith('CarRacing'):
        # Car Racing, use a normalizer for rewards
        envs = VecNormalize(envs,
                            ob=False,
                            ret=training,
                            clipob=1e10,
                            cliprew=1.0)
        if not training:
            envs.eval()

    envs = VecPyTorch(envs, device)
    # Hack for now
    is_atari = env_name.startswith('MiniGrid') or env_name.startswith(
        'CarRacing')
    is_atari = not is_atari

    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
예제 #2
0
def make_vec_envs(env_id,
                  seed,
                  num_processes,
                  gamma,
                  log_dir=None,
                  device=torch.device('cpu'),
                  obs_keys=None,
                  allow_early_resets=False,
                  save_video=False,
                  num_frame_stack=None,
                  max_steps=None,
                  evaluating=False):
    envs = [
        make_env(env_id,
                 seed,
                 i,
                 log_dir,
                 obs_keys,
                 allow_early_resets,
                 save_video,
                 max_steps=max_steps) for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = ShmemVecEnv(envs, context='fork')
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            envs = VecNormalize(envs, gamma=gamma)

        # since our network is frozen, an online normalization would make observations diverge from what it learned
        if evaluating:
            envs.eval()

    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 4, device)

    return envs