def get_env_wrapper(env_id, record_video=False):
    """
        Ultimately it's not very clear why are SB3's wrappers and OpenAI gym's copy/pasted code for the most part.
        It seems that OpenAI gym doesn't have reward clipping which is necessary for Atari.

        I'm using SB3 because it's actively maintained compared to OpenAI's gym and it has reward clipping by default.

    """
    monitor_dump_dir = os.path.join(os.path.dirname(__file__), os.pardir,
                                    'gym_monitor')

    # This is necessary because AtariWrapper skips 4 frames by default, so we can't have additional skipping through
    # the environment itself - hence NoFrameskip requirement
    assert 'NoFrameskip' in env_id, f'Expected NoFrameskip environment got {env_id}'

    # The only additional thing needed, on top of AtariWrapper,
    # is to convert the shape to channel-first because of PyTorch's models
    env_wrapped = Monitor(ChannelFirst(AtariWrapper(gym.make(env_id))),
                          monitor_dump_dir,
                          force=True,
                          video_callable=lambda episode: record_video)

    return env_wrapped
Esempio n. 2
0
 def atari_wrapper(env: gym.Env) -> gym.Env:
     env = AtariWrapper(env, **wrapper_kwargs)
     return env
Esempio n. 3
0
 def env_fn():
     env = gym.make("SpaceInvadersNoFrameskip-v4")
     env = AtariWrapper(env)
     return env
    device = get_device()
    print(f"Using {device} device.")

    seed = args.seed
    random.seed(seed)
    np.random.seed(seed)
    th.manual_seed(seed)
    th.backends.cudnn.deterministic = True
    th.backends.cudnn.benchmark = False
    set_random_seed(seed)

    env_id = args.env
    if 'NoFrameskip' not in env_id:
        raise Exception(f"env {env_id} is not an Atari env")
    env = gym.make(args.env)
    env = AtariWrapper(env)
    env = DummyVecEnv([lambda: env])
    env = VecFrameStack(env, n_stack=4)
    print(f"Created env with obs.shape = {env.reset().shape}.")
    
    
    train = RewardData(env_id, train=True)
    test = RewardData(env_id, train=False)
    
    train_loader = th.utils.data.DataLoader(train, batch_size=20, shuffle=True, num_workers=0)
    test_loader = th.utils.data.DataLoader(test, batch_size=20, shuffle=False, num_workers=0)
    
    reward_model = AtariRewardModel(env, device)
    optimizer = th.optim.Adam(reward_model.parameters())
    loss_fn = th.nn.MSELoss(reduction="sum")