def thunk(): env = gym.make(gym_id) env = ImgObsWrapper(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f'videos/{experiment_name}') env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env
#env = gym.wrappers.RecordEpisodeStatistics(env) # records episode reward in `info['episode']['r']` if args.capture_video: env = Monitor(env, f'videos/{experiment_name}') #env = wrap_deepmind( # env, # clip_rewards=True, # frame_stack=True, # scale=False, #) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) # respect the default timelimit assert isinstance(env.action_space, Discrete), "only discrete action space is supported" # modified from https://github.com/seungeunrho/minimalRL/blob/master/dqn.py# class ReplayBuffer(): def __init__(self, buffer_limit): self.buffer = collections.deque(maxlen=buffer_limit) def put(self, transition): self.buffer.append(transition)