def _generate_real_env_with_kwargs(self, kwargs, env_name): # generate environment class # todo: make generic (e.g. check if class is existent in bandit.py or gridworld.py) if env_name == "Bandit": env = TimeLimit(BanditFixedPermutedGaussian()) elif env_name == "EmptyRoom22": env = TimeLimit(EmptyRoom22()) elif env_name == "EmptyRoom23": env = TimeLimit(EmptyRoom23()) elif env_name == "EmptyRoom33": env = TimeLimit(EmptyRoom33()) elif env_name == "WallRoom": env = TimeLimit(WallRoom()) elif env_name == "HoleRoom": env = TimeLimit(HoleRoom()) elif env_name == "HoleRoomLarge": env = TimeLimit(HoleRoomLarge()) elif env_name == "HoleRoomLargeShifted": env = TimeLimit(HoleRoomLargeShifted()) elif env_name == "Cliff": env = TimeLimit(Cliff()) else: env = gym.make(env_name) for key, value in kwargs.items(): setattr(env, key, value) # for episode termination env._max_episode_steps = int(kwargs["max_steps"]) # for model save/load env.kwargs = kwargs return env
random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) input_shape, preprocess_obs_fn = preprocess_obs_space(env.observation_space, device) output_shape = preprocess_ac_space(env.action_space) # respect the default timelimit if int(args.episode_length): if not isinstance(env, TimeLimit): env = TimeLimit(env, int(args.episode_length)) else: env._max_episode_steps = int(args.episode_length) else: args.episode_length = env._max_episode_steps if isinstance( env, TimeLimit) else 200 if args.capture_video: env = Monitor(env, f'videos/{experiment_name}') assert isinstance(env.action_space, Box), "only continuous action space is supported" # ALGO LOGIC: initialize agent here: class Policy(nn.Module): def __init__(self): super(Policy, self).__init__() self.fc1 = nn.Linear(input_shape, 120) self.fc2 = nn.Linear(120, 84)