Esempio n. 1
0
    def _generate_real_env_with_kwargs(self, kwargs, env_name):
        # generate environment class
        # todo: make generic (e.g. check if class is existent in bandit.py or gridworld.py)
        if env_name == "Bandit":
            env = TimeLimit(BanditFixedPermutedGaussian())
        elif env_name == "EmptyRoom22":
            env = TimeLimit(EmptyRoom22())
        elif env_name == "EmptyRoom23":
            env = TimeLimit(EmptyRoom23())
        elif env_name == "EmptyRoom33":
            env = TimeLimit(EmptyRoom33())
        elif env_name == "WallRoom":
            env = TimeLimit(WallRoom())
        elif env_name == "HoleRoom":
            env = TimeLimit(HoleRoom())
        elif env_name == "HoleRoomLarge":
            env = TimeLimit(HoleRoomLarge())
        elif env_name == "HoleRoomLargeShifted":
            env = TimeLimit(HoleRoomLargeShifted())
        elif env_name == "Cliff":
            env = TimeLimit(Cliff())
        else:
            env = gym.make(env_name)

        for key, value in kwargs.items():
            setattr(env, key, value)

        # for episode termination
        env._max_episode_steps = int(kwargs["max_steps"])
        # for model save/load
        env.kwargs = kwargs

        return env
Esempio n. 2
0
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = args.torch_deterministic
env.seed(args.seed)
env.action_space.seed(args.seed)
env.observation_space.seed(args.seed)
input_shape, preprocess_obs_fn = preprocess_obs_space(env.observation_space,
                                                      device)
output_shape = preprocess_ac_space(env.action_space)
# respect the default timelimit
if int(args.episode_length):
    if not isinstance(env, TimeLimit):
        env = TimeLimit(env, int(args.episode_length))
    else:
        env._max_episode_steps = int(args.episode_length)
else:
    args.episode_length = env._max_episode_steps if isinstance(
        env, TimeLimit) else 200
if args.capture_video:
    env = Monitor(env, f'videos/{experiment_name}')
assert isinstance(env.action_space,
                  Box), "only continuous action space is supported"


# ALGO LOGIC: initialize agent here:
class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(input_shape, 120)
        self.fc2 = nn.Linear(120, 84)