return env env = setup_env(FourRooms(goal_pos=tasks.pop(0))) env.unwrapped.max_steps = 1000000 obs = env.reset() n_states = env.observation_space n_actions = env.action_space.n + 1 # Set up loggers # TODO: use RLlog loglevel = 20 logger = ProjectLogger(level=loglevel, printing=False) plotter = PlotterOneHot(env) db = redis.StrictRedis(port=6379) logger.critical(env) # Define a network shared across options' policies and terminations, # as well as the critic net = NatureConvBody(in_channels=3) params = [net.parameters()] # Create options rng = np.random.RandomState(1338) n_options = 8 options, options_params = create_options(n_options, net.feature_dim, env.action_space.n) # Define a policy over options actor = EgreedyPolicy(ε=0.02, rng=rng, options=options, loglevel=20)
from hrl.project_logger import ProjectLogger from hrl.utils import cache from hrl.visualization.plotter_one_hot import PlotterOneHot """ Evaluate the benefits of planning with options. """ SAVEPATH = Path(f'{EXPERIMENT_DIR}/SMDP_planning') if __name__ == '__main__': # Create environment env = FullyObsWrapper(FourRooms(goal_pos=(15, 15))) # Create loggers LOGLEVEL = 20 logger = ProjectLogger(level=LOGLEVEL, printing=False) logger.critical(env) plotter = PlotterOneHot(env=env) SAVEPATH /= env.unwrapped.__class__.__name__ SAVEPATH.mkdir(parents=True, exist_ok=True) # Create hard-coded options options = [ HallwayOption(o, env.observation_space.shape[::-1]) for o in sorted(HallwayOption.hallway_options) ] options += [ PrimitiveOption(o, env.observation_space.shape[::-1]) for o in sorted(PrimitiveOption.primitive_options) ] logger.info('Learning option models')