예제 #1
0
파일: dqn_oc.py 프로젝트: konichuvak/hrl
     return env
 
 
 env = setup_env(FourRooms(goal_pos=tasks.pop(0)))
 env.unwrapped.max_steps = 1000000
 obs = env.reset()
 n_states = env.observation_space
 n_actions = env.action_space.n + 1
 
 # Set up loggers
 # TODO: use RLlog
 loglevel = 20
 logger = ProjectLogger(level=loglevel, printing=False)
 plotter = PlotterOneHot(env)
 db = redis.StrictRedis(port=6379)
 logger.critical(env)
 
 # Define a network shared across options' policies and terminations,
 # as well as the critic
 net = NatureConvBody(in_channels=3)
 params = [net.parameters()]
 
 # Create options
 rng = np.random.RandomState(1338)
 n_options = 8
 options, options_params = create_options(n_options, net.feature_dim,
                                          env.action_space.n)
 
 # Define a policy over options
 actor = EgreedyPolicy(ε=0.02, rng=rng, options=options, loglevel=20)
 
예제 #2
0
파일: planning.py 프로젝트: konichuvak/hrl
from hrl.project_logger import ProjectLogger
from hrl.utils import cache
from hrl.visualization.plotter_one_hot import PlotterOneHot
""" Evaluate the benefits of planning with options. """

SAVEPATH = Path(f'{EXPERIMENT_DIR}/SMDP_planning')

if __name__ == '__main__':

    # Create environment
    env = FullyObsWrapper(FourRooms(goal_pos=(15, 15)))

    # Create loggers
    LOGLEVEL = 20
    logger = ProjectLogger(level=LOGLEVEL, printing=False)
    logger.critical(env)
    plotter = PlotterOneHot(env=env)
    SAVEPATH /= env.unwrapped.__class__.__name__
    SAVEPATH.mkdir(parents=True, exist_ok=True)

    # Create hard-coded options
    options = [
        HallwayOption(o, env.observation_space.shape[::-1])
        for o in sorted(HallwayOption.hallway_options)
    ]
    options += [
        PrimitiveOption(o, env.observation_space.shape[::-1])
        for o in sorted(PrimitiveOption.primitive_options)
    ]

    logger.info('Learning option models')