def setup(exp, single_threaded): import lab lab.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) env = lab.make(exp['env_id']) sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def test_smoke(env_id): """Check that environments start up without errors and that we can extract rewards and observations""" lab.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) env = lab.make(env_id) if env.metadata.get('configure.required', False): if os.environ.get('FORCE_LATEST_INSTITUTE_DOCKER_RUNTIMES'): # Used to test institute-envs in CI configure_with_latest_docker_runtime_tag(env) else: env.configure(remotes=1) env = wrappers.Unvectorize(env) env.reset() _rollout(env, timestep_limit=60*30) # Check a rollout
def test_nice_vnc_semantics_match(spec, matcher, wrapper): # Check that when running over VNC or using the raw environment, # semantics match exactly. lab.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) spaces.seed(0) vnc_env = spec.make() if vnc_env.metadata.get('configure.required', False): vnc_env.configure(remotes=1) vnc_env = wrapper(vnc_env) vnc_env = wrappers.Unvectorize(vnc_env) env = lab.make(spec._kwargs['lab_core_id']) env.seed(0) vnc_env.seed(0) # Check that reset observations work reset(matcher, env, vnc_env, stage='initial reset') # Check a full rollout rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps') # Reset to start a new episode reset(matcher, env, vnc_env, stage='reset to new episode') # Check that a step into the next episode works rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode') # Make sure env can be reseeded env.seed(1) vnc_env.seed(1) reset(matcher, env, vnc_env, 'reseeded reset') rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
import numpy as np import lab from lab import spaces, envs lab.undo_logger_setup() import logging logging.getLogger('lab.core').addHandler(logging.NullHandler()) num_trials = 50 print 'Name & Random policy performance' names = [ 'CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1' ] for env_name in names: env = envs.make(env_name) returns = [] for _ in xrange(num_trials): env.reset() ret = 0. for _ in xrange(env.spec.timestep_limit): _, r, done, _ = env.step(env.action_space.sample()) ret += r if done: break returns.append(ret) print '{} & {} \pm {}'.format(env_name, np.mean(returns), np.std(returns))