def test_doom_goal_env(self): env = make_doom_env(doom_env_by_name('doom_maze_goal')) self.assertIsNotNone(env) obs = env.reset() self.assertIsInstance(obs, dict) obs, reward, done, info = env.step(0) self.assertIsInstance(obs, dict)
def test_normalize(self): env = make_doom_env(doom_env_by_name(TEST_ENV_NAME)) obs_space = main_observation_space(env) env.reset() obs = [env.step(0)[0] for _ in range(10)] self.assertTrue(np.all(obs_space.low == 0)) self.assertTrue(np.all(obs_space.high == 255)) self.assertEqual(obs_space.dtype, np.uint8) self.assertFalse(is_normalized(obs_space)) tf.reset_default_graph() ph_obs = placeholder_from_space(obs_space) obs_tensor = tf_normalize(ph_obs, obs_space) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) normalized_obs = sess.run(obs_tensor, feed_dict={ph_obs: obs}) self.assertEqual(normalized_obs.dtype, np.float32) self.assertLessEqual(normalized_obs.max(), 1.0) self.assertGreaterEqual(normalized_obs.min(), -1.0) tf.reset_default_graph() gc.collect()
def test_run_loop(self): env = TimeLimitWrapper( make_doom_env(doom_env_by_name(TEST_ENV_NAME), mode='test'), 50, 0) def make_env_func(): return env agent = AgentRandom(make_env_func, {}, close_env=False) run_policy_loop(agent, env, 1, 200)
def skipped_test_resnet(self): shape = (3 * 3, 160, 120) # shape = (3, 84, 84) with tf.variable_scope('reach'): resnet = ResnetBuilder.build_resnet_18(shape, 2) adam = tf.train.AdamOptimizer(learning_rate=1e-4, name='loco_opt') resnet.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) num_data = 1000 half_data = num_data // 2 iterations = 3 num_epochs_per_iteration = 5 epoch = 0 for i in range(iterations): log.info('Iteration %d...', i) x = np.empty([num_data, 84, 84, 3], dtype=np.float32) y = np.empty([num_data, 2], dtype=np.int32) for j in range(half_data): x[j, :, :, 0] = 1.0 x[j, :, :, 1] = random.random() x[j, :, :, 2] = 0.0 y[j] = [1, 0] x[half_data + j, :, :, 0] = 0.0 x[half_data + j, :, :, 1] = random.random() x[half_data + j, :, :, 2] = 1.0 y[half_data + j] = [0, 1] train_until = epoch + num_epochs_per_iteration resnet.fit(x, y, batch_size=64, epochs=train_until, verbose=1, initial_epoch=epoch) epoch += num_epochs_per_iteration x = np.empty([num_data, 84, 84, 3], dtype=np.float32) x[:half_data, :, :, 0] = 1.0 x[:half_data, :, :, 1] = random.random() x[:half_data, :, :, 2] = 0.0 x[half_data:, :, :, 0] = 0.0 x[half_data:, :, :, 1] = random.random() x[half_data:, :, :, 2] = 1.0 result = resnet.predict(x, verbose=1, batch_size=1024) log.info('result %r', result) env = make_doom_env(doom_env_by_name(TEST_ENV_NAME)) obs = env.reset() env.close()
def create_env(env, **kwargs): """Expected names are: doom_maze, atari_montezuma, etc.""" if env.startswith('doom_'): from utils.envs.doom.doom_utils import make_doom_env, doom_env_by_name return make_doom_env(doom_env_by_name(env), **kwargs) elif env.startswith('atari_'): from utils.envs.atari.atari_utils import make_atari_env, atari_env_by_name return make_atari_env(atari_env_by_name(env), **kwargs) elif env.startswith('dmlab_'): from utils.envs.dmlab.dmlab_utils import make_dmlab_env, dmlab_env_by_name return make_dmlab_env(dmlab_env_by_name(env), **kwargs) else: raise Exception('Unsupported env {0}'.format(env))
def test_locomotion(self): g = tf.Graph() env = make_doom_env(doom_env_by_name(TEST_ENV_NAME)) args, params = parse_args_tmax(AgentTMAX.Params, argv=[]) with g.as_default(): locomotion_net = LocomotionNetwork(env, params) obs = env.reset() with tf.Session(graph=g) as sess: sess.run(tf.global_variables_initializer()) action = locomotion_net.navigate(sess, [obs], [obs], [obs])[0] self.assertGreaterEqual(action, 0) self.assertLess(action, env.action_space.n) env.close() g.finalize()
def test_performance(self): params = AgentPPO.Params('test_performance') params.ppo_epochs = 2 params.rollout = 16 env = make_doom_env(doom_env_by_name(TEST_ENV_NAME)) observation_shape = env.observation_space.shape experience_size = params.num_envs * params.rollout # generate random data data = AttrDict() data.obs = np.random.normal(size=(experience_size, ) + observation_shape) data.act = np.random.randint(0, 3, size=[experience_size]) data.old_prob = np.random.uniform(0, 1, size=[experience_size]) data.adv = np.random.normal(size=[experience_size]) data.ret = np.random.normal(size=[experience_size]) self.train_feed_dict(env, data, params, use_gpu=False) self.train_feed_dict(env, data, params, use_gpu=True) self.train_dataset(env, data, params, use_gpu=False) self.train_dataset(env, data, params, use_gpu=True) env.close()
def make_env(): return make_doom_env(doom_env_by_name(TEST_ENV_NAME))
def make_env_func(): env = make_doom_env(doom_env_by_name(TEST_ENV_NAME)) return env
def test_unwrap(self): env = make_doom_env(doom_env_by_name(TEST_ENV_NAME)) unwrapped = unwrap_env(env) self.assertIsNot(type(unwrapped), gym.core.Wrapper)
import gym import numpy as np from unittest import TestCase from gym import spaces from algorithms.agent import AgentRandom from algorithms.utils.algo_utils import EPS from algorithms.utils.env_wrappers import NormalizeWrapper, StackFramesWrapper, unwrap_env, ResizeWrapper, \ SkipAndStackFramesWrapper, TimeLimitWrapper, RemainingTimeWrapper from algorithms.multi_env import MultiEnv from utils.envs.doom.doom_utils import make_doom_env, DOOM_W, DOOM_H, doom_env_by_name TEST_ENV_NAME = 'doom_maze' TEST_ENV = doom_env_by_name(TEST_ENV_NAME).env_id TEST_LOWDIM_ENV = 'CartPole-v0' class TestWrappers(TestCase): def test_normalize(self): def make_env_func(): return gym.make(TEST_LOWDIM_ENV) env = make_env_func() self.assertEqual(len(env.observation_space.shape), 1) def check_range(test, o): for i in range(len(o)): test.assertLessEqual(o[i], env.observation_space.high[i])
def make_env(): return make_doom_env(doom_env_by_name('doom_maze'))
def make_env(): return make_doom_env(doom_env_by_name('doom_textured_very_sparse'))