Ejemplo n.º 1
0
 def test_doom_goal_env(self):
     env = make_doom_env(doom_env_by_name('doom_maze_goal'))
     self.assertIsNotNone(env)
     obs = env.reset()
     self.assertIsInstance(obs, dict)
     obs, reward, done, info = env.step(0)
     self.assertIsInstance(obs, dict)
Ejemplo n.º 2
0
    def test_normalize(self):
        env = make_doom_env(doom_env_by_name(TEST_ENV_NAME))
        obs_space = main_observation_space(env)

        env.reset()
        obs = [env.step(0)[0] for _ in range(10)]

        self.assertTrue(np.all(obs_space.low == 0))
        self.assertTrue(np.all(obs_space.high == 255))
        self.assertEqual(obs_space.dtype, np.uint8)

        self.assertFalse(is_normalized(obs_space))

        tf.reset_default_graph()

        ph_obs = placeholder_from_space(obs_space)
        obs_tensor = tf_normalize(ph_obs, obs_space)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            normalized_obs = sess.run(obs_tensor, feed_dict={ph_obs: obs})

            self.assertEqual(normalized_obs.dtype, np.float32)
            self.assertLessEqual(normalized_obs.max(), 1.0)
            self.assertGreaterEqual(normalized_obs.min(), -1.0)

        tf.reset_default_graph()
        gc.collect()
Ejemplo n.º 3
0
    def test_run_loop(self):
        env = TimeLimitWrapper(
            make_doom_env(doom_env_by_name(TEST_ENV_NAME), mode='test'), 50, 0)

        def make_env_func():
            return env

        agent = AgentRandom(make_env_func, {}, close_env=False)
        run_policy_loop(agent, env, 1, 200)
Ejemplo n.º 4
0
    def skipped_test_resnet(self):
        shape = (3 * 3, 160, 120)
        # shape = (3, 84, 84)

        with tf.variable_scope('reach'):
            resnet = ResnetBuilder.build_resnet_18(shape, 2)
            adam = tf.train.AdamOptimizer(learning_rate=1e-4, name='loco_opt')
            resnet.compile(loss='categorical_crossentropy',
                           optimizer=adam,
                           metrics=['accuracy'])

        num_data = 1000
        half_data = num_data // 2
        iterations = 3
        num_epochs_per_iteration = 5
        epoch = 0

        for i in range(iterations):
            log.info('Iteration %d...', i)

            x = np.empty([num_data, 84, 84, 3], dtype=np.float32)
            y = np.empty([num_data, 2], dtype=np.int32)

            for j in range(half_data):
                x[j, :, :, 0] = 1.0
                x[j, :, :, 1] = random.random()
                x[j, :, :, 2] = 0.0
                y[j] = [1, 0]

                x[half_data + j, :, :, 0] = 0.0
                x[half_data + j, :, :, 1] = random.random()
                x[half_data + j, :, :, 2] = 1.0
                y[half_data + j] = [0, 1]

            train_until = epoch + num_epochs_per_iteration
            resnet.fit(x,
                       y,
                       batch_size=64,
                       epochs=train_until,
                       verbose=1,
                       initial_epoch=epoch)
            epoch += num_epochs_per_iteration

        x = np.empty([num_data, 84, 84, 3], dtype=np.float32)
        x[:half_data, :, :, 0] = 1.0
        x[:half_data, :, :, 1] = random.random()
        x[:half_data, :, :, 2] = 0.0
        x[half_data:, :, :, 0] = 0.0
        x[half_data:, :, :, 1] = random.random()
        x[half_data:, :, :, 2] = 1.0

        result = resnet.predict(x, verbose=1, batch_size=1024)
        log.info('result %r', result)

        env = make_doom_env(doom_env_by_name(TEST_ENV_NAME))
        obs = env.reset()
        env.close()
Ejemplo n.º 5
0
def create_env(env, **kwargs):
    """Expected names are: doom_maze, atari_montezuma, etc."""

    if env.startswith('doom_'):
        from utils.envs.doom.doom_utils import make_doom_env, doom_env_by_name
        return make_doom_env(doom_env_by_name(env), **kwargs)
    elif env.startswith('atari_'):
        from utils.envs.atari.atari_utils import make_atari_env, atari_env_by_name
        return make_atari_env(atari_env_by_name(env), **kwargs)
    elif env.startswith('dmlab_'):
        from utils.envs.dmlab.dmlab_utils import make_dmlab_env, dmlab_env_by_name
        return make_dmlab_env(dmlab_env_by_name(env), **kwargs)
    else:
        raise Exception('Unsupported env {0}'.format(env))
Ejemplo n.º 6
0
    def test_locomotion(self):
        g = tf.Graph()

        env = make_doom_env(doom_env_by_name(TEST_ENV_NAME))
        args, params = parse_args_tmax(AgentTMAX.Params, argv=[])

        with g.as_default():
            locomotion_net = LocomotionNetwork(env, params)

        obs = env.reset()

        with tf.Session(graph=g) as sess:
            sess.run(tf.global_variables_initializer())

            action = locomotion_net.navigate(sess, [obs], [obs], [obs])[0]
            self.assertGreaterEqual(action, 0)
            self.assertLess(action, env.action_space.n)

        env.close()

        g.finalize()
Ejemplo n.º 7
0
    def test_performance(self):
        params = AgentPPO.Params('test_performance')
        params.ppo_epochs = 2
        params.rollout = 16
        env = make_doom_env(doom_env_by_name(TEST_ENV_NAME))

        observation_shape = env.observation_space.shape
        experience_size = params.num_envs * params.rollout

        # generate random data
        data = AttrDict()
        data.obs = np.random.normal(size=(experience_size, ) +
                                    observation_shape)
        data.act = np.random.randint(0, 3, size=[experience_size])
        data.old_prob = np.random.uniform(0, 1, size=[experience_size])
        data.adv = np.random.normal(size=[experience_size])
        data.ret = np.random.normal(size=[experience_size])

        self.train_feed_dict(env, data, params, use_gpu=False)
        self.train_feed_dict(env, data, params, use_gpu=True)
        self.train_dataset(env, data, params, use_gpu=False)
        self.train_dataset(env, data, params, use_gpu=True)

        env.close()
Ejemplo n.º 8
0
 def make_env():
     return make_doom_env(doom_env_by_name(TEST_ENV_NAME))
 def make_env_func():
     env = make_doom_env(doom_env_by_name(TEST_ENV_NAME))
     return env
Ejemplo n.º 10
0
 def test_unwrap(self):
     env = make_doom_env(doom_env_by_name(TEST_ENV_NAME))
     unwrapped = unwrap_env(env)
     self.assertIsNot(type(unwrapped), gym.core.Wrapper)
Ejemplo n.º 11
0
import gym
import numpy as np

from unittest import TestCase

from gym import spaces

from algorithms.agent import AgentRandom
from algorithms.utils.algo_utils import EPS
from algorithms.utils.env_wrappers import NormalizeWrapper, StackFramesWrapper, unwrap_env, ResizeWrapper, \
    SkipAndStackFramesWrapper, TimeLimitWrapper, RemainingTimeWrapper
from algorithms.multi_env import MultiEnv
from utils.envs.doom.doom_utils import make_doom_env, DOOM_W, DOOM_H, doom_env_by_name

TEST_ENV_NAME = 'doom_maze'
TEST_ENV = doom_env_by_name(TEST_ENV_NAME).env_id
TEST_LOWDIM_ENV = 'CartPole-v0'


class TestWrappers(TestCase):
    def test_normalize(self):
        def make_env_func():
            return gym.make(TEST_LOWDIM_ENV)

        env = make_env_func()

        self.assertEqual(len(env.observation_space.shape), 1)

        def check_range(test, o):
            for i in range(len(o)):
                test.assertLessEqual(o[i], env.observation_space.high[i])
Ejemplo n.º 12
0
 def make_env():
     return make_doom_env(doom_env_by_name('doom_maze'))
Ejemplo n.º 13
0
 def make_env():
     return make_doom_env(doom_env_by_name('doom_textured_very_sparse'))