Exemple #1
0
 def test_monitor_stats_recorder_video_recorder_prefix(self):
     prefix = 'eval'
     env = FakeImageEnv(max_steps=10)
     with tempfile.TemporaryDirectory() as tempdir:
         root_dir = os.path.join(
             tempdir, 'test_monitor_stats_recorder_video_recorder_prefix')
         root_dir = os.path.join(root_dir, 'monitor/')
         env = ub_envs.Monitor(env,
                               root_dir=root_dir,
                               prefix=prefix,
                               video=True,
                               video_kwargs=None)
         self.assertEqual(2, len(env.tools))
         self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder))
         self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder))
         env.reset()
         for i in range(25):
             obs, rew, done, info = env.step(env.action_space.sample())
             if done:
                 env.reset()
         csv_path = os.path.join(
             root_dir, prefix + '.' + ub_envs.StatsRecorder.monitor_ext)
         self.assertTrue(os.path.isfile(csv_path))
         video_path = os.path.join(root_dir, 'videos')
         for filename in os.listdir(video_path):
             self.assertTrue(filename.startswith(prefix))
         env.close()
         self.assertTrue(env.tools[0].closed)
         self.assertFalse(env.tools[1]._enabled)
         self.assertTrue(env.tools[1]._recorder.closed)
Exemple #2
0
    def test_monitor_w_video_recorder_cubic(self):
        env = FakeImageEnv(max_steps=10)
        with tempfile.TemporaryDirectory() as tempdir:
            root_dir = os.path.join(tempdir,
                                    'test_monitor_w_video_recorder_cubic')
            root_dir = os.path.join(root_dir, 'monitor/')
            # always record
            env = ub_envs.Monitor(env,
                                  root_dir=root_dir,
                                  video=True,
                                  video_kwargs=None)
            self.assertEqual(2, len(env.tools))
            self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder))
            self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder))
            self.assertTrue(env.tools[1].stats is not None)

            for ep in range(10):
                env.reset()
                self.assertEqual(ep + 1 in [1, 8], env.tools[1].need_record)
                for step in range(20):
                    obs, rew, done, info = env.step(env.action_space.sample())
                    if done:
                        break
            env.close()
            video_path = os.path.join(root_dir, 'videos')
            records = sorted(os.listdir(video_path))
            self.assertEqual(2 * 2, len(records))  # json + mp4
            env.close()
            self.assertTrue(env.tools[0].closed)
            self.assertFalse(env.tools[1]._enabled)
            self.assertTrue(env.tools[1]._recorder.closed)
Exemple #3
0
 def test_video_recorder_init_exception(self):
     env = FakeImageEnv(max_steps=10)
     with tempfile.TemporaryDirectory() as tempdir:
         root_dir = os.path.join(tempdir,
                                 'test_video_recorder_init_exception')
         root_dir = os.path.join(root_dir, 'monitor/')
         with self.assertRaises(RuntimeError):
             # interval not callable
             env = ub_envs.Monitor(env,
                                   root_dir=root_dir,
                                   video=True,
                                   video_kwargs=dict(interval='hello'))
         env.close()
 def test_dqn_call_predict(self):
     envs = [FakeImageEnv() for _ in range(3)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env)
     batch_size = 3
     obs_space = env.observation_space
     act_space = env.action_space
     act_dims = act_space.n
     obs = np.asarray([obs_space.sample() for _ in range(batch_size)])
     # test call
     act, val = model(obs, proc_obs=True)
     self.assertArrayEqual((batch_size, ), act.shape)
     self.assertArrayEqual((batch_size, ), val.shape)
     # test predict
     act = model.predict(obs_space.sample())
     self.assertArrayEqual([], act.shape)
     # test dueling
     model = dqn_model.DQN(env, dueling=True)
     act, val = model(obs, proc_obs=True)
     # test call
     self.assertArrayEqual((batch_size, ), act.shape)
     self.assertArrayEqual((batch_size, ), val.shape)
     # test predict
     act = model.predict(obs_space.sample())
     self.assertArrayEqual([], act.shape)
 def test_dqn_run(self):
     n_envs = 3
     warmup_steps = 50
     buffer_size = 90
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env,
                           buffer_size=buffer_size,
                           warmup_steps=warmup_steps)
     obs_shape = env.observation_space.shape
     act_shape = env.action_space.shape
     n_samples = 100
     n_slots = buffer_size // n_envs
     model.run(n_samples)
     buf = model.buffer
     self.assertEqual(n_slots * n_envs, len(buf))
     self.assertTrue(buf.ready_for_sample)
     self.assertTrue(buf.isfull)
     # test buffer contents
     self.assertArrayEqual((n_slots, n_envs, *obs_shape),
                           buf.data['obs'].shape)
     self.assertArrayEqual((n_slots, n_envs, *act_shape),
                           buf.data['act'].shape)
     self.assertArrayEqual((n_slots, n_envs), buf.data['rew'].shape)
     self.assertArrayEqual((n_slots, n_envs), buf.data['done'].shape)
 def test_dqn_reg_loss(self):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env)
     loss = model.reg_loss(model.agent.trainable_variables)
     self.assertArrayEqual([], loss.shape)
     self.assertFalse(np.all(np.isnan(loss)))
Exemple #7
0
    def test_monitor_disabled_video_recorder(self):
        env = FakeImageEnv(max_steps=10)
        with tempfile.TemporaryDirectory() as tempdir:
            root_dir = os.path.join(tempdir,
                                    'test_monitor_disabled_video_recorder')
            root_dir = os.path.join(root_dir, 'monitor/')
            # always record
            env = ub_envs.Monitor(env,
                                  root_dir=root_dir,
                                  video=True,
                                  video_kwargs=dict(interval=1))
            self.assertEqual(2, len(env.tools))
            self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder))
            self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder))
            self.assertTrue(env.tools[1].stats is not None)
            # _VideoRecorder will not be created
            env.tools[1].close()

            for ep in range(10):
                env.reset()
                self.assertTrue(env.tools[1].need_record)
                for step in range(20):
                    obs, rew, done, info = env.step(env.action_space.sample())
                    if done:
                        break
            env.close()
            self.assertTrue(env.tools[0].closed)
            self.assertFalse(env.tools[1]._enabled)
 def test_dqn_save_load(self):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     env.seed(1)
     ub_utils.set_seed(1)
     model = dqn_model.DQN(env, warmup_steps=5)
     n_samples = 10
     batch_size = 10
     model.run(n_samples)
     # train for some steps
     ub_utils.set_seed(2)
     batch = model.sampler(batch_size)
     batch['next_obs'] = model.sampler.rel[1]['obs']
     model._train_model(batch)
     with tempfile.TemporaryDirectory() as tempdir:
         save_path = tempdir
         # save & load model
         model.save(save_path)
         loaded_model = dqn_model.DQN.load(save_path)
     # check model setup
     self.assertTrue(loaded_model.agent is not None)
     self.assertTrue(loaded_model.buffer is not None)
     self.assertTrue(loaded_model.optimizer is not None)
     # check if config is correctly restored
     model_config = model.get_config()
     loaded_config = loaded_model.get_config()
     self.assertEqual(set(model_config.keys()), set(loaded_config.keys()))
     for key in model_config:
         self.assertEqual(model_config[key], loaded_config[key], key)
     # check if all network variables are correctly restored
     self.assertVariables(model.trainable_variables,
                          loaded_model.trainable_variables)
     # test optimizers
     # load optimizer params
     batches = []
     for i in range(3):
         batch = model.sampler(batch_size)
         batch['next_obs'] = model.sampler.rel[1]['obs']
         batches.append(batch)
     ub_utils.set_seed(1)
     for batch in batches:
         losses1, td1 = model._train_model(batch)
     for batch in batches:
         losses2, td2 = loaded_model._train_model(batch)
     # check if losses are matches
     self.assertEqual(set(losses1.keys()), set(losses2.keys()))
     for key in losses1.keys():
         self.assertEqual(losses1[key], losses2[key])
     self.assertAllClose(td1, td2)
     # check if vars are same
     self.assertVariables(model.trainable_variables,
                          loaded_model.trainable_variables)
     # check if params of the optimizers are same
     self.assertVariables(model.optimizer.variables(),
                          loaded_model.optimizer.variables())
 def test_ppo_reset_spaces_conflict(self):
     n_envs = 4
     envs = [FakeContinuousEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = ppo_model.PPO(env)
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     with self.assertRaises(RuntimeError):
         # space conflict
         model.set_env(env)
 def test_dqn_delayed_setup(self):
     model = dqn_model.DQN(None)
     self.assertTrue(model.observation_space is None)
     self.assertTrue(model.action_space is None)
     self.assertTrue(model.agent is None)
     envs = [FakeImageEnv() for _ in range(3)]
     env = ub_vec.VecEnv(envs)
     model.set_env(env)
     model.setup()
     self.assertTrue(model.observation_space is not None)
     self.assertTrue(model.action_space is not None)
     self.assertEqual((8 + 2) * 2, len(model.trainable_variables))
Exemple #11
0
 def test_video_recorder_invalid_frame(self):
     env = FakeImageEnv(max_steps=10)
     with tempfile.TemporaryDirectory() as tempdir:
         root_dir = os.path.join(tempdir,
                                 'test_video_recorder_invalid_frame')
         root_dir = os.path.join(root_dir, 'monitor/')
         # always record
         env = ub_envs.Monitor(env,
                               root_dir=root_dir,
                               video=True,
                               video_kwargs=dict(interval=1))
         self.assertEqual(2, len(env.tools))
         self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder))
         self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder))
         self.assertTrue(env.tools[1].stats is not None)
         env.reset()
         # start video encoder
         env.step(env.action_space.sample())
         # test invalid frame
         print('\n>>> Test invalid frame type np.float32')
         env.tools[1]._recorder._encode_image_frame(
             np.random.normal(size=(64, 64, 3)).astype(np.float32))
         print('<<<\n')
         self.assertTrue(env.tools[1]._recorder.broken)
         with self.assertRaises(RuntimeError):
             # Not a np.ndarray
             env.tools[1]._recorder.encoder.capture_frame('foobar')
         with self.assertRaises(RuntimeError):
             env.tools[1]._recorder.encoder.capture_frame(
                 np.zeros((64, 32, 3), dtype=np.uint8))
         # Test statsrecorder, videorecorder
         # auto closed
         self.assertFalse(env.tools[0].closed)
         self.assertTrue(env.tools[1]._enabled)
         self.assertFalse(env.tools[1]._recorder.closed)
 def test_dqn_setup_image_obs(self):
     envs = [FakeImageEnv() for _ in range(3)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env)
     self.assertEqual(3, model.n_envs)
     self.assertTrue(model.observation_space is not None)
     self.assertTrue(model.action_space is not None)
     # nature_cnn + value
     self.assertEqual((8 + 2) * 2, len(model.trainable_variables))
     # test dueling
     model = dqn_model.DQN(env, dueling=True)
     # nature_cnn + value(dueling)
     self.assertEqual((8 + 4) * 2, len(model.trainable_variables))
Exemple #13
0
    def test_monitor_w_video_recorder(self):
        env = FakeImageEnv(max_steps=10)
        with tempfile.TemporaryDirectory() as tempdir:
            root_dir = os.path.join(tempdir, 'test_monitor_w_video_recorder')
            root_dir = os.path.join(root_dir, 'monitor/')
            # always record
            env = ub_envs.Monitor(env,
                                  root_dir=root_dir,
                                  video=True,
                                  video_kwargs=dict(interval=1))
            self.assertEqual(2, len(env.tools))
            self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder))
            self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder))
            self.assertTrue(env.tools[1].stats is not None)

            for ep in range(10):
                env.reset()
                self.assertTrue(env.tools[1].need_record)
                for step in range(20):
                    obs, rew, done, info = env.step(env.action_space.sample())
                    if done:
                        break
            env.close()
            video_path = os.path.join(root_dir, 'videos')
            files = sorted(os.listdir(video_path))
            self.assertEqual(10 * 2, len(files))  # json + mp4
            for filename in files:
                self.assertTrue(
                    filename.endswith('metadata.json')
                    or filename.endswith('video.mp4'), filename)
            env.close()
            self.assertTrue(env.tools[0].closed)
            self.assertFalse(env.tools[1]._enabled)
            self.assertTrue(env.tools[1]._recorder.closed)
            json_files = [f for f in files if f.endswith('.json')]
            json_files[0]
            with open(os.path.join(video_path, json_files[0]), 'r') as f:
                meta = json.loads(f.read())
            self.assertTrue('episode_info' in meta)
            self.assertTrue('video_info' in meta)
            self.assertTrue('encoder_version' in meta)
            self.assertTrue('content_type' in meta)
 def test_dqn_train(self, huber):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, huber=huber)
     n_samples = 10
     batch_size = 10
     n_gradsteps = 4
     n_subepochs = 1
     target_update = 2
     exp_gradsteps = n_subepochs * n_gradsteps
     model.run(n_samples)
     model.train(batch_size, n_subepochs, n_gradsteps, target_update)
     self.assertEqual(exp_gradsteps, model.num_gradsteps)
     self.assertEqual(n_subepochs, model.num_subepochs)
 def test_dqn_prioritized(self, huber):
     n_envs = 3
     ub_utils.set_seed(1)
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, prioritized=True, warmup_steps=5)
     self.assertTrue(isinstance(model.prio_beta, ub_sche.Scheduler))
     self.assertTrue(isinstance(model.sampler, ub_data.PriorSampler))
     n_samples = 10
     batch_size = 10
     model.run(n_samples)
     res = model.sampler._weight_tree[:n_samples * n_envs]
     exp = np.ones_like(res, dtype=np.float32)
     self.assertArrayEqual(exp, res)
     model._train_step(batch_size)
     res = model.sampler._weight_tree[:n_samples * n_envs]
     self.assertArrayNotEqual(exp, res)
     self.assertTrue(np.all(res >= 0.0))
 def test_ppo_setup_image_obs(self):
     envs = [FakeImageEnv() for _ in range(3)]
     env = ub_vec.VecEnv(envs)
     model = ppo_model.PPO(env)
     self.assertEqual(3, model.n_envs)
     self.assertTrue(model.observation_space is not None)
     self.assertTrue(model.action_space is not None)
     # nature_cnn + nature_cnn + policy + value
     self.assertEqual(8+8+2+2, len(model.trainable_variables))
     # test share net
     model = ppo_model.PPO(env, share_net=True)
     # nature_cnn + policy + value
     self.assertEqual(8+2+2, len(model.trainable_variables))
     # test force mlp
     model = ppo_model.PPO(env, share_net=False,
                         force_mlp=True, mlp_units=[64, 64, 64])
     # mlp(3) + mlp(3) + policy + value
     self.assertEqual(6+6+2+2, len(model.trainable_variables))
 def test_dqn_train_model(self, huber):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, huber=huber)
     n_samples = 10
     batch_size = 2
     model.run(n_samples)
     samp = model.sampler
     batch = samp.sample(batch_size)
     batch['next_obs'] = samp.rel[1]['obs']
     losses, td = model._train_model(batch)
     for key, loss in losses.items():
         self.assertArrayEqual([], loss.shape)
         self.assertFalse(np.all(np.isnan(loss)))
     self.assertArrayEqual((batch_size, ), td.shape)
     self.assertFalse(np.all(np.isnan(td)))
     self.assertTrue(np.all(td >= 0))
Exemple #18
0
    def test_monitor_wo_video_recorder_exception(self):
        env = FakeImageEnv(max_steps=10)
        with tempfile.TemporaryDirectory() as tempdir:
            root_dir = os.path.join(
                tempdir, 'test_monitor_wo_video_recorder_exception')
            root_dir = os.path.join(root_dir, 'monitor/')

            env = ub_envs.Monitor(env, root_dir=root_dir, video=False)
            with self.assertRaises(RuntimeError):
                # test exception: need reset
                for i in range(20):
                    env.step(env.action_space.sample())
            env.close()
            self.assertTrue(env.tools[0].closed)
 def test_dqn_td_error(self, huber):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, huber=huber)
     obs = env.observation_space.sample()
     obs = obs.reshape(1, *obs.shape)
     next_obs = env.observation_space.sample()
     next_obs = next_obs.reshape(1, *next_obs.shape)
     act = env.action_space.sample()
     act = np.asarray([act], dtype=np.int64)
     done = np.asarray([False], dtype=np.bool_)
     rew = np.asarray([1.0], dtype=np.float32)
     # test td error
     td = model.td_error(obs, act, done, rew, next_obs)
     self.assertArrayEqual((1, ), td.shape)
     self.assertFalse(np.all(np.isnan(td)))
     # test td loss
     loss = model.td_loss(td)
     self.assertArrayEqual([], loss.shape)
     self.assertFalse(np.all(np.isnan(loss)))
 def test_ppo_gae(self):
     n_envs = 2
     gamma = 0.99
     lam = 0.95
     envs = [FakeImageEnv(max_steps=10) for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     env.seed(1)
     ub_utils.set_seed(1)
     n_samples = 20
     model = ppo_model.PPO(env, gamma=gamma, gae_lambda=lam)
     model.collect(n_samples)
     exp_gae = legacy_gae(
         rew   = model.buffer.data['rew'], 
         val   = model.buffer.data['val'], 
         done  = model.buffer.data['done'], 
         gamma = gamma, 
         lam   = lam
     )
     env.seed(1)
     model.run(n_samples)
     gae = model.buffer.data['adv']
     self.assertAllClose(exp_gae, gae)
 def test_dqn_sample_nstep_batch(self):
     ub_utils.set_seed(1)
     n_envs = 3
     gamma = 0.99
     multi_step = 2
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, multi_step=multi_step, gamma=gamma)
     n_samples = 10
     batch_size = 1
     model.run(n_samples)
     samp = model.sampler
     batch = model._sample_nstep_batch(batch_size)
     orig_batch = samp.rel[0]
     self.assertArrayEqual(orig_batch['obs'], batch['obs'])
     self.assertArrayEqual(orig_batch['act'], batch['act'])
     self.assertArrayNotEqual(orig_batch['rew'], batch['rew'])
     next_batch = samp.rel[1]
     # depends on random seed
     self.assertAllClose(orig_batch['rew'] + gamma * next_batch['rew'],
                         batch['rew'],
                         atol=1e-6)
     nnext_batch = samp.rel[2]
     self.assertArrayEqual(nnext_batch['obs'], batch['next_obs'])
Exemple #22
0
    def test_monitor_wo_video_recorder(self):
        env = FakeImageEnv(max_steps=10)
        with tempfile.TemporaryDirectory() as tempdir:
            root_dir = os.path.join(tempdir, 'test_monitor_wo_video_recorder')
            root_dir = os.path.join(root_dir, 'monitor/')

            env = ub_envs.Monitor(env, root_dir=root_dir, video=False)
            self.assertEqual(1, len(env.tools))
            self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder))
            self.assertTrue(env.stats is not None)
            # assert csv created
            csv_path = os.path.join(root_dir,
                                    ub_envs.StatsRecorder.monitor_ext)
            self.assertTrue(os.path.isfile(csv_path))
            # test add tools
            env.add_tools([NoopMonitorTool(), NoopMonitorTool()])
            self.assertEqual(3, len(env.tools))
            env.reset()
            for i in range(25):
                obs, rew, done, info = env.step(env.action_space.sample())
                if done:
                    env.reset()
            env.reset()  # early reset
            with open(csv_path, 'r') as f:
                lines = f.readlines()
            self.assertEqual(5, len(lines))
            self.assertTrue('"env_id": "FakeEnv"' in lines[0])
            self.assertTrue('rewards,length,walltime' in lines[1])
            self.assertTrue('55,10' in lines[2])
            self.assertTrue('55,10' in lines[3])
            self.assertTrue('15,5' in lines[4])  # early reset episode
            env.close()
            self.assertTrue(env.tools[0].closed)
Exemple #23
0
    def test_monitor_non_empty_folder(self):
        env = FakeImageEnv(max_steps=10)
        with tempfile.TemporaryDirectory() as tempdir:
            root_dir = os.path.join(tempdir, 'test_monitor_non_empty_folder')
            root_dir = os.path.join(root_dir, 'monitor/')
            # always record
            env = ub_envs.Monitor(env,
                                  root_dir=root_dir,
                                  video=True,
                                  video_kwargs=dict(interval=1))
            self.assertEqual(2, len(env.tools))
            self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder))
            self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder))
            self.assertTrue(env.tools[1].stats is not None)

            for ep in range(10):
                env.reset()
                self.assertTrue(env.tools[1].need_record)
                for step in range(20):
                    obs, rew, done, info = env.step(env.action_space.sample())
                    if done:
                        break
            env.close()
            video_path = os.path.join(root_dir, 'videos')
            records = sorted(os.listdir(video_path))
            self.assertEqual(10 * 2, len(records))  # json + mp4
            env.close()
            self.assertTrue(env.tools[0].closed)
            self.assertFalse(env.tools[1]._enabled)
            self.assertTrue(env.tools[1]._recorder.closed)
            # writing monitor to a non empty file with force=False raises
            # a RuntimeError
            env = FakeImageEnv(max_steps=10)
            with self.assertRaises(RuntimeError):
                ub_envs.Monitor(env,
                                root_dir=root_dir,
                                video=True,
                                video_kwargs=dict(interval=1, force=False))
            env.close()