def test_monitor_stats_recorder_video_recorder_prefix(self): prefix = 'eval' env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join( tempdir, 'test_monitor_stats_recorder_video_recorder_prefix') root_dir = os.path.join(root_dir, 'monitor/') env = ub_envs.Monitor(env, root_dir=root_dir, prefix=prefix, video=True, video_kwargs=None) self.assertEqual(2, len(env.tools)) self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder)) self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder)) env.reset() for i in range(25): obs, rew, done, info = env.step(env.action_space.sample()) if done: env.reset() csv_path = os.path.join( root_dir, prefix + '.' + ub_envs.StatsRecorder.monitor_ext) self.assertTrue(os.path.isfile(csv_path)) video_path = os.path.join(root_dir, 'videos') for filename in os.listdir(video_path): self.assertTrue(filename.startswith(prefix)) env.close() self.assertTrue(env.tools[0].closed) self.assertFalse(env.tools[1]._enabled) self.assertTrue(env.tools[1]._recorder.closed)
def test_monitor_w_video_recorder_cubic(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join(tempdir, 'test_monitor_w_video_recorder_cubic') root_dir = os.path.join(root_dir, 'monitor/') # always record env = ub_envs.Monitor(env, root_dir=root_dir, video=True, video_kwargs=None) self.assertEqual(2, len(env.tools)) self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder)) self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder)) self.assertTrue(env.tools[1].stats is not None) for ep in range(10): env.reset() self.assertEqual(ep + 1 in [1, 8], env.tools[1].need_record) for step in range(20): obs, rew, done, info = env.step(env.action_space.sample()) if done: break env.close() video_path = os.path.join(root_dir, 'videos') records = sorted(os.listdir(video_path)) self.assertEqual(2 * 2, len(records)) # json + mp4 env.close() self.assertTrue(env.tools[0].closed) self.assertFalse(env.tools[1]._enabled) self.assertTrue(env.tools[1]._recorder.closed)
def test_video_recorder_init_exception(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join(tempdir, 'test_video_recorder_init_exception') root_dir = os.path.join(root_dir, 'monitor/') with self.assertRaises(RuntimeError): # interval not callable env = ub_envs.Monitor(env, root_dir=root_dir, video=True, video_kwargs=dict(interval='hello')) env.close()
def test_dqn_call_predict(self): envs = [FakeImageEnv() for _ in range(3)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env) batch_size = 3 obs_space = env.observation_space act_space = env.action_space act_dims = act_space.n obs = np.asarray([obs_space.sample() for _ in range(batch_size)]) # test call act, val = model(obs, proc_obs=True) self.assertArrayEqual((batch_size, ), act.shape) self.assertArrayEqual((batch_size, ), val.shape) # test predict act = model.predict(obs_space.sample()) self.assertArrayEqual([], act.shape) # test dueling model = dqn_model.DQN(env, dueling=True) act, val = model(obs, proc_obs=True) # test call self.assertArrayEqual((batch_size, ), act.shape) self.assertArrayEqual((batch_size, ), val.shape) # test predict act = model.predict(obs_space.sample()) self.assertArrayEqual([], act.shape)
def test_dqn_run(self): n_envs = 3 warmup_steps = 50 buffer_size = 90 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, buffer_size=buffer_size, warmup_steps=warmup_steps) obs_shape = env.observation_space.shape act_shape = env.action_space.shape n_samples = 100 n_slots = buffer_size // n_envs model.run(n_samples) buf = model.buffer self.assertEqual(n_slots * n_envs, len(buf)) self.assertTrue(buf.ready_for_sample) self.assertTrue(buf.isfull) # test buffer contents self.assertArrayEqual((n_slots, n_envs, *obs_shape), buf.data['obs'].shape) self.assertArrayEqual((n_slots, n_envs, *act_shape), buf.data['act'].shape) self.assertArrayEqual((n_slots, n_envs), buf.data['rew'].shape) self.assertArrayEqual((n_slots, n_envs), buf.data['done'].shape)
def test_dqn_reg_loss(self): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env) loss = model.reg_loss(model.agent.trainable_variables) self.assertArrayEqual([], loss.shape) self.assertFalse(np.all(np.isnan(loss)))
def test_monitor_disabled_video_recorder(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join(tempdir, 'test_monitor_disabled_video_recorder') root_dir = os.path.join(root_dir, 'monitor/') # always record env = ub_envs.Monitor(env, root_dir=root_dir, video=True, video_kwargs=dict(interval=1)) self.assertEqual(2, len(env.tools)) self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder)) self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder)) self.assertTrue(env.tools[1].stats is not None) # _VideoRecorder will not be created env.tools[1].close() for ep in range(10): env.reset() self.assertTrue(env.tools[1].need_record) for step in range(20): obs, rew, done, info = env.step(env.action_space.sample()) if done: break env.close() self.assertTrue(env.tools[0].closed) self.assertFalse(env.tools[1]._enabled)
def test_dqn_save_load(self): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) env.seed(1) ub_utils.set_seed(1) model = dqn_model.DQN(env, warmup_steps=5) n_samples = 10 batch_size = 10 model.run(n_samples) # train for some steps ub_utils.set_seed(2) batch = model.sampler(batch_size) batch['next_obs'] = model.sampler.rel[1]['obs'] model._train_model(batch) with tempfile.TemporaryDirectory() as tempdir: save_path = tempdir # save & load model model.save(save_path) loaded_model = dqn_model.DQN.load(save_path) # check model setup self.assertTrue(loaded_model.agent is not None) self.assertTrue(loaded_model.buffer is not None) self.assertTrue(loaded_model.optimizer is not None) # check if config is correctly restored model_config = model.get_config() loaded_config = loaded_model.get_config() self.assertEqual(set(model_config.keys()), set(loaded_config.keys())) for key in model_config: self.assertEqual(model_config[key], loaded_config[key], key) # check if all network variables are correctly restored self.assertVariables(model.trainable_variables, loaded_model.trainable_variables) # test optimizers # load optimizer params batches = [] for i in range(3): batch = model.sampler(batch_size) batch['next_obs'] = model.sampler.rel[1]['obs'] batches.append(batch) ub_utils.set_seed(1) for batch in batches: losses1, td1 = model._train_model(batch) for batch in batches: losses2, td2 = loaded_model._train_model(batch) # check if losses are matches self.assertEqual(set(losses1.keys()), set(losses2.keys())) for key in losses1.keys(): self.assertEqual(losses1[key], losses2[key]) self.assertAllClose(td1, td2) # check if vars are same self.assertVariables(model.trainable_variables, loaded_model.trainable_variables) # check if params of the optimizers are same self.assertVariables(model.optimizer.variables(), loaded_model.optimizer.variables())
def test_ppo_reset_spaces_conflict(self): n_envs = 4 envs = [FakeContinuousEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = ppo_model.PPO(env) envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) with self.assertRaises(RuntimeError): # space conflict model.set_env(env)
def test_dqn_delayed_setup(self): model = dqn_model.DQN(None) self.assertTrue(model.observation_space is None) self.assertTrue(model.action_space is None) self.assertTrue(model.agent is None) envs = [FakeImageEnv() for _ in range(3)] env = ub_vec.VecEnv(envs) model.set_env(env) model.setup() self.assertTrue(model.observation_space is not None) self.assertTrue(model.action_space is not None) self.assertEqual((8 + 2) * 2, len(model.trainable_variables))
def test_video_recorder_invalid_frame(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join(tempdir, 'test_video_recorder_invalid_frame') root_dir = os.path.join(root_dir, 'monitor/') # always record env = ub_envs.Monitor(env, root_dir=root_dir, video=True, video_kwargs=dict(interval=1)) self.assertEqual(2, len(env.tools)) self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder)) self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder)) self.assertTrue(env.tools[1].stats is not None) env.reset() # start video encoder env.step(env.action_space.sample()) # test invalid frame print('\n>>> Test invalid frame type np.float32') env.tools[1]._recorder._encode_image_frame( np.random.normal(size=(64, 64, 3)).astype(np.float32)) print('<<<\n') self.assertTrue(env.tools[1]._recorder.broken) with self.assertRaises(RuntimeError): # Not a np.ndarray env.tools[1]._recorder.encoder.capture_frame('foobar') with self.assertRaises(RuntimeError): env.tools[1]._recorder.encoder.capture_frame( np.zeros((64, 32, 3), dtype=np.uint8)) # Test statsrecorder, videorecorder # auto closed self.assertFalse(env.tools[0].closed) self.assertTrue(env.tools[1]._enabled) self.assertFalse(env.tools[1]._recorder.closed)
def test_dqn_setup_image_obs(self): envs = [FakeImageEnv() for _ in range(3)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env) self.assertEqual(3, model.n_envs) self.assertTrue(model.observation_space is not None) self.assertTrue(model.action_space is not None) # nature_cnn + value self.assertEqual((8 + 2) * 2, len(model.trainable_variables)) # test dueling model = dqn_model.DQN(env, dueling=True) # nature_cnn + value(dueling) self.assertEqual((8 + 4) * 2, len(model.trainable_variables))
def test_monitor_w_video_recorder(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join(tempdir, 'test_monitor_w_video_recorder') root_dir = os.path.join(root_dir, 'monitor/') # always record env = ub_envs.Monitor(env, root_dir=root_dir, video=True, video_kwargs=dict(interval=1)) self.assertEqual(2, len(env.tools)) self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder)) self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder)) self.assertTrue(env.tools[1].stats is not None) for ep in range(10): env.reset() self.assertTrue(env.tools[1].need_record) for step in range(20): obs, rew, done, info = env.step(env.action_space.sample()) if done: break env.close() video_path = os.path.join(root_dir, 'videos') files = sorted(os.listdir(video_path)) self.assertEqual(10 * 2, len(files)) # json + mp4 for filename in files: self.assertTrue( filename.endswith('metadata.json') or filename.endswith('video.mp4'), filename) env.close() self.assertTrue(env.tools[0].closed) self.assertFalse(env.tools[1]._enabled) self.assertTrue(env.tools[1]._recorder.closed) json_files = [f for f in files if f.endswith('.json')] json_files[0] with open(os.path.join(video_path, json_files[0]), 'r') as f: meta = json.loads(f.read()) self.assertTrue('episode_info' in meta) self.assertTrue('video_info' in meta) self.assertTrue('encoder_version' in meta) self.assertTrue('content_type' in meta)
def test_dqn_train(self, huber): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, huber=huber) n_samples = 10 batch_size = 10 n_gradsteps = 4 n_subepochs = 1 target_update = 2 exp_gradsteps = n_subepochs * n_gradsteps model.run(n_samples) model.train(batch_size, n_subepochs, n_gradsteps, target_update) self.assertEqual(exp_gradsteps, model.num_gradsteps) self.assertEqual(n_subepochs, model.num_subepochs)
def test_dqn_prioritized(self, huber): n_envs = 3 ub_utils.set_seed(1) envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, prioritized=True, warmup_steps=5) self.assertTrue(isinstance(model.prio_beta, ub_sche.Scheduler)) self.assertTrue(isinstance(model.sampler, ub_data.PriorSampler)) n_samples = 10 batch_size = 10 model.run(n_samples) res = model.sampler._weight_tree[:n_samples * n_envs] exp = np.ones_like(res, dtype=np.float32) self.assertArrayEqual(exp, res) model._train_step(batch_size) res = model.sampler._weight_tree[:n_samples * n_envs] self.assertArrayNotEqual(exp, res) self.assertTrue(np.all(res >= 0.0))
def test_ppo_setup_image_obs(self): envs = [FakeImageEnv() for _ in range(3)] env = ub_vec.VecEnv(envs) model = ppo_model.PPO(env) self.assertEqual(3, model.n_envs) self.assertTrue(model.observation_space is not None) self.assertTrue(model.action_space is not None) # nature_cnn + nature_cnn + policy + value self.assertEqual(8+8+2+2, len(model.trainable_variables)) # test share net model = ppo_model.PPO(env, share_net=True) # nature_cnn + policy + value self.assertEqual(8+2+2, len(model.trainable_variables)) # test force mlp model = ppo_model.PPO(env, share_net=False, force_mlp=True, mlp_units=[64, 64, 64]) # mlp(3) + mlp(3) + policy + value self.assertEqual(6+6+2+2, len(model.trainable_variables))
def test_dqn_train_model(self, huber): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, huber=huber) n_samples = 10 batch_size = 2 model.run(n_samples) samp = model.sampler batch = samp.sample(batch_size) batch['next_obs'] = samp.rel[1]['obs'] losses, td = model._train_model(batch) for key, loss in losses.items(): self.assertArrayEqual([], loss.shape) self.assertFalse(np.all(np.isnan(loss))) self.assertArrayEqual((batch_size, ), td.shape) self.assertFalse(np.all(np.isnan(td))) self.assertTrue(np.all(td >= 0))
def test_monitor_wo_video_recorder_exception(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join( tempdir, 'test_monitor_wo_video_recorder_exception') root_dir = os.path.join(root_dir, 'monitor/') env = ub_envs.Monitor(env, root_dir=root_dir, video=False) with self.assertRaises(RuntimeError): # test exception: need reset for i in range(20): env.step(env.action_space.sample()) env.close() self.assertTrue(env.tools[0].closed)
def test_dqn_td_error(self, huber): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, huber=huber) obs = env.observation_space.sample() obs = obs.reshape(1, *obs.shape) next_obs = env.observation_space.sample() next_obs = next_obs.reshape(1, *next_obs.shape) act = env.action_space.sample() act = np.asarray([act], dtype=np.int64) done = np.asarray([False], dtype=np.bool_) rew = np.asarray([1.0], dtype=np.float32) # test td error td = model.td_error(obs, act, done, rew, next_obs) self.assertArrayEqual((1, ), td.shape) self.assertFalse(np.all(np.isnan(td))) # test td loss loss = model.td_loss(td) self.assertArrayEqual([], loss.shape) self.assertFalse(np.all(np.isnan(loss)))
def test_ppo_gae(self): n_envs = 2 gamma = 0.99 lam = 0.95 envs = [FakeImageEnv(max_steps=10) for _ in range(n_envs)] env = ub_vec.VecEnv(envs) env.seed(1) ub_utils.set_seed(1) n_samples = 20 model = ppo_model.PPO(env, gamma=gamma, gae_lambda=lam) model.collect(n_samples) exp_gae = legacy_gae( rew = model.buffer.data['rew'], val = model.buffer.data['val'], done = model.buffer.data['done'], gamma = gamma, lam = lam ) env.seed(1) model.run(n_samples) gae = model.buffer.data['adv'] self.assertAllClose(exp_gae, gae)
def test_dqn_sample_nstep_batch(self): ub_utils.set_seed(1) n_envs = 3 gamma = 0.99 multi_step = 2 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, multi_step=multi_step, gamma=gamma) n_samples = 10 batch_size = 1 model.run(n_samples) samp = model.sampler batch = model._sample_nstep_batch(batch_size) orig_batch = samp.rel[0] self.assertArrayEqual(orig_batch['obs'], batch['obs']) self.assertArrayEqual(orig_batch['act'], batch['act']) self.assertArrayNotEqual(orig_batch['rew'], batch['rew']) next_batch = samp.rel[1] # depends on random seed self.assertAllClose(orig_batch['rew'] + gamma * next_batch['rew'], batch['rew'], atol=1e-6) nnext_batch = samp.rel[2] self.assertArrayEqual(nnext_batch['obs'], batch['next_obs'])
def test_monitor_wo_video_recorder(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join(tempdir, 'test_monitor_wo_video_recorder') root_dir = os.path.join(root_dir, 'monitor/') env = ub_envs.Monitor(env, root_dir=root_dir, video=False) self.assertEqual(1, len(env.tools)) self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder)) self.assertTrue(env.stats is not None) # assert csv created csv_path = os.path.join(root_dir, ub_envs.StatsRecorder.monitor_ext) self.assertTrue(os.path.isfile(csv_path)) # test add tools env.add_tools([NoopMonitorTool(), NoopMonitorTool()]) self.assertEqual(3, len(env.tools)) env.reset() for i in range(25): obs, rew, done, info = env.step(env.action_space.sample()) if done: env.reset() env.reset() # early reset with open(csv_path, 'r') as f: lines = f.readlines() self.assertEqual(5, len(lines)) self.assertTrue('"env_id": "FakeEnv"' in lines[0]) self.assertTrue('rewards,length,walltime' in lines[1]) self.assertTrue('55,10' in lines[2]) self.assertTrue('55,10' in lines[3]) self.assertTrue('15,5' in lines[4]) # early reset episode env.close() self.assertTrue(env.tools[0].closed)
def test_monitor_non_empty_folder(self): env = FakeImageEnv(max_steps=10) with tempfile.TemporaryDirectory() as tempdir: root_dir = os.path.join(tempdir, 'test_monitor_non_empty_folder') root_dir = os.path.join(root_dir, 'monitor/') # always record env = ub_envs.Monitor(env, root_dir=root_dir, video=True, video_kwargs=dict(interval=1)) self.assertEqual(2, len(env.tools)) self.assertTrue(isinstance(env.tools[0], ub_envs.StatsRecorder)) self.assertTrue(isinstance(env.tools[1], ub_envs.VideoRecorder)) self.assertTrue(env.tools[1].stats is not None) for ep in range(10): env.reset() self.assertTrue(env.tools[1].need_record) for step in range(20): obs, rew, done, info = env.step(env.action_space.sample()) if done: break env.close() video_path = os.path.join(root_dir, 'videos') records = sorted(os.listdir(video_path)) self.assertEqual(10 * 2, len(records)) # json + mp4 env.close() self.assertTrue(env.tools[0].closed) self.assertFalse(env.tools[1]._enabled) self.assertTrue(env.tools[1]._recorder.closed) # writing monitor to a non empty file with force=False raises # a RuntimeError env = FakeImageEnv(max_steps=10) with self.assertRaises(RuntimeError): ub_envs.Monitor(env, root_dir=root_dir, video=True, video_kwargs=dict(interval=1, force=False)) env.close()