def test_dqn_call_predict(self): envs = [FakeImageEnv() for _ in range(3)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env) batch_size = 3 obs_space = env.observation_space act_space = env.action_space act_dims = act_space.n obs = np.asarray([obs_space.sample() for _ in range(batch_size)]) # test call act, val = model(obs, proc_obs=True) self.assertArrayEqual((batch_size, ), act.shape) self.assertArrayEqual((batch_size, ), val.shape) # test predict act = model.predict(obs_space.sample()) self.assertArrayEqual([], act.shape) # test dueling model = dqn_model.DQN(env, dueling=True) act, val = model(obs, proc_obs=True) # test call self.assertArrayEqual((batch_size, ), act.shape) self.assertArrayEqual((batch_size, ), val.shape) # test predict act = model.predict(obs_space.sample()) self.assertArrayEqual([], act.shape)
def test_dqn_setup_image_obs(self): envs = [FakeImageEnv() for _ in range(3)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env) self.assertEqual(3, model.n_envs) self.assertTrue(model.observation_space is not None) self.assertTrue(model.action_space is not None) # nature_cnn + value self.assertEqual((8 + 2) * 2, len(model.trainable_variables)) # test dueling model = dqn_model.DQN(env, dueling=True) # nature_cnn + value(dueling) self.assertEqual((8 + 4) * 2, len(model.trainable_variables))
def test_dqn_run(self): n_envs = 3 warmup_steps = 50 buffer_size = 90 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, buffer_size=buffer_size, warmup_steps=warmup_steps) obs_shape = env.observation_space.shape act_shape = env.action_space.shape n_samples = 100 n_slots = buffer_size // n_envs model.run(n_samples) buf = model.buffer self.assertEqual(n_slots * n_envs, len(buf)) self.assertTrue(buf.ready_for_sample) self.assertTrue(buf.isfull) # test buffer contents self.assertArrayEqual((n_slots, n_envs, *obs_shape), buf.data['obs'].shape) self.assertArrayEqual((n_slots, n_envs, *act_shape), buf.data['act'].shape) self.assertArrayEqual((n_slots, n_envs), buf.data['rew'].shape) self.assertArrayEqual((n_slots, n_envs), buf.data['done'].shape)
def test_dqn_reg_loss(self): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env) loss = model.reg_loss(model.agent.trainable_variables) self.assertArrayEqual([], loss.shape) self.assertFalse(np.all(np.isnan(loss)))
def test_dqn_save_load(self): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) env.seed(1) ub_utils.set_seed(1) model = dqn_model.DQN(env, warmup_steps=5) n_samples = 10 batch_size = 10 model.run(n_samples) # train for some steps ub_utils.set_seed(2) batch = model.sampler(batch_size) batch['next_obs'] = model.sampler.rel[1]['obs'] model._train_model(batch) with tempfile.TemporaryDirectory() as tempdir: save_path = tempdir # save & load model model.save(save_path) loaded_model = dqn_model.DQN.load(save_path) # check model setup self.assertTrue(loaded_model.agent is not None) self.assertTrue(loaded_model.buffer is not None) self.assertTrue(loaded_model.optimizer is not None) # check if config is correctly restored model_config = model.get_config() loaded_config = loaded_model.get_config() self.assertEqual(set(model_config.keys()), set(loaded_config.keys())) for key in model_config: self.assertEqual(model_config[key], loaded_config[key], key) # check if all network variables are correctly restored self.assertVariables(model.trainable_variables, loaded_model.trainable_variables) # test optimizers # load optimizer params batches = [] for i in range(3): batch = model.sampler(batch_size) batch['next_obs'] = model.sampler.rel[1]['obs'] batches.append(batch) ub_utils.set_seed(1) for batch in batches: losses1, td1 = model._train_model(batch) for batch in batches: losses2, td2 = loaded_model._train_model(batch) # check if losses are matches self.assertEqual(set(losses1.keys()), set(losses2.keys())) for key in losses1.keys(): self.assertEqual(losses1[key], losses2[key]) self.assertAllClose(td1, td2) # check if vars are same self.assertVariables(model.trainable_variables, loaded_model.trainable_variables) # check if params of the optimizers are same self.assertVariables(model.optimizer.variables(), loaded_model.optimizer.variables())
def test_dqn_delayed_setup(self): model = dqn_model.DQN(None) self.assertTrue(model.observation_space is None) self.assertTrue(model.action_space is None) self.assertTrue(model.agent is None) envs = [FakeImageEnv() for _ in range(3)] env = ub_vec.VecEnv(envs) model.set_env(env) model.setup() self.assertTrue(model.observation_space is not None) self.assertTrue(model.action_space is not None) self.assertEqual((8 + 2) * 2, len(model.trainable_variables))
def test_dqn_train(self, huber): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, huber=huber) n_samples = 10 batch_size = 10 n_gradsteps = 4 n_subepochs = 1 target_update = 2 exp_gradsteps = n_subepochs * n_gradsteps model.run(n_samples) model.train(batch_size, n_subepochs, n_gradsteps, target_update) self.assertEqual(exp_gradsteps, model.num_gradsteps) self.assertEqual(n_subepochs, model.num_subepochs)
def test_dqn_prioritized(self, huber): n_envs = 3 ub_utils.set_seed(1) envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, prioritized=True, warmup_steps=5) self.assertTrue(isinstance(model.prio_beta, ub_sche.Scheduler)) self.assertTrue(isinstance(model.sampler, ub_data.PriorSampler)) n_samples = 10 batch_size = 10 model.run(n_samples) res = model.sampler._weight_tree[:n_samples * n_envs] exp = np.ones_like(res, dtype=np.float32) self.assertArrayEqual(exp, res) model._train_step(batch_size) res = model.sampler._weight_tree[:n_samples * n_envs] self.assertArrayNotEqual(exp, res) self.assertTrue(np.all(res >= 0.0))
def test_dqn_train_model(self, huber): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, huber=huber) n_samples = 10 batch_size = 2 model.run(n_samples) samp = model.sampler batch = samp.sample(batch_size) batch['next_obs'] = samp.rel[1]['obs'] losses, td = model._train_model(batch) for key, loss in losses.items(): self.assertArrayEqual([], loss.shape) self.assertFalse(np.all(np.isnan(loss))) self.assertArrayEqual((batch_size, ), td.shape) self.assertFalse(np.all(np.isnan(td))) self.assertTrue(np.all(td >= 0))
def test_dqn_td_error(self, huber): n_envs = 3 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, huber=huber) obs = env.observation_space.sample() obs = obs.reshape(1, *obs.shape) next_obs = env.observation_space.sample() next_obs = next_obs.reshape(1, *next_obs.shape) act = env.action_space.sample() act = np.asarray([act], dtype=np.int64) done = np.asarray([False], dtype=np.bool_) rew = np.asarray([1.0], dtype=np.float32) # test td error td = model.td_error(obs, act, done, rew, next_obs) self.assertArrayEqual((1, ), td.shape) self.assertFalse(np.all(np.isnan(td))) # test td loss loss = model.td_loss(td) self.assertArrayEqual([], loss.shape) self.assertFalse(np.all(np.isnan(loss)))
def test_dqn_sample_nstep_batch(self): ub_utils.set_seed(1) n_envs = 3 gamma = 0.99 multi_step = 2 envs = [FakeImageEnv() for _ in range(n_envs)] env = ub_vec.VecEnv(envs) model = dqn_model.DQN(env, multi_step=multi_step, gamma=gamma) n_samples = 10 batch_size = 1 model.run(n_samples) samp = model.sampler batch = model._sample_nstep_batch(batch_size) orig_batch = samp.rel[0] self.assertArrayEqual(orig_batch['obs'], batch['obs']) self.assertArrayEqual(orig_batch['act'], batch['act']) self.assertArrayNotEqual(orig_batch['rew'], batch['rew']) next_batch = samp.rel[1] # depends on random seed self.assertAllClose(orig_batch['rew'] + gamma * next_batch['rew'], batch['rew'], atol=1e-6) nnext_batch = samp.rel[2] self.assertArrayEqual(nnext_batch['obs'], batch['next_obs'])