def test_dqn_call_predict(self):
     envs = [FakeImageEnv() for _ in range(3)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env)
     batch_size = 3
     obs_space = env.observation_space
     act_space = env.action_space
     act_dims = act_space.n
     obs = np.asarray([obs_space.sample() for _ in range(batch_size)])
     # test call
     act, val = model(obs, proc_obs=True)
     self.assertArrayEqual((batch_size, ), act.shape)
     self.assertArrayEqual((batch_size, ), val.shape)
     # test predict
     act = model.predict(obs_space.sample())
     self.assertArrayEqual([], act.shape)
     # test dueling
     model = dqn_model.DQN(env, dueling=True)
     act, val = model(obs, proc_obs=True)
     # test call
     self.assertArrayEqual((batch_size, ), act.shape)
     self.assertArrayEqual((batch_size, ), val.shape)
     # test predict
     act = model.predict(obs_space.sample())
     self.assertArrayEqual([], act.shape)
 def test_dqn_setup_image_obs(self):
     envs = [FakeImageEnv() for _ in range(3)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env)
     self.assertEqual(3, model.n_envs)
     self.assertTrue(model.observation_space is not None)
     self.assertTrue(model.action_space is not None)
     # nature_cnn + value
     self.assertEqual((8 + 2) * 2, len(model.trainable_variables))
     # test dueling
     model = dqn_model.DQN(env, dueling=True)
     # nature_cnn + value(dueling)
     self.assertEqual((8 + 4) * 2, len(model.trainable_variables))
 def test_dqn_run(self):
     n_envs = 3
     warmup_steps = 50
     buffer_size = 90
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env,
                           buffer_size=buffer_size,
                           warmup_steps=warmup_steps)
     obs_shape = env.observation_space.shape
     act_shape = env.action_space.shape
     n_samples = 100
     n_slots = buffer_size // n_envs
     model.run(n_samples)
     buf = model.buffer
     self.assertEqual(n_slots * n_envs, len(buf))
     self.assertTrue(buf.ready_for_sample)
     self.assertTrue(buf.isfull)
     # test buffer contents
     self.assertArrayEqual((n_slots, n_envs, *obs_shape),
                           buf.data['obs'].shape)
     self.assertArrayEqual((n_slots, n_envs, *act_shape),
                           buf.data['act'].shape)
     self.assertArrayEqual((n_slots, n_envs), buf.data['rew'].shape)
     self.assertArrayEqual((n_slots, n_envs), buf.data['done'].shape)
 def test_dqn_reg_loss(self):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env)
     loss = model.reg_loss(model.agent.trainable_variables)
     self.assertArrayEqual([], loss.shape)
     self.assertFalse(np.all(np.isnan(loss)))
 def test_dqn_save_load(self):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     env.seed(1)
     ub_utils.set_seed(1)
     model = dqn_model.DQN(env, warmup_steps=5)
     n_samples = 10
     batch_size = 10
     model.run(n_samples)
     # train for some steps
     ub_utils.set_seed(2)
     batch = model.sampler(batch_size)
     batch['next_obs'] = model.sampler.rel[1]['obs']
     model._train_model(batch)
     with tempfile.TemporaryDirectory() as tempdir:
         save_path = tempdir
         # save & load model
         model.save(save_path)
         loaded_model = dqn_model.DQN.load(save_path)
     # check model setup
     self.assertTrue(loaded_model.agent is not None)
     self.assertTrue(loaded_model.buffer is not None)
     self.assertTrue(loaded_model.optimizer is not None)
     # check if config is correctly restored
     model_config = model.get_config()
     loaded_config = loaded_model.get_config()
     self.assertEqual(set(model_config.keys()), set(loaded_config.keys()))
     for key in model_config:
         self.assertEqual(model_config[key], loaded_config[key], key)
     # check if all network variables are correctly restored
     self.assertVariables(model.trainable_variables,
                          loaded_model.trainable_variables)
     # test optimizers
     # load optimizer params
     batches = []
     for i in range(3):
         batch = model.sampler(batch_size)
         batch['next_obs'] = model.sampler.rel[1]['obs']
         batches.append(batch)
     ub_utils.set_seed(1)
     for batch in batches:
         losses1, td1 = model._train_model(batch)
     for batch in batches:
         losses2, td2 = loaded_model._train_model(batch)
     # check if losses are matches
     self.assertEqual(set(losses1.keys()), set(losses2.keys()))
     for key in losses1.keys():
         self.assertEqual(losses1[key], losses2[key])
     self.assertAllClose(td1, td2)
     # check if vars are same
     self.assertVariables(model.trainable_variables,
                          loaded_model.trainable_variables)
     # check if params of the optimizers are same
     self.assertVariables(model.optimizer.variables(),
                          loaded_model.optimizer.variables())
 def test_dqn_delayed_setup(self):
     model = dqn_model.DQN(None)
     self.assertTrue(model.observation_space is None)
     self.assertTrue(model.action_space is None)
     self.assertTrue(model.agent is None)
     envs = [FakeImageEnv() for _ in range(3)]
     env = ub_vec.VecEnv(envs)
     model.set_env(env)
     model.setup()
     self.assertTrue(model.observation_space is not None)
     self.assertTrue(model.action_space is not None)
     self.assertEqual((8 + 2) * 2, len(model.trainable_variables))
 def test_dqn_train(self, huber):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, huber=huber)
     n_samples = 10
     batch_size = 10
     n_gradsteps = 4
     n_subepochs = 1
     target_update = 2
     exp_gradsteps = n_subepochs * n_gradsteps
     model.run(n_samples)
     model.train(batch_size, n_subepochs, n_gradsteps, target_update)
     self.assertEqual(exp_gradsteps, model.num_gradsteps)
     self.assertEqual(n_subepochs, model.num_subepochs)
 def test_dqn_prioritized(self, huber):
     n_envs = 3
     ub_utils.set_seed(1)
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, prioritized=True, warmup_steps=5)
     self.assertTrue(isinstance(model.prio_beta, ub_sche.Scheduler))
     self.assertTrue(isinstance(model.sampler, ub_data.PriorSampler))
     n_samples = 10
     batch_size = 10
     model.run(n_samples)
     res = model.sampler._weight_tree[:n_samples * n_envs]
     exp = np.ones_like(res, dtype=np.float32)
     self.assertArrayEqual(exp, res)
     model._train_step(batch_size)
     res = model.sampler._weight_tree[:n_samples * n_envs]
     self.assertArrayNotEqual(exp, res)
     self.assertTrue(np.all(res >= 0.0))
 def test_dqn_train_model(self, huber):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, huber=huber)
     n_samples = 10
     batch_size = 2
     model.run(n_samples)
     samp = model.sampler
     batch = samp.sample(batch_size)
     batch['next_obs'] = samp.rel[1]['obs']
     losses, td = model._train_model(batch)
     for key, loss in losses.items():
         self.assertArrayEqual([], loss.shape)
         self.assertFalse(np.all(np.isnan(loss)))
     self.assertArrayEqual((batch_size, ), td.shape)
     self.assertFalse(np.all(np.isnan(td)))
     self.assertTrue(np.all(td >= 0))
 def test_dqn_td_error(self, huber):
     n_envs = 3
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, huber=huber)
     obs = env.observation_space.sample()
     obs = obs.reshape(1, *obs.shape)
     next_obs = env.observation_space.sample()
     next_obs = next_obs.reshape(1, *next_obs.shape)
     act = env.action_space.sample()
     act = np.asarray([act], dtype=np.int64)
     done = np.asarray([False], dtype=np.bool_)
     rew = np.asarray([1.0], dtype=np.float32)
     # test td error
     td = model.td_error(obs, act, done, rew, next_obs)
     self.assertArrayEqual((1, ), td.shape)
     self.assertFalse(np.all(np.isnan(td)))
     # test td loss
     loss = model.td_loss(td)
     self.assertArrayEqual([], loss.shape)
     self.assertFalse(np.all(np.isnan(loss)))
 def test_dqn_sample_nstep_batch(self):
     ub_utils.set_seed(1)
     n_envs = 3
     gamma = 0.99
     multi_step = 2
     envs = [FakeImageEnv() for _ in range(n_envs)]
     env = ub_vec.VecEnv(envs)
     model = dqn_model.DQN(env, multi_step=multi_step, gamma=gamma)
     n_samples = 10
     batch_size = 1
     model.run(n_samples)
     samp = model.sampler
     batch = model._sample_nstep_batch(batch_size)
     orig_batch = samp.rel[0]
     self.assertArrayEqual(orig_batch['obs'], batch['obs'])
     self.assertArrayEqual(orig_batch['act'], batch['act'])
     self.assertArrayNotEqual(orig_batch['rew'], batch['rew'])
     next_batch = samp.rel[1]
     # depends on random seed
     self.assertAllClose(orig_batch['rew'] + gamma * next_batch['rew'],
                         batch['rew'],
                         atol=1e-6)
     nnext_batch = samp.rel[2]
     self.assertArrayEqual(nnext_batch['obs'], batch['next_obs'])