def testDQNCartPole(self): root_dir = self.get_temp_dir() train_loss = train_eval.train_eval(root_dir, num_iterations=1, num_eval_episodes=1, initial_collect_steps=10) self.assertGreater(train_loss.loss, 0.0)
def testRNNDQNMaskedCartPole(self): root_dir = self.get_temp_dir() train_loss = train_eval.train_eval(root_dir, env_name='MaskedCartPole-v0', train_sequence_length=2, initial_collect_steps=10, num_eval_episodes=1, num_iterations=1) self.assertGreater(train_loss.loss, 0.0)
def testDQNCartPole(self): if not tf.executing_eagerly(): self.skipTest('Binary is eager-only.') root_dir = self.get_temp_dir() train_loss = train_eval.train_eval(root_dir, num_iterations=1, num_eval_episodes=1, initial_collect_steps=10) self.assertGreater(train_loss.loss, 0.0)
def testRNNDQNMaskedCartPole(self): if not tf.executing_eagerly(): self.skipTest('Binary is eager-only.') root_dir = self.get_temp_dir() train_loss = train_eval.train_eval(root_dir, env_name='MaskedCartPole-v0', train_sequence_length=2, initial_collect_steps=10, num_eval_episodes=1, num_iterations=1) self.assertGreater(train_loss.loss, 0.0)