def test_weights_update(self): """Check that trainable parameters get updated after one iteration.""" # Reset models. self.model.load_state_dict(self.initial_model_dict) self.actor_model.load_state_dict(self.initial_actor_model_dict) polybeast.learn(*self.learn_args) model_state_dict = self.model.state_dict(keep_vars=True) actor_model_state_dict = self.actor_model.state_dict(keep_vars=True) for key, initial_tensor in self.initial_model_dict.items(): model_tensor = model_state_dict[key] actor_model_tensor = actor_model_state_dict[key] # Assert that the gradient is not zero for the learner. self.assertGreater(torch.norm(model_tensor.grad), 0.0) # Assert actor has no gradient. # Note that even though actor model tensors have no gradient, # they have requires_grad == True. No gradients are ever calculated # for these tensors because the inference function in polybeast.py # (that performs forward passes with the actor_model) uses torch.no_grad # context manager. self.assertIsNone(actor_model_tensor.grad) # Assert that the weights are updated in the expected way. # We manually perform a gradient descent step, # and check that they are the same as the calculated ones # (ignoring floating point errors). expected_tensor = (initial_tensor.detach().numpy() - self.lr * model_tensor.grad.numpy()) np.testing.assert_almost_equal(model_tensor.detach().numpy(), expected_tensor) np.testing.assert_almost_equal(actor_model_tensor.detach().numpy(), expected_tensor)
def test_non_zero_loss(self): """Check that the loss is not zero after one iteration.""" # Reset models. self.model.load_state_dict(self.initial_model_dict) self.actor_model.load_state_dict(self.initial_actor_model_dict) polybeast.learn(*self.learn_args) self.assertNotEqual(self.stats["total_loss"], 0.0) self.assertNotEqual(self.stats["pg_loss"], 0.0) self.assertNotEqual(self.stats["baseline_loss"], 0.0) self.assertNotEqual(self.stats["entropy_loss"], 0.0)
def test_parameters_copied_to_actor_model(self): """Check that the learner model copies the parameters to the actor model.""" # Reset models. self.model.load_state_dict(self.initial_model_dict) self.actor_model.load_state_dict(self.initial_actor_model_dict) polybeast.learn(*self.learn_args) np.testing.assert_equal( _state_dict_to_numpy(self.actor_model.state_dict()), _state_dict_to_numpy(self.model.state_dict()), )
def test_gradients_update(self): """Check that gradients get updated after one iteration.""" # Reset models. self.model.load_state_dict(self.initial_model_dict) self.actor_model.load_state_dict(self.initial_actor_model_dict) # There should be no calculated gradient yet. for p in self.model.parameters(): self.assertIsNone(p.grad) for p in self.actor_model.parameters(): self.assertIsNone(p.grad) polybeast.learn(*self.learn_args) # Check that every parameter for the learner model has a gradient, and that # there is at least some non-zero gradient for each set of paramaters. for p in self.model.parameters(): self.assertIsNotNone(p.grad) self.assertFalse(torch.equal(p.grad, torch.zeros_like(p.grad))) # Check that the actor model has no gradients associated with it. for p in self.actor_model.parameters(): self.assertIsNone(p.grad)