コード例 #1
0
    def test_weights_update(self):
        """Check that trainable parameters get updated after one iteration."""
        # Reset models.
        self.model.load_state_dict(self.initial_model_dict)
        self.actor_model.load_state_dict(self.initial_actor_model_dict)

        polybeast.learn(*self.learn_args)

        model_state_dict = self.model.state_dict(keep_vars=True)
        actor_model_state_dict = self.actor_model.state_dict(keep_vars=True)
        for key, initial_tensor in self.initial_model_dict.items():
            model_tensor = model_state_dict[key]
            actor_model_tensor = actor_model_state_dict[key]
            # Assert that the gradient is not zero for the learner.
            self.assertGreater(torch.norm(model_tensor.grad), 0.0)
            # Assert actor has no gradient.
            # Note that even though actor model tensors have no gradient,
            # they have requires_grad == True. No gradients are ever calculated
            # for these tensors because the inference function in polybeast.py
            # (that performs forward passes with the actor_model) uses torch.no_grad
            # context manager.
            self.assertIsNone(actor_model_tensor.grad)
            # Assert that the weights are updated in the expected way.
            # We manually perform a gradient descent step,
            # and check that they are the same as the calculated ones
            # (ignoring floating point errors).
            expected_tensor = (initial_tensor.detach().numpy() -
                               self.lr * model_tensor.grad.numpy())
            np.testing.assert_almost_equal(model_tensor.detach().numpy(),
                                           expected_tensor)
            np.testing.assert_almost_equal(actor_model_tensor.detach().numpy(),
                                           expected_tensor)
コード例 #2
0
    def test_non_zero_loss(self):
        """Check that the loss is not zero after one iteration."""
        # Reset models.
        self.model.load_state_dict(self.initial_model_dict)
        self.actor_model.load_state_dict(self.initial_actor_model_dict)

        polybeast.learn(*self.learn_args)

        self.assertNotEqual(self.stats["total_loss"], 0.0)
        self.assertNotEqual(self.stats["pg_loss"], 0.0)
        self.assertNotEqual(self.stats["baseline_loss"], 0.0)
        self.assertNotEqual(self.stats["entropy_loss"], 0.0)
コード例 #3
0
    def test_parameters_copied_to_actor_model(self):
        """Check that the learner model copies the parameters to the actor model."""
        # Reset models.
        self.model.load_state_dict(self.initial_model_dict)
        self.actor_model.load_state_dict(self.initial_actor_model_dict)

        polybeast.learn(*self.learn_args)

        np.testing.assert_equal(
            _state_dict_to_numpy(self.actor_model.state_dict()),
            _state_dict_to_numpy(self.model.state_dict()),
        )
コード例 #4
0
    def test_gradients_update(self):
        """Check that gradients get updated after one iteration."""
        # Reset models.
        self.model.load_state_dict(self.initial_model_dict)
        self.actor_model.load_state_dict(self.initial_actor_model_dict)

        # There should be no calculated gradient yet.
        for p in self.model.parameters():
            self.assertIsNone(p.grad)
        for p in self.actor_model.parameters():
            self.assertIsNone(p.grad)

        polybeast.learn(*self.learn_args)

        # Check that every parameter for the learner model has a gradient, and that
        # there is at least some non-zero gradient for each set of paramaters.
        for p in self.model.parameters():
            self.assertIsNotNone(p.grad)
            self.assertFalse(torch.equal(p.grad, torch.zeros_like(p.grad)))

        # Check that the actor model has no gradients associated with it.
        for p in self.actor_model.parameters():
            self.assertIsNone(p.grad)