def test_sqrt_hessian_sampled_squared_approximates_hessian( problem: DerivativesTestProblem, subsampling: Union[List[int], None], mc_samples: int = 1000000, chunks: int = 10, ) -> None: """Test the MC-sampled sqrt decomposition of the input Hessian. Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt. Args: problem: Test case. subsampling: Indices of active samples. mc_samples: number of samples. Defaults to 1000000. chunks: Number of passes the MC samples will be processed sequentially. """ problem.set_up() skip_subsampling_conflict(problem, subsampling) backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian( mc_samples=mc_samples, chunks=chunks, subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling) RTOL, ATOL = 1e-2, 7e-3 check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL) problem.tear_down()
def test_ea_jac_t_mat_jac_prod(problem: DerivativesTestProblem, request) -> None: """Test KFRA backpropagation. H_in → 1/N ∑ₙ Jₙ^T H_out Jₙ Notes: - `Dropout` cannot be tested,as the `autograd` implementation does a forward pass over each sample, while the `backpack` implementation requires only one forward pass over the batched data. This leads to different outputs, as `Dropout` is not deterministic. Args: problem: Test case. request: PyTest request, used to get test id. """ skip_adaptive_avg_pool3d_cuda(request) problem.set_up() out_features = problem.output_shape[1:].numel() mat = rand(out_features, out_features).to(problem.device) backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat) autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_for_loop_replace() -> None: """Application of retain_graph: replace an outer for-loop. This test is based on issue #220 opened by Romain3Ch216. It computes per-component individual gradients of a tensor-valued output with a for loop over components, rather than over samples and components. """ manual_seed(0) B = 5 M = 3 h = 2 x = randn(B, h) fc = extend(Linear(h, M)) A = fc(x) grad_autograd = zeros(B, M, *fc.weight.shape) for b in range(B): for m in range(M): with backpack(retain_graph=True): grads = autograd.grad(A[b, m], fc.weight, retain_graph=True) grad_autograd[b, m] = grads[0] grad_backpack = zeros(B, M, *fc.weight.shape) for i in range(M): with backpack(BatchGrad(), retain_graph=True): A[:, i].backward(ones_like(A[:, i]), retain_graph=True) grad_backpack[:, i] = fc.weight.grad_batch check_sizes_and_values(grad_backpack, grad_autograd)
def test_ggn_mc( problem: ExtensionsTestProblem, subsampling: Union[List[int], None] ) -> None: """Compare MC-approximated GGN from BackPACK with exact version from autograd. Args: problem: Test case with small network whose GGN can be evaluated. subsampling: Indices of active samples. ``None`` uses the full mini-batch. """ skip_large_parameters(problem) skip_subsampling_conflict(problem, subsampling) autograd_res = AutogradExtensions(problem).ggn(subsampling=subsampling) atol, rtol = 5e-3, 5e-3 mc_samples, chunks = 150000, 15 backpack_res = BackpackExtensions(problem).ggn_mc( mc_samples, chunks=chunks, subsampling=subsampling ) # compare normalized entries ∈ [-1; 1] (easier to tune atol) max_val = max(autograd_res.abs().max(), backpack_res.abs().max()) # NOTE: The GGN can be exactly zero; e.g. if a ReLU after all parameters zeroes # its input, its Jacobian is thus zero and will cancel the backpropagated GGN if not isclose(max_val, 0): autograd_res, backpack_res = autograd_res / max_val, backpack_res / max_val check_sizes_and_values(autograd_res, backpack_res, atol=atol, rtol=rtol)
def test_jac_t_mat_prod( problem: DerivativesTestProblem, subsampling: Union[None, List[int]], request, V: int = 3, ) -> None: """Test the transposed Jacobian-matrix product. Args: problem: Problem for derivative test. subsampling: Indices of active samples. request: Pytest request, used for getting id. V: Number of vectorized transposed Jacobian-vector products. Default: ``3``. """ skip_adaptive_avg_pool3d_cuda(request) problem.set_up() skip_batch_norm_train_mode_with_subsampling(problem, subsampling) skip_subsampling_conflict(problem, subsampling) mat = rand_mat_like_output(V, problem, subsampling=subsampling) backpack_res = BackpackDerivatives(problem).jac_t_mat_prod( mat, subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).jac_t_mat_prod( mat, subsampling=subsampling ) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def check_equivalence(self) -> None: """Check if the given parameters lead to the same output. Checks the sizes and values. """ stride, kernel_size, _ = self._get_derivatives().get_avg_pool_parameters( self.module ) module_equivalent: Module = self._make_module_equivalent(stride, kernel_size) output_equivalent: Tensor = module_equivalent(self.input) check_sizes_and_values(self.output, output_equivalent)
def test_make_hessian_mat_prod(problem: DerivativesTestProblem) -> None: """Test hessian_mat_prod. Args: problem: test problem """ problem.set_up() mat = rand(4, *problem.input_shape, device=problem.device) autograd_res = AutogradDerivatives(problem).hessian_mat_prod(mat) backpack_res = BackpackDerivatives(problem).hessian_mat_prod(mat) check_sizes_and_values(backpack_res, autograd_res)
def test_batch_l2_grad_hook(problem): """Test squared ℓ₂ norm of individual gradients computed via extension hook. Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).batch_l2_grad_extension_hook() autograd_res = AutogradExtensions(problem).batch_l2_grad() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_diag_ggn_batch(problem): """Test the individual diagonal of Generalized Gauss-Newton/Fisher Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch() autograd_res = AutogradExtensions(problem).diag_ggn_batch() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_sum_grad_squared(problem): """Test sum of square of individual gradients Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).sgs() autograd_res = AutogradExtensions(problem).sgs() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_batch_grad(problem): """Test individual gradients Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).batch_grad() autograd_res = AutogradExtensions(problem).batch_grad() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_sum_grad_squared_hook(problem): """Test individual gradient second moment computed via extension hook. Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).sgs_extension_hook() autograd_res = AutogradExtensions(problem).sgs() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_diag_h_batch(problem): """Test Diagonal of Hessian Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).diag_h_batch() autograd_res = AutogradExtensions(problem).diag_h_batch() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_sum_hessian(problem): """Test the summed Hessian. Args: problem (DerivativesProblem): Problem for derivative test. """ problem.set_up() backpack_res = BackpackDerivatives(problem).sum_hessian() autograd_res = AutogradDerivatives(problem).sum_hessian() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_variance(problem: ExtensionsTestProblem) -> None: """Test variance of individual gradients. Args: problem: Test case. """ problem.set_up() backpack_res = BackpackExtensions(problem).variance() autograd_res = AutogradExtensions(problem).variance() rtol = 5e-5 check_sizes_and_values(autograd_res, backpack_res, rtol=rtol) problem.tear_down()
def test_bias_jac_mat_prod(problem: DerivativesTestProblem, V: int = 3) -> None: """Test the Jacobian-matrix product w.r.t. to the bias. Args: problem: Test case. V: Number of vectorized Jacobian-vector products. Default: ``3``. """ problem.set_up() mat = rand(V, *problem.module.bias.shape).to(problem.device) backpack_res = BackpackDerivatives(problem).bias_jac_mat_prod(mat) autograd_res = AutogradDerivatives(problem).bias_jac_mat_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_diag_ggn(problem, request): """Test the diagonal of generalized Gauss-Newton. Args: problem (ExtensionsTestProblem): Problem for extension test. request: problem request """ skip_adaptive_avg_pool3d_cuda(request) problem.set_up() backpack_res = BackpackExtensions(problem).diag_ggn() autograd_res = AutogradExtensions(problem).diag_ggn() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_jac_t_mat_prod(problem, V=3): """Test the transposed Jacobian-matrix product. Args: problem (DerivativesProblem): Problem for derivative test. V (int): Number of vectorized transposed Jacobian-vector products. """ problem.set_up() mat = torch.rand(V, *problem.output_shape).to(problem.device) backpack_res = BackpackDerivatives(problem).jac_t_mat_prod(mat) autograd_res = AutogradDerivatives(problem).jac_t_mat_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_batch_grad( problem: ExtensionsTestProblem, subsampling: Union[List[int], None] ) -> None: """Test individual gradients. Args: problem: Test case. subsampling: Indices of active samples. """ skip_if_subsampling_conflict(problem, subsampling) backpack_res = BackpackExtensions(problem).batch_grad(subsampling) autograd_res = AutogradExtensions(problem).batch_grad(subsampling) check_sizes_and_values(autograd_res, backpack_res)
def test_ggn_exact( problem: ExtensionsTestProblem, subsampling: Union[List[int], None] ) -> None: """Compare exact GGN from BackPACK's matrix square root with autograd. Args: problem: Test case with small network whose GGN can be evaluated. subsampling: Indices of active samples. ``None`` uses the full mini-batch. """ skip_large_parameters(problem) skip_subsampling_conflict(problem, subsampling) autograd_res = AutogradExtensions(problem).ggn(subsampling=subsampling) backpack_res = BackpackExtensions(problem).ggn(subsampling=subsampling) check_sizes_and_values(autograd_res, backpack_res)
def test_diag_ggn_mc_batch_light(problem): """Test the MC approximation of individual diagonal of Generalized Gauss-Newton/Fisher with few mc_samples (light version) Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch() mc_samples = 5000 backpack_res_mc_avg = BackpackExtensions(problem).diag_ggn_mc_batch(mc_samples) check_sizes_and_values( backpack_res, backpack_res_mc_avg, atol=MC_ATOL, rtol=MC_LIGHT_RTOL ) problem.tear_down()
def test_kfac_should_approx_ggn_montecarlo(problem: ExtensionsTestProblem): """Check that for batch_size = 1, the K-FAC is the same as the GGN. Should be true for linear layers and in the limit of infinite mc_samples. Args: problem: Test case. """ problem.set_up() autograd_res = AutogradExtensions(problem).ggn_blocks() mc_samples = 300000 backpack_kfac = BackpackExtensions(problem).kfac_chunk(mc_samples) backpack_res = [kfacs_to_mat(kfac) for kfac in backpack_kfac] check_sizes_and_values(autograd_res, backpack_res, atol=5e-3, rtol=5e-3) problem.tear_down()
def test_sqrt_hessian_sampled_squared_approximates_hessian( problem, mc_samples=100000): """Test the MC-sampled sqrt decomposition of the input Hessian. Args: problem (DerivativesProblem): Problem for derivative test. Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt. """ problem.set_up() backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian( mc_samples=mc_samples) autograd_res = AutogradDerivatives(problem).input_hessian() RTOL, ATOL = 1e-2, 2e-2 check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL) problem.tear_down()
def test_bias_jac_t_mat_prod(problem, sum_batch, V=3): """Test the transposed Jacobian-matrix product w.r.t. to the biass. Args: problem (DerivativesProblem): Problem for derivative test. sum_batch (bool): Sum results over the batch dimension. V (int): Number of vectorized transposed Jacobian-vector products. """ problem.set_up() mat = torch.rand(V, *problem.output_shape).to(problem.device) backpack_res = BackpackDerivatives(problem).bias_jac_t_mat_prod( mat, sum_batch) autograd_res = AutogradDerivatives(problem).bias_jac_t_mat_prod( mat, sum_batch) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_sqrt_hessian_squared_equals_hessian(problem): """Test the sqrt decomposition of the input Hessian. Args: problem (DerivativesProblem): Problem for derivative test. Compares the Hessian to reconstruction from individual Hessian sqrt. """ problem.set_up() backpack_res = BackpackDerivatives( problem).input_hessian_via_sqrt_hessian() autograd_res = AutogradDerivatives(problem).input_hessian() print(backpack_res.device) print(autograd_res.device) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_weight_jac_t_mat_prod(problem, sum_batch, save_memory, V=3): """Test the transposed Jacobian-matrix product w.r.t. to the weights. Args: problem (DerivativesProblem): Problem for derivative test. sum_batch (bool): Sum results over the batch dimension. save_memory (bool): Use Owkin implementation to save memory. V (int): Number of vectorized transposed Jacobian-vector products. """ problem.set_up() mat = torch.rand(V, *problem.output_shape).to(problem.device) with weight_jac_t_save_memory(save_memory): backpack_res = BackpackDerivatives(problem).weight_jac_t_mat_prod( mat, sum_batch) autograd_res = AutogradDerivatives(problem).weight_jac_t_mat_prod( mat, sum_batch) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_diag_ggn_mc_batch(problem): """Test the MC approximation of individual diagonal of Gauss-Newton with more samples (slow version) Args: problem (ExtensionsTestProblem): Problem for extension test. """ problem.set_up() backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch() mc_samples = 300000 chunks = 30 backpack_res_mc_avg = BackpackExtensions(problem).diag_ggn_mc_batch_chunk( mc_samples, chunks=chunks ) check_sizes_and_values( backpack_res, backpack_res_mc_avg, atol=MC_ATOL, rtol=MC_RTOL ) problem.tear_down()
def test_sqrt_hessian_squared_equals_hessian( problem: DerivativesTestProblem, subsampling: Union[List[int], None] ) -> None: """Test the sqrt decomposition of the input Hessian. Args: problem: Test case. subsampling: Indices of active samples. Compares the Hessian to reconstruction from individual Hessian sqrt. """ problem.set_up() skip_subsampling_conflict(problem, subsampling) backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian( subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_ea_jac_t_mat_jac_prod(problem): """Test KFRA backpropagation H_in → 1/N ∑ₙ Jₙ^T H_out Jₙ Notes: - `Dropout` cannot be tested,as the `autograd` implementation does a forward pass over each sample, while the `backpack` implementation requires only one forward pass over the batched data. This leads to different outputs, as `Dropout` is not deterministic. Args: problem (DerivativesProblem): Problem for derivative test. """ problem.set_up() out_features = torch.prod(torch.tensor(problem.output_shape[1:])) mat = torch.rand(out_features, out_features).to(problem.device) backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat) autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_param_mjp( problem: DerivativesTestProblem, sum_batch: bool, subsampling: List[int] or None, request, ) -> None: """Test all parameter derivatives. Args: problem: test problem sum_batch: whether to sum along batch axis subsampling: subsampling indices request: problem request """ skip_subsampling_conflict(problem, subsampling) test_save_memory: bool = "Conv" in request.node.callspec.id V = 3 for param_str, _ in problem.module.named_parameters(): print(f"testing derivative wrt {param_str}") for save_memory in [True, False] if test_save_memory else [None]: if test_save_memory: print(f"testing with save_memory={save_memory}") mat = rand_mat_like_output(V, problem, subsampling=subsampling) with weight_jac_t_save_memory( save_memory=save_memory ) if test_save_memory else nullcontext(): backpack_res = BackpackDerivatives(problem).param_mjp( param_str, mat, sum_batch, subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).param_mjp( param_str, mat, sum_batch, subsampling=subsampling ) check_sizes_and_values(autograd_res, backpack_res)