def memory_after_forward(device, context=None): """Return memory consumed by the forward pass of an extended model.""" memory_init = pytorch_current_memory_usage() torch.manual_seed(0) # MNIST dummy B = 256 X = torch.rand(B, 1, 28, 28).to(device) y = classification_targets((B, ), 10).to(device) model = torch.nn.Sequential( torch.nn.Flatten(), torch.nn.Linear(784, 10), ).to(device) model = extend(model) lossfunc = torch.nn.CrossEntropyLoss().to(device) lossfunc = extend(lossfunc) if context is None: context = nullcontext with context(): lossfunc(model(X), y) return pytorch_current_memory_usage() - memory_init
def make_simple_act_setting(act_cls: Type[Module], bias: bool) -> dict: """Create a simple CNN with activation as test case dictionary. Make parameters of final linear layer non-differentiable to save run time. Args: act_cls: Class of the activation function. bias: Use bias in the convolution. Returns: Dictionary representation of the simple CNN test case. """ def _make_simple_cnn(act_cls: Type[Module], bias: bool) -> Sequential: linear = Linear(72, 5) set_requires_grad(linear, False) return Sequential(Conv2d(3, 2, 2, bias=bias), act_cls(), Flatten(), linear) dict_setting = { "input_fn": lambda: rand(3, 3, 7, 7), "module_fn": lambda: _make_simple_cnn(act_cls, bias), "loss_function_fn": lambda: CrossEntropyLoss(), "target_fn": lambda: classification_targets((3,), 5), "id_prefix": "automated-simple-cnn-act", } return dict_setting
def make_simple_act_setting(act_cls, bias): """ input: Activation function & Bias setting return: simple CNN Network This function is used to automatically create a simple CNN Network consisting of CNN & Linear layer for different activation functions. It is used to test `test.extensions`. """ def make_simple_cnn(act_cls, bias): return torch.nn.Sequential( torch.nn.Conv2d(3, 2, 2, bias=bias), act_cls(), torch.nn.Flatten(), torch.nn.Linear(72, 5), ) dict_setting = { "input_fn": lambda: torch.rand(3, 3, 7, 7), "module_fn": lambda: make_simple_cnn(act_cls, bias), "loss_function_fn": lambda: torch.nn.CrossEntropyLoss(), "target_fn": lambda: classification_targets((3, ), 5), "id_prefix": "automated-simple-cnn-act", } return dict_setting
def test_network_diag_ggn(model_and_input): """Test whether the given module can compute diag_ggn. This test is placed here, because some models are too big to run with PyTorch. Thus, a full diag_ggn comparison with PyTorch is impossible. This test just checks whether it runs on BackPACK without errors. Additionally, it checks whether the forward pass is identical to the original model. Finally, a small number of elements of DiagGGN are compared. Args: model_and_input: module to test Raises: NotImplementedError: if loss_fn is not MSELoss or CrossEntropyLoss """ model_original, x, loss_fn = model_and_input model_original = model_original.eval() output_compare = model_original(x) if isinstance(loss_fn, MSELoss): y = regression_targets(output_compare.shape) elif isinstance(loss_fn, CrossEntropyLoss): y = classification_targets( (output_compare.shape[0], *output_compare.shape[2:]), output_compare.shape[1], ) else: raise NotImplementedError( f"test cannot handle loss_fn = {type(loss_fn)}") num_params = sum(p.numel() for p in model_original.parameters() if p.requires_grad) num_to_compare = 10 idx_to_compare = linspace(0, num_params - 1, num_to_compare, dtype=int32) diag_ggn_exact_to_compare = autograd_diag_ggn_exact(x, y, model_original, loss_fn, idx=idx_to_compare) model_extended = extend(model_original, use_converter=True, debug=True) output = model_extended(x) assert allclose(output, output_compare) loss = extend(loss_fn)(output, y) with backpack(DiagGGNExact()): loss.backward() diag_ggn_exact_vector = cat([ p.diag_ggn_exact.flatten() for p in model_extended.parameters() if p.requires_grad ]) for idx, element in zip(idx_to_compare, diag_ggn_exact_to_compare): assert allclose(element, diag_ggn_exact_vector[idx], atol=1e-5)
def make_simple_pooling_setting( input_size: Tuple[int], conv_cls: Type[Module], pool_cls: Type[Module], pool_params: Tuple[Any], ) -> dict: """Create CNN with convolution and pooling layer as test case dictionary. Make parameters of final linear layer non-differentiable to save run time. Args: input_size: Input shape ``[N, C_in, ...]``. conv_cls: Class of convolution layer. pool_cls: Class of pooling layer. pool_params: Pooling hyperparameters. Returns: Dictionary representation of the test case. """ def _make_cnn( conv_cls: Type[Module], output_size: int, conv_params: Tuple[Any], pool_cls: Type[Module], pool_params: Tuple[Any], ) -> Sequential: linear = Linear(output_size, 5) set_requires_grad(linear, False) return Sequential( conv_cls(*conv_params), ReLU(), pool_cls(*pool_params), Flatten(), linear ) conv_params = (3, 2, 2) input = rand(input_size) output_dim = _get_output_dim( Sequential(conv_cls(*conv_params), pool_cls(*pool_params)), input ) dict_setting = { "input_fn": lambda: rand(input_size), "module_fn": lambda: _make_cnn( conv_cls, output_dim, conv_params, pool_cls, pool_params ), "loss_function_fn": lambda: CrossEntropyLoss(reduction="sum"), "target_fn": lambda: classification_targets((3,), 5), "id_prefix": "automated-simple-cnn", } return dict_setting
def make_simple_pooling_setting(input_size, conv_class, pool_cls, pool_params): """ input_size: tuple of input size of (N*C*Image Size) conv_class: convolutional class conv_params: configurations for convolutional class return: simple CNN Network This function is used to automatically create a simple CNN Network consisting of CNN & Linear layer for different convolutional layers. It is used to test `test.extensions`. """ def make_cnn(conv_class, output_size, conv_params, pool_cls, pool_params): """Note: output class size is assumed to be 5""" return torch.nn.Sequential( conv_class(*conv_params), torch.nn.ReLU(), pool_cls(*pool_params), torch.nn.Flatten(), torch.nn.Linear(output_size, 5), ) def get_output_shape(module, module_params, input, pool, pool_params): """Returns the output shape for a given layer.""" output_1 = module(*module_params)(input) output = pool_cls(*pool_params)(output_1) return output.numel() // output.shape[0] conv_params = (3, 2, 2) input = torch.rand(input_size) output_size = get_output_shape(conv_class, conv_params, input, pool_cls, pool_params) dict_setting = { "input_fn": lambda: torch.rand(input_size), "module_fn": lambda: make_cnn(conv_class, output_size, conv_params, pool_cls, pool_params), "loss_function_fn": lambda: torch.nn.CrossEntropyLoss(reduction="sum"), "target_fn": lambda: classification_targets((3, ), 5), "id_prefix": "automated-simple-cnn", } return dict_setting
def problem(device, request) -> Tuple[Module, Tensor, str]: """Return extended nested sequential with loss from a forward pass. Args: device: available device request: pytest request Yields: model, loss and problem_string Raises: NotImplementedError: if the problem_string is unknown """ problem_string = request.param manual_seed(0) B = 2 X = rand(B, 4).to(device) y = classification_targets((B, ), 2).to(device) if problem_string == NESTED_SEQUENTIAL: model = Sequential( Linear(4, 3, bias=False), Sequential(Linear(3, 2, bias=False), ), ) elif problem_string == CUSTOM_CONTAINER: class _MyCustomModule(Module): def __init__(self): super().__init__() self.linear1 = Linear(4, 3, bias=False) self.linear2 = Linear(3, 2, bias=False) def forward(self, x): x = self.linear1(x) x = self.linear2(x) return x model = _MyCustomModule() else: raise NotImplementedError( f"problem={problem_string} but no test setting for this.") model = extend(model.to(device)) lossfunc = extend(CrossEntropyLoss(reduction="mean").to(device)) loss = lossfunc(model(X), y) yield model, loss, problem_string
def memory_leak(device, context=None): """Reproduce memory leak due to forward pass through a model with non-freed IO. Raises: RuntimeError if a memory leak is detected (the allocated memory exceeds the specified threshold). """ memory_init = pytorch_current_memory_usage() torch.manual_seed(0) # MNIST dummy B = 256 X = torch.rand(B, 1, 28, 28).to(device) y = classification_targets((B, ), 10).to(device) model = torch.nn.Sequential( torch.nn.Flatten(), torch.nn.Linear(784, 10), ).to(device) model = extend(model) # threshold to detect a memory_leak steps = 50 memory_leak_threshold_mb = 1 memory_leak_threshold = memory_leak_threshold_mb * 2**20 if context is None: context = nullcontext for _ in range(steps): lossfunc = torch.nn.CrossEntropyLoss().to(device) lossfunc = extend(lossfunc) loss = lossfunc(model(X), y) loss.backward() with context(): loss = lossfunc(model(X), y) # this is what kills it memory = pytorch_current_memory_usage() if memory - memory_init > memory_leak_threshold: raise RuntimeError( f"Memory leak detected: context={context}, device={device}")
def set_up(device): """Return extended nested sequential with loss from a forward pass.""" torch.manual_seed(0) B = 2 X = torch.rand(B, 4).to(device) y = classification_targets((B,), 2).to(device) model = torch.nn.Sequential( torch.nn.Linear(4, 3, bias=False), torch.nn.Sequential( torch.nn.Linear(3, 2, bias=False), ), ).to(device) model = extend(model) lossfunc = extend(torch.nn.CrossEntropyLoss(reduction="mean")) loss = lossfunc(model(X), y) return model, loss
"target_fn": lambda: regression_targets((8, 3 * 5)), }, { "input_fn": lambda: rand(4, 3, 5), "module_fn": lambda: Sequential( LSTM(input_size=5, hidden_size=4, batch_first=True), ReduceTuple(index=0), Flatten(), ), "loss_function_fn": lambda: CrossEntropyLoss(), "target_fn": lambda: classification_targets((4, ), 4 * 3), }, { "input_fn": lambda: rand(8, 5, 6), "module_fn": lambda: Sequential( RNN(input_size=6, hidden_size=3, batch_first=True), ReduceTuple(index=0), Linear(3, 3), Permute(0, 2, 1), ), "loss_function_fn": lambda: CrossEntropyLoss(), "target_fn": lambda: classification_targets((8, 5), 3),
import torch from test.core.derivatives.utils import classification_targets, regression_targets FIRSTORDER_SETTINGS = [] ############################################################################### # examples # ############################################################################### example = { "input_fn": lambda: torch.rand(3, 10), "module_fn": lambda: torch.nn.Sequential(torch.nn.Linear(10, 5)), "loss_function_fn": lambda: torch.nn.CrossEntropyLoss(reduction="sum"), "target_fn": lambda: classification_targets((3,), 5), "device": [torch.device("cpu")], "seed": 0, "id_prefix": "example", } FIRSTORDER_SETTINGS.append(example) ############################################################################### # test setting: Linear Layers # ############################################################################### FIRSTORDER_SETTINGS += [ # classification { "input_fn": lambda: torch.rand(3, 10), "module_fn": lambda: torch.nn.Sequential(
""" from test.core.derivatives.utils import classification_targets, regression_targets import torch LOSS_SETTINGS = [] ############################################################################### # examples # ############################################################################### example = { "module_fn": lambda: torch.nn.CrossEntropyLoss(reduction="mean"), "input_fn": lambda: torch.rand(size=(2, 4)), "target_fn": lambda: classification_targets(size=(2, ), num_classes=2), "device": [torch.device("cpu")], # optional "seed": 0, # optional "id_prefix": "loss-example", # optional } LOSS_SETTINGS.append(example) LOSS_SETTINGS += [ { "module_fn": lambda: torch.nn.CrossEntropyLoss(reduction="mean"), "input_fn": lambda: torch.rand(size=(2, 4)), "target_fn": lambda: classification_targets(size=(2, ), num_classes=2), }, { "module_fn": lambda: torch.nn.CrossEntropyLoss(reduction="sum"), "input_fn": lambda: torch.rand(size=(8, 4)),
lambda: Sequential( Linear(10, 5), ReLU(), # skip connection Parallel( Identity(), Linear(5, 5), ), # end of skip connection Sigmoid(), Linear(5, 4), ), "loss_function_fn": lambda: CrossEntropyLoss(), "target_fn": lambda: classification_targets((3, ), 4), "id_prefix": "branching-linear", }, { "input_fn": lambda: rand(3, 10), "module_fn": lambda: Sequential( Linear(10, 5), ReLU(), # skip connection Parallel( ScaleModule(weight=3.0), Linear(5, 5), ),
) from backpack.custom_module.pad import Pad from backpack.custom_module.slicing import Slicing SECONDORDER_SETTINGS = [] ############################################################################### # examples # ############################################################################### example = { "input_fn": lambda: rand(3, 10), "module_fn": lambda: Sequential(Linear(10, 5)), "loss_function_fn": lambda: CrossEntropyLoss(), "target_fn": lambda: classification_targets((3, ), 5), "device": [device("cpu")], "seed": 0, "id_prefix": "example", } SECONDORDER_SETTINGS.append(example) SECONDORDER_SETTINGS += [ # classification { "input_fn": lambda: rand(3, 10), "module_fn": lambda: Sequential(Linear(10, 7), Linear(7, 5)), "loss_function_fn": lambda: CrossEntropyLoss(reduction="mean"), "target_fn": lambda: classification_targets((3, ), 5), }, {
from backpack import convert_module_to_backpack from backpack.custom_module.permute import Permute from backpack.custom_module.reduce_tuple import ReduceTuple FIRSTORDER_SETTINGS = [] ############################################################################### # examples # ############################################################################### example = { "input_fn": lambda: rand(3, 10), "module_fn": lambda: Sequential(Linear(10, 5)), "loss_function_fn": lambda: CrossEntropyLoss(reduction="sum"), "target_fn": lambda: classification_targets((3, ), 5), "device": [device("cpu")], "seed": 0, "id_prefix": "example", } FIRSTORDER_SETTINGS.append(example) ############################################################################### # test setting: Linear Layers # ############################################################################### FIRSTORDER_SETTINGS += [ # classification { "input_fn": lambda: rand(3, 10), "module_fn": lambda: Sequential(Linear(10, 7), Linear(7, 5)),
def dummy_cross_entropy(N=5): y_pred = torch.rand((N, 2)) y_pred.requires_grad = True y = classification_targets((N,), 2) loss_module = extend(CrossEntropyLoss()) return loss_module(y_pred, y)