Beispiel #1
0
def memory_after_forward(device, context=None):
    """Return memory consumed by the forward pass of an extended model."""
    memory_init = pytorch_current_memory_usage()

    torch.manual_seed(0)

    # MNIST dummy
    B = 256
    X = torch.rand(B, 1, 28, 28).to(device)
    y = classification_targets((B, ), 10).to(device)

    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(784, 10),
    ).to(device)
    model = extend(model)

    lossfunc = torch.nn.CrossEntropyLoss().to(device)
    lossfunc = extend(lossfunc)

    if context is None:
        context = nullcontext

    with context():
        lossfunc(model(X), y)

    return pytorch_current_memory_usage() - memory_init
Beispiel #2
0
def setup(device):
    """Load MNIST batch, create extended CNN and loss function. Load to device.

    Args:
        device (torch.device): Device that all objects are transferred to.

    Returns:
        inputs, labels, model, loss function
    """
    X, y = load_one_batch_mnist(batch_size=64)
    X, y = X.to(device), y.to(device)

    model = extend(
        Sequential(
            Conv2d(1, 128, 3, padding=1),
            ReLU(),
            MaxPool2d(3, stride=2),
            Conv2d(128, 256, 3, padding=1),
            ReLU(),
            MaxPool2d(3, padding=1, stride=2),
            Conv2d(256, 64, 3, padding=1),
            ReLU(),
            MaxPool2d(3, stride=2),
            Conv2d(64, 32, 3, padding=1),
            ReLU(),
            MaxPool2d(3, stride=2),
            Flatten(),
            Linear(32, 10),
        ).to(device)
    )

    lossfunc = extend(CrossEntropyLoss().to(device))

    return X, y, model, lossfunc
Beispiel #3
0
def backpack_individual_gradients(X, y, model, loss_func):
    """Individual gradients with BackPACK.

    Args:
        X (torch.Tensor): Mini-batch of shape `(N, *)`
        y (torch.Tensor: Labels for `X`
        model (torch.nn.Module): Model for forward pass
        loss_func (torch.nn.Module): Loss function for model prediction

    Returns:
        [torch.Tensor]: Individual gradients for samples in the mini-batch
            with respect to the model parameters. Arranged in the same order
           as `model.parameters()`.
    """
    model = extend(model)
    loss_func = extend(loss_func)

    loss = loss_func(model(X), y)

    with backpack(extensions.BatchGrad()):
        loss.backward()

    individual_gradients = [p.grad_batch for p in model.parameters()]

    return individual_gradients
Beispiel #4
0
    def __init__(self,
                 obs_shape,
                 num_actions,
                 base_kwargs=None,
                 extra_kwargs=None):
        super(Policy, self).__init__()
        self.use_backpack = extra_kwargs['use_backpack']
        self.recurrent_hidden_state_size = 1
        num_outputs = num_actions
        hidden_size = 512
        conv_init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                    constant_(x, 0),
                                    nn.init.calculate_gain('relu'))
        lin_init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                                   constant_(x, 0))
        self.model = nn.Sequential(
            conv_init_(nn.Conv2d(obs_shape[0], 32, 8, stride=4)), nn.ReLU(),
            conv_init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            conv_init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            conv_init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU(),
            lin_init_(nn.Linear(hidden_size, num_outputs)))
        if self.use_backpack:
            extend(self.model)

        self.model.train()
Beispiel #5
0
    def __init__(self, arg_dict):
        BaseAlg.__init__(self, arg_dict)
        #! changed the user and item dimensions
        self.learner = extend(NeuMF(user_dim=100, item_dim=100, mf_dim=32, mlp_dim = [32, 16, 8], lr=1e-3).cuda())
        self.lossfunc = extend(torch.nn.BCELoss())

        #! changed the user embedding filename
        self.path = './Dataset/delicious_100.dat'
        self.user_feature = []
        with open(self.path, 'r') as f:
            for line in f:
                if line.strip():
                    self.user_feature.append(torch.from_numpy(np.genfromtxt(io.StringIO(line), delimiter=" ")).to(dtype=torch.float).cuda())
                else:
                    self.user_feature.append(None)
        print(len(self.user_feature))
        self.data = DataLoader()
        self.cnt = 0
        self.batch = 100

        torch.set_num_threads(8)
        torch.set_num_interop_threads(8)

        self.lamdba = 1
        self.nu = 1
        self.U = self.lamdba * torch.ones((self.learner.total_param), dtype=torch.float).cuda()
        self.U1 = torch.zeros((self.learner.total_param), dtype=torch.float).cuda()
        self.g = None
        self.reg = None
        self.t1 = time.time()
Beispiel #6
0
def test_convolutions_stride_issue_30(params):
    """
    https://github.com/f-dangel/backpack/issues/30

    The gradient for the convolution is wrong when `stride` is not a multiple of
    `D + 2*padding - dilation*(kernel-1) - 1`.
    """
    torch.manual_seed(0)

    mod = torch.nn.Conv2d(
        in_channels=params["C_in"],
        out_channels=params["C_out"],
        kernel_size=params["K"],
        stride=params["S"],
        padding=params["pad"],
        dilation=params["dil"],
    )
    backpack.extend(mod)
    x = torch.randn(size=(params["N"], params["C_in"], params["W"], params["H"]))

    with backpack.backpack(backpack.extensions.BatchGrad()):
        loss = torch.sum(mod(x))
        loss.backward()

        for p in mod.parameters():
            assert torch.allclose(p.grad, p.grad_batch.sum(0), rtol=1e-04, atol=1e-04)
Beispiel #7
0
def test_compute_hessians(network, dataset, data_type, network_type, x, y):
    network.train()

    batch_size, num_iterations = get_batch_type(data_type)
    batch_sampler = BatchSampler(dataset, num_iterations,
                                 batch_size)  # train by iteration, not epoch
    data_loader = DataLoader(dataset,
                             batch_sampler=batch_sampler,
                             num_workers=4)

    network_seq = get_seq_network(network_type)
    network_seq = copy_network(network, network_seq)

    criterion = nn.CrossEntropyLoss()
    criterion = extend(criterion)
    network_seq = extend(network_seq).cuda()

    hessians = None
    x = x.cuda()
    y = y.cuda()
    if data_type == 'mnist':
        x = x.view(len(x), -1)

    out = network_seq(x)
    loss = criterion(out, y)

    with backpack(DiagHessian()):
        loss.backward()

    hessians = get_hessians(network_seq, hessians)

    return hessians, x, y
Beispiel #8
0
def Diag_second_order(model, train_loader, prec0=10, device='cpu'):

    W = list(model.parameters())[-2]
    b = list(model.parameters())[-1]
    m, n = W.shape
    print("n: {} inputs to linear layer with m: {} classes".format(n, m))
    lossfunc = torch.nn.CrossEntropyLoss()

    var0 = 1 / prec0

    extend(lossfunc, debug=False)
    extend(model.linear, debug=False)

    with backpack(DiagHessian()):

        max_len = len(train_loader)
        weights_cov = torch.zeros(max_len, m, n, device=device)
        biases_cov = torch.zeros(max_len, m, device=device)

        for batch_idx, (x, y) in enumerate(train_loader):

            if device == 'cuda':
                x, y = x.cuda(), y.cuda()

            model.zero_grad()
            lossfunc(model(x), y).backward()

            with torch.no_grad():
                # Hessian of weight
                W_ = W.diag_h
                b_ = b.diag_h

                #add_prior: since it will be flattened later we can just add the prior like that
                W_ += var0 * torch.ones(W_.size(), device=device)
                b_ += var0 * torch.ones(b_.size(), device=device)

            weights_cov[batch_idx] = W_
            biases_cov[batch_idx] = b_

            print("Batch: {}/{}".format(batch_idx, max_len))

        print(len(weights_cov))
        C_W = torch.mean(weights_cov, dim=0)
        C_b = torch.mean(biases_cov, dim=0)

    # Predictive distribution
    with torch.no_grad():
        M_W_post = W.t()
        M_b_post = b

        C_W_post = C_W
        C_b_post = C_b

    print("M_W_post size: ", M_W_post.size())
    print("M_b_post size: ", M_b_post.size())
    print("C_W_post size: ", C_W_post.size())
    print("C_b_post size: ", C_b_post.size())

    return (M_W_post, M_b_post, C_W_post, C_b_post)
def KFLP_second_order(model, batch_size, train_loader, var0=10, device='cpu'):

    W = list(model.parameters())[-2]
    b = list(model.parameters())[-1]
    m, n = W.shape
    lossfunc = torch.nn.CrossEntropyLoss()

    tau = 1 / var0

    extend(lossfunc, debug=False)
    extend(model.fc, debug=False)

    with backpack(KFAC()):
        U, V = torch.zeros(m, m, device=device), torch.zeros(n,
                                                             n,
                                                             device=device)
        B = torch.zeros(m, m, device=device)

        max_len = int(np.ceil(len(train_loader.dataset) / batch_size))
        for batch_idx, (x, y) in enumerate(train_loader):

            if device == 'cuda':
                x, y = x.cuda(), y.cuda()

            model.zero_grad()
            lossfunc(model(x), y).backward()

            with torch.no_grad():
                # Hessian of weight
                U_, V_ = W.kfac
                B_ = b.kfac[0]

                U_ = np.sqrt(batch_size) * U_ + np.sqrt(tau) * torch.eye(
                    m, device=device)
                V_ = np.sqrt(batch_size) * V_ + np.sqrt(tau) * torch.eye(
                    n, device=device)
                B_ = batch_size * B_ + tau * torch.eye(m, device=device)

                rho = min(1 - 1 / (batch_idx + 1), 0.95)

                U = rho * U + (1 - rho) * U_
                V = rho * V + (1 - rho) * V_
                B = rho * B + (1 - rho) * B_

            print("Batch: {}/{}".format(batch_idx, max_len))

    # Predictive distribution
    with torch.no_grad():
        M_W_post = W.t()
        M_b_post = b

        # Covariances for Laplace
        U_post = torch.inverse(V)  # Interchanged since W is transposed
        V_post = torch.inverse(U)
        B_post = torch.inverse(B)

    return (M_W_post, M_b_post, U_post, V_post, B_post)
Beispiel #10
0
    def __init__(
        self,
        tproblem,
        logpath,
        track_interval=1,
        quantities=None,
        plot=True,
        plot_schedule=None,
        secondary_screen=False,
    ):
        """Initialize the Cockpit.

        Args:
            tproblem (deepobs.pytorch.testproblem): A DeepOBS testproblem.
                Alternatively, it ccould also be a general Pytorch Net.
            logpath (str): Path to the log file.
            track_interval (int, optional): Tracking rate.
                Defaults to 1 meaning every iteration is tracked.
            quantities (list, optional): List of quantities (classes or instances)
                that should be tracked. Defaults to None, which would use all
                implemented ones.
            plot (bool, optional): Whether results should be plotted.
            plot_schedule (callable): Function that maps an iteration to a boolean
                which determines if a plot should be created and tracked data output
                should be written.
            secondary_screen (bool): Whether to plot other experimental quantities
                on a secondary screen.
        """
        # Store all parameters as attributes
        self.tproblem = tproblem
        self.logpath = logpath
        self.track_interval = track_interval
        self.quantities = quantities

        self.create_graph = False
        self.output = defaultdict(dict)

        # Collect quantities
        self.quantities = self._collect_quantities(quantities, track_interval)

        # Extend testproblem
        if isinstance(tproblem, TestProblem):
            extend_with_access_unreduced_loss(tproblem, detach=True)
        else:
            model, lossfunc = tproblem
            extend(model)
            extend(lossfunc)

        # Prepare logpath
        self._prepare_logpath(logpath)

        # Create a Cockpit Plotter instance
        self._plot_schedule = plot_schedule
        self._enable_plotting = plot
        if self._enable_plotting:
            self.cockpit_plotter = CockpitPlotter(
                self.logpath, secondary_screen=secondary_screen)
Beispiel #11
0
def test_network_diag_ggn(model_and_input):
    """Test whether the given module can compute diag_ggn.

    This test is placed here, because some models are too big to run with PyTorch.
    Thus, a full diag_ggn comparison with PyTorch is impossible.
    This test just checks whether it runs on BackPACK without errors.
    Additionally, it checks whether the forward pass is identical to the original model.
    Finally, a small number of elements of DiagGGN are compared.

    Args:
        model_and_input: module to test

    Raises:
        NotImplementedError: if loss_fn is not MSELoss or CrossEntropyLoss
    """
    model_original, x, loss_fn = model_and_input
    model_original = model_original.eval()
    output_compare = model_original(x)
    if isinstance(loss_fn, MSELoss):
        y = regression_targets(output_compare.shape)
    elif isinstance(loss_fn, CrossEntropyLoss):
        y = classification_targets(
            (output_compare.shape[0], *output_compare.shape[2:]),
            output_compare.shape[1],
        )
    else:
        raise NotImplementedError(
            f"test cannot handle loss_fn = {type(loss_fn)}")

    num_params = sum(p.numel() for p in model_original.parameters()
                     if p.requires_grad)
    num_to_compare = 10
    idx_to_compare = linspace(0, num_params - 1, num_to_compare, dtype=int32)
    diag_ggn_exact_to_compare = autograd_diag_ggn_exact(x,
                                                        y,
                                                        model_original,
                                                        loss_fn,
                                                        idx=idx_to_compare)

    model_extended = extend(model_original, use_converter=True, debug=True)
    output = model_extended(x)

    assert allclose(output, output_compare)

    loss = extend(loss_fn)(output, y)

    with backpack(DiagGGNExact()):
        loss.backward()

    diag_ggn_exact_vector = cat([
        p.diag_ggn_exact.flatten() for p in model_extended.parameters()
        if p.requires_grad
    ])

    for idx, element in zip(idx_to_compare, diag_ggn_exact_to_compare):
        assert allclose(element, diag_ggn_exact_vector[idx], atol=1e-5)
Beispiel #12
0
def get_posterior(model, train_loader, var0, mnist=False, batch_size=128):
    W = list(model.parameters())[-2]
    b = list(model.parameters())[-1]
    m, n = W.shape
    lossfunc = torch.nn.CrossEntropyLoss()

    tau = 1/var0

    extend(lossfunc, debug=False)
    extend(model.linear if not mnist else model.fc2, debug=False)

    with backpack(KFAC()):
        U, V = torch.zeros(m, m, device='cuda'), torch.zeros(n, n, device='cuda')
        B = torch.zeros(m, m, device='cuda')

        # for i, (x, y) in tqdm(enumerate(train_loader)):
        for i, (x, y) in enumerate(train_loader):
            x, y = x.cuda(), y.cuda()

            model.zero_grad()
            lossfunc(model(x), y).backward()

            with torch.no_grad():
                # Hessian of weight
                U_, V_ = W.kfac
                B_ = b.kfac[0]

                # U_ = sqrt(batch_size)*U_ + sqrt(tau)*torch.eye(m, device='cuda')
                # V_ = sqrt(batch_size)*V_ + sqrt(tau)*torch.eye(n, device='cuda')
                # B_ = batch_size*B_ + tau*torch.eye(m, device='cuda')

                rho = min(1-1/(i+1), 0.95)

                U = rho*U + (1-rho)*U_
                V = rho*V + (1-rho)*V_
                B = rho*B + (1-rho)*B_


    # Predictive distribution
    with torch.no_grad():
        M_W_post = W.t()
        M_b_post = b

        # Add priors
        n_data = len(train_loader.dataset)
        U = sqrt(n_data)*U + sqrt(tau)*torch.eye(m, device='cuda')
        V = sqrt(n_data)*V + sqrt(tau)*torch.eye(n, device='cuda')
        B = n_data*B + tau*torch.eye(m, device='cuda')

        # Covariances for Laplace
        U_post = torch.inverse(V)  # Interchanged since W is transposed
        V_post = torch.inverse(U)
        B_post = torch.inverse(B)

    return M_W_post, M_b_post, U_post, V_post, B_post
Beispiel #13
0
    def _preprocess(self, tproblem, backpack_debug):
        """Make model and loss function BackPACKable."""
        extend(tproblem.net, debug=backpack_debug)
        tproblem._old_loss = tproblem.loss_function

        def hotfix_lossfunc(reduction="mean"):
            return extend(tproblem._old_loss(reduction=reduction),
                          debug=backpack_debug)

        tproblem.loss_function = hotfix_lossfunc
        return tproblem
Beispiel #14
0
def data_prep_cifar10_small(use_sigmoid=False):
    model = extend(net_cifar10_3c3d_small(use_sigmoid)).to(device)
    lossfunc = extend(nn.CrossEntropyLoss())

    dataset = datasets.CIFAR10(
        './data',
        train=True,
        download=True,
        transform=cifar_transform
    )

    return model, lossfunc, make_loader_for_dataset(dataset)
Beispiel #15
0
def make_small_linear_classification_problem():
    Ds = [32, 16, 4]
    model = torch.nn.Sequential(
        extend(torch.nn.Linear(Ds[0], Ds[1])),
        extend(torch.nn.Sigmoid()),
        extend(torch.nn.Linear(Ds[1], Ds[2])),
    )
    N = 32
    X = torch.randn(size=(N, Ds[0]))
    Y = torch.randint(high=Ds[-1], size=(N, ))
    lossfunc = extend(torch.nn.CrossEntropyLoss())
    return TestProblem(X, Y, model, lossfunc)
Beispiel #16
0
def dummy_forward_pass_conv():
    N, C, H, W = 2, 3, 4, 4
    X = torch.randn(N, C, H, W)
    Y = torch.randint(high=5, size=(N,))
    conv = Conv2d(3, 2, 2)
    lin = Linear(18, 5)
    model = extend(Sequential(conv, Flatten(), lin))
    loss = extend(CrossEntropyLoss())

    def forward():
        return loss(model(X), Y)

    return forward, (conv.weight, lin.weight), (conv.bias, lin.bias)
Beispiel #17
0
def test_extension_hook_executes_on_custom_module():
    """Cockpit's extension hook is only skipped for known containers like Sequential.

    It will thus execute on custom containers and lead to crashes whenever a quantity
    that uses extension hooks is used.
    """
    manual_seed(0)
    N, D_in, D_out = 2, 3, 1

    # NOTE Inheriting from Sequential passes
    class CustomModule(Module):
        """Custom container that is not skipped by the extension hook."""
        def __init__(self):
            super().__init__()
            self.linear = Linear(D_in, D_out)
            self.relu = ReLU()

        def forward(self, x: Tensor) -> Tensor:
            return self.relu(self.linear(x))

    uses_extension_hook = GradHist1d(linear(interval=1))
    config = [uses_extension_hook]

    model = extend(CustomModule())
    cockpit = Cockpit(model.parameters(), quantities=config)

    opt = SGD(model.parameters(), lr=0.1)

    loss_fn = extend(MSELoss(reduction="mean"))
    individual_loss_fn = MSELoss(reduction="none")

    global_step = 0
    inputs, labels = rand(N, D_in), rand(N, D_out)

    # forward pass
    outputs = model(inputs)
    loss = loss_fn(outputs, labels)
    losses = individual_loss_fn(outputs, labels)

    # backward pass
    with cockpit(
            global_step,
            info={
                "batch_size": N,
                "individual_losses": losses,
                "loss": loss,
                "optimizer": opt,
            },
    ):
        loss.backward(create_graph=cockpit.create_graph(global_step))
Beispiel #18
0
def test_no_io():
    """Check IO is not tracked."""
    torch.manual_seed(0)

    input = torch.rand(3, 5)
    module = torch.nn.Linear(5, 2)
    extend(module)

    with disable():
        module(input)
        assert not hasattr(module, "input0")
        assert not hasattr(module, "output")

    module(input)
    assert hasattr(module, "input0")
    assert hasattr(module, "output")
Beispiel #19
0
    def __init__(self, X, Y, model, lossfunc, device=DEVICE_CPU):
        """
        A traditional machine learning test problem, loss(model(X), Y)

        X: [N x D_X]
        Y: [N x D_Y]
        model: [N x D_X] -> [N x D_out]
        loss: [N x D_out] x [N x D_y] -> scalar
        """
        self.X = X
        self.Y = Y
        self.model = extend(model)
        self.lossfunc = extend(lossfunc)
        self.device = device
        self.to(device)
        self.N = self.X.shape[0]
def convlayer2(conv_cls, settings):
    return extend(
        conv_cls(in_channels=settings["in_features"][0],
                 out_channels=settings["out_channels"],
                 kernel_size=settings["kernel_size"],
                 padding=settings["padding"],
                 bias=settings["bias"]))
Beispiel #21
0
def test_for_loop_replace() -> None:
    """Application of retain_graph: replace an outer for-loop.

    This test is based on issue #220 opened by Romain3Ch216.
    It computes per-component individual gradients of a tensor-valued output
    with a for loop over components, rather than over samples and components.
    """
    manual_seed(0)
    B = 5
    M = 3
    h = 2

    x = randn(B, h)
    fc = extend(Linear(h, M))
    A = fc(x)

    grad_autograd = zeros(B, M, *fc.weight.shape)
    for b in range(B):
        for m in range(M):
            with backpack(retain_graph=True):
                grads = autograd.grad(A[b, m], fc.weight, retain_graph=True)
            grad_autograd[b, m] = grads[0]

    grad_backpack = zeros(B, M, *fc.weight.shape)
    for i in range(M):
        with backpack(BatchGrad(), retain_graph=True):
            A[:, i].backward(ones_like(A[:, i]), retain_graph=True)
        grad_backpack[:, i] = fc.weight.grad_batch

    check_sizes_and_values(grad_backpack, grad_autograd)
Beispiel #22
0
def train_model_for_label(ar, label):
  img_shape = (ar.channels, ar.img_size, ar.img_size)
  device = pt.device("cuda" if pt.cuda.is_available() else "cpu")
  gen = Generator(ar.latent_dim, img_shape).to(device)  # Initialize generator and discriminator
  dis = Discriminator(img_shape).to(device)
  if ar.dp_noise > 0.:
    dis = extend(dis)

  dataloader, n_data = get_single_label_dataloader(ar.batch_size, label, ar.data_key)

  # Optimizers
  gen_opt = pt.optim.RMSprop(gen.parameters(), lr=ar.lr)
  dis_opt = pt.optim.RMSprop(dis.parameters(), lr=ar.lr)

  batches_done = 0
  for epoch in range(ar.n_epochs):
    for idx, (real_imgs, _) in enumerate(dataloader):
      train_gen = batches_done % ar.n_critic == 0
      is_final_batch = epoch + 1 == ar.n_epochs and idx + 1 == len(dataloader)

      log_vals = train_batch(real_imgs, device, dis_opt, gen_opt, dis, gen, ar.clip_value, train_gen,
                             ar.dp_clip, ar.dp_noise)

      log_progress(log_vals, batches_done, len(dataloader), epoch, ar, label, is_final_batch)
      batches_done += 1
  if ar.synth_data:
    make_synth_data(gen, n_data, device, ar.log_name, label)
def convlayer():
    return extend(
        torch.nn.Conv2d(in_channels=TEST_SETTINGS["in_features"][0],
                        out_channels=TEST_SETTINGS["out_channels"],
                        kernel_size=TEST_SETTINGS["kernel_size"],
                        padding=TEST_SETTINGS["padding"],
                        bias=TEST_SETTINGS["bias"]))
Beispiel #24
0
def problem(device, request) -> Tuple[Module, Tensor, str]:
    """Return extended nested sequential with loss from a forward pass.

    Args:
        device: available device
        request: pytest request

    Yields:
        model, loss and problem_string

    Raises:
        NotImplementedError: if the problem_string is unknown
    """
    problem_string = request.param
    manual_seed(0)

    B = 2
    X = rand(B, 4).to(device)
    y = classification_targets((B, ), 2).to(device)

    if problem_string == NESTED_SEQUENTIAL:
        model = Sequential(
            Linear(4, 3, bias=False),
            Sequential(Linear(3, 2, bias=False), ),
        )
    elif problem_string == CUSTOM_CONTAINER:

        class _MyCustomModule(Module):
            def __init__(self):
                super().__init__()
                self.linear1 = Linear(4, 3, bias=False)
                self.linear2 = Linear(3, 2, bias=False)

            def forward(self, x):
                x = self.linear1(x)
                x = self.linear2(x)
                return x

        model = _MyCustomModule()
    else:
        raise NotImplementedError(
            f"problem={problem_string} but no test setting for this.")

    model = extend(model.to(device))
    lossfunc = extend(CrossEntropyLoss(reduction="mean").to(device))
    loss = lossfunc(model(X), y)
    yield model, loss, problem_string
Beispiel #25
0
def backpack_ea_jac_t_mat_jac_prod(layer, input, mat):
    layer = extend(layer)
    derivative = derivative_from_layer(layer)

    # forward pass to initialize backpack buffers
    _ = layer(input)

    return derivative.ea_jac_t_mat_jac_prod(layer, None, None, mat)
def make_classification_problem(pooling_cls):
    model = torch.nn.Sequential(convlayer(), pooling(pooling_cls), Flatten())

    Y = torch.randint(high=X.shape[1], size=(model(X).shape[0], ))

    lossfunc = extend(torch.nn.CrossEntropyLoss())

    return TestProblem(X, Y, model, lossfunc)
Beispiel #27
0
def backpack_sum_hessian(layer, input, targets):
    layer = extend(layer)
    derivative = derivative_from_layer(layer)

    # forward pass to initialize backpack buffers
    _ = layer(input, targets)

    sum_hessian = derivative.sum_hessian(layer, None, None)
    return sum_hessian
def data():
    N = 5
    Ds = [20, 10, 3]

    X = randn(N, Ds[0])
    Y = randint(high=Ds[-1], size=(N, ))

    manual_seed(0)
    model1 = Sequential(
        extend(Linear(Ds[0], Ds[1])), extend(Linear(Ds[1], Ds[2])))

    manual_seed(0)
    model2 = Sequential(
        extend(LinearConcat(Ds[0], Ds[1])), extend(LinearConcat(Ds[1], Ds[2])))

    loss = CrossEntropyLoss()

    return X, Y, model1, model2, loss
def convlayer2(join_params):
    conv_cls = Conv2dConcat if join_params else Conv2d
    return extend(
        conv_cls(
            in_channels=TEST_SETTINGS["in_features"][0],
            out_channels=TEST_SETTINGS["out_channels"],
            kernel_size=TEST_SETTINGS["kernel_size"],
            padding=TEST_SETTINGS["padding"],
            bias=TEST_SETTINGS["bias"]))
Beispiel #30
0
def make_regression_problem(pooling_cls):
    model = torch.nn.Sequential(convlayer(), pooling(pooling_cls),
                                torch.nn.Flatten(), linearlayer())

    Y = torch.randn(size=(model(X).shape[0], 1))

    lossfunc = extend(torch.nn.MSELoss())

    return TestProblem(X, Y, model, lossfunc)