Exemple #1
0
    def hvp_fn(x):

        x_tensor = torch.tensor(x, requires_grad=False)
        if gpu >= 0:
            x_tensor = x_tensor.cuda()

        params, names = make_functional(model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        hvp = torch.zeros_like(flat_params)

        for x_train, y_train in train_loader:

            if gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            def f(flat_params_):
                split_params = tensor_to_tuple(flat_params_, params)
                load_weights(model, names, split_params)
                out = model(x_train)
                loss = calc_loss(out, y_train)
                return loss

            batch_hvp = vhp(f, flat_params, x_tensor, strict=True)[1]

            hvp += batch_hvp / float(len(train_loader))

        with torch.no_grad():
            load_weights(model, names, params, as_params=True)
            damped_hvp = hvp + damp * v_flat

        return damped_hvp.cpu().numpy()
def s_test(x_test, y_test, model, i, samples_loader, gpu=-1, damp=0.01, scale=25.0):
    """s_test can be precomputed for each test point of interest, and then
    multiplied with grad_z to get the desired value for each training point.
    Here, stochastic estimation is used to calculate s_test. s_test is the
    Inverse Hessian Vector Product.

    Arguments:
        x_test: torch tensor, test data points, such as test images
        y_test: torch tensor, contains all test data labels
        model: torch NN, model used to evaluate the dataset
        i: the sample number
        samples_loader: torch DataLoader, can load the training dataset
        gpu: int, GPU id to use if >=0 and -1 means use CPU
        damp: float, dampening factor
        scale: float, scaling factor

    Returns:
        h_estimate: list of torch tensors, s_test"""

    v = grad_z(x_test, y_test, model, gpu)
    h_estimate = v

    params, names = make_functional(model)
    # Make params regular Tensors instead of nn.Parameter
    params = tuple(p.detach().requires_grad_() for p in params)

    # TODO: Dynamically set the recursion depth so that iterations stop once h_estimate stabilises
    progress_bar = tqdm(samples_loader, desc=f"IHVP sample {i}")
    for i, (x_train, y_train) in enumerate(progress_bar):

        if gpu >= 0:
            x_train, y_train = x_train.cuda(), y_train.cuda()

        def f(*new_params):
            load_weights(model, names, new_params)
            out = model(x_train)
            loss = model.loss(out, y_train) #calc_loss(out, y_train, loss_func=loss_func)
            return loss

        hv = vhp(f, params, tuple(h_estimate), strict=True)[1]

        # Recursively calculate h_estimate
        with torch.no_grad():
            h_estimate = [
                _v + (1 - damp) * _h_e - _hv / scale
                for _v, _h_e, _hv in zip(v, h_estimate, hv)
            ]

            if i % 100 == 0:
                norm = sum([h_.norm() for h_ in h_estimate])
                progress_bar.set_postfix({"est_norm": norm.item()})

    with torch.no_grad():
        load_weights(model, names, params, as_params=True)

    return h_estimate
def s_test_sample_exact(model, x_test, y_test, train_loader, gpu=-1):

    grads = grad_z(x_test, y_test, model, gpu=gpu)
    flat_grads = parameters_to_vector(grads)
    def make_loss_f(model, params, names, x, y):
        def f(flat_params_):
            split_params = tensor_to_tuple(flat_params_, params)
            load_weights(model, names, split_params)
            out = model(x)
            loss = model.loss(out, y)
            return loss
        return f
    # Make model functional
    params, names = make_functional(model)
    # Make params regular Tensors instead of nn.Parameter
    params = tuple(p.detach().requires_grad_() for p in params)
    flat_params = parameters_to_vector(params)

    h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])
    if gpu >= 0:
        h = h.cuda()
    # Compute real IHVP
    for x_train, y_train in train_loader:
        if gpu >= 0:
            x_train, y_train = x_train.cuda(), y_train.cuda()
        f = make_loss_f(model, params, names, x_train, y_train)
        batch_h = hessian(f, flat_params, strict=True)
        with torch.no_grad():
            h += batch_h / float(len(train_loader))
    h = (h + h.transpose(0,1))/2
    with torch.no_grad():
        load_weights(model, names, params, as_params=True)
        inv_h = torch.inverse(h)
        print("Inverse Hessian")
        print(inv_h)
        real_ihvp = inv_h @ flat_grads
    return tensor_to_tuple(real_ihvp, params)
Exemple #4
0
    def setUpClass(cls) -> None:
        pl.seed_everything(0)

        cls.n_features = 10

        cls.n_params = 2 * cls.n_features

        cls.model = LinearRegression(cls.n_features)

        gpus = 1 if torch.cuda.is_available() else 0
        trainer = pl.Trainer(gpus=gpus, max_epochs=10)
        # trainer.fit(cls.model)

        print(tuple(cls.model.parameters()))
        use_sklearn = True
        if use_sklearn:
            train_dataset = DummyDataset(cls.n_features)
            clf = SklearnLR()
            clf.fit(train_dataset.data, train_dataset.targets)

            with torch.no_grad():
                cls.model.linear.weight = torch.nn.Parameter(
                    torch.tensor([clf.coef_], dtype=torch.float))
                cls.model.linear.bias = torch.nn.Parameter(
                    torch.tensor([clf.intercept_], dtype=torch.float))

        cls.train_loader = cls.model.train_dataloader(batch_size=40000)

        # Setup test point data
        cls.test_idx = 8
        cls.x_test = torch.tensor([cls.model.test_set.data[[cls.test_idx]]],
                                  dtype=torch.float)
        cls.y_test = torch.tensor([cls.model.test_set.targets[[cls.test_idx]]],
                                  dtype=torch.float)

        # Compute estimated IVHP
        cls.gpu = 1 if torch.cuda.is_available() else -1

        # Compute anc flatten grad
        grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu)
        flat_grads = parameters_to_vector(grads)

        print("Grads:")
        print(flat_grads)

        # Make model functional
        params, names = make_functional(cls.model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        # Initialize Hessian
        h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])

        # Compute real IHVP
        for x_train, y_train in cls.train_loader:

            if cls.gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            def f(flat_params_):
                split_params = tensor_to_tuple(flat_params_, params)
                load_weights(cls.model, names, split_params)
                out = cls.model(x_train)
                loss = calc_loss(out, y_train)
                return loss

            batch_h = hessian(f, flat_params, strict=True)

            with torch.no_grad():
                h += batch_h / float(len(cls.train_loader))

        print("Hessian:")
        print(h)

        complete_x_train = cls.train_loader.dataset.data

        real_hessian = complete_x_train.T @ complete_x_train / complete_x_train.shape[
            0] * 2

        print(real_hessian)

        print(np.linalg.norm(real_hessian - h.cpu().numpy()[:10, :10]))

        np.save("hessian_pytorch.npy", h.cpu().numpy())

        # Make the model back `nn`

        with torch.no_grad():
            load_weights(cls.model, names, params, as_params=True)
            inv_h = torch.inverse(h)
            print("Inverse Hessian")
            print(inv_h)
            cls.real_ihvp = inv_h @ flat_grads

        print("Real IHVP")
        print(cls.real_ihvp)
Exemple #5
0
    def setUpClass(cls) -> None:
        pl.seed_everything(0)

        cls.n_features = 10
        cls.n_classes = 3

        cls.n_params = cls.n_classes * cls.n_features + cls.n_features

        cls.wd = wd = 1e-2  # weight decay=1/(nC)
        cls.model = LogisticRegression(cls.n_classes,
                                       cls.n_features,
                                       wd=cls.wd)

        gpus = 1 if torch.cuda.is_available() else 0

        trainer = pl.Trainer(gpus=gpus, max_epochs=10)
        # trainer.fit(self.model)

        use_sklearn = True
        if use_sklearn:
            cls.train_dataset = cls.model.training_set  #DummyDataset(cls.n_features, cls.n_classes)
            multi_class = "multinomial" if cls.model.n_classes != 2 else "auto"
            clf = SklearnLogReg(C=1 / len(cls.train_dataset) / wd,
                                tol=1e-8,
                                max_iter=1000,
                                multi_class=multi_class)

            clf.fit(cls.train_dataset.data, cls.train_dataset.targets)

            with torch.no_grad():
                cls.model.linear.weight = torch.nn.Parameter(
                    torch.tensor(clf.coef_, dtype=torch.float))
                cls.model.linear.bias = torch.nn.Parameter(
                    torch.tensor(clf.intercept_, dtype=torch.float))

        # Setup test point data
        cls.test_idx = 5
        cls.x_test = torch.tensor(cls.model.test_set.data[[cls.test_idx]],
                                  dtype=torch.float)
        cls.y_test = torch.tensor(cls.model.test_set.targets[[cls.test_idx]],
                                  dtype=torch.long)

        # Compute estimated IVHP
        cls.gpu = 1 if torch.cuda.is_available() else -1

        if cls.gpu >= 0:
            cls.model = cls.model.cuda()
            cls.x_test = cls.x_test.cuda()
            cls.y_test = cls.y_test.cuda()

        cls.train_loader = cls.model.train_dataloader(batch_size=40000)
        # Compute anc flatten grad
        grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu)
        flat_grads = parameters_to_vector(grads)

        print("Grads:")
        print(flat_grads)

        # Make model functional
        params, names = make_functional(cls.model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        # Initialize Hessian
        h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])
        if cls.gpu == 1:
            h = h.cuda()

        # Compute real IHVP
        for x_train, y_train in cls.train_loader:

            if cls.gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            f = make_loss_f(cls.model, params, names, x_train, y_train, wd=wd)

            batch_h = hessian(f, flat_params, strict=True)

            with torch.no_grad():
                h += batch_h / float(len(cls.train_loader))

        h = (h + h.transpose(0, 1)) / 2
        print("Hessian:")
        print(h)

        np.save("hessian_pytorch.npy", h.cpu().numpy())
        from numpy import linalg as LA
        ei = LA.eig(h.cpu().numpy())[0]
        print('ei=', ei)
        print("max,min eigen value=", ei.max(), ei.min())
        assert ei.min() > 0, "Error: Non-positive Eigenvalues"

        # Make the model back `nn`

        with torch.no_grad():
            load_weights(cls.model, names, params, as_params=True)
            inv_h = torch.inverse(h)
            print("Inverse Hessian")
            print(inv_h)
            cls.real_ihvp = inv_h @ flat_grads

        print("Real IHVP")
        print(cls.real_ihvp)