예제 #1
0
파일: ilqr.py 프로젝트: clthegoat/GT-Mb-Mf
def test_grad():

    #try with a model
    class Network(nn.Module):
        def __init__(self):
            super(Network, self).__init__()
            self.linear2 = nn.Linear(2, 1)
            self.linear2.weight.data.fill_(0.0)
            self.linear2.weight[0, 0] = 1.
            self.linear2.weight[0, 1] = 1.
            self.linear2.weight[1, 1] = 1.
            self.linear2.weight[1, 2] = 1.

        def forward(self, x):
            pax_predict = self.linear2(x)
            #print(self.linear2.weight.data)

            return pax_predict

    f_t = Network()

    def fun(x):

        t_x = torch.from_numpy(x).float()

        f_x = f_t(t_x).detach().numpy()
        #print(f_x.shape)
        return f_x

    ##1d square
    #torch
    time_start = time.clock()
    model = Network()
    x = torch.ones((5, 2))
    print([af.jacobian(model, x[i, :]) for i in range(x.shape[0])])
    print([af.hessian(model, x[i, :]) for i in range(x.shape[0])])
    time_e = time.clock() - time_start
    print(time_e)

    #numerical
    time_start = time.clock()
    model = Network()
    x = np.ones((5, 2))
    df = nd.Gradient(fun)
    H = nd.Hessian(fun)
    print(list(map(df, x.tolist())))
    print(list(map(H, x.tolist())))
    time_e = time.clock() - time_start
    print(time_e)

    #from mpc
    time_start = time.clock()
    model = Network()
    x = np.ones((5, 2))
    print(grad(model, x))
    x = torch.ones((5, 2))
    print([af.hessian(model, x[i, :]) for i in range(x.shape[0])])
    time_e = time.clock() - time_start
    print(time_e)
예제 #2
0
def Newton_for_Nesterov(x, func, epoch=100, h=0.001):

    n = x.shape[0]

    f_line = []
    for i in range(epoch):

        #         print(i, end='\r')

        f_line.append(func(x))
        #         print(f_line[-1])

        jac = jacobian(func, x.view(n, 1))
        hes = hessian(func, x.view(n, 1)).sum()

        #         print('jac: {}'.format(jac))
        #         print('hes: {}'.format(hes))
        #         print('x: {}'.format(x))

        h = jac / hes
        h = h.view(n)
        #         print(h)

        x -= h

    f_line.append(func(x))

    return x, f_line
예제 #3
0
    def test_hessian_vector_valued_postprocessing(self, dev_name, diff_method,
                                                  mocker, tol):
        """Test hessian calculation of a vector valued QNode with post-processing"""
        if diff_method not in {"parameter-shift", "backprop"}:
            pytest.skip("Test only supports parameter-shift or backprop")

        dev = qml.device(dev_name, wires=1)

        @qnode(dev, diff_method=diff_method, interface="torch")
        def circuit(x):
            qml.RX(x[0], wires=0)
            qml.RY(x[1], wires=0)
            return [qml.expval(qml.PauliZ(0)), qml.expval(qml.PauliZ(0))]

        x = torch.tensor([0.76, -0.87],
                         requires_grad=True,
                         dtype=torch.float64)

        def cost_fn(x):
            return x @ circuit(x)

        a, b = x.detach().numpy()

        res = cost_fn(x)
        expected_res = np.array(
            [a, b]) @ [np.cos(a) * np.cos(b),
                       np.cos(a) * np.cos(b)]
        assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)

        res.backward()

        g = x.grad
        expected_g = [
            np.cos(b) * (np.cos(a) - (a + b) * np.sin(a)),
            np.cos(a) * (np.cos(b) - (a + b) * np.sin(b)),
        ]
        assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)

        spy = mocker.spy(JacobianTape, "hessian")
        hess = hessian(cost_fn, x)

        if diff_method == "backprop":
            spy.assert_not_called()
        elif diff_method == "parameter-shift":
            spy.assert_called_once()

        expected_hess = [
            [
                -(np.cos(b) * ((a + b) * np.cos(a) + 2 * np.sin(a))),
                -(np.cos(b) * np.sin(a)) + (-np.cos(a) +
                                            (a + b) * np.sin(a)) * np.sin(b),
            ],
            [
                -(np.cos(b) * np.sin(a)) + (-np.cos(a) +
                                            (a + b) * np.sin(a)) * np.sin(b),
                -(np.cos(a) * ((a + b) * np.cos(b) + 2 * np.sin(b))),
            ],
        ]

        assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)
예제 #4
0
    def test_hessian(self, dev_name, diff_method, mocker, tol):
        """Test hessian calculation of a scalar valued QNode"""
        if diff_method not in {"parameter-shift", "backprop"}:
            pytest.skip("Test only supports parameter-shift or backprop")

        dev = qml.device(dev_name, wires=1)

        @qnode(dev, diff_method=diff_method, interface="torch")
        def circuit(x):
            qml.RY(x[0], wires=0)
            qml.RX(x[1], wires=0)
            return qml.expval(qml.PauliZ(0))

        x = torch.tensor([1.0, 2.0], requires_grad=True)
        res = circuit(x)

        res.backward()
        g = x.grad

        spy = mocker.spy(JacobianTape, "hessian")
        hess = hessian(circuit, x)
        spy.assert_called_once()

        a, b = x.detach().numpy()

        expected_res = np.cos(a) * np.cos(b)
        assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0)

        expected_g = [-np.sin(a) * np.cos(b), -np.cos(a) * np.sin(b)]
        assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0)

        expected_hess = [[-np.cos(a) * np.cos(b),
                          np.sin(a) * np.sin(b)],
                         [np.sin(a) * np.sin(b), -np.cos(a) * np.cos(b)]]
        assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)
예제 #5
0
def Newton_for_Newton(x, func, epoch=100, h=1):

    f_line = []
    for i in range(epoch):

        #         print(i, end='\r')

        f_line.append(func(x))
        #         print(f_line[-1])

        jac = jacobian(func, x)
        hes = hessian(func, x).sum()

        #         print('jac: {}'.format(jac))
        #         print('hes: {}'.format(hes))
        #         print('x: {}'.format(x))
        if x - jac / hes < 0:
            #             print('neg : {}'.format(x - jac / hes))
            if h < 1e-3:
                break
            h *= 0.1
            continue

        x -= h * jac / hes

    f_line.append(func(x))

    return x, f_line
예제 #6
0
    def hess(self, x):
        x = self.x_encode(x)
        hess_x = hessian(self.model.obj, x).numpy()
        hess_x = hess_x[1:-1, 1:-1, 1:-1, 1:-1]
        length = (self.m - 2) * (self.n - 2)
        hess_x = hess_x.reshape(length, length)
        assert np.linalg.norm(hess_x - hess_x.T) < 1e-5, np.linalg.norm(hess_x)

        return hess_x
예제 #7
0
    def vcov(self):
        from torch.autograd.functional import hessian
        bias, weight = torch.tensor(self.bias), torch.tensor(self.weight)
        h = hessian(self.log_lik, (bias, weight))
        fisher_obs = -torch.cat([torch.cat([h[0][0],h[0][1].squeeze(dim = 2)], dim = 1),
                                torch.cat([h[1][0].squeeze(dim =0).squeeze(dim =1),
                                           h[1][1].squeeze()], dim = 1)],
                               dim = 0)

        vcov = torch.inverse(fisher_obs)/self.n_sample
        return vcov
예제 #8
0
    def get_hess(self, input_var):
        assert 'shapes' in dir(
            self), 'You must first call get input to define the tensors shapes.'
        input_var_ = torch.tensor(
            input_var, dtype=self.precision, device=self.device)

        def func(inp):
            return self._eval_func(self._unconcat(inp, self.shapes))

        hess = hessian(func, input_var_, vectorize=False)

        return hess.cpu().detach().numpy().astype(np.float64)
예제 #9
0
    def call_oracle(self,x):
        
        if type(x) != torch.Tensor:
            try:
                x = torch.tensor(x, dtype=torch.double,
                                 requires_grad=self.requires_grad)
            except:
                raise Exception('Optimization variable must be Pytorch tensor\
                                 or something that could be cast into it such as\
                                 numpy array, list, etc.')
        
        assert len(x.shape) == 1
        
        if (not x.requires_grad) and (self.requires_grad):
            raise Exception('Need to enable gradients on optimization variable.')
        
        # Zero the gradient if x has one
        if x.requires_grad and (x.grad is not None):
            x.grad.zero_grad()
        
        self.x = x
        self.fx = self.obj_func(self.x)
        
        if self.fx.dim() != 0:
            raise Exception('Objective function must outputscalar value')
        
        # Uses auto differentiation to get subgradient

        if self.requires_grad:
            self.fx.backward()
        
        if self.oracle_output == 'f':
            return self.oracle_f()
        elif self.oracle_output == 'df':
            return self.oracle_df()
        elif self.oracle_output == 'both':
            return {'f'  : self.oracle_f(),
                    'df' : self.oracle_df(),
                    }
        elif self.oracle_output == 'hess+':
            assert type(x) == torch.Tensor
            return {'f'  : self.oracle_f(),
                    'df' : self.oracle_df(),
                    'd2f': np.nan_to_num(hessian(self.obj_func,self.x).data.numpy(),1e16)
                    }
예제 #10
0
파일: ilqr.py 프로젝트: clthegoat/GT-Mb-Mf
    def linearize(self):
        # linearize dynamics
        XU_t = torch.from_numpy(self.XU[:, :, 0]).float()
        #print(XU_t.size())
        # apply num method
        #self.F = grad(self.dyn_f, self.XU[:,:,0]).reshape((self.T, self.n, self.n+self.m)) #T*n*n+m
        self.F = compute_jacobian(self.dyn_f, XU_t, (self.T, self.n))

        self.F = np.concatenate([self.F[i, :, i, :]
                                 for i in range(self.T)]).reshape(
                                     (self.T, self.n, self.n + self.m))
        #print(self.F)
        self.f = self.dyn_f(XU_t).detach().numpy()
        self.f = self.f.reshape((self.T, -1, 1))
        #print(self.f)

        # linearize cost
        # TODO: can we further speedup with high accuracy?
        self.C = [self.hessian_torch(XU_t[i, :]) for i in range(self.T)]
        # self.C = Parallel(n_jobs=4,backend="threading")(delayed(self.hessian_torch)(XU_t[i,:]) for i in range(self.T))
        # self.C = self.map(self.hessian_torch, [XU_t[i,:] for i in range(self.T)])
        self.C = np.asarray(self.C)

        #self.C = torch_hessian(self.cost_f, XU_t)
        #try numerical
        #self.C = hess(self.cost_f, self.XU[:,:,0])
        #self.c = grad(self.cost_f, self.XU[:,:,0])

        self.c = compute_jacobian(self.cost_f, XU_t, (self.T, 1))
        self.c = np.concatenate([self.c[i, :, i, :] for i in range(self.T)])
        #print(self.c.shape)
        #debug
        self.c = self.c.reshape((self.T, -1, 1))

        # linearize val
        X_T = torch.from_numpy(self.X[self.T, :, 0]).float()
        #print(X_T.size())
        self.V_T = af.hessian(self.val_f, X_T).detach().numpy()
        self.v_T = compute_jacobian(self.val_f, X_T.view(1, -1), (1, ))
        #self.v_T = grad(self.val_f,self.X[self.T,:,:].reshape((1,-1)))
        #debug
        self.v_T = self.v_T.reshape((-1, 1))
def s_test_sample_exact(model, x_test, y_test, train_loader, gpu=-1):

    grads = grad_z(x_test, y_test, model, gpu=gpu)
    flat_grads = parameters_to_vector(grads)
    def make_loss_f(model, params, names, x, y):
        def f(flat_params_):
            split_params = tensor_to_tuple(flat_params_, params)
            load_weights(model, names, split_params)
            out = model(x)
            loss = model.loss(out, y)
            return loss
        return f
    # Make model functional
    params, names = make_functional(model)
    # Make params regular Tensors instead of nn.Parameter
    params = tuple(p.detach().requires_grad_() for p in params)
    flat_params = parameters_to_vector(params)

    h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])
    if gpu >= 0:
        h = h.cuda()
    # Compute real IHVP
    for x_train, y_train in train_loader:
        if gpu >= 0:
            x_train, y_train = x_train.cuda(), y_train.cuda()
        f = make_loss_f(model, params, names, x_train, y_train)
        batch_h = hessian(f, flat_params, strict=True)
        with torch.no_grad():
            h += batch_h / float(len(train_loader))
    h = (h + h.transpose(0,1))/2
    with torch.no_grad():
        load_weights(model, names, params, as_params=True)
        inv_h = torch.inverse(h)
        print("Inverse Hessian")
        print(inv_h)
        real_ihvp = inv_h @ flat_grads
    return tensor_to_tuple(real_ihvp, params)
예제 #12
0
 def grad_xy(self, x, y):
     return torch.trace(hessian(self.kernel, (x, y))[0][1])
예제 #13
0
 def hessian_fwdrev(model, inp, strict=None):
     return functional.hessian(model,
                               inp,
                               strict=False,
                               vectorize=True,
                               outer_jacobian_strategy="forward-mode")
예제 #14
0
import numpy as np
import torch
from torch import nn, optim
from torch.autograd import grad
from torch.autograd.functional import hessian, jacobian

### scalar function ####


def scalar_func(x):
    return x ** 2 + x


print(jacobian(scalar_func, torch.ones(1), create_graph=True))  # f'(x) = 2x + 1
print(hessian(scalar_func, torch.ones(1), create_graph=True))  # f''(x) = 2

### vector quadratic function ####
def vector_func(x):
    H = torch.FloatTensor([[1.0, -1.0], [-1.0, 2.0]])
    g = torch.FloatTensor([3.0, 1.0])
    return 0.5 * x.t().matmul(H).matmul(x) + g.t().matmul(x)


### neural network function ####
simple_model = nn.Sequential(*[nn.Linear(4, 2), nn.Softplus(), nn.Linear(2, 1)])

x = torch.ones(4)
x.requires_grad = True
y = simple_model(x)

# print(grad(y, x, retain_graph=True, create_graph=True))
예제 #15
0
import torch
from torch.autograd.functional import hessian


def pow_reducer(x):
    return x.pow(3).sum() + torch.norm(x)


inputs = torch.rand(2, 3)  #.flatten()
# print(torch.randn(2, 3, dtype=torch.float32))
print(hessian(pow_reducer, inputs))
y = hessian(pow_reducer, inputs)
x = hessian(pow_reducer, inputs).reshape(6, 6)
print(x)
print(torch.norm(x - x.T))
예제 #16
0
import torch
import time
import numpy as np
import torch.autograd.functional as F

num_params = 40010
k = torch.tensor(np.load('./tests/utils/numpy_params/function_2_param_k.npy'),
                 requires_grad=True,
                 dtype=torch.float)
torch.set_num_threads(1)


def make_func(k):
    return (torch.sin(k) + torch.cos(k) + torch.pow(k, 2)).sum()


start_time_pytorch = time.time()

output = F.hessian(make_func, k.data)

end_time_pytorch = time.time()
runtime = (end_time_pytorch - start_time_pytorch)
print(str(runtime))

output = output.data.numpy()
for i in range(num_params):
    print(output[i][i])
예제 #17
0
    def setUpClass(cls) -> None:
        pl.seed_everything(0)

        cls.n_features = 10

        cls.n_params = 2 * cls.n_features

        cls.model = LinearRegression(cls.n_features)

        gpus = 1 if torch.cuda.is_available() else 0
        trainer = pl.Trainer(gpus=gpus, max_epochs=10)
        # trainer.fit(cls.model)

        print(tuple(cls.model.parameters()))
        use_sklearn = True
        if use_sklearn:
            train_dataset = DummyDataset(cls.n_features)
            clf = SklearnLR()
            clf.fit(train_dataset.data, train_dataset.targets)

            with torch.no_grad():
                cls.model.linear.weight = torch.nn.Parameter(
                    torch.tensor([clf.coef_], dtype=torch.float))
                cls.model.linear.bias = torch.nn.Parameter(
                    torch.tensor([clf.intercept_], dtype=torch.float))

        cls.train_loader = cls.model.train_dataloader(batch_size=40000)

        # Setup test point data
        cls.test_idx = 8
        cls.x_test = torch.tensor([cls.model.test_set.data[[cls.test_idx]]],
                                  dtype=torch.float)
        cls.y_test = torch.tensor([cls.model.test_set.targets[[cls.test_idx]]],
                                  dtype=torch.float)

        # Compute estimated IVHP
        cls.gpu = 1 if torch.cuda.is_available() else -1

        # Compute anc flatten grad
        grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu)
        flat_grads = parameters_to_vector(grads)

        print("Grads:")
        print(flat_grads)

        # Make model functional
        params, names = make_functional(cls.model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        # Initialize Hessian
        h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])

        # Compute real IHVP
        for x_train, y_train in cls.train_loader:

            if cls.gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            def f(flat_params_):
                split_params = tensor_to_tuple(flat_params_, params)
                load_weights(cls.model, names, split_params)
                out = cls.model(x_train)
                loss = calc_loss(out, y_train)
                return loss

            batch_h = hessian(f, flat_params, strict=True)

            with torch.no_grad():
                h += batch_h / float(len(cls.train_loader))

        print("Hessian:")
        print(h)

        complete_x_train = cls.train_loader.dataset.data

        real_hessian = complete_x_train.T @ complete_x_train / complete_x_train.shape[
            0] * 2

        print(real_hessian)

        print(np.linalg.norm(real_hessian - h.cpu().numpy()[:10, :10]))

        np.save("hessian_pytorch.npy", h.cpu().numpy())

        # Make the model back `nn`

        with torch.no_grad():
            load_weights(cls.model, names, params, as_params=True)
            inv_h = torch.inverse(h)
            print("Inverse Hessian")
            print(inv_h)
            cls.real_ihvp = inv_h @ flat_grads

        print("Real IHVP")
        print(cls.real_ihvp)
예제 #18
0
    def setUpClass(cls) -> None:
        pl.seed_everything(0)

        cls.n_features = 10
        cls.n_classes = 3

        cls.n_params = cls.n_classes * cls.n_features + cls.n_features

        cls.wd = wd = 1e-2  # weight decay=1/(nC)
        cls.model = LogisticRegression(cls.n_classes,
                                       cls.n_features,
                                       wd=cls.wd)

        gpus = 1 if torch.cuda.is_available() else 0

        trainer = pl.Trainer(gpus=gpus, max_epochs=10)
        # trainer.fit(self.model)

        use_sklearn = True
        if use_sklearn:
            cls.train_dataset = cls.model.training_set  #DummyDataset(cls.n_features, cls.n_classes)
            multi_class = "multinomial" if cls.model.n_classes != 2 else "auto"
            clf = SklearnLogReg(C=1 / len(cls.train_dataset) / wd,
                                tol=1e-8,
                                max_iter=1000,
                                multi_class=multi_class)

            clf.fit(cls.train_dataset.data, cls.train_dataset.targets)

            with torch.no_grad():
                cls.model.linear.weight = torch.nn.Parameter(
                    torch.tensor(clf.coef_, dtype=torch.float))
                cls.model.linear.bias = torch.nn.Parameter(
                    torch.tensor(clf.intercept_, dtype=torch.float))

        # Setup test point data
        cls.test_idx = 5
        cls.x_test = torch.tensor(cls.model.test_set.data[[cls.test_idx]],
                                  dtype=torch.float)
        cls.y_test = torch.tensor(cls.model.test_set.targets[[cls.test_idx]],
                                  dtype=torch.long)

        # Compute estimated IVHP
        cls.gpu = 1 if torch.cuda.is_available() else -1

        if cls.gpu >= 0:
            cls.model = cls.model.cuda()
            cls.x_test = cls.x_test.cuda()
            cls.y_test = cls.y_test.cuda()

        cls.train_loader = cls.model.train_dataloader(batch_size=40000)
        # Compute anc flatten grad
        grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu)
        flat_grads = parameters_to_vector(grads)

        print("Grads:")
        print(flat_grads)

        # Make model functional
        params, names = make_functional(cls.model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        # Initialize Hessian
        h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])
        if cls.gpu == 1:
            h = h.cuda()

        # Compute real IHVP
        for x_train, y_train in cls.train_loader:

            if cls.gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            f = make_loss_f(cls.model, params, names, x_train, y_train, wd=wd)

            batch_h = hessian(f, flat_params, strict=True)

            with torch.no_grad():
                h += batch_h / float(len(cls.train_loader))

        h = (h + h.transpose(0, 1)) / 2
        print("Hessian:")
        print(h)

        np.save("hessian_pytorch.npy", h.cpu().numpy())
        from numpy import linalg as LA
        ei = LA.eig(h.cpu().numpy())[0]
        print('ei=', ei)
        print("max,min eigen value=", ei.max(), ei.min())
        assert ei.min() > 0, "Error: Non-positive Eigenvalues"

        # Make the model back `nn`

        with torch.no_grad():
            load_weights(cls.model, names, params, as_params=True)
            inv_h = torch.inverse(h)
            print("Inverse Hessian")
            print(inv_h)
            cls.real_ihvp = inv_h @ flat_grads

        print("Real IHVP")
        print(cls.real_ihvp)
예제 #19
0
  def H(x, A):
    def f(x):
      return qform(x,A)

    hval = agf.hessian(f, x)
    return hval
import torch
from torch.autograd.functional import hessian

def pow_addr_reducer(x, y):
	return (2 * x.pow(2) + 3 * y.pow(2)).sum()

inputs = (torch.FloatTensor(1),torch.FloatTensor(1))
print(hessian(pow_addr_reducer, inputs))
예제 #21
0
 def hessian_(x, lambda_):
     if le_cons.type() == LINEAR and obj.type() == LINEAR:
         return 0.0
     else:
         return hessian(lambda x: l(x, lambda_), x)
예제 #22
0
    z.backward()
    print("descent!!!")
    return z
opt.step(closure)

### 計算黑塞矩陣
`torch.autograd.functional` 中有 `hessian` 此函數,其可用於計算純量函數之黑塞矩陣,這意味著研究者可以自行撰寫牛頓法之算則。

from torch.autograd.functional import hessian
x = torch.tensor([1, 2, 3],
                 dtype = torch.float,
                 requires_grad=True)
def g(x):
    z = (x ** 3).sum()
    return z
print(hessian(g, x))



## 實徵範例與練習

### 練習

# set seed
torch.manual_seed(246437)

# write a function to generate data
from torch.distributions import Bernoulli
def generate_data(n_sample,
                  weight,
                  bias = 0,
예제 #23
0
 def hessian_revrev(model, inp, strict=None):
     return functional.hessian(model, inp, strict=False, vectorize=True)
예제 #24
0
파일: ilqr.py 프로젝트: clthegoat/GT-Mb-Mf
 def hessian_torch(self, XU):
     return af.hessian(self.cost_f, XU).detach().numpy()