def test_jacobian(transform): x = generate_data(transform) try: y = transform(x) actual = transform.log_abs_det_jacobian(x, y) except NotImplementedError: pytest.skip('Not implemented.') # Test shape target_shape = x.shape[:x.dim() - transform.domain.event_dim] assert actual.shape == target_shape # Expand if required transform = reshape_transform(transform, x.shape) ndims = len(x.shape) event_dim = ndims - transform.domain.event_dim x_ = x.view((-1, ) + x.shape[event_dim:]) n = x_.shape[0] # Reshape to squash batch dims to a single batch dim transform = reshape_transform(transform, x_.shape) # 1. Transforms with unit jacobian if isinstance(transform, ReshapeTransform) or isinstance( transform.inv, ReshapeTransform): expected = x.new_zeros(x.shape[x.dim() - transform.domain.event_dim]) expected = x.new_zeros(x.shape[x.dim() - transform.domain.event_dim]) # 2. Transforms with 0 off-diagonal elements elif transform.domain.event_dim == 0: jac = jacobian(transform, x_) # assert off-diagonal elements are zero assert torch.allclose(jac, jac.diagonal().diag_embed()) expected = jac.diagonal().abs().log().reshape(x.shape) # 3. Transforms with non-0 off-diagonal elements else: if isinstance(transform, CorrCholeskyTransform): jac = jacobian(lambda x: tril_matrix_to_vec(transform(x), diag=-1), x_) elif isinstance(transform.inv, CorrCholeskyTransform): jac = jacobian(lambda x: transform(vec_to_tril_matrix(x, diag=-1)), tril_matrix_to_vec(x_, diag=-1)) elif isinstance(transform, StickBreakingTransform): jac = jacobian(lambda x: transform(x)[..., :-1], x_) else: jac = jacobian(transform, x_) # Note that jacobian will have shape (batch_dims, y_event_dims, batch_dims, x_event_dims) # However, batches are independent so this can be converted into a (batch_dims, event_dims, event_dims) # after reshaping the event dims (see above) to give a batched square matrix whose determinant # can be computed. gather_idx_shape = list(jac.shape) gather_idx_shape[-2] = 1 gather_idxs = torch.arange(n).reshape( (n, ) + (1, ) * (len(jac.shape) - 1)).expand(gather_idx_shape) jac = jac.gather(-2, gather_idxs).squeeze(-2) out_ndims = jac.shape[-2] jac = jac[ ..., : out_ndims] # Remove extra zero-valued dims (for inverse stick-breaking). expected = torch.slogdet(jac).logabsdet assert torch.allclose(actual, expected, atol=1e-5)
def test_hessian_ragged(self, dev_name, diff_method, mocker, tol): """Test hessian calculation of a ragged QNode""" if diff_method not in {"parameter-shift", "backprop"}: pytest.skip("Test only supports parameter-shift or backprop") dev = qml.device(dev_name, wires=2) @qnode(dev, diff_method=diff_method, interface="torch") def circuit(x): qml.RY(x[0], wires=0) qml.RX(x[1], wires=0) qml.RY(x[0], wires=1) qml.RX(x[1], wires=1) return qml.expval(qml.PauliZ(0)), qml.probs(wires=1) x = torch.tensor([1.0, 2.0], requires_grad=True) res = circuit(x) jac_fn = lambda x: jacobian(circuit, x, create_graph=True) g = jac_fn(x) spy = mocker.spy(JacobianTape, "hessian") hess = jacobian(jac_fn, x) spy.assert_called_once() a, b = x.detach().numpy() expected_res = [ np.cos(a) * np.cos(b), 0.5 + 0.5 * np.cos(a) * np.cos(b), 0.5 - 0.5 * np.cos(a) * np.cos(b), ] assert np.allclose(res.detach(), expected_res, atol=tol, rtol=0) expected_g = [ [-np.sin(a) * np.cos(b), -np.cos(a) * np.sin(b)], [-0.5 * np.sin(a) * np.cos(b), -0.5 * np.cos(a) * np.sin(b)], [0.5 * np.sin(a) * np.cos(b), 0.5 * np.cos(a) * np.sin(b)], ] assert np.allclose(g.detach(), expected_g, atol=tol, rtol=0) expected_hess = [ [ [-np.cos(a) * np.cos(b), np.sin(a) * np.sin(b)], [np.sin(a) * np.sin(b), -np.cos(a) * np.cos(b)], ], [ [-0.5 * np.cos(a) * np.cos(b), 0.5 * np.sin(a) * np.sin(b)], [0.5 * np.sin(a) * np.sin(b), -0.5 * np.cos(a) * np.cos(b)], ], [ [0.5 * np.cos(a) * np.cos(b), -0.5 * np.sin(a) * np.sin(b)], [-0.5 * np.sin(a) * np.sin(b), 0.5 * np.cos(a) * np.cos(b)], ], ] assert np.allclose(hess.detach(), expected_hess, atol=tol, rtol=0)
def logdetexp(self, x, u): #very expensive rip if len(u.shape) == 1: return torch.det(AF.jacobian(lambda v: self.exp(x, v), u)) else: jacobians = [ AF.jacobian(lambda v: self.exp(x[i], v), u[i]) for i in range(u.shape[0]) ] return torch.det(torch.stack(jacobians))
def Newton_for_Newton(x, func, epoch=100, h=1): f_line = [] for i in range(epoch): # print(i, end='\r') f_line.append(func(x)) # print(f_line[-1]) jac = jacobian(func, x) hes = hessian(func, x).sum() # print('jac: {}'.format(jac)) # print('hes: {}'.format(hes)) # print('x: {}'.format(x)) if x - jac / hes < 0: # print('neg : {}'.format(x - jac / hes)) if h < 1e-3: break h *= 0.1 continue x -= h * jac / hes f_line.append(func(x)) return x, f_line
def Nesterov_3_qv(x, epoch=100, epoch_N_G=100, h_N_G=0.001): L = func(x) f_line = [] for i in range(epoch): f = func(x) F = function(x) jac = jacobian(function, x)[:, 0] # print('x : {}'.format(x)) # print('f : {}'.format(f)) # print('jac : {}'.format(jac)) # print('hes : {}'.format(hes)) x_k = torch.zeros_like(x) x_k.copy_(x) x_k = x_k.view(3, 1) func_Nes = lambda y: 1 / (2 * f) * (f**2 + ( (F + jac.mm(y - x_k))**2).sum()) + L / 2 * ((y - x_k)**2).sum() # print('1 : {} {}'.format(x_k, func_Nes(x_k))) # print('2 : {} {}'.format(x, func_Nes(x))) x, _ = Newton_for_Nesterov(x, func_Nes, epoch=epoch_N_G, h=h_N_G) # print('3 : {} {}'.format(x_k, func_Nes(x_k))) # print('4 : {} {}'.format(x, func_Nes(x))) print(x, end='\r') f_line.append(f) return x, f_line
def ke_hessian(self, q, qdot, create_graph=True): """ Compute Hessian of kinetic energy wrt qdot Args: q (torch.tensor): (*, qdim) generalized coordinates qdot (torch.tensor): (*, qdim) generalized velocities create_graph (bool): create graph when computing Hessian Returns: HKEqdqd (torch.tensor): (*, qdim, qdim) kinetic energy Hessian values """ qdims = q.shape Hdims = list(qdims) + [self._qdim] q = q.reshape(-1, self._qdim) qdot = qdot.reshape(-1, self._qdim) with torch.enable_grad(): with temp_require_grad([q, qdot]): def lamfun(qdot_): KE = self.kinetic_energy(q, qdot_) # (*, 1) JKEq = grad(KE.sum(), [qdot_], create_graph=True)[0] # (*, qdim) return JKEq.sum(0) HKEqdqd = jacobian(lamfun, qdot, create_graph=create_graph).transpose(0, 1) HKEqdqd = HKEqdqd.reshape(*Hdims) return HKEqdqd
def constant_direction( model: nn.Module, start: torch.Tensor, direction: torch.Tensor, projection: Callable[[torch.Tensor, torch.Tensor], torch.Tensor], step_size: float = 0.1, steps: int = 1000, post_processing: Optional[Callable[[torch.Tensor], torch.Tensor]] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: direction = torch.flatten(direction) points = [start] x = start p = torch.exp(model(x.unsqueeze(0))) probability, prediction = torch.max(p, dim=-1) probabilities = [probability.item()] predictions = [prediction.item()] for _ in trange(steps): # noinspection PyTypeChecker j = jacobian(model, x.unsqueeze(0)).squeeze(0) with torch.no_grad(): j = F.normalize(j.reshape(j.shape[0], -1).T, dim=0) displacement = projection(j, direction) displacement = F.normalize(displacement, dim=-1).reshape(start.shape) x = post_processing(x + step_size * displacement) points.append(x.detach()) p = torch.exp(model(x.unsqueeze(0))) probability, prediction = torch.max(p, dim=-1) probabilities.append(probability.item()) predictions.append(prediction.item()) points = torch.stack(points, dim=0) probabilities = torch.tensor(probabilities, device=start.device) predictions = torch.tensor(predictions, device=start.device) return points, probabilities, predictions
def Newton_for_Nesterov(x, func, epoch=100, h=0.001): n = x.shape[0] f_line = [] for i in range(epoch): # print(i, end='\r') f_line.append(func(x)) # print(f_line[-1]) jac = jacobian(func, x.view(n, 1)) hes = hessian(func, x.view(n, 1)).sum() # print('jac: {}'.format(jac)) # print('hes: {}'.format(hes)) # print('x: {}'.format(x)) h = jac / hes h = h.view(n) # print(h) x -= h f_line.append(func(x)) return x, f_line
def test_grad(): #try with a model class Network(nn.Module): def __init__(self): super(Network, self).__init__() self.linear2 = nn.Linear(2, 1) self.linear2.weight.data.fill_(0.0) self.linear2.weight[0, 0] = 1. self.linear2.weight[0, 1] = 1. self.linear2.weight[1, 1] = 1. self.linear2.weight[1, 2] = 1. def forward(self, x): pax_predict = self.linear2(x) #print(self.linear2.weight.data) return pax_predict f_t = Network() def fun(x): t_x = torch.from_numpy(x).float() f_x = f_t(t_x).detach().numpy() #print(f_x.shape) return f_x ##1d square #torch time_start = time.clock() model = Network() x = torch.ones((5, 2)) print([af.jacobian(model, x[i, :]) for i in range(x.shape[0])]) print([af.hessian(model, x[i, :]) for i in range(x.shape[0])]) time_e = time.clock() - time_start print(time_e) #numerical time_start = time.clock() model = Network() x = np.ones((5, 2)) df = nd.Gradient(fun) H = nd.Hessian(fun) print(list(map(df, x.tolist()))) print(list(map(H, x.tolist()))) time_e = time.clock() - time_start print(time_e) #from mpc time_start = time.clock() model = Network() x = np.ones((5, 2)) print(grad(model, x)) x = torch.ones((5, 2)) print([af.hessian(model, x[i, :]) for i in range(x.shape[0])]) time_e = time.clock() - time_start print(time_e)
def main_nn(centroid, input_config, robot): ### Each line in centroid file stores # a) the x, y, z co-ordinates of the centroid # the span of x, y, z # maximum of (x-span, y-span, z-span).. So there are 7 values ##############################################3 #centroid_file = '/home/pragna/Documents/Documents/collision/collision_model/main/octree_TranslatedCentroid_Data_rr.pkl' batch_size = 8 cost_list = [] cost_collision = 0 gt_cost_list = [] elapsed_time_list_nn = [] elapsed_time_list_man = [] if robot == 'RR': model_NN = torch.load('octree_Best_model_rr.th') elif robot == 'LR': model_NN = torch.load('octree_Best_model_lr.th') sumSamples = 0 #cnfg_file = '/home/pragna/Documents/Documents/collision/collision_model/main/TB7_np_Data.pkl' #TB7_dict = input_config # centroid_f = open(centroid_file, 'r') # text_lines = centroid_f.readlines() # centroid_f.close() # xyz =[] # for line in text_lines: # lsplit = line.split(',') # xyz_line = [float(item) for item in lsplit[1:4]] # xyz.append(xyz_line) # xyz = np.array(xyz)/100.0 # print(xyz.shape) print("Input config", input_config) # input_config = np.tile(input_config.reshape(-1, 1), (1, 10)) # print("Input config shape", input_config.shape) print("centroid", centroid) # centroid = np.asarray([float(c) for c in centroid[2:-2].split()]) input_feat2 = torch.from_numpy(centroid) input_feat1 = torch.from_numpy(input_config) # input_feat1 = input_feat1.unsqueeze(0).repeat(10, 1) # print("Input feat1", input_feat1) # print("Input feat shape", input_feat.shape) input_feat2 = torch.autograd.Variable(input_feat2.float()) input_feat1 = torch.autograd.Variable(input_feat1.float()) print("Input feat 1", input_feat1.shape) print("Input feat 2", input_feat2.shape) # inference = model_NN(input_feat1[0], input_feat2[0]) #cost_collision = inference.item() jac_input = jacobian(model_NN, (input_feat1.unsqueeze(0), input_feat2.unsqueeze(0))) print("jac_input[0].shape", jac_input[0].shape) print("jac_input[1].shape", jac_input[1].shape) #cost_list.append(inference.item()) return jac_input[0].squeeze()
def jac(self, t, y): TPY = torch.Tensor(y) TPY.requires_grad = True jac_ = jacobian(self.TYdot_jac, TPY, create_graph=False) return jac_
def get_jac_pt_single(model, single_obs, single_state): # obs: h # state: h _, state_jac = jacobian(model, ( single_obs.unsqueeze(0), single_state.unsqueeze(0).unsqueeze(0) )) obs_grad, state_grad = state_jac # 1 x 1 x h_out x 1 x 1 x h_in return obs_grad.squeeze(), state_grad.squeeze()
def backtracking(xk, dx): t = 1 alpha = 0.49 beta = 0.8 for i in range(100000): if objective(xk + t * dx) <= objective(xk) + alpha * t * jacobian( objective, xk).reshape(-1, 1).T @ dx: return t t *= beta return t
def prior_score(self, theta_vec): sigma2_1 = self.sigma2_1 sigma2_2 = self.sigma2_2 sigmas = torch.tensor([sigma2_1, sigma2_2]) prior_dist = torch.distributions.MultivariateNormal( torch.zeros([2]), torch.diag(sigmas)) return jacobian(prior_dist.log_prob, theta_vec)
def PGD_step(xk): # GD_step xk = xk.reshape(-1, 1) dx_orig = -jacobian(objective, xk).reshape(-1, 1) alphak = backtracking(xk, dx_orig) # project xk1_hat = xk + alphak * dx_orig xpk = projector(xk1_hat) dx = xpk - xk return dx, xk1_hat
def toeplitz_convmatrix2d(self): inputs = torch.ones_like(self.last.bounds[1, :, :, :].flatten()) reshape_conv = ReshapeConv( self.last.bounds[1, :, :, :].shape[2], self.last.bounds[1, :, :, :].shape[2] // self.stride[0], self.in_channels, self.out_channels, self.conv) ## hacky but works: find toeplitz by jacobian j = jacobian(reshape_conv, inputs) j.requires_grad = False return j
def mod_Newton(x, epoch=1000, epoch_N_G=100, h_N_G=0.001): L = func(x) n = x.shape[0] E = torch.eye(n, n, dtype=torch.float32) f_line = [func(x)] for i in range(epoch): F = function(x) F_T = F.transpose(0, 1) jac = jacobian(function, x)[:, 0] jac_T = jac.transpose(0, 1) # print(F) # print() # print(F_T) # print() # print(jac) # print() # print(jac_T) # print() l = torch.tensor(1, dtype=torch.float32) def func_Nes(l): A = (E * l + jac.mm(jac_T) / L).inverse() return l / 2 + (A.mm(F) * F).sum() / 2 l, line = Newton_for_Newton(l, func_Nes, epoch=epoch_N_G, h=h_N_G) # return line #print(l, end='\r') # lambda для двойственной задачи B = (E * l + jac.mm(jac_T) / L).inverse() h = -1 / L * jac_T.mm(B).mm(F)[:, 0] # print(B) # print() # print(jac_T) # print() # print(F) # print() # print(h, end='\r') # print() print(x, end='\r') x += h f_line.append(func(x)) return x, f_line
def compute_qddot(self, q, qdot, create_graph=False): """ Compute qddot from the Euler-Lagrange equation. Args: q (torch.tensor): (*, qdim) generalized coordinates qdot (torch.tensor): (*, qdim) generalized velocities create_graph (bool): create graph for diff through qqdot? Returns: qddot (torch.tensor): (*, qdim) generalized accelerations """ dims = q.shape qdim = dims[-1] q = q.reshape(-1, qdim) qdot = qdot.reshape(-1, qdim) F = self.generalized_forces(q, qdot) with torch.enable_grad(): with temp_require_grad([q, qdot]): L = self.lagrangian(q, qdot) Jq = grad(L.sum(), [q], create_graph=create_graph)[0].unsqueeze(-1) Hqdqd = jacobian( lambda qd: grad(self.lagrangian(q, qd).sum(), [qd], create_graph=True)[0].sum(0), qdot, create_graph=create_graph) Hqdqd = Hqdqd.transpose(0, 1) Hqqd = jacobian( lambda q_: grad(self.lagrangian(q_, qdot).sum(), [qdot], create_graph=True)[0].sum(0), q, create_graph=create_graph) Hqqd = Hqqd.transpose(0, 1) b = (F.unsqueeze(-1) + Jq - Hqqd @ qdot.unsqueeze(-1)) qddot = torch.solve(b, Hqdqd)[0].squeeze(-1) return qddot.reshape(dims)
def cjald(func, X): """cjald = Computes Jacobian Along Last Dimension Recursively splits tensor ``X`` along its leading dimensions until we are left with a vector, computes the jacobian of this vector under the transformation ``func``, then stitches all the results back together using ``torch.stack``. """ assert X.ndim >= 1 if X.ndim == 1: return jacobian(func, X) else: return torch.stack([cjald(func, X[i]) for i in range(X.shape[0])], dim=0)
def local_data_matrix_trace(model: nn.Module, x: torch.Tensor) -> float: training_state = model.training if training_state: model.eval() # noinspection PyTypeChecker j = jacobian(model, x.unsqueeze(0)).squeeze(0) j = j.reshape(j.size(0), -1) with torch.no_grad(): p = torch.exp(model(x.unsqueeze(0))) trace = torch.sum(p * torch.pow(j, 2).sum(dim=1)) if training_state: model.train() return trace.item()
def jac(q3): q3 = torch.tensor(q3).reshape(*dims) with temp_require_grad([q2, q3]): tmpfun = lambda q_: self.discrete_euler_lagrange(q1, q2, q_ ).reshape(-1) jac = jacobian(tmpfun, q3) jac = jac.reshape(jac.shape[0], -1) return jac.detach().numpy()
def evaluate(model, sindy_model, dataloader, train, device='cuda:0'): for i, data in enumerate(dataloader): x, dxdt = data[0].to(device), data[1].to(device) x.requires_grad = True z, xhat, encoded = model(x) B, C, H, W = z.shape # dzdx = compute_jacobian(x, z) # dzdt = torch.matmul(dzdx.view(B, LATENT_DIM, -1), dxdt.view(B, -1, 1)).view(B, C) dzdx = jacobian(lambda x: model(x)[0], x, create_graph=True) a = torch.diagonal(dzdx, offset=0, dim1=0, dim2=4).squeeze() a = a.permute(3, 0, 1, 2) dzdt = torch.bmm(a.view(B, C, -1), dxdt.view(B, -1, 1)) dxdz = jacobian( lambda z: model.decoder(F.upsample(z, size=encoded.shape[2:]))[1], z, create_graph=True) print(dxdz.shape) exit(0) # ### SINDY library theta_z = SindyLibrary(z.view(B, C), latent_dim=LATENT_DIM, poly_order=POLYORDER, include_sine=INCLUDE_SIN, device=device) zdot_hat = sindy_model(theta_z) print(x.shape, xhat.shape, zdot_hat.shape, dzdt.shape) sindy_weights = sindy_model.weight print(sindy_weights.shape) sindy_regularization = torch.linalg.norm(sindy_weights, view(1, -1), 1) exit(0)
def local_data_matrix(model: nn.Module, x: torch.Tensor) -> torch.Tensor: training_state = model.training if training_state: model.eval() # noinspection PyTypeChecker j = jacobian(model, x.unsqueeze(0)).squeeze(0) j = j.reshape(j.size(0), -1) with torch.no_grad(): p = torch.exp(model(x.unsqueeze(0))) jacobian_product = torch.bmm(j.unsqueeze(2), j.unsqueeze(1)).permute(1, 2, 0) g_matrix = torch.sum(p * jacobian_product, dim=-1) if training_state: model.train() return g_matrix
def augmented_ode(t, y_and_dydp, p): y = y_and_dydp[0:self._n_states] dydp = y_and_dydp[self._n_states:].reshape( (self._n_states, self._n_params)) with torch.enable_grad(): t_ = torch.as_tensor(t, dtype=torch.float) y_ = torch.as_tensor(y, dtype=torch.float) p_ = torch.as_tensor(p, dtype=torch.float) jac_x, _, jac_p = functional.jacobian( lambda y, t, p, tch=True: self._rhs(y, t, p, tch), (y_, t_, p_)) d_dydp_dt = np.matmul(jac_x.detach().numpy(), dydp) + jac_p.detach().numpy() dydt = self._rhs(y, t, p) return np.concatenate((dydt, d_dydp_dt.reshape(-1)))
def generate_change_tensor( self, preprocessed_image: torch.Tensor) -> torch.Tensor: """ Generates change tensor by iteratively going towards linearized minimal distance to hyperplane that is approximation for the decision boundary. Arguments: - preprocessed_image (torch.Tensor): normalized and preprocessed image with shape [channels, height, width] Returns: torch.Tensor: tensor to be added to the image to change prediction """ self.model.classifier.eval() with torch.no_grad(): original_prediction = self.model.classifier( preprocessed_image.unsqueeze(0))[0] original_prediction_class = torch.argmax(original_prediction) perturbated_img = preprocessed_image.clone().detach() perturbation = torch.zeros_like(perturbated_img) for _ in range(self.max_iter): with torch.no_grad(): perturbated_img = clipped_renormalize(perturbated_img) predicted = self.model.classifier( perturbated_img.unsqueeze(0))[0] predicted_class = torch.argmax(predicted) if predicted_class != original_prediction_class: return perturbation jacobian = agf.jacobian( lambda x: self.model.classifier(x.unsqueeze(0))[0], perturbated_img) with torch.no_grad(): w = torch.cat([ jacobian[:predicted_class], jacobian[(predicted_class + 1):] ]) - jacobian[predicted_class] f = torch.cat([ predicted[:predicted_class], predicted[(predicted_class + 1):] ]) - predicted[predicted_class] l = torch.argmin( torch.abs(f) / la.norm(torch.flatten(w, start_dim=1), dim=1)) r = (torch.abs(f[l]) / la.norm(torch.flatten(w[l]))**2) * w[l] perturbation = perturbation + 1.1 * r perturbated_img = perturbated_img + 1.1 * r return perturbation
def fit(self, mean_traj: np.array): """Linearized dynamics along mean trajectory perturbed equation delta dynamics Args: mean_traj (np.array): T * x' x u """ T = mean_traj.shape[0] self.AB = np.zeros((T, self.x_dim, self.x_dim + self.u_dim)) self.c = np.zeros((T, self.x_dim)) self.W = np.zeros((T, self.x_dim, self.x_dim)) q = self.x_dim // 2 for t in range(T): xu = torch.FloatTensor(mean_traj[t][self.x_dim:]).reshape(1, -1) D_qqd_qdd = jacobian(self.prior.model.defunc.m.forward, xu).squeeze()[q:2 * q].detach().numpy() f_star = self.prior.model.defunc( 0, xu).squeeze()[q:2 * q].detach().numpy() ident = np.block([ [np.eye(q), np.eye(q) * self.dt, np.zeros((q, self.u_dim))], [np.zeros((q, q)), np.eye(q), np.zeros((q, self.u_dim))], ]) AB_t = np.block([[D_qqd_qdd * self.dt**2], [D_qqd_qdd * self.dt] ]) + ident c_t = (np.hstack([f_star * self.dt**2, f_star * self.dt]) - mean_traj[t][:self.x_dim] + mean_traj[t][self.x_dim:2 * self.x_dim]) c_t[:q] += mean_traj[t][self.x_dim + q:self.x_dim + 2 * q] * self.dt self.AB[t] = AB_t self.c[t] = c_t self.W[t] = 0.1 * np.eye( self.x_dim) # TODO better noise reprenstation return self.AB, self.c, self.W
def path( model: nn.Module, start: torch.Tensor, end: torch.Tensor, projection: Callable[[torch.Tensor, torch.Tensor], torch.Tensor], step_size: float = 0.1, steps: int = 10000, threshold: float = 1.0, post_processing: Optional[Callable[[torch.Tensor], torch.Tensor]] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: points = [start] x = start p = torch.exp(model(x.unsqueeze(0))) probability, prediction = torch.max(p, dim=-1) probabilities = [probability.item()] predictions = [prediction.item()] distance = torch.norm(end - x) print( f'Iteration {len(points) - 1:05d} - Distance {distance:.04f} - ' f'Predicted {predictions[-1]} with probability {probabilities[-1]:0.4f}\r', end='', ) while distance > threshold and len(points) < steps + 1: # noinspection PyTypeChecker j = jacobian(model, x.unsqueeze(0)).squeeze(0) with torch.no_grad(): j = F.normalize(j.reshape(j.shape[0], -1).T, dim=0) direction = (end - x).flatten() displacement = projection(j, direction) displacement = F.normalize(displacement, dim=-1).reshape(start.shape) x = post_processing(x + step_size * displacement) points.append(x.detach()) p = torch.exp(model(x.unsqueeze(0))) probability, prediction = torch.max(p, dim=-1) probabilities.append(probability.item()) predictions.append(prediction.item()) distance = torch.norm(end - x) print( f'Iteration {len(points) - 1:05d} - Distance {distance:.04f} - ' f'Predicted {predictions[-1]} with probability {probabilities[-1]:0.4f}\r', end='', ) points = torch.stack(points, dim=0) probabilities = torch.tensor(probabilities, device=start.device) predictions = torch.tensor(predictions, device=start.device) return points, probabilities, predictions
def test_loss_jacobian_full_receptive_field(embed_inputs): batch_size = 2 p = model.HParams( embed_inputs=embed_inputs, n_audio_chans=1, n_classes=2, dilation_stacks=2, n_layers=4, sample_length=40, ).with_all_chans(10) m = model.Wavenet(p) # pin it down expected receptive field assert p.receptive_field_size() == 32, p.receptive_field_size() assert p.sample_length > p.receptive_field_size() # all results should be class 2 y = torch.ones((batch_size, 1, p.sample_length), dtype=torch.long) def loss(x): logits, _ = m.forward(x) losses = F.cross_entropy(logits, y, reduction="none") return losses.sum(1) # N, C, W -> N, W # input is N, C, W. output is N, W. jacobian is N, W, N, C, W x = torch.rand((batch_size, 1, p.sample_length)) j = jacobian(loss, x) # sum everything else to obtain WxW j = j.sum((0, 2, 3)) # pick the last row of the WxW jacobian. these are the derivatives of each # input timestep with respect to the last output timestep. we also chop # off the last input timestep, since this cannot have an effect on the # last output timestep due to temporal masking. receptive_field = j[-1, :-1] # checks out assert receptive_field.ne(0.0).sum() == p.receptive_field_size() # but let for real expected = torch.zeros_like(receptive_field) expected[-p.receptive_field_size():] = 1 assert expected.ne(0.0).equal(receptive_field.ne(0.0))
def to_lg_policy(self, xu): x_dim = self.x_dim u_dim = self.u_dim K = np.zeros((u_dim, x_dim)) k = np.zeros((u_dim)) cov = np.zeros((u_dim, u_dim)) state = xu[: self.x_dim] action = xu[self.x_dim :] pt_state = torch.FloatTensor(state) K = jacobian(self.model, pt_state.reshape(1, -1)).reshape(u_dim, x_dim).detach().numpy() k = self.model(pt_state.reshape(1, -1)).reshape(-1).detach().numpy() - K @ state cov = self.pi_cov return K, k, cov
def jac_resid_x(self, model, q, sparse=False, sparse_format=sp.csr_matrix, inds=None): #q.requires_grad = True # make this safer with temp_require_grad([q]): jac_dyn_q_ = jacobian( lambda q_: self.residuals(model, q_, inds=inds, flatten=True), q) if sparse: jac_dyn_q_ = jac_dyn_q_.reshape(jac_dyn_q_.shape[0], -1) return sparse_format(jac_dyn_q_.detach().numpy(), dtype=np.float64) else: return jac_dyn_q_