def lstsq(b, y, alpha=0.01): """ Batched linear least-squares for pytorch with optional L1 regularization. Parameters ---------- b : shape(L, M, N) y : shape(L, M) Returns ------- tuple of (coefficients, model, residuals) """ bT = b.transpose(-1, -2) AA = torch.bmm(bT, b) if alpha != 0: diag = torch.diagonal(AA, dim1=1, dim2=2) diag += alpha RHS = torch.bmm(bT, y[:, :, None]) X, LU = torch.gesv(RHS, AA) fit = torch.bmm(b, X)[..., 0] res = y - fit return X[..., 0], fit, res
def backward(ctx, grad_output): L, = ctx.saved_variables if ctx.upper: L = L.t() grad_output = grad_output.t() # make sure not to double-count variation, since # only half of output matrix is unique Lbar = grad_output.tril() P = Potrf.phi(torch.mm(L.t(), Lbar)) S = torch.gesv(P + P.t(), L.t())[0] S = torch.gesv(S.t(), L.t())[0] S = Potrf.phi(S) return S, None
def lsolve(A, B): """ Computes the solution X to AX = B. """ return torch.gesv(B, A)[0]
def predict(self, pred_x, hyper=None, in_optimization=False): if hyper is not None: param_original = self.model.param_to_vec() self.cholesky_update(hyper) kernel_max = self.model.kernel.forward_on_identical().data[0] n_pred, n_dim = pred_x.size() pred_x_radius = torch.sqrt(torch.sum(pred_x**2, 1, keepdim=True)) assert (pred_x_radius.data > 0).all() pred_x_sphere = pred_x / pred_x_radius satellite = pred_x_sphere * pred_x.size(1)**0.5 one_radius = Variable(torch.ones(1, 1)).type_as(self.train_x) K_non_ori_radius = self.model.kernel.radius_kernel( self.train_x_nonorigin_radius, one_radius * 0) K_non_ori_sphere = self.model.kernel.sphere_kernel( self.train_x_nonorigin_sphere, pred_x_sphere) K_non_ori = K_non_ori_radius.view(-1, 1) * K_non_ori_sphere K_non_pre = self.model.kernel(self.train_x_nonorigin, pred_x) K_non_sat = self.model.kernel(self.train_x_nonorigin, satellite) K_ori_pre_diag = self.model.kernel.radius_kernel( pred_x_radius, one_radius * 0) K_ori_sat_diag = self.model.kernel.radius_kernel( one_radius * 0, one_radius * n_dim**0.5).repeat(n_pred, 1) K_sat_pre_diag = self.model.kernel.radius_kernel( pred_x_radius, one_radius * n_dim**0.5) chol_B = torch.cat([ K_non_ori, K_non_pre, self.mean_vec.index_select(0, self.ind_nonorigin), K_non_sat ], 1) chol_solver = torch.gesv(chol_B, self.cholesky_nonorigin)[0] chol_solver_q = chol_solver[:, :n_pred] chol_solver_k = chol_solver[:, n_pred:n_pred * 2] chol_solver_y = chol_solver[:, n_pred * 2:n_pred * 2 + 1] chol_solver_q_bar_0 = chol_solver[:, n_pred * 2 + 1:] sol_p_sqr = kernel_max + self.model.likelihood(pred_x).view( -1, 1) + self.jitter - (chol_solver_q**2).sum(0).view(-1, 1) if not (sol_p_sqr.data >= 0).all(): if not in_optimization: neg_mask = sol_p_sqr.data < 0 neg_val = sol_p_sqr.data[neg_mask] min_neg_val = torch.min(neg_val) max_neg_val = torch.max(neg_val) kernel_max = self.model.kernel.forward_on_identical().data[0] print('p') print('negative %d/%d pred_var range %.4E(%.4E) ~ %.4E(%.4E)' % (torch.sum(neg_mask), sol_p_sqr.numel(), min_neg_val, min_neg_val / kernel_max, max_neg_val, max_neg_val / kernel_max)) print('kernel max %.4E / noise variance %.4E' % (kernel_max, torch.exp(self.model.likelihood.log_noise_var.data)[0])) print('jitter %.4E' % self.jitter) print('-' * 50) sol_p = torch.sqrt(sol_p_sqr.clamp(min=1e-12)) sol_k_bar = ( K_ori_pre_diag - (chol_solver_q * chol_solver_k).sum(0).view(-1, 1)) / sol_p sol_y_bar = (self.mean_vec.index_select(0, self.ind_origin) - torch.mm(chol_solver_q.t(), chol_solver_y)) / sol_p sol_q_bar_1 = ( K_ori_sat_diag - (chol_solver_q * chol_solver_q_bar_0).sum(0).view(-1, 1)) / sol_p sol_p_bar_sqr = kernel_max + self.model.likelihood(pred_x).view( -1, 1) + self.jitter - (chol_solver_q_bar_0**2).sum(0).view( -1, 1) - (sol_q_bar_1**2) if not (sol_p_bar_sqr.data >= 0).all(): if not in_optimization: neg_mask = sol_p_bar_sqr.data < 0 neg_val = sol_p_bar_sqr.data[neg_mask] min_neg_val = torch.min(neg_val) max_neg_val = torch.max(neg_val) kernel_max = self.model.kernel.forward_on_identical().data[0] print('p bar') print('negative %d/%d pred_var range %.4E(%.4E) ~ %.4E(%.4E)' % (torch.sum(neg_mask), sol_p_bar_sqr.numel(), min_neg_val, min_neg_val / kernel_max, max_neg_val, max_neg_val / kernel_max)) print('kernel max %.4E / noise variance %.4E' % (kernel_max, torch.exp(self.model.likelihood.log_noise_var.data)[0])) print('jitter %.4E' % self.jitter) print('-' * 50) sol_p_bar = torch.sqrt(sol_p_bar_sqr.clamp(min=1e-12)) sol_k_tilde = (K_sat_pre_diag - (chol_solver_q_bar_0 * chol_solver_k).sum(0).view( -1, 1) - sol_k_bar * sol_q_bar_1) / sol_p_bar pred_mean = torch.mm( chol_solver_k.t(), chol_solver_y) + sol_k_bar * sol_y_bar + self.model.mean(pred_x) pred_var = self.model.kernel.forward_on_identical() - ( chol_solver_k**2).sum(0).view(-1, 1) - sol_k_bar**2 - sol_k_tilde**2 if not (pred_var.data >= 0).all(): if not in_optimization: neg_mask = pred_var.data < 0 neg_val = pred_var.data[neg_mask] min_neg_val = torch.min(neg_val) max_neg_val = torch.max(neg_val) kernel_max = self.model.kernel.forward_on_identical().data[0] print('predictive variance') print('negative %d/%d pred_var range %.4E(%.4E) ~ %.4E(%.4E)' % (torch.sum(neg_mask), pred_var.numel(), min_neg_val, min_neg_val / kernel_max, max_neg_val, max_neg_val / kernel_max)) print('kernel max %.4E / noise variance %.4E' % (kernel_max, torch.exp(self.model.likelihood.log_noise_var.data)[0])) print('jitter %.4E' % self.jitter) print('-' * 50) numerically_stable = (pred_var.data >= 0).all() zero_pred_var = (pred_var.data <= 0).all() if hyper is not None: self.cholesky_update(param_original) return pred_mean, pred_var.clamp( min=1e-12), numerically_stable, zero_pred_var
def gesv_wrapper(return_dict, i, *args): return_dict[i] = torch.gesv(*args)[0]
def get_perspective_transform(src, dst): r"""Calculates a perspective transform from four pairs of the corresponding points. The function calculates the matrix of a perspective transform so that: .. math :: \begin{bmatrix} t_{i}x_{i}^{'} \\ t_{i}y_{i}^{'} \\ t_{i} \\ \end{bmatrix} = \textbf{map_matrix} \cdot \begin{bmatrix} x_{i} \\ y_{i} \\ 1 \\ \end{bmatrix} where .. math :: dst(i) = (x_{i}^{'},y_{i}^{'}), src(i) = (x_{i}, y_{i}), i = 0,1,2,3 Args: src (Tensor): coordinates of quadrangle vertices in the source image. dst (Tensor): coordinates of the corresponding quadrangle vertices in the destination image. Returns: Tensor: the perspective transformation. Shape: - Input: :math:`(B, 4, 2)` and :math:`(B, 4, 2)` - Output: :math:`(B, 3, 3)` """ if not torch.is_tensor(src): raise TypeError("Input type is not a torch.Tensor. Got {}".format( type(src))) if not torch.is_tensor(dst): raise TypeError("Input type is not a torch.Tensor. Got {}".format( type(dst))) if not src.shape[-2:] == (4, 2): raise ValueError("Inputs must be a Bx4x2 tensor. Got {}".format( src.shape)) if not src.shape == dst.shape: raise ValueError("Inputs must have the same shape. Got {}".format( dst.shape)) if not (src.shape[0] == dst.shape[0]): raise ValueError( "Inputs must have same batch size dimension. Got {}".format( src.shape, dst.shape)) def ax(p, q): ones = torch.ones_like(p)[..., 0:1] zeros = torch.zeros_like(p)[..., 0:1] return torch.cat([ p[:, 0:1], p[:, 1:2], ones, zeros, zeros, zeros, -p[:, 0:1] * q[:, 0:1], -p[:, 1:2] * q[:, 0:1] ], dim=1) def ay(p, q): ones = torch.ones_like(p)[..., 0:1] zeros = torch.zeros_like(p)[..., 0:1] return torch.cat([ zeros, zeros, zeros, p[:, 0:1], p[:, 1:2], ones, -p[:, 0:1] * q[:, 1:2], -p[:, 1:2] * q[:, 1:2] ], dim=1) # we build matrix A by using only 4 point correspondence. The linear # system is solved with the least square method, so here # we could even pass more correspondence p = [] p.append(ax(src[:, 0], dst[:, 0])) p.append(ay(src[:, 0], dst[:, 0])) p.append(ax(src[:, 1], dst[:, 1])) p.append(ay(src[:, 1], dst[:, 1])) p.append(ax(src[:, 2], dst[:, 2])) p.append(ay(src[:, 2], dst[:, 2])) p.append(ax(src[:, 3], dst[:, 3])) p.append(ay(src[:, 3], dst[:, 3])) # A is Bx8x8 A = torch.stack(p, dim=1) # b is a Bx8x1 b = torch.stack([ dst[:, 0:1, 0], dst[:, 0:1, 1], dst[:, 1:2, 0], dst[:, 1:2, 1], dst[:, 2:3, 0], dst[:, 2:3, 1], dst[:, 3:4, 0], dst[:, 3:4, 1], ], dim=1) # solve the system Ax = b X, LU = torch.gesv(b, A) # create variable to return batch_size = src.shape[0] M = torch.ones(batch_size, 9, device=src.device, dtype=src.dtype) M[..., :8] = torch.squeeze(X, dim=-1) return M.view(-1, 3, 3) # Bx3x3
############################## res1 = torch.gels(y, X) print("Solution 1:") print(res1[0]) # Solution 2 print(torch.matmul(torch.transpose(X, 0, 1),X)) print(torch.matmul(torch.transpose(X, 0, 1),y)) ############################## ## How to compute l and r? ## Dimensions: l (2x2); r (2x1) ############################## l = torch.matmul(torch.transpose(X, 0, 1),X) r = torch.matmul(torch.transpose(X, 0, 1),y) res2 = torch.gesv(r,l) print("Solution 2:") print(res2[0]) # Solution 3 ############################## ## What is l and r? ## Dimensions: l (2x2); r (2x1) ############################## l = torch.matmul(torch.transpose(X, 0, 1),X) r = torch.matmul(torch.transpose(X, 0, 1),y) res3 = torch.matmul(torch.inverse(l),r) print("Solution 3:") print(res3)
def solve(matrix1, matrix2): solution, _ = torch.gesv(matrix2, matrix1) return solution
sync() start = time.time() c = Kinv(x, x, b, g, alpha=alpha) sync() end = time.time() print('Timing (KeOps implementation):', round(end - start, 5), 's') ############################################################################### # Compare with a straightforward PyTorch implementation: # sync() start = time.time() K_xx = alpha * torch.eye(N) + torch.exp(-torch.sum( (x[:, None, :] - x[None, :, :])**2, dim=2) / (2 * sigma**2)) c_py = torch.gesv(b, K_xx)[0] sync() end = time.time() print('Timing (PyTorch implementation):', round(end - start, 5), 's') print("Relative error = ", (torch.norm(c - c_py) / torch.norm(c_py)).item()) # Plot the results next to each other: for i in range(Dv): plt.subplot(Dv, 1, i + 1) plt.plot(c.cpu().detach().numpy()[:40, i], '-', label='KeOps') plt.plot(c_py.cpu().detach().numpy()[:40, i], '--', label='PyTorch') plt.legend(loc='lower right') plt.tight_layout() plt.show() ###############################################################################
def compute_beta_cuda(self, X, Y): XtX, XtY = X.permute(1, 0).mm(X), X.permute(1, 0).mm(Y) beta_cholesky, _ = torch.gesv(XtY, XtX) return beta_cholesky
def dynamics(self, z, u, i): """Dynamics model function. Args: z (Tensor<..., state_size>): State distribution. u (Tensor<..., action_size>): Action vector(s). i (Tensor<...>): Time index. Returns: derivatives of current state wrt to time (Tensor<..., state_size>). """ mc = self.mc if z.dim() == 1 else self.mc.repeat(z.shape[0]) mp1 = self.mp1 if z.dim() == 1 else self.mp1.repeat(z.shape[0]) mp2 = self.mp2 if z.dim() == 1 else self.mp2.repeat(z.shape[0]) l1 = self.l1 if z.dim() == 1 else self.l1.repeat(z.shape[0]) l2 = self.l2 if z.dim() == 1 else self.l2.repeat(z.shape[0]) mu = self.mu if z.dim() == 1 else self.mu.repeat(z.shape[0]) g = self.g if z.dim() == 1 else self.g.repeat(z.shape[0]) x_dot = z[..., 1] theta1 = z[..., 2] theta1_dot = z[..., 3] theta2 = z[..., 4] theta2_dot = z[..., 5] dtheta = theta1 - theta2 F = u[..., 0] angles = torch.tensor([theta1, theta2, dtheta]) sin_theta1, sin_theta2, sin_dtheta = angles.sin() cos_theta1, cos_theta2, cos_dtheta = angles.cos() a0 = mp2 + 2 * mc a1 = mc * l2 a2 = l1 * theta1_dot**2 a3 = a1 * theta2_dot**2 # yapf: disable A = torch.stack([ torch.stack([ 2 * (mp1 + mp2 + mc), -a0 * l1 * cos_theta1, -a1 * cos_theta2 ], dim=-1), torch.stack([ -3 * a0 * cos_theta1, (2 * a0 + 2 * mc) * l1, 3 * a1 * cos_dtheta ], dim=-1), torch.stack([ -3 * cos_theta2, 3 * l1 * cos_dtheta, 2 * l2 ], dim=-1), ], dim=-1).transpose(-2, -1) b = torch.stack([ torch.stack([ - 2 * mu * x_dot - a0 * a2 * sin_theta1 - a3 * sin_theta2 ], dim=-1), torch.stack([ 3 * a0 * g * sin_theta1 - 3 * a3 * sin_dtheta ], dim=-1), torch.stack([ 6*F/(l2*mp2) + 3 * a2 * sin_dtheta + 3 * g * sin_theta2 ], dim=-1), ], dim=-1).transpose(-2, -1) # yapf: enable sol = torch.gesv(b, A)[0].transpose(-2, -1) # For symplectic integration. x_dot_dot = sol[..., 0].view(x_dot.shape) theta1_dot_dot = sol[..., 1].view(theta1_dot.shape) theta2_dot_dot = sol[..., 2].view(theta2_dot.shape) return torch.stack([ x_dot, x_dot_dot, theta1_dot, theta1_dot_dot, theta2_dot, theta2_dot_dot, ], dim=-1)
def forward(self, x_train, y_train, body_train, x_test=None, body_test=None, classify=False): # See the autograd section for explanation of what happens here. n = x_train.size(0) p = x_train.size(-1) d = torch.zeros(n, n) nB = body_train.size(1) # number of regions/bodies if classify: # i.e we are predicting not training body_train = body_train > 0.5 nullB = torch.sqrt(1 - torch.sum(body_train.float().pow(2), 1)) for i in range(p): d += 0.5 * (x_train[:, i].unsqueeze(1) - x_train[:, i].unsqueeze(0) ).pow(2) / self.lengthscale[i].pow(2) kse = self.sigma_f.pow(2) * torch.exp(-d) + self.sigma_n.pow( 2) * torch.eye(n) kyy = nullB.unsqueeze(0) * nullB.unsqueeze(1) * kse for i in range(nB): kyy += body_train[:, i].float().unsqueeze( 1) * body_train[:, i].float().unsqueeze(0) * kse c = torch.cholesky(kyy, upper=True) # v = torch.potrs(y_train, c, upper=True) v, _ = torch.gesv(y_train, kyy) if x_test is None: out = (c, v) if x_test is not None: with torch.no_grad(): if classify: body_test = body_test > 0.5 # make a distinct classifier nullB_test = torch.sqrt(1 - torch.sum(body_test.float().pow(2), 1)) ntest = x_test.size(0) d = torch.zeros(ntest, n) for i in range(p): d += 0.5 * (x_test[:, i].unsqueeze(1) - x_train[:, i].unsqueeze(0) ).pow(2) / self.lengthscale[i].pow(2) kse = self.sigma_f.pow(2) * torch.exp(-d) kfy = nullB_test.unsqueeze(1) * nullB.unsqueeze(0) * kse for i in range(nB): kfy += body_test[:, i].float().unsqueeze( 1) * body_train[:, i].float().unsqueeze(0) * kse # solve f_test = kfy.mm(v) tmp = torch.potrs(kfy.t(), c, upper=True) tmp = torch.sum(kfy * tmp.t(), dim=1) cov_f = self.sigma_f.pow(2) - tmp out = (f_test, cov_f) return out
global_folder = os.path.join(data_folder, 'global') for dx in dx_list: # Set subject of observations based on the number of features for exp_i in range(n_experiments): # Set rho # rho = rho_vec[exp_i] rho = 0.001 m = n_centers[exp_i // n_sub_exp] # Create data X = torch.randn(n, dx).type(dtype) W = torch.randn(dy, dx).t().type(dtype) Y = torch.mm(X, W).type(dtype) + 2 * torch.randn(n, dy).type(dtype) X_t_X, LU = torch.gesv( torch.eye(dx).type(dtype), torch.mm(X.t(), X)) W_full_data = torch.mm(X_t_X, torch.mm(X.t(), Y)) # print('Plotting') # plt.scatter(Y.numpy()[0], torch.mm(X, W).numpy()[0], alpha=0.2, color='b') # plt.show() global_data = {'X': X, 'W': W, 'Y': Y} # Print info print('\n[ INFO ] ==== Experiment %d ====' % exp_i) print('[ INFO ] Global matrices info:') for key, data in global_data.items(): print('\t\t- Shape of matrix %s: %s' % (key, str(data.shape))) print('\t\t- Number of observations: ', n)
def __getitem__(self, index): image_path = self.image_paths[index] # albedo_path = self.albedo_paths[index] # mask_path = self.mask_paths[index] if self.opt.direction == 'BtoA': input_nc = self.opt.output_nc output_nc = self.opt.input_nc else: input_nc = self.opt.input_nc output_nc = self.opt.output_nc content = sio.loadmat(image_path) if self.isTrain: rgb_img = content['imag'] chrom = content['chrom'] mask = content['mask'] mask = resize(mask, [384, 512], 1) mask[mask > 0] = 1 mask = np.mean(mask, axis=2) #mask = skimage.morphology.binary_erosion(mask, square(1)) mask = np.expand_dims(mask, axis=2) mask = np.repeat(mask, 3, axis=2) l1 = content['l1'] l2 = content['l2'] rand_id = random.randint(0, 3) light_id1 = random.randint(0, 8) light_id2 = random.randint(0, 8) arr_id = random.randint(0, 1) img1 = content['im1'] img2 = content['im2'] img1 = np.nan_to_num(img1) img2 = np.nan_to_num(img2) img1[img1 > 1.0] = 1.0 img1[img1 < 0.0] = 0.0 img2[img2 > 1.0] = 1.0 img2[img2 < 0.0] = 0.0 [img1, img2, l1, l2] = self.produceColor(img1, img2, arr_id, light_id1, light_id2, l1, l2) # img2 = self.DA(img2, rand_id) #img1 = img1/2 #img2 = img2/2 chrom = np.nan_to_num(chrom) #rgb_img = np.nan_to_num(rgb_img) #l1 = np.nan_to_num(l1) #l2 = np.nan_to_num(l2) # img1[img1 != img1] = 0.0 # img2[img2 != img2] = 0.0 # chrom[chrom != chrom] = 0.0 # rgb_img[rgb_img != rgb_img] = 0.0 #for i in range(3): # img1[:, :, i] = img1[:, :, i] * l1[i] # img2[:, :, i] = img2[:, :, i] * l2[i] rgb_img = img1 + img2 #rgb_img[rgb_img > 1.0] = 1.0 #rgb_img[rgb_img < 0.0] = 0.0 chrom[chrom > 1.0] = 1.0 chrom[chrom < 0.0] = 0.0 rgb_img = resize(rgb_img, [384, 512], 1) img1 = resize(img1, [384, 512], 1) img2 = resize(img2, [384, 512], 1) chrom = resize(chrom, [384, 512], 1) rgb_img = self.DA(rgb_img, rand_id) chrom = self.DA(chrom, rand_id) mask = self.DA(mask, rand_id) img1 = self.DA(img1, rand_id) img2 = self.DA(img2, rand_id) # if arr_id: # l1 = self.l1Matrix[light_id1]/255 # l2 = self.l2Matrix[light_id2]/255 # l1 = np.reshape(l1, (3, -1)) # l2 = np.reshape(l2, (3, -1)) # else: # l1 = self.l2Matrix[light_id2]/255 # l2 = self.l1Matrix[light_id1]/255 # l1 = np.reshape(l1, (3, -1)) # l2 = np.reshape(l2, (3, -1)) # rgb_img = img1 + img2 lightColor = np.concatenate((l1, l2), axis=1) lightColor = torch.from_numpy(lightColor).contiguous().float() rgb_img = torch.from_numpy(np.transpose( rgb_img, (2, 0, 1))).contiguous().float() chrom = torch.from_numpy(np.transpose( chrom, (2, 0, 1))).contiguous().float() mask = torch.from_numpy(np.transpose( mask.astype(float), (2, 0, 1))).contiguous().float() no_albedo_nf = rgb_img / (1e-6 + chrom) sum_albedo = torch.sum(no_albedo_nf, 0, keepdim=True) gamma = no_albedo_nf / (sum_albedo.repeat(3, 1, 1) + 1e-6) gamma = gamma.view(3, -1) lightT = lightColor.t() light = lightColor B = torch.mm(lightT, gamma) A = torch.mm(lightT, light) shadings, _ = torch.gesv(B, A) # shadings[0, :] = (shadings[0:, ] - torch.min(shadings[0, :]))/(torch.max(shadings[0:, ]) - torch.min(shadings[0, :])) # shadings[1, :] = (shadings[1:, ] - torch.min(shadings[1, :]))/(torch.max(shadings[1:, ]) - torch.min(shadings[1, :])) shadings[shadings != shadings] = 0.0 im1 = shadings[0, :].repeat(3, 1).view(3, rgb_img.size(1), rgb_img.size(2)) im2 = shadings[1, :].repeat(3, 1).view(3, rgb_img.size(1), rgb_img.size(2)) im1 = (im1 - torch.min(im1[mask > 0])) / ( torch.max(im1[mask > 0]) - torch.min(im1[mask > 0])) im2 = (im2 - torch.min(im2[mask > 0])) / ( torch.max(im2[mask > 0]) - torch.min(im2[mask > 0])) # remove nan values im1[im1 != im1] = 0.0 im2[im2 != im2] = 0.0 im1[mask == 0] = 0.0 im2[mask == 0] = 0.0 im1[0, :, :] *= lightColor[0, 0] im1[1, :, :] *= lightColor[1, 0] im1[2, :, :] *= lightColor[2, 0] im2[0, :, :] *= lightColor[0, 1] im2[1, :, :] *= lightColor[1, 1] im2[2, :, :] *= lightColor[2, 1] # normalize the data # im1 = torch.mul(shadings[0, :].repeat(3, 1), lightColor[:, 0].view(3, 1).repeat(1, shadings.size(1))) # im2 = torch.mul(shadings[1, :].repeat(3, 1), lightColor[:, 1].view(3, 1).repeat(1, shadings.size(1))) # im1 = im1.view(3, rgb_img.size(1), rgb_img.size(2)) # im2 = im2.view(3, rgb_img.size(1), rgb_img.size(2)) im1[im1 > 1] = 1.0 im2[im2 > 1] = 1.0 im1[im1 < 0] = 0.0 im2[im2 < 0] = 0.0 # rgb_img = 2*rgb_img - 1.0 img1 = torch.from_numpy(np.transpose( img1, (2, 0, 1))).contiguous().float() img2 = torch.from_numpy(np.transpose( img2, (2, 0, 1))).contiguous().float() return { 'rgb_img': rgb_img, 'chrom': chrom, 'im1': im1, 'im2': im2, 'A_paths': image_path, 'mask': mask, 'img1': img1, 'img2': img2 } else: rgb_img = content['imag'] #chrom = content['chrom'] #mask = content['chrom'] #im1 = content['im1'] #im2 = content['im2'] rgb_img[rgb_img > 1] = 1 rgb_img[rgb_img < 0] = 0 # rgb_img = 2*rgb_img - 1.0 #img1 = content['im1'] #img2 = content['im2'] rgb_img = torch.from_numpy(np.transpose( rgb_img, (2, 0, 1))).contiguous().float() #chrom = torch.from_numpy(np.transpose(chrom, (2, 0, 1))).contiguous().float() #mask = torch.from_numpy(np.transpose(mask.astype(float), (2, 0, 1))).contiguous().float() #im1 = torch.from_numpy(np.transpose(im1, (2, 0, 1))).contiguous().float() #im2 = torch.from_numpy(np.transpose(im2, (2, 0, 1))).contiguous().float() return {'rgb_img': rgb_img, 'A_paths': image_path}
def solve(A, b): return torch.gesv(b, A)[0].contiguous()
def manitest(input_image, net, mode, maxIter=50000, lim=None, hs=None, cuda_on=True, stop_when_found=None, verbose=True): def list_index(a_list, inds): return [a_list[i] for i in inds] def group_chars(mode): if mode == 'rotation': hs = torch.Tensor([pi / 20]) elif mode == 'translation': hs = torch.Tensor([0.25, 0.25]) elif mode == 'rotation+scaling': hs = torch.Tensor([pi / 20, 0.1]) elif mode == 'rotation+translation': hs = torch.Tensor([pi / 20, 0.25, 0.25]) elif mode == 'scaling+translation': hs = torch.Tensor([0.1, 0.25, 0.25]) elif mode == 'similarity': hs = torch.Tensor([pi / 20, 0.5, 0.5, 0.1]) else: raise NameError('Wrong mode name entered') if cuda_on: hs.cuda() return hs def gen_simplices(cur_vec, cur_dim): nonlocal num_simpl nonlocal simpls if cur_dim == dimension_group + 1: if not cur_vec: return simpls.append(cur_vec) num_simpl = num_simpl + 1 return if (n_vec[2 * cur_dim - 2] == i or n_vec[2 * cur_dim - 1] == i): cur_vec = cur_vec + [i] gen_simplices(cur_vec, cur_dim + 1) else: gen_simplices(cur_vec, cur_dim + 1) if (n_vec[2 * cur_dim - 2] != -1): cur_vec_l = cur_vec + [n_vec[2 * cur_dim - 2]] gen_simplices(cur_vec_l, cur_dim + 1) if (n_vec[2 * cur_dim - 1] != -1): cur_vec_r = cur_vec + [n_vec[2 * cur_dim - 1]] gen_simplices(cur_vec_r, cur_dim + 1) def check_oob(coord): inside = 1 for u in range(len(coord)): if (coord[u] > lim[u, 1] + 1e-8 or coord[u] < lim[u, 0] - 1e-8): inside = 0 break return inside def get_and_create_neighbours(cur_node): nonlocal id_max, coords, dist, visited, neighbours, W, ims #1)Generate coordinates of neighbouring nodes for l in range(dimension_group): #Generate coordinates coordsNeighbour1 = coords[cur_node].clone() coordsNeighbour1[l] += hs[l] coordsNeighbour2 = coords[cur_node].clone() coordsNeighbour2[l] -= hs[l] if check_oob(coordsNeighbour1): #Can we find a similar coordinate? dists = (torch.stack(coords, 0) - coordsNeighbour1.repeat(len(coords), 1)).abs().sum( dim=1) II1 = (dists < 1e-6).nonzero() if not II1.size(): id_max += 1 #create node: i) coords, ii)visited, iii)distance coords.append(coordsNeighbour1) dist.append(np.inf) visited.append(0) #Assing the NodeID to IDNeighbours neighbours.append([-1] * 2 * dimension_group) neighbours[cur_node][2 * l] = id_max #Do the reverse neighbours[id_max][2 * l + 1] = cur_node W.append(None) ims.append([]) else: #Node already exists neighbours[cur_node][2 * l] = II1[0, 0] #Do the reverse neighbours[II1[0, 0]][2 * l + 1] = cur_node if check_oob(coordsNeighbour2): #Can we find a similar coordinate? dists = (torch.stack(coords, 0) - coordsNeighbour2.repeat(len(coords), 1)).abs().sum( dim=1) II2 = (dists < 1e-6).nonzero() if not II2.size(): id_max += 1 #create node: i) coords, ii)visited, iii)distance coords.append(coordsNeighbour2) dist.append(np.inf) visited.append(0) #Assing the NodeID to IDNeighbours neighbours.append([-1] * 2 * dimension_group) neighbours[cur_node][2 * l + 1] = id_max #Do the reverse neighbours[id_max][2 * l] = cur_node W.append(None) ims.append([]) else: #Node already exists neighbours[cur_node][2 * l + 1] = II2[0, 0] #Do the reverse neighbours[II2[0, 0]][2 * l] = cur_node def generate_metric(cur_node): nonlocal ims, W tau = coords[cur_node] tfm = g.para2tfm(tau, mode, 1) I = tfm(input_image) ims[cur_node] = I J = g.jacobian(input_image, I, tfm, mode, 1) J_n = J.resize_(J.size()[0], n) curW = J_n.mm(J_n.transpose(0, 1)) W[cur_node] = curW def evaluate_classifier(cur_node): nonlocal manitest_score, manitest_image, fooling_tfm, out_label x = Variable(ims[cur_node].unsqueeze(0)) output = net(x) _, k_I = torch.max(output.data, 1) pred_label = k_I[0] if pred_label != input_label: manitest_score = dist[cur_node] / input_image.norm() manitest_image = ims[cur_node] fooling_tfm = g.para2tfm(coords[cur_node], mode, 1) out_label = pred_label return True return False ### e = g.init_param(mode) if cuda_on: net.cuda() input_image = input_image.cuda() e = e.cuda() dimension_group = e.size()[0] n = functools.reduce(operator.mul, input_image.size(), 1) stop_flag = False point_dists = None if stop_when_found is not None: stop_flag = True num_stopping_points = stop_when_found.size()[0] point_dists = torch.Tensor(num_stopping_points) remaining_points = num_stopping_points if hs is None: hs = group_chars(mode) if lim is None: lim = np.zeros((dimension_group, 2)) lim[:, 0] = -np.inf lim[:, 1] = np.inf dist = [0.0] visited = [0] coords = [e] ims = [input_image] W = [None] id_max = 0 neighbours = [[-1] * 2 * dimension_group] #Generate input label x = Variable(input_image.unsqueeze(0)) output = net(x) _, k_I = torch.max(output.data, 1) input_label = k_I[0] #Output Variables manitest_score = np.inf manitest_image = input_image.clone() fooling_tfm = e out_label = input_label for k in range(maxIter): if k % 100 == 0 and verbose: print('>> k = {}'.format(k)) tmp_vec = np.array(dist[0:id_max + 1]) #copy the list tmp_vec[np.asarray(visited) == 1] = np.inf i = np.argmin(tmp_vec) visited[i] = 1 #evaluate the classifier and check if it is fooled if stop_flag: dists = torch.norm( coords[i].repeat(num_stopping_points, 1) - stop_when_found, 2, 1) if dists.min() < 1e-6: _, ind = torch.min(dists, 0) point_dists[ind[0, 0]] = dist[i] remaining_points -= 1 if remaining_points == 0: break elif evaluate_classifier(i): break get_and_create_neighbours(i) for j in neighbours[i]: if j == -1: continue #Consider unknown neighbours only if visited[j]: continue #Look at the neighbours of j (vector of size 2*dimension_group) n_vec = neighbours[j] num_simpl = 1 simpls = [] gen_simplices([], 1) if W[j] is None: generate_metric(j) for j_ in range(num_simpl - 1): X = torch.stack(list_index(coords, simpls[j_])) - coords[j].repeat( len(simpls[j_]), 1) if cuda_on: v = torch.cuda.FloatTensor(list_index( dist, simpls[j_])).unsqueeze(1) one_vector = torch.ones(v.size()).cuda() else: v = torch.FloatTensor(list_index(dist, simpls[j_])).unsqueeze(1) one_vector = torch.ones(v.size()) M_prime = (X.mm(W[j]).mm(X.transpose(0, 1))) try: invM_prime_v, _ = torch.gesv(v, M_prime) except: invM_prime_v = v * np.inf try: invM_prime_1, _ = torch.gesv(one_vector, M_prime) except: invM_prime_1 = one_vector * np.inf invM_prime_v.transpose_(0, 1) # one_vector.squeeze_() # v.squeeze_() #Solve second order equation # dz^2 * one_vector' * invM_prime * one_vector # - 2 * dz * one_vector' * invM_prime * v + v' * invM_prime * v - 1 Delta = (invM_prime_v.sum() )**2 - invM_prime_1.sum() * (invM_prime_v.mm(v) - 1) Delta = Delta[0, 0] if Delta >= 0: #Compute solution x_c = (invM_prime_v.sum() + np.sqrt(Delta)) / invM_prime_1.sum() #Test that it is not on the border of the simplex te, _ = torch.gesv(x_c - v, M_prime) if te.min() > 0: dist[j] = min(dist[j], x_c) return manitest_score, manitest_image, fooling_tfm, dist, coords, input_label, out_label, point_dists, k
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False, jitter_level=1e-6): """ Given F, representing the GP at the points X, produce the mean and (co-)variance of the GP at the points Xnew. Additionally, there may be Gaussian uncertainty about F as represented by q_sqrt. In this case `f` represents the mean of the distribution and q_sqrt the square-root of the covariance. Additionally, the GP may have been centered (whitened) so that p(v) = N( 0, I) f = L v thus p(f) = N(0, LL^T) = N(0, K). In this case 'f' represents the values taken by v. The method can either return the diagonals of the covariance matrix for each output of the full covariance matrix (full_cov). We assume K independent GPs, represented by the columns of f (and the last dimension of q_sqrt). :param Xnew: data matrix, size N x D. :param X: data points, size M x D. :param kern: GPflow kernel. :param f: data matrix, M x K, representing the function values at X, for K functions. :param q_sqrt: matrix of standard-deviations or Cholesky matrices, size M x K or M x M x K. :param whiten: boolean of whether to whiten the representation as described above. :return: two element tuple with conditional mean and variance. """ # compute kernel stuff num_data = X.size(0) # M num_func = f.size(1) # K Kmn = kern.K(X, Xnew) Kmm = kern.K(X) + Variable(torch.eye(num_data, out=X.data.new())) * jitter_level Lm = torch.potrf(Kmm, upper=False) # Compute the projection matrix A A, _ = torch.gesv(Kmn, Lm) # compute the covariance due to the conditioning if full_cov: fvar = kern.K(Xnew) - torch.matmul(A.t(), A) fvar = fvar.unsqueeze(0).expand(num_func, -1, -1) # K x N x N else: fvar = kern.Kdiag(Xnew) - (A**2).sum(0) fvar = fvar.unsqueeze(0).expand(num_func, -1) # K x N # fvar is K x N x N or K x N # another backsubstitution in the unwhitened case (complete the inverse of the cholesky decomposition) if not whiten: A, _ = torch.gesv(A, Lm.t()) # construct the conditional mean fmean = torch.matmul(A.t(), f) if q_sqrt is not None: if q_sqrt.dim() == 2: LTA = A * q_sqrt.t().unsqueeze(2) # K x M x N elif q_sqrt.dim() == 3: L = batch_tril(q_sqrt.permute(2, 0, 1)) # K x M x M # A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1])) # I don't think I need this LTA = torch.matmul(L.transpose(-2, -1), A) # K x M x N else: # pragma: no cover raise ValueError("Bad dimension for q_sqrt :{}".format( q_sqrt.dim())) if full_cov: fvar = fvar + torch.matmul(LTA.t(), LTA) # K x N x N else: fvar = fvar + (LTA**2).sum(1) # K x N fvar = fvar.permute(*range(fvar.dim() - 1, -1, -1)) # N x K or N x N x K return fmean, fvar
def Linear_Time_Iteration(A, B, C, F_initial, mu, epsilon): """ This function will find the linear time iteration solution to the system of equations in the form of AX(-1) + BX + CE[X(+1)] + epsilon = 0 with a recursive solution in the form of X = FX(-1) + Q*epsilon Parameters ---------- A : torch, array_like, dtype=float The matrix of coefficients next to endogenous variables entering with a lag B : torch, array_like, dtype=float The matrix of coefficients next to endogenous, contemporanous variables C : torch, array_like, dtype=float The matrix of coefficients next to endogenous variables entering with a lead F : torch, array_like, dtype=float The initial guess for F mu : number, dtype=float Small positive real number to be multiplied by a conformable identity matrix epsilon : number, dtype=float Threshold value, should be set to a small value like 1e-16 Returns ------- F : torch, array_like, dtype=float The matrix of coefficients next to the endogenous variable in the solution Q : torch, array_like, dtype=float The matrix of coefficients next to the disturbance term in the solution Notes ----- """ F = F_initial S = zeros(*A.shape) # F.requires_grad_() # S.requires_grad_() Id = eye(*A.shape) * mu Ch = C Bh = (B + 2 * mm(C, Id)) Ah = (mm(C, matrix_power(Id, 2)) + mm(B, Id) + A) metric = 1 iter = 1 while metric > epsilon: if iter % 10000 == 0: print(iter) F = -gesv(Ah, (Bh + mm(Ch, F)))[0] S = -gesv(Ch, (Bh + mm(Ah, S)))[0] metric1 = max(abs(Ah + mm(Bh, F) + mm(Ch, (mm(F, F))))) metric2 = max(abs(mm(Ah, mm(S, S)) + mm(Bh, S) + Ch)) metric = max(metric1, metric2) iter += 1 if iter > 1000000: break # eig_F = max(abs(eig(F)[0])) # eig_S = max(abs(eig(S)[0])) # if (eig_F > 1) or (eig_S > 1) or (mu > 1-eig_S): # print('Conditions of Proposition 3 violated') F = F + Id Q = -inverse(B + mm(C, F)) return F, Q
def predict(self, pred_x, hyper=None): if hyper is not None: param_original = self.model.param_to_vec() self.model.vec_to_param(hyper) kernel_on_input_map = deepcopy(self.model.kernel) kernel_on_input_map.input_map = id_transform train_x_input_map = self.model.kernel.input_map(self.train_x) pred_x_input_map = self.model.kernel.input_map(pred_x) train_origin_point_mask = train_x_input_map.data[:, 0] == 0 n_train_origin = torch.sum(train_origin_point_mask) n_train_other = self.train_x.size(0) - n_train_origin train_point_ind = torch.sort(train_origin_point_mask, 0, descending=True)[1] train_origin_ind = train_point_ind[:n_train_origin] train_other_ind = train_point_ind[n_train_origin:] train_x_other = self.train_x[train_other_ind] train_x_origin = self.train_x[train_origin_ind] train_x_other_input_map = train_x_input_map[train_other_ind] train_y_other = self.train_y[train_other_ind] train_y_origin = self.train_y[train_origin_ind] eye_mat = Variable(torch.eye(n_train_other)).type_as(self.train_x) K_other_noise = kernel_on_input_map( train_x_other_input_map) + torch.diag( self.model.likelihood(train_x_other)) K_other_noise_inv, _ = torch.gesv(eye_mat, K_other_noise) mean_vec_other = train_y_other - self.model.mean(train_x_other) mean_vec_origin = train_y_origin - self.model.mean(train_x_origin) k_pred_other = kernel_on_input_map(pred_x_input_map, train_x_other_input_map) shared_part = k_pred_other.mm(K_other_noise_inv) pred_mean = torch.mm(shared_part, mean_vec_other) + self.model.mean(pred_x) pred_var = self.model.kernel.forward_on_identical() - ( shared_part * k_pred_other).sum(1, keepdim=True) slide_origin = pred_x_input_map.clone() slide_origin[:, 0] = 0 K_other_origin = kernel_on_input_map(train_x_other_input_map, slide_origin) Ainv_B = K_other_noise_inv.mm(K_other_origin) identity_part = self.model.likelihood(self.train_x[:1] * 0) onemat_part = self.model.kernel.forward_on_identical() - ( Ainv_B * K_other_origin).sum(0).view(-1, 1) identity_const = 1.0 / identity_part onemat_const = -onemat_part / identity_part / ( identity_part + onemat_part * n_train_origin) k_pred_origin = torch.cat([ kernel_on_input_map(pred_x_input_map[i:i + 1], slide_origin[i:i + 1]) for i in range(pred_x.size(0)) ], 0) k_vec = (k_pred_other * Ainv_B.t()).sum(1).view(-1, 1) - k_pred_origin y_vec = torch.mm(mean_vec_other.t(), Ainv_B).view( -1, 1) - torch.mean(mean_vec_origin) slide_origin_mean_adjustment = ( identity_const + onemat_const * n_train_origin) * k_vec * y_vec * n_train_origin slide_origin_quad_adjustment = ( identity_const + onemat_const * n_train_origin) * k_vec**2 * n_train_origin if hyper is not None: self.model.vec_to_param(param_original) return pred_mean + slide_origin_mean_adjustment, pred_var - slide_origin_quad_adjustment
def b_inv(A): eye = A.new_ones(A.size(-1)).diag().expand_as(A) b_inv, _ = torch.gesv(eye, A) return b_inv
def bgesv(B, A): return torch.stack([torch.gesv(b, a)[0] for b, a in zip(B, A)])
Xt = torch.FloatTensor(X).cuda() yt = torch.FloatTensor(y).cuda() # cur_time = 0 # for i in range(10): # #log_reg = LogisticRegression(solver=opt, random_state=123) # start = time.time() # thetas_np = train_logistic(X, y) # #thetas = train_logistic_torch(Xt, yt) # # #log_reg.fit(X, y) # cur_time += time.time()-start # print("Time is {}".format(cur_time/10)) cur_time = 0 pre = torch.gesv(Xt.t(), Xt.t() @ Xt)[0] cov = Xt.t() @ Xt for i in range(10): #log_reg = LogisticRegression(solver=opt, random_state=123) start = time.time() # thetas_np = train_logistic(X, y) theta = torch.gesv((Xt.t() @ yt).view(-1, 1), cov) #log_reg.fit(X, y) cur_time += time.time() - start print("Time is {}".format(cur_time / 10)) cur_time = 0 for i in range(10): #log_reg = LogisticRegression(solver=opt, random_state=123) start = time.time() # thetas_np = train_logistic(X, y) thetas = train_logistic_torch(Xt, yt)
def _depth_warping(depth_maps_1, depth_maps_2, img_masks, translation_vectors, rotation_matrices, intrinsic_matrices, epsilon): # Generate a meshgrid for each depth map to calculate value # BxHxWxC depth_maps_1 = torch.mul(depth_maps_1, img_masks) depth_maps_2 = torch.mul(depth_maps_2, img_masks) depth_maps_1 = depth_maps_1.permute(0, 2, 3, 1) depth_maps_2 = depth_maps_2.permute(0, 2, 3, 1) img_masks = img_masks.permute(0, 2, 3, 1) num_batch, height, width, channels = depth_maps_1.shape y_grid, x_grid = torch.meshgrid([ torch.arange(start=0, end=height, dtype=torch.float32).cuda(), torch.arange(start=0, end=width, dtype=torch.float32).cuda() ]) x_grid = x_grid.view(1, height, width, 1) y_grid = y_grid.view(1, height, width, 1) ones_grid = torch.ones((1, height, width, 1), dtype=torch.float32).cuda() # intrinsic_matrix_inverse = intrinsic_matrix.inverse() eye = torch.eye(3).float().cuda().view(1, 3, 3).expand( intrinsic_matrices.shape[0], -1, -1) intrinsic_matrices_inverse, _ = torch.gesv(eye, intrinsic_matrices) rotation_matrices_inverse = rotation_matrices.transpose(1, 2) # The following is when we have different intrinsic matrices for samples within a batch temp_mat = torch.bmm(intrinsic_matrices, rotation_matrices_inverse) W = torch.bmm(temp_mat, -translation_vectors) M = torch.bmm(temp_mat, intrinsic_matrices_inverse) mesh_grid = torch.cat((x_grid, y_grid, ones_grid), dim=-1).view(height, width, 3, 1) intermediate_result = torch.matmul(M.view(-1, 1, 1, 3, 3), mesh_grid).view(-1, height, width, 3) depth_maps_2_calculate = W.view(-1, 3).narrow( dim=-1, start=2, length=1).view(-1, 1, 1, 1) + torch.mul( depth_maps_1, intermediate_result.narrow(dim=-1, start=2, length=1).view( -1, height, width, 1)) # expand operation doesn't allocate new memory (repeat does) depth_maps_2_calculate = torch.where(img_masks > 0.5, depth_maps_2_calculate, epsilon) depth_maps_2_calculate = torch.where(depth_maps_2_calculate > 0.0, depth_maps_2_calculate, epsilon) # This is the source coordinate in coordinate system 2 but ordered in coordinate system 1 in order to warp image 2 to coordinate system 1 u_2 = (W.view(-1, 3).narrow(dim=-1, start=0, length=1).view(-1, 1, 1, 1) + torch.mul( depth_maps_1, intermediate_result.narrow(dim=-1, start=0, length=1).view( -1, height, width, 1))) / (depth_maps_2_calculate) v_2 = (W.view(-1, 3).narrow(dim=-1, start=1, length=1).view(-1, 1, 1, 1) + torch.mul( depth_maps_1, intermediate_result.narrow(dim=-1, start=1, length=1).view( -1, height, width, 1))) / (depth_maps_2_calculate) W_2 = torch.bmm(intrinsic_matrices, translation_vectors) M_2 = torch.bmm(torch.bmm(intrinsic_matrices, rotation_matrices), intrinsic_matrices_inverse) temp = torch.matmul(M_2.view(-1, 1, 1, 3, 3), mesh_grid).view( -1, height, width, 3).narrow(dim=-1, start=2, length=1).view(-1, height, width, 1) depth_maps_1_calculate = W_2.view(-1, 3).narrow( dim=-1, start=2, length=1).view(-1, 1, 1, 1) + torch.mul( depth_maps_2, temp) depth_maps_1_calculate = torch.mul(img_masks, depth_maps_1_calculate) u_2_flat = u_2.view(-1) v_2_flat = v_2.view(-1) warped_depth_maps_2 = _bilinear_interpolate(depth_maps_1_calculate, u_2_flat, v_2_flat).view( num_batch, 1, height, width) # binarize intersect_masks = torch.where( _bilinear_interpolate(img_masks, u_2_flat, v_2_flat) * img_masks >= 0.9, torch.tensor(1.0).float().cuda(), torch.tensor(0.0).float().cuda()).view(num_batch, 1, height, width) return [warped_depth_maps_2, intersect_masks]
def chol_solve(B, A): c = torch.potrf(A) s1 = torch.gesv(B, c.transpose(0, 1))[0] s2 = torch.gesv(s1, c)[0] return s2
def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_output): """ Returns a new PredictionStrategy that incorporates the specified inputs and targets as new training data. This method is primary responsible for updating the mean and covariance caches. To add fantasy data to a GP model, use the :meth:`~gpytorch.models.ExactGP.get_fantasy_model` method. Args: - :attr:`inputs` (Tensor `m x d` or `b x m x d`): Locations of fantasy observations. - :attr:`targets` (Tensor `m` or `b x m`): Labels of fantasy observations. - :attr:`full_inputs` (Tensor `n+m x d` or `b x n+m x d`): Training data concatenated with fantasy inputs - :attr:`full_targets` (Tensor `n+m` or `b x n+m`): Training labels concatenated with fantasy labels. - :attr:`full_output` (:class:`gpytorch.distributions.MultivariateNormal`): Prior called on full_inputs Returns: - :class:`DefaultPredictionStrategy` A `DefaultPredictionStrategy` model with `n + m` training examples, where the `m` fantasy examples have been added and all test-time caches have been updated. """ full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix batch_shape = full_inputs[0].shape[:-2] full_mean = full_mean.view(*batch_shape, -1) num_train = self.num_train # Evaluate fant x train and fant x fant covariance matrices, leave train x train unevaluated. fant_fant_covar = full_covar[..., num_train:, num_train:] fant_mean = full_mean[..., num_train:] mvn = self.likelihood(MultivariateNormal(fant_mean, fant_fant_covar), inputs) fant_fant_covar = mvn.covariance_matrix fant_train_covar = delazify(full_covar[..., num_train:, :num_train]) self.fantasy_inputs = inputs self.fantasy_targets = targets """ Compute a new mean cache given the old mean cache. We have \\alpha = K^{-1}y, and we want to solve [K U; U' S][a; b] = [y; y_f], where U' is fant_train_covar, S is fant_fant_covar, and y_f is (targets - fant_mean) To do this, we solve the bordered linear system of equations for [a; b]: AQ = U # Q = fant_solve [S - U'Q]b = y_f - U'\\alpha ==> b = [S - U'Q]^{-1}(y_f - U'\\alpha) a = \\alpha - Qb """ # Get cached K inverse decomp. (or compute if we somehow don't already have the covariance cache) K_inverse = self.lik_train_train_covar.root_inv_decomposition() fant_solve = K_inverse.matmul(fant_train_covar.transpose(-2, -1)) # Solve for "b", the lower portion of the *new* \\alpha corresponding to the fantasy points. schur_complement = fant_fant_covar - fant_train_covar.matmul(fant_solve) small_system_rhs = targets - fant_mean - fant_train_covar.matmul(self.mean_cache) # Schur complement of a spd matrix is guaranteed to be positive definite if small_system_rhs.requires_grad or schur_complement.requires_grad: # TODO: Delete this part of the if statement when PyTorch implements cholesky_solve derivative. fant_cache_lower = torch.gesv(small_system_rhs.unsqueeze(-1), schur_complement)[0] else: fant_cache_lower = cholesky_solve(small_system_rhs, torch.cholesky(schur_complement)) # Get "a", the new upper portion of the cache corresponding to the old training points. fant_cache_upper = self.mean_cache.unsqueeze(-1) - fant_solve.matmul(fant_cache_lower) fant_cache_upper = fant_cache_upper.squeeze(-1) fant_cache_lower = fant_cache_lower.squeeze(-1) # New mean cache. fant_mean_cache = torch.cat((fant_cache_upper, fant_cache_lower), dim=-1) """ Compute a new covariance cache given the old covariance cache. We have access to K \\approx LL' and K^{-1} \\approx R^{-1}R^{-T}, where L and R are low rank matrices resulting from Lanczos (see the LOVE paper). To update R^{-1}, we first update L: [K U; U' S] = [L 0; A B][L' A'; 0 B'] Solving this matrix equation, we get: K = LL' ==> L = L U = LA' ==> A = UR^{-1} S = AA' + BB' ==> B = cholesky(S - AA') Once we've computed Z = [L 0; A B], we have that the new kernel matrix [K U; U' S] \approx ZZ'. Therefore, we can form a pseudo-inverse of Z directly to approximate [K U; U' S]^{-1/2}. """ # [K U; U' S] = [L 0; lower_left schur_root] batch_shape = fant_train_covar.shape[:-2] L_inverse = self.covar_cache L = delazify(self.lik_train_train_covar.root_decomposition().root) m, n = L.shape[-2:] lower_left = fant_train_covar.matmul(L_inverse) schur_root = torch.cholesky(fant_fant_covar - lower_left.matmul(lower_left.transpose(-2, -1))) upper_right = torch.zeros(m, schur_root.size(-1), device=L.device, dtype=L.dtype) # Form new root Z = [L 0; lower_left schur_root] num_fant = schur_root.size(-2) m, n = L.shape[-2:] new_root = torch.zeros(*batch_shape, m + num_fant, n + num_fant, device=L.device, dtype=L.dtype) new_root[..., :m, :n] = L new_root[..., :m, n:] = upper_right new_root[..., m:, :n] = lower_left new_root[..., m:, n:] = schur_root # Use pseudo-inverse of Z as new inv root # TODO: Replace pseudo-inverse calculation with something more stable than normal equations once # one of torch.svd, torch.qr, or torch.pinverse works in batch mode. cap_mat = new_root.transpose(-2, -1).matmul(new_root) if cap_mat.requires_grad or new_root.requires_grad: # TODO: Delete this part of the if statement when PyTorch implements cholesky_solve derivative. new_covar_cache = torch.gesv(new_root.transpose(-2, -1), cap_mat)[0].transpose(-2, -1) else: new_covar_cache = cholesky_solve(new_root.transpose(-2, -1), torch.cholesky(cap_mat)) new_covar_cache = new_covar_cache.transpose(-2, -1) # Create new DefaultPredictionStrategy object new_num_train = full_inputs[0].size(len(batch_shape)) fant_strat = self.__class__( num_train=new_num_train, train_inputs=full_inputs, train_mean=full_mean, train_train_covar=full_covar, train_labels=full_targets, likelihood=self.likelihood, non_batch_train=(len(batch_shape) == 0), ) setattr(fant_strat, "_memoize_cache", {"mean_cache": fant_mean_cache, "covar_cache": new_covar_cache}) return fant_strat
def b_inv(b_mat): eye = torch.rand(b_mat.size(0), b_mat.size(1), b_mat.size(2)).cuda() b_inv, _ = torch.gesv(eye, b_mat) return b_inv
def forward(self, im, pts_before, pts_after): ''' Deforms image according to movement of pts_before and pts_after Args) im torch.Tensor object of size NxCxHxW pts_before torch.Tensor object of size NxTx2 (T is # control pts) pts_after torch.Tensor object of size NxTx2 (T is # control pts) ''' # check input requirements assert (4 == im.dim()) assert (3 == pts_after.dim()) assert (3 == pts_before.dim()) N = im.size()[0] assert (N == pts_after.size()[0] and N == pts_before.size()[0]) assert (2 == pts_after.size()[2] and 2 == pts_before.size()[2]) T = pts_after.size()[1] assert (T == pts_before.size()[1]) H = im.size()[2] W = im.size()[3] if self.normalize: pts_after = pts_after.clone() pts_after[:, :, 0] /= 0.5 * W pts_after[:, :, 1] /= 0.5 * H pts_after -= 1 pts_before = pts_before.clone() pts_before[:, :, 0] /= 0.5 * W pts_before[:, :, 1] /= 0.5 * H pts_before -= 1 def construct_P(): ''' Consturcts matrix P of size NxTx3 where P[n,i,0] := 1 P[n,i,1:] := pts_after[n] ''' # Create matrix P with same configuration as 'pts_after' P = pts_after.new_zeros((N, T, 3)) P[:, :, 0] = 1 P[:, :, 1:] = pts_after return P def calc_U(pt1, pt2): ''' Calculate distance U between pt1 and pt2 U(r) := r**2 * log(r) where r := |pt1 - pt2|_2 Args) pt1 torch.Tensor object, last dim is always 2 pt2 torch.Tensor object, last dim is always 2 ''' assert (2 == pt1.size()[-1]) assert (2 == pt2.size()[-1]) diff = pt1 - pt2 sq_diff = diff**2 sq_diff_sum = sq_diff.sum(-1) r = sq_diff_sum.sqrt() # Adds 1e-6 for numerical stability return (r**2) * torch.log(r + 1e-6) def construct_K(): ''' Consturcts matrix K of size NxTxT where K[n,i,j] := U(|pts_after[n,i] - pts_after[n,j]|_2) ''' # Assuming the number of control points are small enough, # We just use for-loop for easy-to-read code # Create matrix K with same configuration as 'pts_after' K = pts_after.new_zeros((N, T, T)) for i in range(T): for j in range(T): K[:, i, j] = calc_U(pts_after[:, i, :], pts_after[:, j, :]) return K def construct_L(): ''' Consturcts matrix L of size Nx(T+3)x(T+3) where L[n] = [[ K[n] P[n] ]] [[ P[n]^T 0 ]] ''' P = construct_P() K = construct_K() # Create matrix L with same configuration as 'K' L = K.new_zeros((N, T + 3, T + 3)) # Fill L matrix L[:, :T, :T] = K L[:, :T, T:(T + 3)] = P L[:, T:(T + 3), :T] = P.transpose(1, 2) return L def construct_uv_grid(): ''' Returns H x W x 2 tensor uv with UV coordinate as its elements uv[:,:,0] is H x W grid of x values uv[:,:,1] is H x W grid of y values ''' u_range = torch.arange(start=-1.0, end=1.0, step=2.0 / W, device=im.device) assert (W == u_range.size()[0]) u = u_range.new_zeros((H, W)) u[:] = u_range v_range = torch.arange(start=-1.0, end=1.0, step=2.0 / H, device=im.device) assert (H == v_range.size()[0]) vt = v_range.new_zeros((W, H)) vt[:] = v_range v = vt.transpose(0, 1) return torch.stack([u, v], dim=2) L = construct_L() VT = pts_before.new_zeros((N, T + 3, 2)) # Use delta x and delta y as known heights of the surface VT[:, :T, :] = pts_before - pts_after # Solve Lx = VT # x is of shape (N, T+3, 2) # x[:,:,0] represents surface parameters for dx surface # (dx values as surface height (z)) # x[:,:,1] represents surface parameters for dy surface # (dy values as surface height (z)) x, _ = torch.gesv(VT, L) uv = construct_uv_grid() uv_batch = uv.repeat((N, 1, 1, 1)) def calc_dxdy(): ''' Calculate surface height for each uv coordinate Returns NxHxWx2 tensor ''' # control points of size NxTxHxWx2 cp = uv.new_zeros((H, W, N, T, 2)) cp[:, :, :] = pts_after cp = cp.permute([2, 3, 0, 1, 4]) U = calc_U(uv, cp) # U value matrix of size NxTxHxW w, a = x[:, : T, :], x[:, T:, :] # w is of size NxTx2, a is of size Nx3x2 w_x, w_y = w[:, :, 0], w[:, :, 1] # NxT each a_x, a_y = a[:, :, 0], a[:, :, 1] # Nx3 each dx = (a_x[:, 0].repeat((H, W, 1)).permute(2, 0, 1) + torch.einsum('nhwd,nd->nhw', uv_batch, a_x[:, 1:]) + torch.einsum('nthw,nt->nhw', U, w_x)) # dx values of NxHxW dy = (a_y[:, 0].repeat((H, W, 1)).permute(2, 0, 1) + torch.einsum('nhwd,nd->nhw', uv_batch, a_y[:, 1:]) + torch.einsum('nthw,nt->nhw', U, w_y)) # dy values of NxHxW return torch.stack([dx, dy], dim=3) dxdy = calc_dxdy() flow_field = uv + dxdy return F.grid_sample(im, flow_field)
def b_inv(b_mat): eye = b_mat.new_ones(b_mat.size(-1)).diag().expand_as(b_mat) b_inv, _ = torch.gesv(eye, b_mat) return b_inv
def matrix_solve(self, matrix, rhs, adjoint=None): import ipdb ipdb.set_trace() return torch.gesv(rhs, matrix)[0]
def compute_pseudo_gradient(parameters, lr): theta = torch.cat([x.grad.data.flatten() for x in parameters]).cpu() H = torch.cat([(i * theta).unsqueeze(dim=-1) for i in theta], dim=1) U = torch.eye(H.size(0)) + lr * H pseudo_grad, _ = torch.gesv(theta, U) return pseudo_grad.cuda()
def forward(self, y_train, phi, sq_lambda, L, m_test): # extract hyperparameters sigma_f = torch.exp(self.log_sigma_f) lengthscale = torch.exp(self.log_lengthscale) sigma_n = torch.exp(self.log_sigma_n) # number of basis functions m = sq_lambda.size(0) # input dimension dim = sq_lambda.size(1) if self.covfunc.type == 'matern': lengthscale = lengthscale.repeat(1, dim).view(dim) # See the autograd section for explanation of what happens here. n = y_train.size(0) lprod = torch.ones(1) omega_sum = torch.zeros(m, 1) for q in range(dim): lprod = lprod.mul(lengthscale[q].pow(2)) omega_sum = omega_sum.add(lengthscale[q].pow(2) * sq_lambda[:, q].view(m, 1).pow(2)) if self.covfunc.type == 'matern': inv_lambda_diag = \ ( math.pow( 2.0, dim ) * math.pow( math.pi, dim/2.0 ) *math.gamma( self.covfunc.nu + dim/2.0 ) *math.pow( 2.0*self.covfunc.nu, self.covfunc.nu ) *( (2.0*self.covfunc.nu + omega_sum).mul(lprod.pow(-0.5)) ).pow(-self.covfunc.nu-dim/2.0) .div( math.gamma(self.covfunc.nu)*lprod.pow(self.covfunc.nu) ) ).pow(-1.0) .view(m).mul(sigma_f.pow(-2.0)) elif self.covfunc.type == 'se': inv_lambda_diag = (sigma_f.pow(-2).mul(lprod.pow(-0.5)).mul( torch.exp(0.5 * omega_sum))).mul( math.pow(2.0 * math.pi, -dim / 2)).view(m) Z = phi.t().mm(phi) + torch.diag(inv_lambda_diag).mul(sigma_n.pow(2)) phi_lam = torch.cat((phi, inv_lambda_diag.sqrt().diag().mul(sigma_n)), 0) # [Phi; sign*sqrt(Lambda^-1)] _, r = torch.qr(phi_lam) v, _ = torch.gesv( phi.t().mm(y_train.view(n, 1)), Z ) # X,LU = torch.gesv(B, A); AX=B => v=(Phi'*Phi+sign^2I)\(Phi'*y) # compute phi_star nt = m_test.size(0) phi_star = torch.ones(m, nt) for q in range(dim): phi_star = phi_star.mul( torch.sin(sq_lambda[:, q].view(m, 1) * (m_test[:, q].view(1, nt) + L[q])).div( math.sqrt(L[q]))) # predict f_test = phi_star.t().mm(v) tmp, _ = torch.trtrs(phi_star, r.t(), upper=False) # solves r^T*u=phi_star, u=r*x tmp, _ = torch.trtrs(tmp, r) # solves r*x=u cov_f = torch.sum(phi_star.t().mul(tmp.t()), dim=1).mul(sigma_n.pow(2)) out = (f_test, cov_f) return out
def forward(ctx, b, a): # TODO see if one can backprop through LU X, LU = torch.gesv(b, a) ctx.save_for_backward(X, a) ctx.mark_non_differentiable(LU) return X, LU
def backward(ctx, grad_output, grad_LU=None): X, a = ctx.saved_variables grad_b, _ = torch.gesv(grad_output, a.t()) grad_a = -torch.mm(grad_b, X.t()) return grad_b, grad_a
def b_inv(b_mat, device): eye = torch.rand(b_mat.size(0), b_mat.size(1), b_mat.size(2)).to(device) b_inv, _ = torch.gesv(eye, b_mat) return b_inv