def sparse_norm(A: SparseTensor, out: Optional[torch.Tensor]) -> torch.Tensor: """Row-wise l2 norm of a sparse 2D matrix Parameters ---------- A : SparseTensor The 2D matrix. Since we compute row-wise norms, the matrix must be in CSR format (for efficiency). out : torch.Tensor A dense tensor with the same number of rows as matrix `A`. Will contain the output of the norm operation. Returns ------- out : torch.Tensor The same tensor as the input `out` parameter. Notes ----- This function is currently limited to CPU input tensors. """ if out is None: out = torch.empty(A.shape[0], 1, dtype=A.dtype, device=A.device) if not A.is_csr: raise RuntimeError("Sparse norm can only be applied on CSR tensors.") if not check_same_dtype(A, out): raise ValueError("All data-types must match.") if A.shape[0] != out.shape[0]: raise ValueError( "Dimension 0 of A must match the length of tensor 'out'.") return norm_(A.indexptr, A.data, out)
def _check_mmv_dimensions(X1, X2, v, out): # Parameter validation if X1.dim() != 2: raise ValueError("Matrix X1 must be 2D.") if X2.dim() != 2: raise ValueError("Matrix X2 must be 2D.") if v.dim() == 1: v = v.reshape((-1, 1)) if v.dim() != 2: raise ValueError( f"v must be a vector or a 2D matrix. Found {len(v.shape)}D.") if out is not None and out.shape != (X1.size(0), v.size(1)): raise ValueError( f"Output dimension is incorrect. " f"Expected ({X1.size(0)}, {v.size(1)}) found {out.shape}") if v.shape != (X2.size(0), v.size(1)): raise ValueError( f"Dimensions of matrix v are incorrect: " f"Expected ({X2.size(0)}, {v.size(1)}) found {v.shape}") if not check_same_dtype(X1, X2, v, out): raise TypeError("Data types of input matrices must be equal.") return X1, X2, v, out
def incore_fmmv(mat: torch.Tensor, vec: torch.Tensor, out: Optional[torch.Tensor] = None, transpose: bool = False, opt: Optional[FalkonOptions] = None) -> torch.Tensor: if not check_same_dtype(mat, vec, out): raise TypeError("Data types of input matrices must be equal.") if not check_same_device(mat, vec, out): raise RuntimeError("All input arguments to incore_fmmv must be on the same device") if out is None: if transpose: out_shape = (mat.shape[1], vec.shape[1]) else: out_shape = (mat.shape[0], vec.shape[1]) out = create_same_stride(out_shape, mat, mat.dtype, device=mat.device, pin_memory=False) out.fill_(0.0) if mat.is_cuda: s1 = torch.cuda.Stream() with torch.cuda.stream(s1): if transpose: out.addmm_(mat.T, vec, beta=0.0) else: out.addmm_(mat, vec, beta=0.0) s1.synchronize() else: if transpose: out.addmm_(mat.T, vec, beta=0.0) else: out.addmm_(mat, vec, beta=0.0) return out
def sparse_square_norm(A: SparseTensor, out: torch.Tensor) -> torch.Tensor: """Row-wise squared l2 norm of a sparse 2D matrix. The operation is equivalent to squaring all elements of the matrix, and summing up the rows. Parameters ---------- A : SparseTensor The 2D matrix. Since we compute row-wise norms, the matrix must be in CSR format (for efficiency). out : torch.Tensor A dense tensor with the same number of rows as matrix `A`. Will contain the output of the squared-norm operation. Returns ------- out : torch.Tensor The same tensor as the input `out` parameter. Notes ----- This function is currently limited to CPU input tensors. """ if not A.is_csr: raise RuntimeError("Squared norm can only be applied on CSR tensors") if not check_same_dtype(A, out): raise ValueError("All data-types must match") if A.shape[0] != out.shape[0]: raise ValueError( "Dimension 0 of A must match the length of tensor 'out'") return norm_sq(A.indexptr, A.data, out)
def test_check_same_dtype_equal(): smat = scipy.sparse.csr_matrix( np.array([[0, 1], [0, 1]]).astype(np.float32)) ts = [ torch.tensor(0, dtype=torch.float32), SparseTensor.from_scipy(smat), None ] assert check_same_dtype(*ts) is True
def test_check_same_dtype_notequal(): smat32 = scipy.sparse.csr_matrix( np.array([[0, 1], [0, 1]]).astype(np.float32)) smat64 = scipy.sparse.csr_matrix( np.array([[0, 1], [0, 1]]).astype(np.float64)) ts = [ torch.tensor(0, dtype=torch.float32), torch.tensor(0, dtype=torch.float64), SparseTensor.from_scipy(smat32), ] assert check_same_dtype(*ts) is False ts = [ torch.tensor(0, dtype=torch.float32), SparseTensor.from_scipy(smat32), SparseTensor.from_scipy(smat64), ] assert check_same_dtype(*ts) is False
def sparse_norm(A: SparseTensor, out: Optional[torch.Tensor]) -> torch.Tensor: if not A.is_csr: raise RuntimeError("Norm can only be applied on CSR tensors") if not check_same_dtype(A, out): raise ValueError("All data-types must match") if A.shape[0] != out.shape[0]: raise ValueError( "Dimension 0 of A must match the length of tensor 'out'") return norm_(A.indexptr, A.data, out)
def _check_mm_dimensions(X1, X2, out): # Parameter validation if X1.dim() != 2: raise ValueError("Matrix X1 must be 2D.") if X2.dim() != 2: raise ValueError("Matrix X2 must be 2D.") N = X1.size(0) M = X2.size(0) if out is not None and out.shape != (N, M): raise ValueError(f"Output dimension is incorrect. " f"Expected ({N}, {M}) found {out.shape}") if not check_same_dtype(X1, X2, out): raise TypeError("Data types of input matrices must be equal.") return X1, X2, out
def _check_dmmv_dimensions(X1, X2, v, w, out): # Parameter validation if v is None and w is None: raise ValueError("One of v and w must be specified to run fdMMV.") if X1.dim() != 2: raise ValueError("Matrix X1 must be 2D.") if X2.dim() != 2: raise ValueError("Matrix X2 must be 2D.") if v is not None and v.dim() == 1: v = v.reshape((-1, 1)) if v is not None and v.dim() != 2: raise ValueError( f"v must be a vector or a 2D matrix. Found {len(v.shape)}D.") if w is not None and w.dim() == 1: w = w.reshape((-1, 1)) if w is not None and w.dim() != 2: raise ValueError( f"w must be a vector or a 2D matrix. Found {len(w.shape)}D.") # noinspection PyUnresolvedReferences T = v.size(1) if v is not None else w.size(1) M = X2.size(0) if out is not None and out.shape != (M, T): raise ValueError( f"Output dimension is incorrect. " f"Expected ({M}, {T}) found {out.shape}") if v is not None and v.shape != (X2.size(0), T): raise ValueError( f"Dimensions of matrix v are incorrect: " f"Expected ({M}, {T}) found {v.shape}") if w is not None and w.shape != (X1.size(0), T): raise ValueError( f"Dimensions of matrix w are incorrect: " f"Expected ({X1.size(0)}, {T}) found {w.shape}") if not check_same_dtype(X1, X2, v, w, out): raise TypeError("Data types of input matrices must be equal.") return X1, X2, v, w, out
def _check_fit_inputs( self, X: _tensor_type, Y: torch.Tensor, Xts: _tensor_type, Yts: torch.Tensor ) -> Tuple[_tensor_type, torch.Tensor, _tensor_type, torch.Tensor]: if X.shape[0] != Y.shape[0]: raise ValueError("X and Y must have the same number of " "samples (found %d and %d)" % (X.shape[0], Y.shape[0])) if Y.dim() == 1: Y = torch.unsqueeze(Y, 1) if Y.dim() != 2: raise ValueError("Y is expected 1D or 2D. Found %dD." % (Y.dim())) if not check_same_dtype(X, Y): raise TypeError("X and Y must have the same data-type.") # If KeOps is used, data must be C-contiguous. if should_use_keops(X, X, self.options): X = to_c_contig(X, "X", True) Y = to_c_contig(Y, "Y", True) Xts = to_c_contig(Xts, "Xts", True) Yts = to_c_contig(Yts, "Yts", True) return X, Y, Xts, Yts
def fit(self, X: torch.Tensor, Y: torch.Tensor, Xts: Optional[torch.Tensor] = None, Yts: Optional[torch.Tensor] = None): if X.size(0) != Y.size(0): raise ValueError("X and Y must have the same number of " "samples (found %d and %d)" % (X.size(0), Y.size(0))) if Y.dim() == 1: Y = torch.unsqueeze(Y, 1) if Y.dim() != 2: raise ValueError("Y is expected 1D or 2D. Found %dD." % (Y.dim())) if not check_same_dtype(X, Y): raise TypeError("X and Y must have the same data-type.") dtype = X.dtype self.fit_times_ = [] t_s = time.time() ny_X, ny_Y = self.center_selection.select(X, Y, self.M) if self.use_cuda_: ny_X = ny_X.pin_memory() # beta is the temporary iterative solution beta = torch.zeros(ny_X.shape[0], 1, dtype=dtype) optim = ConjugateGradient(opt=self.options) cback = None precond = None if self.error_fn is not None and self.error_every is not None: def cback(it, x, pc, train_time): self.fit_times_.append(train_time) if it % self.error_every != 0: print("Iteration %3d - Elapsed %.1fs" % (it, self.fit_times_[-1]), flush=True) return err_str = "training" if Xts is None or Yts is None else "validation" coeff = pc.invT(x) # Compute error: can be train or test; if Xts is not None and Yts is not None: pred = self._predict(Xts, ny_X, coeff) err = self.error_fn(Yts, pred) loss = torch.mean(self.loss(Yts, pred)).item() else: pred = self._predict(X, ny_X, coeff) err = self.error_fn(Y, pred) loss = torch.mean(self.loss(Y, pred)).item() err_name = "error" if isinstance(err, tuple) and len(err) == 2: err, err_name = err print( f"Iteration {it:3d} - Elapsed {self.fit_times_[-1]:.2f}s - " f"{err_str} loss {loss:.4f} - " f"{err_str} {err_name} {err:.4f} ", flush=True) t_elapsed = 0.0 for it, penalty in enumerate(self.penalty_list): max_iter = self.iter_list[it] print("Iteration %d - penalty %e - sub-iterations %d" % (it, penalty, max_iter), flush=True) with TicToc("Preconditioner", self.options.debug): if precond is None: precond = falkon.preconditioner.LogisticPreconditioner( self.kernel, self.loss, self.options) precond.init(ny_X, ny_Y, beta, penalty, X.shape[0]) if self.use_cuda_: torch.cuda.empty_cache() with TicToc("Gradient", self.options.debug): # Gradient computation knmp_grad, inner_mmv = self.loss.knmp_grad(X, ny_X, Y, precond.invT(beta), opt=self.options) grad_p = precond.invAt( precond.invTt(knmp_grad).add_(penalty * beta)) # Callback def mmv(sol): sol_a = precond.invA(sol) knmp_hess = self.loss.knmp_hess(X, ny_X, Y, inner_mmv, precond.invT(sol_a), opt=self.options) return precond.invAt( precond.invTt(knmp_hess).add_(sol_a.mul_(penalty))) with TicToc("Optim", self.options.debug): optim_out = optim.solve(X0=None, B=grad_p, mmv=mmv, max_iter=max_iter, callback=None) beta -= precond.invA(optim_out) t_elapsed += time.time() - t_s cback(it, beta, precond, train_time=t_elapsed) t_s = time.time() t_elapsed += time.time() - t_s cback(len(self.penalty_list), beta, precond, train_time=t_elapsed) self.alpha_ = precond.invT(beta) self.ny_points_ = ny_X
def fit(self, X: torch.Tensor, Y: torch.Tensor, Xts: Optional[torch.Tensor] = None, Yts: Optional[torch.Tensor] = None): """Fits the Falkon KRR model. Parameters ----------- X : torch.Tensor (2D) The tensor of training data, of shape [num_samples, num_dimensions]. If X is in Fortran order (i.e. column-contiguous) then we can avoid an extra copy of the data. Y : torch.Tensor (1D or 2D) The tensor of training targets, of shape [num_samples, num_outputs]. If X and Y represent a classification problem, Y can be encoded as a one-hot vector. If Y is in Fortran order (i.e. column-contiguous) then we can avoid an extra copy of the data. Xts : torch.Tensor (2D) or None Tensor of validation data, of shape [num_test_samples, num_dimensions]. If validation data is provided and `error_fn` was specified when creating the model, they will be used to print the validation error during the optimization iterations. If Xts is in Fortran order (i.e. column-contiguous) then we can avoid an extra copy of the data. Yts : torch.Tensor (1D or 2D) or None Tensor of validation targets, of shape [num_test_samples, num_outputs]. If validation data is provided and `error_fn` was specified when creating the model, they will be used to print the validation error during the optimization iterations. If Yts is in Fortran order (i.e. column-contiguous) then we can avoid an extra copy of the data. Returns -------- model: Falkon The fitted model """ if X.size(0) != Y.size(0): raise ValueError("X and Y must have the same number of " "samples (found %d and %d)" % (X.size(0), Y.size(0))) if Y.dim() == 1: Y = torch.unsqueeze(Y, 1) if Y.dim() != 2: raise ValueError("Y is expected 1D or 2D. Found %dD." % (Y.dim())) if not check_same_dtype(X, Y): raise TypeError("X and Y must have the same data-type.") dtype = X.dtype # Decide whether to use CUDA for preconditioning based on M _use_cuda_preconditioner = ( self.use_cuda_ and (not self.options.cpu_preconditioner) and self.M >= get_min_cuda_preconditioner_size(dtype) ) _use_cuda_mmv = ( self.use_cuda_ and X.shape[0] * X.shape[1] * self.M / self.num_gpus >= get_min_cuda_mmv_size(dtype) ) self.fit_times_ = [] self.ny_points_ = None self.alpha_ = None t_s = time.time() ny_points = self.center_selection.select(X, None, self.M) if self.use_cuda_: ny_points = ny_points.pin_memory() with TicToc("Calcuating Preconditioner of size %d" % (self.M), debug=self.options.debug): pc_opt: FalkonOptions = dataclasses.replace(self.options, use_cpu=not _use_cuda_preconditioner) if pc_opt.debug: print("Preconditioner will run on %s" % ("CPU" if pc_opt.use_cpu else ("%d GPUs" % self.num_gpus))) precond = falkon.preconditioner.FalkonPreconditioner(self.penalty, self.kernel, pc_opt) precond.init(ny_points) if _use_cuda_mmv: # Cache must be emptied to ensure enough memory is visible to the optimizer torch.cuda.empty_cache() X = X.pin_memory() # Decide whether it's worthwile to pre-compute the k_NM kernel. # If we precompute K_NM, each CG iteration costs # Given a single kernel evaluation between two D-dimensional vectors # costs D, at CG iteration we must perform N*M kernel evaluations. # Other than the kernel evaluations we must perform two matrix-vector # products 2(N*M*T) and a bunch of triangular solves. # # So if we precompute we have 2*(N*M*T), othewise we also have N*M*D # but precomputing costs us N*M memory. # So heuristic is the following: # - If D is large (e.g. > 100) check if RAM is sufficient # - If RAM is sufficient precompute # - Otherwise do not precompute Knm = None if X.size(1) > 1200: necessary_ram = X.size(0) * ny_points.size(0) * sizeof_dtype(dtype) k_opt = dataclasses.replace(self.options, use_cpu=True) cpu_info = get_device_info(k_opt) available_ram = min(k_opt.max_cpu_mem, cpu_info[-1].free_memory) * 0.9 del k_opt if available_ram > necessary_ram: if self.options.debug: print("%d*%d Kernel matrix will be stored" % (X.size(0), ny_points.size(0))) Knm = self.kernel(X, ny_points, opt=self.options) # TODO: Maybe we should do the same for Kts, but this complicates # checks for fitting in memory elif self.options.debug: print( "Cannot store full kernel matrix: not enough memory (have %.2fGB, need %.2fGB)" % (available_ram / 2 ** 30, necessary_ram / 2 ** 30)) self.fit_times_.append(time.time() - t_s) # Preparation time # Here we define the callback function which will run at the end # of conjugate gradient iterations. This function computes and # displays the validation error. val_cback = None if self.error_fn is not None and self.error_every is not None: def val_cback(it, beta, train_time): self.fit_times_.append(self.fit_times_[0] + train_time) if it % self.error_every != 0: print("Iteration %3d - Elapsed %.1fs" % (it, self.fit_times_[-1]), flush=True) return err_str = "training" if Xts is None or Yts is None else "validation" alpha = precond.apply(beta) # Compute error: can be train or test; if Xts is not None and Yts is not None: pred = self._predict(Xts, ny_points, alpha) err = self.error_fn(Yts, pred) else: pred = self._predict(X, ny_points, alpha) err = self.error_fn(Y, pred) err_name = "error" if isinstance(err, tuple) and len(err) == 2: err, err_name = err print("Iteration %3d - Elapsed %.1fs - %s %s: %.4f" % (it, self.fit_times_[-1], err_str, err_name, err), flush=True) # Start with the falkon algorithm with TicToc('Computing Falkon iterations', debug=self.options.debug): o_opt: FalkonOptions = dataclasses.replace(self.options, use_cpu=not _use_cuda_mmv) if o_opt.debug: print("Optimizer will run on %s" % ("CPU" if o_opt.use_cpu else ("%d GPUs" % self.num_gpus)), flush=True) optim = falkon.optim.FalkonConjugateGradient(self.kernel, precond, o_opt) if Knm is not None: beta = optim.solve( Knm, None, Y, self.penalty, initial_solution=None, max_iter=self.maxiter, callback=val_cback) else: beta = optim.solve( X, ny_points, Y, self.penalty, initial_solution=None, max_iter=self.maxiter, callback=val_cback) self.alpha_ = precond.apply(beta) self.ny_points_ = ny_points return self
def test_check_same_dtype_empty(): assert check_same_dtype() is True