def tensor_with_random_cores(shape, tt_rank=2, mean=0., stddev=1.): r"""Generate a TT-tensor of the given shape with N(mean, stddev^2) cores. Args: shape: array representing the shape of the future tensor. tt_rank: a number or a (d+1)-element array with the desired ranks. mean: a number, the mean of the normal distribution used for initializing TT-cores. stddev: a number, the standard deviation of the normal distribution used for initializing TT-cores. Returns: Tensorizing containing a TT-tensor """ # TODO: good distribution to initialize the training. # TODO: support the shape and tt_ranks as Torch.tensor. # TODO: support None as a dimension. shape = np.array(shape) tt_rank = np.array(tt_rank) _validate_input_parameters(is_tensor=True, shape=shape, tt_rank=tt_rank) num_dims = shape.size if tt_rank.size == 1: tt_rank = tt_rank * np.ones(num_dims - 1) tt_rank = np.insert(tt_rank, 0, 1) tt_rank = np.append(tt_rank, 1) tt_rank = tt_rank.astype(int) tt_cores = [None] * num_dims for i in range(num_dims): curr_core_shape = (tt_rank[i], shape[i], tt_rank[i + 1]) tt_cores[i] = torch.randn(curr_core_shape).normal_(mean=mean, std=stddev) return TensorTrain(tt_cores, shape, tt_rank)
def add(tt_a, tt_b): """Returns a TensorTrain corresponding to elementwise sum tt_a + tt_b. The shapes of tt_a and tt_b should coincide. Args: tt_a: `TensorTrain`, TT-tensor, or TT-matrix tt_b: `TensorTrain`, TT-tensor, or TT-matrix Returns a `TensorTrain` object corresponding to the element-wise sum of arguments if both arguments are `TensorTrain`s. Raises ValueError if the arguments shapes do not coincide """ ndims = tt_a.ndims if tt_a.is_tt_matrix() != tt_b.is_tt_matrix(): raise ValueError('The arguments should be both TT-tensors or both TT-matrices') if tt_a.get_raw_shape() != tt_b.get_raw_shape(): raise ValueError('The arguments should have the same shape.') if tt_a.is_tt_matrix(): tt_cores = _add_matrix_cores(tt_a, tt_b) else: tt_cores = _add_tensor_cores(tt_a, tt_b) out_ranks = [1] static_a_ranks = tt_a.get_tt_ranks() static_b_ranks = tt_b.get_tt_ranks() for core_idx in range(1, ndims): out_ranks.append(static_a_ranks[core_idx] + static_b_ranks[core_idx]) out_ranks.append(1) return TensorTrain(tt_cores, tt_a.get_raw_shape(), out_ranks)
def matrix_zeros(shape): r"""Generate a TT-matrix of the given shape with each entry equal to 0. Args: shape: 2d array, shape[0] is the shape of the matrix row-index, shape[1] is the shape of the column index. shape[0] and shape[1] should have the same number of elements (d) Also supports omitting one of the dimensions for vectors, e.g. matrix_zeros([[2, 2, 2], None]) and matrix_zeros([None, [2, 2, 2]]) will create an 8-element column and row vectors correspondingly. Returns: TensorTrain containing a TT-matrix of size np.prod(shape[0]) x np.prod(shape[1]) with each entry equal to 0 """ shape = list(shape) # In case shape represents a vector, e.g. [None, [2, 2, 2]] if shape[0] is None: shape[0] = np.ones(len(shape[1]), dtype=int) # In case shape represents a vector, e.g. [[2, 2, 2], None] if shape[1] is None: shape[1] = np.ones(len(shape[0]), dtype=int) shape = np.array(shape) _validate_input_parameters(is_tensor=False, shape=shape) num_dims = shape[0].size tt_rank = np.ones(shape[0].size + 1) tt_cores = [None] * num_dims for i in range(num_dims): curr_core_shape = (1, shape[0][i], shape[1][i], 1) tt_cores[i] = torch.zeros(curr_core_shape) return TensorTrain(tt_cores, shape, tt_rank)
def renormalize_tt_cores(tt, epsilon=1e-8): """Renormalizes TT-cores to make them of the same Frobenius norm. Doesn't change the tensor represented by `tt` object, but renormalizes the TT-cores to make further computations more stable. Args: tt: `TensorTrain` or `TensorTrainBatch` object epsilon: parameter for numerical stability of sqrt Returns: `TensorTrain` which represents the same tensor as tt, but with all cores having equal norm. """ if isinstance(tt, TensorTrain): new_cores = [] running_log_norm = 0 core_norms = [] for core in tt.tt_cores: cur_core_norm = torch.sqrt(max(torch.sum(core ** 2), epsilon)) core_norms.append(cur_core_norm) running_log_norm += np.log(cur_core_norm) running_log_norm = running_log_norm / tt.ndims fact = np.exp(running_log_norm) for i, core in enumerate(tt.tt_cores): new_cores.append(core * fact / core_norms[i]) return TensorTrain(new_cores)
def multiply(tt_left, right): """Returns a TensorTrain corresponding to element-wise product tt_left * right. Args: tt_left: `TensorTrain` object right: `TensorTrain` OR a number. Returns a `TensorTrain` object corresponding to the element-wise product of the arguments. Raises ValueError if the arguments shapes do not coincide or broadcasting is not possible. """ ndims = tt_left.ndims if not isinstance(right, TensorTrain): # Assume right is a number, not TensorTrain. # To squash right uniformly across TT-cores we pull its absolute value # and raise to the power 1/ndims. First TT-core is multiplied by the sign # of right. tt_cores = list(tt_left.tt_cores) right = torch.tensor(right) fact = torch.pow(torch.abs(right), 1.0/ndims) sign = torch.sign(right) for i in range(len(tt_cores)): tt_cores[i] = fact * tt_cores[i] tt_cores[0] = tt_cores[0] * sign out_ranks = tt_left.get_tt_ranks() else: if tt_left.is_tt_matrix() != right.is_tt_matrix(): raise ValueError('The arugments should be both TT-tensors or both ' 'TT-matrices.') if tt_left.get_raw_shape() != right.get_raw_shape(): raise ValueError('The arguments should have the same shape.') a_ranks = tt_left.get_tt_ranks() b_ranks = right.get_tt_ranks() shape = tt_left.get_raw_shape() is_matrix = tt_left.is_tt_matrix() tt_cores = [] for core_idx in range(ndims): a_core = tt_left.tt_cores[core_idx] b_core = right.tt_cores[core_idx] left_rank = a_ranks[core_idx] * b_ranks[core_idx] right_rank = a_ranks[core_idx + 1] * b_ranks[core_idx + 1] if is_matrix: einsum_str = 'aijb,cijd->acijbd' curr_core = torch.einsum(einsum_str, [a_core, b_core]) curr_core = curr_core.reshape((left_rank, shape[0][core_idx], shape[1][core_idx], right_rank)) else: einsum_str = 'aib,cid->acibd' curr_core = torch.einsum(einsum_str, [a_core, b_core]) curr_core = curr_core.reshape((left_rank, shape[0][core_idx], right_rank)) tt_cores.append(curr_core) combined_ranks = zip(tt_left.get_tt_ranks(), right.get_tt_ranks()) out_ranks = [a * b for a, b in combined_ranks] return TensorTrain(tt_cores, tt_left.get_raw_shape(), out_ranks)
def _orthogonalize_tt_cores_right_to_left(tt): """Orthogonalize TT-cores of a TT-object in the right to left order. Args: tt: TenosorTrain or a TensorTrainBatch. Returns: The same type as the input `tt` (TenosorTrain or a TensorTrainBatch). """ # Left to right orthogonalization. ndims = tt.ndims raw_shape = tt.get_raw_shape() tt_ranks = tt.get_tt_ranks() prev_rank = tt_ranks[ndims] # Copy cores reference so we can change the cores. tt_cores = list(tt.tt_cores) for core_idx in range(ndims - 1, 0, -1): curr_core = tt_cores[core_idx] # TT-ranks could have changed on the previous iteration, so `tt_ranks` can # be outdated for the current TT-rank, but should be valid for the next # TT-rank. curr_rank = prev_rank prev_rank = tt_ranks[core_idx] if tt.is_tt_matrix(): curr_mode_left = raw_shape[0][core_idx] curr_mode_right = raw_shape[1][core_idx] curr_mode = curr_mode_left * curr_mode_right else: curr_mode = raw_shape[0][core_idx] qr_shape = (prev_rank, curr_mode * curr_rank) curr_core = curr_core.reshape(qr_shape) curr_core, triang = torch.qr(curr_core.t()) curr_core = curr_core.t() triang_shape = triang.shape # The TT-rank could have changed: if qr_shape is e.g. 4 x 10, than q would # be of size 4 x 4 and r would be 4 x 10, which means that the next rank # should be changed to 4. prev_rank = triang_shape[1] if tt.is_tt_matrix(): new_core_shape = (prev_rank, curr_mode_left, curr_mode_right, curr_rank) else: new_core_shape = (prev_rank, curr_mode, curr_rank) tt_cores[core_idx] = curr_core.reshape(new_core_shape) prev_core = tt_cores[core_idx - 1].reshape(-1, triang_shape[0]) tt_cores[core_idx - 1] = torch.mm(prev_core, triang) if tt.is_tt_matrix(): first_core_shape = (1, raw_shape[0][0], raw_shape[1][0], prev_rank) else: first_core_shape = (1, raw_shape[0][0], prev_rank) tt_cores[0] = tt_cores[0].reshape(first_core_shape) # TODO: infer the tt_ranks return TensorTrain(tt_cores, tt.get_raw_shape())
def tt_tt_matmul(tt_matrix_a, tt_matrix_b): """Multiplies two TT-matrices and returns the TT-matrix of the result. Args: tt_matrix_a: `TensorTrain` or `TensorTrainBatch` object containing a TT-matrix (a batch of TT-matrices) of size M x N tt_matrix_b: `TensorTrain` or `TensorTrainBatch` object containing a TT-matrix (a batch of TT-matrices) of size N x P Returns `TensorTrain` object containing a TT-matrix of size M x P if both arguments are `TensorTrain`s `TensorTrainBatch` if any of the arguments is a `TensorTrainBatch` Raises: ValueError is the arguments are not TT matrices or if their sizes are not appropriate for a matrix-by-matrix multiplication. """ if not isinstance(tt_matrix_a, TensorTrain) or not isinstance(tt_matrix_b, TensorTrain) or \ not tt_matrix_a.is_tt_matrix() or not tt_matrix_b.is_tt_matrix(): raise ValueError('Arguments should be TT-matrices.') ndims = tt_matrix_a.ndims if tt_matrix_b.ndims != ndims: raise ValueError('Arguments should have the same number of dimensions, \ got %d and %d instead.' % (ndims, tt_matrix_b.ndims())) einsum_str = 'aijb,cjkd->acikbd' result_cores = [] a_shape = tt_matrix_a.get_raw_shape() a_ranks = tt_matrix_a.get_tt_ranks() b_shape = tt_matrix_b.get_raw_shape() b_ranks = tt_matrix_b.get_tt_ranks() for core_idx in range(ndims): a_core = tt_matrix_a.tt_cores[core_idx] b_core = tt_matrix_b.tt_cores[core_idx] curr_res_core = torch.einsum(einsum_str, [a_core, b_core]) res_left_rank = a_ranks[core_idx] * b_ranks[core_idx] res_right_rank = a_ranks[core_idx + 1] * b_ranks[core_idx + 1] left_mode = a_shape[0][core_idx] right_mode = b_shape[1][core_idx] core_shape = (res_left_rank, left_mode, right_mode, res_right_rank) curr_res_core = curr_res_core.reshape(core_shape) result_cores.append(curr_res_core) res_shape = (tt_matrix_a.get_raw_shape()[0], tt_matrix_b.get_raw_shape()[1]) static_a_ranks = tt_matrix_a.get_tt_ranks() static_b_ranks = tt_matrix_b.get_tt_ranks() out_ranks = [a_r * b_r for a_r, b_r in zip(static_a_ranks, static_b_ranks)] return TensorTrain(result_cores, res_shape, out_ranks)
def tensor_zeros(shape): r"""Generate a TT-tensor of the given shape with all entries equal to 0. Args: shape: array representing the shape of the future tensor Returns: TensorTrain object containing a TT-tensor """ shape = np.array(shape) _validate_input_parameters(is_tensor=True, shape=shape) num_dims = shape.size tt_rank = np.ones(num_dims + 1) tt_cores = num_dims * [None] for i in range(num_dims): curr_core_shape = (1, shape[i], 1) tt_cores[i] = torch.zeros(curr_core_shape) return TensorTrain(tt_cores, shape, tt_rank)
def forward(self, x): TensorTrain_W = TensorTrain(self.W_cores, self.tt_shape, self.tt_rank) h = tc.tc_math.matmul(x, TensorTrain_W, activation=self.activation) if self.outer: if self.activation in activations: if self.activation == 'sigmoid': h = torch.sigmoid(h) elif self.activation == 'tanh': h = torch.tanh(h) elif self.activation == 'relu': h = torch.relu(h) elif self.activation == 'linear': h = h else: raise ValueError('Unknown activation "%s", only %s and None \ are supported'%(self.activation, activations)) return h
def eye(shape): r"""Creates an identity TT-matrix. Args: shape: array which defines the shape of the matrix row and column indices. Returns: TensorTrain containing an identity TT-matrix of size np.prod(shape) x np.prod(shape) """ shape = np.array(shape) # In this special case shape is in the same format as in the TT-tensor case _validate_input_parameters(is_tensor=True, shape=shape) num_dims = shape.size tt_ranks = np.ones(num_dims + 1) tt_cores = num_dims * [None] for i in range(num_dims): curr_core_shape = (1, shape[i], shape[i], 1) # tt_cores[i] = torch.reshape(torch.eye(shape[i]), curr_core_shape) tt_cores[i] = torch.eye(int(shape[i])).reshape(curr_core_shape).type(dtype) true_shape = np.vstack([shape, shape]) return TensorTrain(tt_cores, true_shape, tt_ranks)
def matrix_with_random_cores(shape, tt_rank=2, mean=0., stddev=1.): r"""Generate a TT-matrix of the given shape with N gaussian cores. Args: shape: 2d array, shape[0] is the shape of the matrix row-index, shape[1] is the shape of teh column index. shape[0] and shape[1] should have the same number of elements (d) Also supports omitting one of the dimensions of vectors, e.g. matrix_with_random_cores([[2, 2, 2], None]) and matrix_with_random_cores([None, [2, 2, 2]]) will create an 8-elemnt column and row vectors correspondingly. tt_rank: a number or a (d+1)-element array with ranks. Returns: TensorTrain containing a TT-matrix of the size np.prod(shape[0]) x np.prod(shape[1]) """ shape = list(shape) # In case shape represents a vector, e.g. [None, [2, 2, 2]] if shape[0] is None: shape[0] = np.ones(len(shape[1]), dtype=int) # In case shape represents a vector, e.g. [[2, 2, 2], None] if shape[1] is None: shape[1] = np.ones(len(shape[0]), dtype=int) shape = np.array(shape) tt_rank = np.array(tt_rank) _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank) num_dims = shape[0].size if tt_rank.size == 1: tt_rank = tt_rank * np.ones(num_dims - 1) tt_rank = np.concatenate([[1], tt_rank, [1]]) tt_rank = tt_rank.astype(int) tt_cores = [None] * num_dims for i in range(num_dims): curr_core_shape = (tt_rank[i], shape[0][i], shape[1][i], tt_rank[i + 1]) tt_cores[i] = torch.randn(curr_core_shape).normal_(mean=mean, std=stddev) return TensorTrain(tt_cores, shape, tt_rank)
def transpose(tt_matrix): """ Transpose a TT-matrix. Args: tt_matrix: `TensorTrain` or `TensorTrainBatch` object containing a TT-matrix (or a batch of TT-matrices). Returns: `TensorTrain` or `TensorTrainBatch` object containing a transposed TT-matrix (or a batch of TT-matrices). Raises: ValueError if the argument is not a TT-matrix. """ if not isinstance(tt_matrix, TensorTrain) or not tt_matrix.is_tt_matrix(): raise ValueError('The argument should be a TT-matrix.') transposed_tt_cores = [] for core_idx in range(tt_matrix.ndims): curr_core = tt_matrix.tt_cores[core_idx] if isinstance(tt_matrix, TensorTrain): transposed_tt_cores.append(curr_core.permute(0, 2, 1, 3)) tt_matrix_shape = tt_matrix.get_raw_shape() transposed_shape = tt_matrix_shape[1], tt_matrix_shape[0] tt_ranks = tt_matrix.get_tt_ranks() if isinstance(tt_matrix, TensorTrain): return TensorTrain(transposed_tt_cores, transposed_shape, tt_ranks)
def to_tt_matrix(mat, shape, max_tt_rank=10, epsilon=None): """Converts a given matrix or vector to a TT-matrix. The matrix dimensions should factorize into d numbers. If e.g. the dimensions are prime numbers, it's usually better to pad the matrix with zeros until the dimensions factorize into (ideally) 3-8 numbers. Args: mat: two dimensional tf.Tensor (a matrix). shape: two dimensional array (np.array or list of lists) Represents the tensor shape of the matrix. E.g. for a (a1 * a2 * a3) x (b1 * b2 * b3) matrix `shape` should be ((a1, a2, a3), (b1, b2, b3)) `shape[0]`` and `shape[1]`` should have the same length. For vectors you may use ((a1, a2, a3), (1, 1, 1)) or, equivalently, ((a1, a2, a3), None) max_tt_rank: a number or a list of numbers If a number, than defines the maximal TT-rank of the result. If a list of numbers, than `max_tt_rank` length should be d+1 (where d is the length of `shape[0]`) and `max_tt_rank[i]` defines the maximal (i+1)-th TT-rank of the result. The following two versions are equivalent `max_tt_rank = r` and `max_tt_rank = r * np.ones(d-1)` epsilon: a floating point number or None If the TT-ranks are not restricted (`max_tt_rank=np.inf`), then the result would be guarantied to be `epsilon` close to `mat` in terms of relative Frobenius error: ||res - mat||_F / ||mat||_F <= epsilon If the TT-ranks are restricted, providing a loose `epsilon` may reduce the TT-ranks of the result. E.g. to_tt_matrix(mat, shape, max_tt_rank=100, epsilon=0.9) will probably return you a TT-matrix with TT-ranks close to 1, not 100. Note that providing a nontrivial (= not equal to None) `epsilon` will make the TT-ranks of the result undefined on the compilation stage (e.g. res.get_tt_ranks() will return None, but t3f.tt_ranks(res).eval() will work). Returns: `TensorTrain` object containing a TT-matrix. Raises: ValueError if max_tt_rank is less than 0, if max_tt_rank is not a number and not a vector of length d + 1 where d is the number of dimensions (rank) of the input tensor, if epsilon is less than 0. """ mat = torch.tensor(mat) # In case the shape is immutable. shape = list(shape) # In case shape represents a vector, e.g. [None, [2, 2, 2]] if shape[0] is None: shape[0] = np.ones(len(shape[1])).astype(int) # In case shape represents a vector, e.g., [[2, 2, 2], None] if shape[1] is None: shape[1] = np.ones(len(shape[0])).astype(int) shape = np.array(shape) tens = mat.reshape(tuple(shape.flatten())) d = len(shape[0]) # Transpose_idx = 0, d, 1, d+1, ... transpose_idx = np.arange(2 * d).reshape(2, d).T.flatten() transpose_idx = tuple(transpose_idx.astype(int)) tens = tens.permute(transpose_idx) new_shape = np.prod(shape, axis=0) tens = tens.reshape(tuple(new_shape)) tt_tens = to_tt_tensor(tens, max_tt_rank, epsilon) tt_cores = [] static_tt_ranks = tt_tens.get_tt_ranks() for core_idx in range(d): curr_core = tt_tens.tt_cores[core_idx] curr_rank = static_tt_ranks[core_idx] next_rank = static_tt_ranks[core_idx + 1] curr_core_new_shape = (curr_rank, shape[0, core_idx], shape[1, core_idx], next_rank) curr_core = curr_core.reshape(curr_core_new_shape) tt_cores.append(curr_core) return TensorTrain(tt_cores, shape, tt_tens.get_tt_ranks())
def to_tt_tensor(tens, max_tt_rank=10, epsilon=None): """ Convert a given torch.tensor to a TT-tensor fo the same shape. Args: tens: torch.tensor max_tt_rank: a number or a list of numbers If a number, than defines the maximal TT-rank of the result. If a list of numbers, than `max_tt_rank` length should be d+1 (where d is the rank of `tens`) and `max_tt_rank[i]` defines the maximal (i+1)-th TT-rank of the result. The following two versions are equivalent `max_tt_rank = r` and `max_tt_rank = r * np.ones(d-1)` epsilon: a floating point number or None If the TT-ranks are not restricted (`max_tt_rank=np.inf`), then the result would be guarantied to be `epsilon` close to `tens` in terms of relative Frobenius error: ||res - tens||_F / ||tens||_F <= epsilon If the TT-ranks are restricted, providing a loose `epsilon` may reduce the TT-ranks of the result. E.g. to_tt_tensor(tens, max_tt_rank=100, epsilon=0.9) will probably return you a TT-tensor with TT-ranks close to 1, not 100. Note that providing a nontrivial (= not equal to None) `epsilon` will make the TT-ranks of the result undefined on the compilation stage (e.g. res.get_tt_ranks() will return None, but t3f.tt_ranks(res).eval() will work). Returns: `TensorTrain` object containing a TT-tensor. Raises: ValueError if the rank (number of dimensions) of the input tensor is not defined, if max_tt_rank is less than 0, if max_tt_rank is not a number and not a vector of length d + 1 where d is the number of dimensions (rank) of the input tensor, if epsilon is less than 0. """ tens = torch.tensor(tens) static_shape = tens.shape # Raises ValueError if ndims is not defined. d = len(static_shape) max_tt_rank = np.array(max_tt_rank).astype(np.int32) if np.any(max_tt_rank < 1): raise ValueError('Maximum TT-rank should be greater or equal to 1.') if epsilon is not None and epsilon < 0: raise ValueError('Epsilon should be non-negative.') if max_tt_rank.size == 1: max_tt_rank = (max_tt_rank * np.ones(d + 1)).astype(np.int32) elif max_tt_rank.size != d + 1: raise ValueError('max_tt_rank should be a number or a vector of the size (d+1) ' \ 'where d is the number of dimensions (rank) of the tensor.') ranks = [1] * (d + 1) tt_cores = [] are_tt_ranks_defined = True for core_idx in range(d - 1): curr_mode = static_shape[core_idx] rows = ranks[core_idx] * curr_mode tens = tens.reshape((rows, -1)) columns = tens.shape[1] u, s, v = torch.svd(tens) if max_tt_rank[core_idx + 1] == 1: ranks[core_idx + 1] = 1 else: try: ranks[core_idx + 1] = min(max_tt_rank[core_idx + 1], rows, columns) except TypeError: # Some of the values are undefined on the compilation stage and thus # they are tf.tensors instead of values. min_dim = min(rows, columns) ranks[core_idx + 1] = min(max_tt_rank[core_idx + 1], min_dim) are_tt_ranks_defined = False u = u[:, 0:ranks[core_idx + 1]] s = s[0:ranks[core_idx + 1]] v = v[:, 0:ranks[core_idx + 1]] core_shape = (ranks[core_idx], curr_mode, ranks[core_idx + 1]) tt_cores.append(u.reshape(core_shape)) tens = torch.mm(torch.diag(s), v.transpose(1, 0)) last_mode = static_shape[-1] core_shape = (ranks[d - 1], last_mode, ranks[d]) tt_cores.append(tens.reshape(core_shape)) if not are_tt_ranks_defined: ranks = None return TensorTrain(tt_cores, static_shape)
def round_tt(tt, max_tt_rank, epsilon): """TT-rounding procedure, returns a TT object with smaller TT-ranks. Args: tt: `TensorTrain` object, TT-tensor or TT-matrix max_tt_rank: a number or a list of numbers If a number, than defines the maximal TT-rank of the result. If a list of numbers, than `max_tt_rank` length should be d+1 (where d is the rank of `tens`) and `max_tt_rank[i]` defines the maximal (i+1)-th TT-rank of the result. The following two versions are equivalent `max_tt_rank = r` and `max_tt_rank = r * np.ones(d-1)` epsilon: a floating point number or None If the TT-ranks are not restricted (`max_tt_rank=np.inf`), then the result would be guarantied to be `epsilon` close to `tt` in terms of relative Frobenius error: ||res - tt||_F / ||tt||_F <= epsilon If the TT-ranks are restricted, providing a loose `epsilon` may reduce the TT-ranks of the result. E.g. round(tt, max_tt_rank=100, epsilon=0.9) will probably return you a TT-tensor with TT-ranks close to 1, not 100. Note that providing a nontrivial (= not equal to None) `epsilon` will make the TT-ranks of the result undefined on the compilation stage (e.g. res.get_tt_ranks() will return None, but t3f.tt_ranks(res).eval() will work). Returns: `TensorTrain` object containing a TT-tensor. """ ndims = tt.ndims max_tt_rank = np.array(max_tt_rank).astype(np.int32) if max_tt_rank < 1: raise ValueError('Maximum TT-rank should be greater or equal to 1.') if epsilon is not None and epsilon < 0: raise ValueError('Epsilon should be non-negative.') if max_tt_rank.size == 1: max_tt_rank = (max_tt_rank * np.ones(ndims + 1)).astype(np.int32) elif max_tt_rank.size != ndims + 1: raise ValueError( 'max_tt_rank should be a number or a vector of size (d+1) ' 'where d is the number of dimensions (rank) of the tensor.') raw_shape = tt.get_raw_shape() tt_cores = orthogonalize_tt_cores(tt).tt_cores # Copy cores references so we can change the cores. tt_cores = list(tt_cores) ranks = [1] * (ndims + 1) are_tt_ranks_defined = True # Right to left SVD compression. for core_idx in range(ndims - 1, 0, -1): curr_core = tt_cores[core_idx] if tt.is_tt_matrix(): curr_mode_left = raw_shape[0][core_idx] curr_mode_right = raw_shape[1][core_idx] curr_mode = curr_mode_left * curr_mode_right else: curr_mode = raw_shape[0][core_idx] columns = curr_mode * ranks[core_idx + 1] curr_core = curr_core.reshape(-1, columns) rows = curr_core.shape[0] if rows is None: rows = curr_core.shape[0] if max_tt_rank[core_idx] == 1: ranks[core_idx] = 1 else: try: ranks[core_idx] = min(max_tt_rank[core_idx], rows, columns) except TypeError: # Some of the values are undefined on the compilation stage and thus # they are tf.tensors instead of values. min_dim = min(rows, columns) ranks[core_idx] = min(max_tt_rank[core_idx], min_dim) are_tt_ranks_defined = False u, s, v = torch.svd(curr_core) u = u[:, 0:ranks[core_idx]] s = s[0:ranks[core_idx]] v = v[:, 0:ranks[core_idx]] if tt.is_tt_matrix(): core_shape = (ranks[core_idx], curr_mode_left, curr_mode_right, ranks[core_idx + 1]) else: core_shape = (ranks[core_idx], curr_mode, ranks[core_idx + 1]) tt_cores[core_idx] = v.transpose(1, 0).reshape(core_shape) prev_core_shape = (-1, rows) tt_cores[core_idx - 1] = tt_cores[core_idx - 1].reshape(prev_core_shape) tt_cores[core_idx - 1] = torch.mm(tt_cores[core_idx - 1], u) tt_cores[core_idx - 1] = torch.mm(tt_cores[core_idx - 1], torch.diag(s)) if tt.is_tt_matrix(): core_shape = (ranks[0], raw_shape[0][0], raw_shape[1][0], ranks[1]) else: core_shape = (ranks[0], raw_shape[0][0], ranks[1]) tt_cores[0] = tt_cores[0].reshape(core_shape) if not are_tt_ranks_defined: ranks = None return TensorTrain(tt_cores, tt.get_raw_shape())
def _orthogonalize_tt_cores_left_to_right(tt): """Orthogonalize TT-cores of a TT-object in the left to right order. Args: tt: TensorTrain or a TensorTrainBatch. Returns: The same type as the input `tt' (TensorTrain or a TensorTrainBatch). Complexity: for a single TT-object: O(d r^3 n) where d is the number of TT-cores (tt.ndims()); r is the largest TT-rank of tt max(tt.get_tt_rank()) n is the size of the axis 4 x 4 x 4, n is 4; for a tensor of the size 4 x 4 x 4, n is 4; for a 9 x 64 matrix of the raw shape (3, 3, 3) x (4, 4, 4) n is 12 """ # Left to right orthogonalization ndims = tt.ndims raw_shape = tt.get_raw_shape() tt_ranks = tt.get_tt_ranks() next_rank = int(tt_ranks[0]) # Copy cores references so we can change the cores. tt_cores = list(tt.tt_cores) for core_idx in range(ndims - 1): curr_core = tt_cores[core_idx] # TT-ranks could have changed on the previous iteration, so `tt_ranks` can # be outdated for the current TT-rank, but should be valid for the next # TT-rank. curr_rank = next_rank next_rank = tt_ranks[core_idx + 1] if tt.is_tt_matrix(): curr_mode_left = raw_shape[0][core_idx] curr_mode_right = raw_shape[1][core_idx] curr_mode = curr_mode_left * curr_mode_right else: curr_mode = raw_shape[0][core_idx] qr_shape = (curr_rank * curr_mode, next_rank) curr_core = curr_core.reshape(qr_shape) curr_core, triang = torch.qr(curr_core) triang_shape = triang.shape # The TT-rank could have changed: if qr_shape is e.g. 4 x 10, than q would # be of size 4 x 4 and r would be 4 x 10, which means that the next rank # should be changed to 4. next_rank = triang_shape[0] if tt.is_tt_matrix(): new_core_shape = (curr_rank, curr_mode_left, curr_mode_right, next_rank) else: new_core_shape = (curr_rank, curr_mode, next_rank) tt_cores[core_idx] = curr_core.reshape(new_core_shape) next_core = tt_cores[core_idx + 1].reshape(triang_shape[1], -1) tt_cores[core_idx + 1] = torch.mm(triang, next_core) if tt.is_tt_matrix(): last_core_shape = (next_rank, raw_shape[0][-1], raw_shape[1][-1], 1) else: last_core_shape = (next_rank, raw_shape[0][-1], 1) tt_cores[-1] = tt_cores[-1].reshape(last_core_shape) # TODO: infer the tt_ranks return TensorTrain(tt_cores, tt.get_raw_shape())