def mps_entanglement_entropy(tensor, boundary): """Returns the entanglement entropy of an MPS paritioned at boundary in TT tensor form. Assumes a traditional and single MPS, that is, a linear pure state in single-mode form. Parameters ---------- tensor : (TT tensor) Data structure boundary : (int) Qubit at which to partition system. Returns ------- tt_mps_entanglement_entropy : order-0 tensor """ partial_mps = tensor[boundary] dims = partial_mps.shape partial_mps = tl.reshape(partial_mps, (1, dims[0] * dims[1], dims[2])) partial_mps = tt_to_tensor([partial_mps] + tensor[boundary + 1::]) partial_mps = tl.reshape(partial_mps, (dims[0] * dims[1], -1)) _, eig_vals, _ = tl.partial_svd(partial_mps, min(partial_mps.shape)) eig_vals = eig_vals**2 eps = tl.eps(eig_vals.dtype) eig_vals = eig_vals[eig_vals > eps] return -T.sum(T.log2(eig_vals) * eig_vals)
def vonneumann_entropy(tensor): """Returns the von Neumann entropy of a density matrix (2-mode, square) tensor (matrix). Parameters ---------- tensor : Non-decomposed tensor with indices whose shapes are all a factor of two (represent one or more qubits) Returns ------- von_neumann_entropy : order-0 tensor Notes ----- The von Neumann entropy is :math:`- \\sum_i p_i ln(p_i)`, where p_i are the probabilities that each state is occupied (the eigenvalues of the density matrix). """ square_dim = int(math.sqrt(prod(tensor.shape))) tensor = tl.reshape(tensor, (square_dim, square_dim)) try: eig_vals = T.eigh(tensor)[0] except: #All density matrices are Hermitian, here real. Hermitianize matrix if rounding/transformation #errors have occured. tensor = (tensor + tl.transpose(tensor)) / 2 eig_vals = T.eigh(tensor)[0] eps = tl.eps(eig_vals.dtype) eig_vals = eig_vals[eig_vals > eps] return -T.sum(T.log2(eig_vals) * eig_vals)
def cp_vonneumann_entropy(tensor): """Returns the von Neumann entropy of a density matrix (square matrix) in CP tensor. Parameters ---------- tensor : (CP tensor) Data structure Returns ------- cp_von_neumann_entropy : order-0 tensor """ eig_vals = cp_normalize(tensor).weights eps = tl.eps(eig_vals.dtype) eig_vals = eig_vals[eig_vals > eps] return -T.sum(T.log2(eig_vals) * eig_vals)
def vonneumann_entropy(tensor): """Returns the von Neumann entropy of a density matrix (2-mode, square) tensor (matrix). Parameters ---------- tensor : (matrix) Data structure Returns ------- von_neumann_entropy : order-0 tensor """ try: eig_vals = T.eigh(tensor)[0] except: #All density matrices are Hermitian, here real. Hermitianize matrix if rounding/transformation #errors have occured. tensor = (tensor + tl.transpose(tensor)) / 2 eig_vals = T.eigh(tensor)[0] eps = tl.eps(eig_vals.dtype) eig_vals = eig_vals[eig_vals > eps] return -T.sum(T.log2(eig_vals) * eig_vals)
def parafac2(tensor_slices, rank, n_iter_max=100, init='random', svd='numpy_svd', normalize_factors=False, tol=1e-8, random_state=None, verbose=False, return_errors=False, n_iter_parafac=5): r"""PARAFAC2 decomposition [1]_ of a third order tensor via alternating least squares (ALS) Computes a rank-`rank` PARAFAC2 decomposition of the third-order tensor defined by `tensor_slices`. The decomposition is on the form :math:`(A [B_i] C)` such that the i-th frontal slice, :math:`X_i`, of :math:`X` is given by .. math:: X_i = B_i diag(a_i) C^T, where :math:`diag(a_i)` is the diagonal matrix whose nonzero entries are equal to the :math:`i`-th row of the :math:`I \times R` factor matrix :math:`A`, :math:`B_i` is a :math:`J_i \times R` factor matrix such that the cross product matrix :math:`B_{i_1}^T B_{i_1}` is constant for all :math:`i`, and :math:`C` is a :math:`K \times R` factor matrix. To compute this decomposition, we reformulate the expression for :math:`B_i` such that .. math:: B_i = P_i B, where :math:`P_i` is a :math:`J_i \times R` orthogonal matrix and :math:`B` is a :math:`R \times R` matrix. An alternative formulation of the PARAFAC2 decomposition is that the tensor element :math:`X_{ijk}` is given by .. math:: X_{ijk} = \sum_{r=1}^R A_{ir} B_{ijr} C_{kr}, with the same constraints hold for :math:`B_i` as above. Parameters ---------- tensor_slices : ndarray or list of ndarrays Either a third order tensor or a list of second order tensors that may have different number of rows. Note that the second mode factor matrices are allowed to change over the first mode, not the third mode as some other implementations use (see note below). rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random', CPTensor, Parafac2Tensor} Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : bool (optional) If True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors. Note that there may be some inaccuracies in the component weights. tol : float, optional (Default: 1e-8) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors n_iter_parafac: int, optional Number of PARAFAC iterations to perform for each PARAFAC2 iteration Returns ------- Parafac2Tensor : (weight, factors, projection_matrices) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * projection_matrices : List of projection matrices used to create evolving factors. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] Kiers, H.A.L., ten Berge, J.M.F. and Bro, R. (1999), PARAFAC2—Part I. A direct fitting algorithm for the PARAFAC2 model. J. Chemometrics, 13: 275-294. Notes ----- This formulation of the PARAFAC2 decomposition is slightly different from the one in [1]_. The difference lies in that here, the second mode changes over the first mode, whereas in [1]_, the second mode changes over the third mode. We made this change since that means that the function accept both lists of matrices and a single nd-array as input without any reordering of the modes. """ weights, factors, projections = initialize_decomposition( tensor_slices, rank, init=init, svd=svd, random_state=random_state) rec_errors = [] norm_tensor = tl.sqrt( sum(tl.norm(tensor_slice, 2)**2 for tensor_slice in tensor_slices)) svd_fun = _get_svd(svd) projected_tensor = tl.zeros([factor.shape[0] for factor in factors], **T.context(factors[0])) for iteration in range(n_iter_max): if verbose: print("Starting iteration", iteration) factors[1] = factors[1] * T.reshape(weights, (1, -1)) weights = T.ones(weights.shape, **tl.context(tensor_slices[0])) projections = _compute_projections(tensor_slices, factors, svd_fun, out=projections) projected_tensor = _project_tensor_slices(tensor_slices, projections, out=projected_tensor) _, factors = parafac(projected_tensor, rank, n_iter_max=n_iter_parafac, init=(weights, factors), svd=svd, orthogonalise=False, verbose=verbose, return_errors=False, normalize_factors=False, mask=None, random_state=random_state, tol=1e-100) if normalize_factors: new_factors = [] for factor in factors: norms = T.norm(factor, axis=0) norms = tl.where( tl.abs(norms) <= tl.eps(factor.dtype), tl.ones(tl.shape(norms), **tl.context(factors[0])), norms) weights = weights * norms new_factors.append(factor / (tl.reshape(norms, (1, -1)))) factors = new_factors if tol: rec_error = _parafac2_reconstruction_error( tensor_slices, (weights, factors, projections)) rec_error /= norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('PARAFAC2 reconstruction error={}, variation={}.'. format(rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('PARAFAC2 reconstruction error={}'.format( rec_errors[-1])) parafac2_tensor = Parafac2Tensor((weights, factors, projections)) if return_errors: return parafac2_tensor, rec_errors else: return parafac2_tensor
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=0, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [ tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors) ] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where( tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor / (tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def make_svd_non_negative(tensor, U, S, V, nntype): """ Use NNDSVD method to transform SVD results into a non-negative form. This method leads to more efficient solving with NNMF [1]. Parameters ---------- tensor : tensor being decomposed U, S, V: SVD factorization results nntype : {'nndsvd', 'nndsvda'} Whether to fill small values with 0.0 (nndsvd), or the tensor mean (nndsvda, default). [1]: Boutsidis & Gallopoulos. Pattern Recognition, 41(4): 1350-1362, 2008. """ # NNDSVD initialization W = tl.zeros_like(U) H = tl.zeros_like(V) # The leading singular triplet is non-negative # so it can be used as is for initialization. W = tl.index_update(W, tl.index[:, 0], tl.sqrt(S[0]) * tl.abs(U[:, 0])) H = tl.index_update(H, tl.index[0, :], tl.sqrt(S[0]) * tl.abs(V[0, :])) for j in range(1, tl.shape(U)[1]): x, y = U[:, j], V[j, :] # extract positive and negative parts of column vectors x_p, y_p = tl.clip(x, a_min=0.0), tl.clip(y, a_min=0.0) x_n, y_n = tl.abs(tl.clip(x, a_max=0.0)), tl.abs(tl.clip(y, a_max=0.0)) # and their norms x_p_nrm, y_p_nrm = tl.norm(x_p), tl.norm(y_p) x_n_nrm, y_n_nrm = tl.norm(x_n), tl.norm(y_n) m_p, m_n = x_p_nrm * y_p_nrm, x_n_nrm * y_n_nrm # choose update if m_p > m_n: u = x_p / x_p_nrm v = y_p / y_p_nrm sigma = m_p else: u = x_n / x_n_nrm v = y_n / y_n_nrm sigma = m_n lbd = tl.sqrt(S[j] * sigma) W = tl.index_update(W, tl.index[:, j], lbd * u) H = tl.index_update(H, tl.index[j, :], lbd * v) # After this point we no longer need H eps = tl.eps(tensor.dtype) if nntype == "nndsvd": W = soft_thresholding(W, eps) elif nntype == "nndsvda": avg = tl.mean(tensor) W = tl.where(W < eps, tl.ones(tl.shape(W), **tl.context(W)) * avg, W) else: raise ValueError( 'Invalid nntype parameter: got %r instead of one of %r' % (nntype, ('nndsvd', 'nndsvda'))) return W
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, cvg_criterion='abs_rec_error', fixed_modes=None): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors fixed_modes : list, default is None A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = tl.eps(tensor.dtype) rank = validate_cp_rank(tl.shape(tensor), rank=rank) if mask is not None and init == "svd": message = "Masking occurs after initialization. Therefore, random initialization is recommended." warnings.warn(message, Warning) weights, factors = initialize_nn_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) if fixed_modes is None: fixed_modes = [] if tl.ndim(tensor) - 1 in fixed_modes: warnings.warn( 'You asked for fixing the last mode, which is not supported while tol is fixed.\n The last mode will not be fixed. Consider using tl.moveaxis()' ) fixed_modes.remove(tl.ndim(tensor) - 1) modes_list = [ mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes ] for iteration in range(n_iter_max): if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) accum = tl.reshape(weights, (-1, 1)) * accum * tl.reshape(weights, (1, -1)) if mask is not None: tensor = tensor * mask + tl.cp_to_tensor( (weights, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator factors[mode] = factor if normalize_factors and mode != modes_list[-1]: weights, factors = cp_normalize((weights, factors)) if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = cp_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0)) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print( "iteration {}, reconstraction error: {}, decrease = {}" .format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format( iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) if normalize_factors: weights, factors = cp_normalize((weights, factors)) cp_tensor = CPTensor((weights, factors)) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error'): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ This is the same as parafac(non_negative=True). Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: for i, f in enumerate(factors): if min(tl.shape(f)) >= rank: factors[i] = tl.abs(tl.qr(f)[0]) if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}".format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def initialize_factors(tensor, rank, init='svd', svd='numpy_svd', random_state=None, non_negative=False, normalize_factors=False): r"""Initialize factors used in `parafac`. The type of initialization is set using `init`. If `init == 'random'` then initialize factor matrices using `random_state`. If `init == 'svd'` then initialize the `m`th factor matrix using the `rank` left singular vectors of the `m`th unfolding of the input tensor. Parameters ---------- tensor : ndarray rank : int init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS non_negative : bool, default is False if True, non-negative factors are returned Returns ------- factors : ndarray list List of initialized factors of the CP decomposition where element `i` is of shape (tensor.shape[i], rank) """ rng = check_random_state(random_state) eps = tl.eps(tensor.dtype) if init == 'random': factors = [tl.tensor(rng.random_sample((tensor.shape[i], rank)), **tl.context(tensor)) for i in range(tl.ndim(tensor))] if non_negative: factors = [tl.abs(f) for f in factors] if normalize_factors: factors = [f/(tl.reshape(tl.norm(f, axis=0), (1, -1)) + eps) for f in factors] return factors elif init == 'svd': try: svd_fun = tl.SVD_FUNS[svd] except KeyError: message = 'Got svd={}. However, for the current backend ({}), the possible choices are {}'.format( svd, tl.get_backend(), tl.SVD_FUNS) raise ValueError(message) factors = [] for mode in range(tl.ndim(tensor)): U, _, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank) if tensor.shape[mode] < rank: # TODO: this is a hack but it seems to do the job for now # factor = tl.tensor(np.zeros((U.shape[0], rank)), **tl.context(tensor)) # factor[:, tensor.shape[mode]:] = tl.tensor(rng.random_sample((U.shape[0], rank - tl.shape(tensor)[mode])), **tl.context(tensor)) # factor[:, :tensor.shape[mode]] = U random_part = tl.tensor(rng.random_sample((U.shape[0], rank - tl.shape(tensor)[mode])), **tl.context(tensor)) U = tl.concatenate([U, random_part], axis=1) factor = U[:, :rank] if non_negative: factor = tl.abs(factor) if normalize_factors: factor = factor / (tl.reshape(tl.norm(factor, axis=0), (1, -1)) + eps) factors.append(factor) return factors raise ValueError('Initialization method "{}" not recognized'.format(init))
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ non_negative=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error'): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if (previous rec_error - current rec_error) < tol. If 'abs_rec_error', ALS terminates when |previous rec_error - current rec_error| < tol. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = kruskal_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: if sparsity: low_rank_component = kruskal_to_tensor((weights, factors)) sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ kruskal_to_tensor((weights, factors)),\ sparsity) kruskal_tensor = (kruskal_tensor, sparse_component) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def hals_nnls(UtM, UtU, V=None, n_iter_max=500, tol=10e-8, sparsity_coefficient=None, normalize=False, nonzero_rows=False, exact=False): """ Non Negative Least Squares (NNLS) Computes an approximate solution of a nonnegative least squares problem (NNLS) with an exact block-coordinate descent scheme. M is m by n, U is m by r, V is r by n. All matrices are nonnegative componentwise. This algorithm is defined in [1], as an accelerated version of the HALS algorithm. It features two accelerations: an early stop stopping criterion, and a complexity averaging between precomputations and loops, so as to use large precomputations several times. This function is made for being used repetively inside an outer-loop alternating algorithm, for instance for computing nonnegative matrix Factorization or tensor factorization. Parameters ---------- UtM: r-by-n array Pre-computed product of the transposed of U and M, used in the update rule UtU: r-by-r array Pre-computed product of the transposed of U and U, used in the update rule V: r-by-n initialization matrix (mutable) Initialized V array By default, is initialized with one non-zero entry per column corresponding to the closest column of U of the corresponding column of M. n_iter_max: Postivie integer Upper bound on the number of iterations Default: 500 tol : float in [0,1] early stop criterion, while err_k > delta*err_0. Set small for almost exact nnls solution, or larger (e.g. 1e-2) for inner loops of a PARAFAC computation. Default: 10e-8 sparsity_coefficient: float or None The coefficient controling the sparisty level in the objective function. If set to None, the problem is solved unconstrained. Default: None nonzero_rows: boolean True if the lines of the V matrix can't be zero, False if they can be zero Default: False exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution Default: False Returns ------- V: array a r-by-n nonnegative matrix \approx argmin_{V >= 0} ||M-UV||_F^2 rec_error: float number of loops authorized by the error stop criterion iteration: integer final number of update iteration performed complexity_ratio: float number of loops authorized by the stop criterion Notes ----- We solve the following problem :math:`\\min_{V >= 0} ||M-UV||_F^2` The matrix V is updated linewise. The update rule for this resolution is:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j))/UtU[k,k] \\end{equation} with j the update iteration. This problem can also be defined by adding a sparsity coefficient, enhancing sparsity in the solution [2]. In this sparse version, the update rule becomes:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j) - sparsity_coefficient)/UtU[k,k] \\end{equation} References ---------- .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and Hierarchical ALS Algorithms for Nonnegative Matrix Factorization, Neural Computation 24 (4): 1085-1105, 2012. .. [2] J. Eggert, and E. Korner. "Sparse coding and NMF." 2004 IEEE International Joint Conference on Neural Networks (IEEE Cat. No. 04CH37541). Vol. 4. IEEE, 2004. """ rank, n_col_M = tl.shape(UtM) if V is None: # checks if V is empty V = tl.solve(UtU, UtM) V = tl.clip(V, a_min=0, a_max=None) # Scaling scale = tl.sum(UtM * V) / tl.sum(UtU * tl.dot(V, tl.transpose(V))) V = V * scale if exact: n_iter_max = 50000 tol = 10e-16 for iteration in range(n_iter_max): rec_error = 0 for k in range(rank): if UtU[k, k]: if sparsity_coefficient is not None: # Modifying the function for sparsification deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) else: # without sparsity deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) rec_error = rec_error + tl.dot(deltaV, tl.transpose(deltaV)) # Safety procedure, if columns aren't allow to be zero if nonzero_rows and tl.all(V[k, :] == 0): V[k, :] = tl.eps(V.dtype) * tl.max(V) elif nonzero_rows: raise ValueError("Column " + str(k) + " of U is zero with nonzero condition") if normalize: norm = tl.norm(V[k, :]) if norm != 0: V[k, :] /= norm else: sqrt_n = 1 / n_col_M**(1 / 2) V[k, :] = [sqrt_n for i in range(n_col_M)] if iteration == 0: rec_error0 = rec_error numerator = tl.shape(V)[0] * tl.shape(V)[1] + tl.shape(V)[1] * rank denominator = tl.shape(V)[0] * rank + tl.shape(V)[0] complexity_ratio = 1 + (numerator / denominator) if exact: if rec_error < tol * rec_error0: break else: if rec_error < tol * rec_error0 or iteration > 1 + 0.5 * complexity_ratio: break return V, rec_error, iteration, complexity_ratio
def parafac2( tensor_slices, rank, n_iter_max=2000, init='random', svd='numpy_svd', normalize_factors=False, tol=1e-8, absolute_tol=1e-13, nn_modes=None, random_state=None, verbose=False, return_errors=False, n_iter_parafac=5, ): r"""PARAFAC2 decomposition [1]_ of a third order tensor via alternating least squares (ALS) Computes a rank-`rank` PARAFAC2 decomposition of the third-order tensor defined by `tensor_slices`. The decomposition is on the form :math:`(A [B_i] C)` such that the i-th frontal slice, :math:`X_i`, of :math:`X` is given by .. math:: X_i = B_i diag(a_i) C^T, where :math:`diag(a_i)` is the diagonal matrix whose nonzero entries are equal to the :math:`i`-th row of the :math:`I \times R` factor matrix :math:`A`, :math:`B_i` is a :math:`J_i \times R` factor matrix such that the cross product matrix :math:`B_{i_1}^T B_{i_1}` is constant for all :math:`i`, and :math:`C` is a :math:`K \times R` factor matrix. To compute this decomposition, we reformulate the expression for :math:`B_i` such that .. math:: B_i = P_i B, where :math:`P_i` is a :math:`J_i \times R` orthogonal matrix and :math:`B` is a :math:`R \times R` matrix. An alternative formulation of the PARAFAC2 decomposition is that the tensor element :math:`X_{ijk}` is given by .. math:: X_{ijk} = \sum_{r=1}^R A_{ir} B_{ijr} C_{kr}, with the same constraints hold for :math:`B_i` as above. Parameters ---------- tensor_slices : ndarray or list of ndarrays Either a third order tensor or a list of second order tensors that may have different number of rows. Note that the second mode factor matrices are allowed to change over the first mode, not the third mode as some other implementations use (see note below). rank : int Number of components. n_iter_max : int, optional (Default: 2000) Maximum number of iteration .. versionchanged:: 0.6.1 Previously, the default maximum number of iterations was 100. init : {'svd', 'random', CPTensor, Parafac2Tensor} Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : bool (optional) If True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors. Note that there may be some inaccuracies in the component weights. tol : float, optional (Default: 1e-8) Relative reconstruction error decrease tolerance. The algorithm is considered to have converged when :math:`\left|\| X - \hat{X}_{n-1} \|^2 - \| X - \hat{X}_{n} \|^2\right| < \epsilon \| X - \hat{X}_{n-1} \|^2`. That is, when the relative change in sum of squared error is less than the tolerance. .. versionchanged:: 0.6.1 Previously, the stopping condition was :math:`\left|\| X - \hat{X}_{n-1} \| - \| X - \hat{X}_{n} \|\right| < \epsilon`. absolute_tol : float, optional (Default: 1e-13) Absolute reconstruction error tolearnce. The algorithm is considered to have converged when :math:`\left|\| X - \hat{X}_{n-1} \|^2 - \| X - \hat{X}_{n} \|^2\right| < \epsilon_\text{abs}`. That is, when the relative sum of squared error is less than the specified tolerance. The absolute tolerance is necessary for stopping the algorithm when used on noise-free data that follows the PARAFAC2 constraint. If None, then the machine precision + 1000 will be used. nn_modes: None, 'all' or array of integers (Default: None) Used to specify which modes to impose non-negativity constraints on. We cannot impose non-negativity constraints on the the B-mode (mode 1) with the ALS algorithm, so if this mode is among the constrained modes, then a warning will be shown (see notes for more info). random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors n_iter_parafac : int, optional Number of PARAFAC iterations to perform for each PARAFAC2 iteration Returns ------- Parafac2Tensor : (weight, factors, projection_matrices) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * projection_matrices : List of projection matrices used to create evolving factors. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] Kiers, H.A.L., ten Berge, J.M.F. and Bro, R. (1999), PARAFAC2—Part I. A direct fitting algorithm for the PARAFAC2 model. J. Chemometrics, 13: 275-294. Notes ----- This formulation of the PARAFAC2 decomposition is slightly different from the one in [1]_. The difference lies in that here, the second mode changes over the first mode, whereas in [1]_, the second mode changes over the third mode. We made this change since that means that the function accept both lists of matrices and a single nd-array as input without any reordering of the modes. Because of the reformulation above, :math:`B_i = P_i B`, the :math:`B_i` matrices cannot be constrained to be non-negative with ALS. If this mode is constrained to be non-negative, then :math:`B` will be non-negative, but not the orthogonal `P_i` matrices. Consequently, the `B_i` matrices are unlikely to be non-negative. """ weights, factors, projections = initialize_decomposition( tensor_slices, rank, init=init, svd=svd, random_state=random_state) rec_errors = [] norm_tensor = tl.sqrt( sum(tl.norm(tensor_slice, 2)**2 for tensor_slice in tensor_slices)) svd_fun = _get_svd(svd) if absolute_tol is None: absolute_tol = tl.eps(factors[0].dtype) * 1000 # If nn_modes is set, we use HALS, otherwise, we use the standard parafac implementation. if nn_modes is None: def parafac_updates(X, w, f): return parafac(X, rank, n_iter_max=n_iter_parafac, init=(w, f), svd=svd, orthogonalise=False, verbose=verbose, return_errors=False, normalize_factors=False, mask=None, random_state=random_state, tol=1e-100)[1] else: if nn_modes == 'all' or 1 in nn_modes: warn( "Mode `1` of PARAFAC2 fitted with ALS cannot be constrained to be truly non-negative. See the documentation for more info." ) def parafac_updates(X, w, f): return non_negative_parafac_hals(X, rank, n_iter_max=n_iter_parafac, init=(w, f), svd=svd, nn_modes=nn_modes, verbose=verbose, return_errors=False, tol=1e-100)[1] projected_tensor = tl.zeros([factor.shape[0] for factor in factors], **T.context(factors[0])) for iteration in range(n_iter_max): if verbose: print("Starting iteration", iteration) factors[1] = factors[1] * T.reshape(weights, (1, -1)) weights = T.ones(weights.shape, **tl.context(tensor_slices[0])) projections = _compute_projections(tensor_slices, factors, svd_fun, out=projections) projected_tensor = _project_tensor_slices(tensor_slices, projections, out=projected_tensor) factors = parafac_updates(projected_tensor, weights, factors) if normalize_factors: new_factors = [] for factor in factors: norms = T.norm(factor, axis=0) norms = tl.where( tl.abs(norms) <= tl.eps(factor.dtype), tl.ones(tl.shape(norms), **tl.context(factors[0])), norms) weights = weights * norms new_factors.append(factor / (tl.reshape(norms, (1, -1)))) factors = new_factors if tol: rec_error = _parafac2_reconstruction_error( tensor_slices, (weights, factors, projections)) rec_error /= norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('PARAFAC2 reconstruction error={}, variation={}.'. format(rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if abs(rec_errors[-2]**2 - rec_errors[-1]**2) < ( tol * rec_errors[-2]**2) or rec_errors[-1]**2 < absolute_tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('PARAFAC2 reconstruction error={}'.format( rec_errors[-1])) parafac2_tensor = Parafac2Tensor((weights, factors, projections)) if return_errors: return parafac2_tensor, rec_errors else: return parafac2_tensor