def test_tr_to_tensor(): # Create ground truth TR factors factors = [tl.randn((2, 4, 3)), tl.randn((3, 5, 2)), tl.randn((2, 6, 2))] # Create tensor tensor = tl.zeros((4, 5, 6)) for i in range(4): for j in range(5): for k in range(6): product = tl.dot( tl.dot(factors[0][:, i, :], factors[1][:, j, :]), factors[2][:, k, :]) # TODO: add trace to backend instead of this tensor = tl.index_update( tensor, tl.index[i, j, k], tl.sum(product * tl.eye(product.shape[0]))) # Check that TR factors re-assemble to the original tensor assert_array_almost_equal(tensor, tr_to_tensor(factors))
def maxvol(A): """ Find the rxr submatrix of maximal volume in A(nxr), n>=r We want to decompose matrix A as A = A[:,J] * (A[I,J])^-1 * A[I,:] This algorithm helps us find this submatrix A[I,J] from A, which has the largest determinant. We greedily find vector of max norm, and subtract its projection from the rest of rows. Parameters ---------- A: matrix The matrix to find maximal volume Returns ------- row_idx: list of int is the list or rows of A forming the matrix with maximal volume, A_inv: matrix is the inverse of the matrix with maximal volume. References ---------- S. A. Goreinov, I. V. Oseledets, D. V. Savostyanov, E. E. Tyrtyshnikov, N. L. Zamarashkin. How to find a good submatrix.Goreinov, S. A., et al. Matrix Methods: Theory, Algorithms and Applications: Dedicated to the Memory of Gene Golub. 2010. 247-256. Ali Çivril, Malik Magdon-Ismail On selecting a maximum volume sub-matrix of a matrix and related problems Theoretical Computer Science. Volume 410, Issues 47–49, 6 November 2009, Pages 4801-4811 """ (n, r) = tl.shape(A) # The index of row of the submatrix row_idx = tl.zeros(r) # Rest of rows / unselected rows rest_of_rows = tl.tensor(list(range(n)),dtype= tl.int64) # Find r rows iteratively i = 0 A_new = A while i < r: mask = list(range(tl.shape(A_new)[0])) # Compute the square of norm of each row rows_norms = tl.sum(A_new ** 2, axis=1) # If there is only one row of A left, let's just return it. MxNet is not robust about this case. if tl.shape(rows_norms) == (): row_idx[i] = rest_of_rows break # If a row is 0, we delete it. if any(rows_norms == 0): zero_idx = tl.argmin(rows_norms,axis=0) mask.pop(zero_idx) rest_of_rows = rest_of_rows[mask] A_new = A_new[mask,:] continue # Find the row of max norm max_row_idx = tl.argmax(rows_norms, axis=0) max_row = A[rest_of_rows[max_row_idx], :] # Compute the projection of max_row to other rows # projection a to b is computed as: <a,b> / sqrt(|a|*|b|) projection = tl.dot(A_new, tl.transpose(max_row)) normalization = tl.sqrt(rows_norms[max_row_idx] * rows_norms) # make sure normalization vector is of the same shape of projection (causing bugs for MxNet) normalization = tl.reshape(normalization, tl.shape(projection)) projection = projection/normalization # Subtract the projection from A_new: b <- b - a * projection A_new = A_new - A_new * tl.reshape(projection, (tl.shape(A_new)[0], 1)) # Delete the selected row mask.pop(max_row_idx) A_new = A_new[mask,:] # update the row_idx and rest_of_rows row_idx[i] = rest_of_rows[max_row_idx] rest_of_rows = rest_of_rows[mask] i = i + 1 row_idx = tl.tensor(row_idx, dtype=tl.int64) inverse = tl.solve(A[row_idx,:], tl.eye(tl.shape(A[row_idx,:])[0], **tl.context(A))) row_idx = tl.to_numpy(row_idx) return row_idx, inverse
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error',\ fixed_modes = [], svd_mask_repeats=5, linesearch = False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that:: tensor = [|weights; factors[0], ..., factors[-1] |]. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if ``(previous rec_error - current rec_error) < tol``. If 'abs_rec_error', ALS terminates when `|previous rec_error - current rec_error| < tol`. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = cp_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. fixed_modes : list, default is [] A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. svd_mask_repeats: int If using a tensor with masked values, this initializes using SVD multiple times to remove the effect of these missing values on the initialization. linesearch : bool, default is False Whether to perform line search as proposed by Bro [3]. Returns ------- CPTensor : (weight, factors) * weights : 1D array of shape (rank, ) * all ones if normalize_factors is False (default) * weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. .. [3] R. Bro, "Multi-Way Analysis in the Food Industry: Models, Algorithms, and Applications", PhD., University of Amsterdam, 1998 """ rank = validate_cp_rank(tl.shape(tensor), rank=rank) if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max if linesearch: acc_pow = 2.0 # Extrapolate to the iteration^(1/acc_pow) ahead acc_fail = 0 # How many times acceleration have failed max_fail = 4 # Increase acc_pow with one after max_fail failure weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) if mask is not None and init == "svd": for _ in range(svd_mask_repeats): tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if tl.ndim(tensor)-1 in fixed_modes: warnings.warn('You asked for fixing the last mode, which is not supported.\n The last mode will not be fixed. Consider using tl.moveaxis()') fixed_modes.remove(tl.ndim(tensor)-1) modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes] if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if linesearch and iteration % 2 == 0: factors_last = [tl.copy(f) for f in factors] weights_last = tl.copy(weights) if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if not iteration and weights is not None: # Take into account init weights mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) else: mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: scales = tl.norm(factor, 2, axis=0) weights = tl.where(scales==0, tl.ones(tl.shape(scales), **tl.context(factor)), scales) factor = factor / tl.reshape(weights, (1, -1)) factors[mode] = factor # Will we be performing a line search iteration if linesearch and iteration % 2 == 0 and iteration > 5: line_iter = True else: line_iter = False # Calculate the current unnormalized error if we need it if (tol or return_errors) and line_iter is False: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) else: if mask is not None: tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) # Start line search if requested. if line_iter is True: jump = iteration ** (1.0 / acc_pow) new_weights = weights_last + (weights - weights_last) * jump new_factors = [factors_last[ii] + (factors[ii] - factors_last[ii])*jump for ii in range(tl.ndim(tensor))] new_rec_error, new_tensor, new_norm_tensor = error_calc(tensor, norm_tensor, new_weights, new_factors, sparsity, mask) if (new_rec_error / new_norm_tensor) < rec_errors[-1]: factors, weights = new_factors, new_weights tensor, norm_tensor = new_tensor, new_norm_tensor unnorml_rec_error = new_rec_error acc_fail = 0 if verbose: print("Accepted line search jump of {}.".format(jump)) else: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) acc_fail += 1 if verbose: print("Line search failed for jump of {}.".format(jump)) if acc_fail == max_fail: acc_pow += 1.0 acc_fail = 0 if verbose: print("Reducing acceleration.") rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if tol: if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstruction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) cp_tensor = CPTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ cp_to_tensor((weights, factors)),\ sparsity) cp_tensor = (cp_tensor, sparse_component) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ non_negative=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error'): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if (previous rec_error - current rec_error) < tol. If 'abs_rec_error', ALS terminates when |previous rec_error - current rec_error| < tol. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = kruskal_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: if sparsity: low_rank_component = kruskal_to_tensor((weights, factors)) sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ kruskal_to_tensor((weights, factors)),\ sparsity) kruskal_tensor = (kruskal_tensor, sparse_component) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def admm(UtM, UtU, x, dual_var, n_iter_max=100, n_const=None, order=None, non_negative=None, l1_reg=None, l2_reg=None, l2_square_reg=None, unimodality=None, normalize=None, simplex=None, normalized_sparsity=None, soft_sparsity=None, smoothness=None, monotonicity=None, hard_sparsity=None, tol=1e-4): """ Alternating direction method of multipliers (ADMM) algorithm to minimize a quadratic function under convex constraints. Parameters ---------- UtM: ndarray Pre-computed product of the transposed of U and M. UtU: ndarray Pre-computed product of the transposed of U and U. x: init Default: None dual_var : ndarray Dual variable to update x n_iter_max : int Maximum number of iteration Default: 100 n_const : int Default : None order : int Default : None non_negative : bool or dictionary This constraint is clipping negative values to '0'. If it is True non-negative constraint is applied to all modes. l1_reg : float or list or dictionary, optional l2_reg : float or list or dictionary, optional l2_square : float or list or dictionary, optional unimodality : bool or dictionary, optional If it is True unimodality constraint is applied to all modes. normalize : bool or dictionary, optional This constraint divides all the values by maximum value of the input array. If it is True normalize constraint is applied to all modes. simplex : float or list or dictionary, optional normalized_sparsity : float or list or dictionary, optional soft_sparsity : float or list or dictionary, optional smoothness : float or list or dictionary, optional monotonicity : bool or dictionary, optional hard_sparsity : float or list or dictionary, optional tol : float Returns ------- x : Updated ndarray x_split : Updated ndarray dual_var : Updated ndarray Notes ----- ADMM solves the convex optimization problem :math:`\\min_ f(x) + g(z)` where :math: A(x_split) + Bx = c. Following updates are iterated to solve the problem:: .. math:: \\begin{equation} x_split = argmin_(x_split) f(x_split) + (rho/2)||A(x_split) + Bx - c||_2^2 x = argmin_x g(x) + (rho/2)||A(x_split) + Bx - c||_2^2 dual_var = dual_var + (Ax + B(x_split) - c) \\end{equation} where rho is a constant defined by the user. Let us define a least square problem such as :math:`\\||Ux - M||^2 + r(x)`. ADMM can be adapted to this least square problem as following:: .. math:: \\begin{equation} x_split = (UtU + rho\times I)\times(UtM + rho\times(x + dual_var)^T) x = argmin r(x) + (rho/2)||x - x_split^T + dual_var||_2^2 dual_var = dual_var + x - x_split^T \\end{equation} where r is the regularization operator. Here, x can be updated by using proximity operator of :math:`x_split^T - dual_var`. References ---------- .. [1] Huang, Kejun, Nicholas D. Sidiropoulos, and Athanasios P. Liavas. "A flexible and efficient algorithmic framework for constrained matrix and tensor factorization." IEEE Transactions on Signal Processing 64.19 (2016): 5052-5065. """ rho = tl.trace(UtU) / tl.shape(x)[1] for iteration in range(n_iter_max): x_old = tl.copy(x) x_split = tl.solve(tl.transpose(UtU + rho * tl.eye(tl.shape(UtU)[1])), tl.transpose(UtM + rho * (x + dual_var))) x = proximal_operator(tl.transpose(x_split) - dual_var, non_negative=non_negative, l1_reg=l1_reg, l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality, normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity, soft_sparsity=soft_sparsity, smoothness=smoothness, monotonicity=monotonicity, hard_sparsity=hard_sparsity, n_const=n_const, order=order) if n_const is None: x = tl.transpose(tl.solve(tl.transpose(UtU), tl.transpose(UtM))) return x, x_split, dual_var dual_var = dual_var + x - tl.transpose(x_split) dual_residual = x - tl.transpose(x_split) primal_residual = x - x_old if tl.norm(dual_residual) < tol * tl.norm(x) and tl.norm( primal_residual) < tol * tl.norm(dual_var): break return x, x_split, dual_var
mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes ] rank = 2 normalize_factors = True linesearch = False tol = 1e-8 return_errors = False mask = None sparsity = None cvg_criterion = 'abs_rec_error' weights, factors = initialize_cp(tensor, rank=2, init='svd', svd='numpy_svd') rec_errors = [] norm_tensor = tl.norm(tensor, 2) Id = tl.eye(rank, **tl.context( tensor)) * l2_reg #暂时没看明白这一步是在做什么,默认l2_reg=0,会得到一个全0矩阵,看下面操作好像是正则项的意思 for iteration in range(n_iter_max): if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: #每个mode依次更新 if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) #全1的矩阵,ALS显式解的第三项 for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id #看起来Id的作用是正则项