def __factor_non_negative(self, tensor, factors, mode): """Compute a non-negative factor optimization for TCA Parameters ---------- tensor : torch.Tensor The tensor of activity of N neurons, T timepoints and K trials of shape N, T, K factors : list List of tensors, each one containing a factor mode : int Index of the factor to optimize Returns ------- float Number to which multiply the factor to for optimization """ sub_indices = [i for i in range(self.dimension) if i != mode] for i, e in enumerate(sub_indices): if i: accum = accum * tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) numerator = tl.dot(tl.base.unfold(tensor, mode), tl.tenalg.khatri_rao(factors, skip_matrix=mode)) numerator = tl.clip(numerator, a_min=self.epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=self.epsilon, a_max=None) return (numerator / denominator)
def test_clip(): """Test that clip can work with single arguments""" X = T.tensor([0.0, -1.0, 1.0]) X_low = T.tensor([0.0, 0.0, 1.0]) X_high = T.tensor([0.0, -1.0, 0.0]) assert_array_equal(tl.clip(X, a_min=0.0), X_low) assert_array_equal(tl.clip(X, a_max=0.0), X_high)
def simplex_prox(tensor, parameter): """ Projects the input tensor on the simplex of radius parameter. Parameters ---------- tensor : ndarray parameter : float Returns ------- ndarray References ---------- .. [1]: Held, Michael, Philip Wolfe, and Harlan P. Crowder. "Validation of subgradient optimization." Mathematical programming 6.1 (1974): 62-88. """ _, col = tl.shape(tensor) tensor = tl.clip(tensor, 0, tl.max(tensor)) tensor_sort = tl.sort(tensor, axis=0, descending=True) to_change = tl.sum(tl.where( tensor_sort > (tl.cumsum(tensor_sort, axis=0) - parameter), 1.0, 0.0), axis=0) difference = tl.zeros(col) for i in range(col): if to_change[i] > 0: difference = tl.index_update( difference, tl.index[i], tl.cumsum(tensor_sort, axis=0)[int(to_change[i] - 1), i]) difference = (difference - parameter) / to_change return tl.clip(tensor - difference, a_min=0)
def test_clip(): # Test that clip can work with single arguments X = T.tensor([0.0, -1.0, 1.0]) X_low = T.tensor([0.0, 0.0, 1.0]) X_high = T.tensor([0.0, -1.0, 0.0]) assert_array_equal(tl.clip(X, a_min=0.0), X_low) assert_array_equal(tl.clip(X, a_max=0.0), X_high) # More extensive test with a larger random tensor rng = tl.check_random_state(0) tensor = tl.tensor(rng.random_sample((10, 10, 10)).astype('float32')) val1 = np.float32(rng.random_sample()) val2 = np.float32(rng.random_sample()) limits = [(min(val1, val2), max(val1, val2)), (-1, 2), (tl.max(tensor) + 1, None), (None, tl.min(tensor) - 1), (tl.max(tensor), None), (tl.min(tensor), None), (None, tl.max(tensor)), (None, tl.min(tensor))] for min_val, max_val in limits: message = f"Tensor clipped incorrectly with min_val={min_val} and max_val={max_val}. Tensor bounds are ({tl.to_numpy(tl.min(tensor))}, {tl.to_numpy(tl.max(tensor))}" if min_val is not None: assert tl.all(tl.clip(tensor, min_val, None) >= min_val), message assert tl.all( tl.clip(tensor, min_val, max_val) >= min_val), message if max_val is not None: assert tl.all(tl.clip(tensor, None, max_val) <= max_val), message assert tl.all( tl.clip(tensor, min_val, max_val) <= max_val), message
def soft_thresholding(tensor, threshold): """Soft-thresholding operator sign(tensor) * max[abs(tensor) - threshold, 0] Parameters ---------- tensor : ndarray threshold : float or ndarray with shape tensor.shape * If float the threshold is applied to the whole tensor * If ndarray, one threshold is applied per elements, 0 values are ignored Returns ------- ndarray thresholded tensor on which the operator has been applied Examples -------- Basic shrinkage >>> import tensorly.backend as T >>> from tensorly.tenalg.proximal import soft_thresholding >>> tensor = tl.tensor([[1, -2, 1.5], [-4, 3, -0.5]]) >>> soft_thresholding(tensor, 1.1) array([[ 0. , -0.9, 0.4], [-2.9, 1.9, 0. ]]) Example with missing values >>> mask = tl.tensor([[0, 0, 1], [1, 0, 1]]) >>> soft_thresholding(tensor, mask*1.1) array([[ 1. , -2. , 0.4], [-2.9, 3. , 0. ]]) See also -------- inplace_soft_thresholding : Inplace version of the soft-thresholding operator svd_thresholding : SVD-thresholding operator """ return tl.sign(tensor) * tl.clip(tl.abs(tensor) - threshold, a_min=0)
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=0, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [ tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors) ] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where( tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor / (tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def make_svd_non_negative(tensor, U, S, V, nntype): """ Use NNDSVD method to transform SVD results into a non-negative form. This method leads to more efficient solving with NNMF [1]. Parameters ---------- tensor : tensor being decomposed U, S, V: SVD factorization results nntype : {'nndsvd', 'nndsvda'} Whether to fill small values with 0.0 (nndsvd), or the tensor mean (nndsvda, default). [1]: Boutsidis & Gallopoulos. Pattern Recognition, 41(4): 1350-1362, 2008. """ # NNDSVD initialization W = tl.zeros_like(U) H = tl.zeros_like(V) # The leading singular triplet is non-negative # so it can be used as is for initialization. W = tl.index_update(W, tl.index[:, 0], tl.sqrt(S[0]) * tl.abs(U[:, 0])) H = tl.index_update(H, tl.index[0, :], tl.sqrt(S[0]) * tl.abs(V[0, :])) for j in range(1, tl.shape(U)[1]): x, y = U[:, j], V[j, :] # extract positive and negative parts of column vectors x_p, y_p = tl.clip(x, a_min=0.0), tl.clip(y, a_min=0.0) x_n, y_n = tl.abs(tl.clip(x, a_max=0.0)), tl.abs(tl.clip(y, a_max=0.0)) # and their norms x_p_nrm, y_p_nrm = tl.norm(x_p), tl.norm(y_p) x_n_nrm, y_n_nrm = tl.norm(x_n), tl.norm(y_n) m_p, m_n = x_p_nrm * y_p_nrm, x_n_nrm * y_n_nrm # choose update if m_p > m_n: u = x_p / x_p_nrm v = y_p / y_p_nrm sigma = m_p else: u = x_n / x_n_nrm v = y_n / y_n_nrm sigma = m_n lbd = tl.sqrt(S[j] * sigma) W = tl.index_update(W, tl.index[:, j], lbd * u) H = tl.index_update(H, tl.index[j, :], lbd * v) # After this point we no longer need H eps = tl.eps(tensor.dtype) if nntype == "nndsvd": W = soft_thresholding(W, eps) elif nntype == "nndsvda": avg = tl.mean(tensor) W = tl.where(W < eps, tl.ones(tl.shape(W), **tl.context(W)) * avg, W) else: raise ValueError( 'Invalid nntype parameter: got %r instead of one of %r' % (nntype, ('nndsvd', 'nndsvda'))) return W
def nn_her_Als(tensor, rank, factors=None, it_max=100, tol=1e-7, beta=0.5, eta=1.5, gamma=1.05, gamma_bar=1.01, list_factors=False, error_fast=True, time_rec=False): """ her ALS methode of CP decomposition for non negative case Parameters ---------- tensor : tensor rank : int factors : list of matrices, optional an initial non negative factor matrices. The default is None. it_max : int, optional maximal number of iteration. The default is 100. tol : float, optional error tolerance. The default is 1e-7. beta : float, optional extrapolation parameter. The default is 0.5. eta : float, optional decrease coefficient of beta. The default is 1.5. gamma : float, optional increase coefficient of beta. The default is 1.05. gamma_bar : float, optional increase coeefficient of beta_bar. The default is 1.01. list_factors : boolean, optional If true, then return factor matrices of each iteration. The default is False. error_fast : boolean, optional If true, use err_fast to compute data fitting error, otherwise, use err. The default is True. time_rec : boolean, optional If true, return computation time of each iteration. The default is False. Returns ------- the CP decomposition, number of iteration, error and restart pourcentage. list_fac and list_time are optional. """ beta_bar = 1 N = tl.ndim(tensor) # order of tensor norm_tensor = tl.norm(tensor) # norm of tensor weights = None if time_rec == True: list_time = [] if list_factors == True: list_fac = [] if (factors == None): factors = svd_init_fac(tensor, rank) # Initialization of factor hat matrices by factor matrices factors_hat = factors if list_factors == True: list_fac.append(copy.deepcopy(factors)) it = 0 cpt = 0 F_hat_bf = err(tensor, None, factors) # cost error = [F_hat_bf / norm_tensor] while (error[len(error) - 1] > tol and it < it_max): if time_rec == True: tic = time.time() for n in range(N): V = np.ones((rank, rank)) for i in range(len(factors)): if i != n: V = V * tl.dot(tl.transpose(factors_hat[i]), factors_hat[i]) W = tl.cp_tensor.unfolding_dot_khatri_rao(tensor, (None, factors_hat), n) factor_bf = factors[n] # update fac, _, _, _ = hals_nnls(tl.transpose(W), V, tl.transpose(factors[n])) factors[n] = tl.transpose(fac) # extrapolate factors_hat[n] = tl.clip(factors[n] + beta * (factors[n] - factor_bf), a_min=0.0) if (error_fast == False): F_hat_new = err(tensor, None, factors_hat) # cost update else: F_hat_new = err_fast(norm_tensor, factors[N - 1], V, W) if (F_hat_new > F_hat_bf): factors_hat = factors beta_bar = beta beta = beta / eta cpt = cpt + 1 else: factors = factors_hat beta_bar = min(1, beta_bar * gamma_bar) beta = min(beta_bar, gamma * beta) F_hat_bf = F_hat_new it = it + 1 if list_factors == True: list_fac.append(copy.deepcopy(factors)) error.append(F_hat_new / norm_tensor) if time_rec == True: toc = time.time() list_time.append(toc - tic) if time_rec == True and list_factors == True: return (weights, factors, it, error, cpt / it, list_fac, list_time) if list_factors == True: return (weights, factors, it, error, cpt / it, list_fac) if time_rec == True: return (weights, factors, it, error, cpt / it, list_time) return (weights, factors, it, error, cpt / it)
def non_negative_tucker(tensor, rank, n_iter_max=10, init='svd', tol=10e-5, random_state=None, verbose=False, ranks=None): """Non-negative Tucker decomposition Iterative multiplicative update, see [2]_ Parameters ---------- tensor : ``ndarray`` rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'} random_state : {None, int, np.random.RandomState} Returns ------- core : ndarray positive core of the Tucker decomposition has shape `ranks` factors : ndarray list list of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Yong-Deok Kim and Seungjin Choi, "Nonnegative tucker decomposition", IEEE Conference on Computer Vision and Pattern Recognition s(CVPR), pp 1-8, 2007 """ if ranks is not None: message = "'ranks' is depreciated, please use 'rank' instead" warnings.warn(message, DeprecationWarning) rank = ranks if rank is None: rank = [tl.shape(tensor)[mode] for mode in range(tl.ndim(tensor))] elif isinstance(rank, int): n_mode = tl.ndim(tensor) message = "Given only one int for 'rank' for decomposition a tensor of order {}. Using this rank for all modes.".format(n_mode) warnings.warn(message, RuntimeWarning) rank = [rank]*n_mode epsilon = 10e-12 # Initialisation if init == 'svd': core, factors = tucker(tensor, rank) nn_factors = [tl.abs(f) for f in factors] nn_core = tl.abs(core) else: rng = check_random_state(random_state) core = tl.tensor(rng.random_sample(rank) + 0.01, **tl.context(tensor)) # Check this factors = [tl.tensor(rng.random_sample(s), **tl.context(tensor)) for s in zip(tl.shape(tensor), rank)] nn_factors = [tl.abs(f) for f in factors] nn_core = tl.abs(core) norm_tensor = tl.norm(tensor, 2) rec_errors = [] for iteration in range(n_iter_max): for mode in range(tl.ndim(tensor)): B = tucker_to_tensor((nn_core, nn_factors), skip_factor=mode) B = tl.transpose(unfold(B, mode)) numerator = tl.dot(unfold(tensor, mode), B) numerator = tl.clip(numerator, a_min=epsilon, a_max=None) denominator = tl.dot(nn_factors[mode], tl.dot(tl.transpose(B), B)) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) nn_factors[mode] *= numerator / denominator numerator = tucker_to_tensor((tensor, nn_factors), transpose_factors=True) numerator = tl.clip(numerator, a_min=epsilon, a_max=None) for i, f in enumerate(nn_factors): if i: denominator = mode_dot(denominator, tl.dot(tl.transpose(f), f), i) else: denominator = mode_dot(nn_core, tl.dot(tl.transpose(f), f), i) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) nn_core *= numerator / denominator rec_error = tl.norm(tensor - tucker_to_tensor((nn_core, nn_factors)), 2) / norm_tensor rec_errors.append(rec_error) if iteration > 1 and verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if iteration > 1 and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break return nn_core, nn_factors
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error'): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ This is the same as parafac(non_negative=True). Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: for i, f in enumerate(factors): if min(tl.shape(f)) >= rank: factors[i] = tl.abs(tl.qr(f)[0]) if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}".format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def hals_nnls(UtM, UtU, V=None, n_iter_max=500, tol=10e-8, sparsity_coefficient=None, normalize=False, nonzero_rows=False, exact=False): """ Non Negative Least Squares (NNLS) Computes an approximate solution of a nonnegative least squares problem (NNLS) with an exact block-coordinate descent scheme. M is m by n, U is m by r, V is r by n. All matrices are nonnegative componentwise. This algorithm is defined in [1], as an accelerated version of the HALS algorithm. It features two accelerations: an early stop stopping criterion, and a complexity averaging between precomputations and loops, so as to use large precomputations several times. This function is made for being used repetively inside an outer-loop alternating algorithm, for instance for computing nonnegative matrix Factorization or tensor factorization. Parameters ---------- UtM: r-by-n array Pre-computed product of the transposed of U and M, used in the update rule UtU: r-by-r array Pre-computed product of the transposed of U and U, used in the update rule V: r-by-n initialization matrix (mutable) Initialized V array By default, is initialized with one non-zero entry per column corresponding to the closest column of U of the corresponding column of M. n_iter_max: Postivie integer Upper bound on the number of iterations Default: 500 tol : float in [0,1] early stop criterion, while err_k > delta*err_0. Set small for almost exact nnls solution, or larger (e.g. 1e-2) for inner loops of a PARAFAC computation. Default: 10e-8 sparsity_coefficient: float or None The coefficient controling the sparisty level in the objective function. If set to None, the problem is solved unconstrained. Default: None nonzero_rows: boolean True if the lines of the V matrix can't be zero, False if they can be zero Default: False exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution Default: False Returns ------- V: array a r-by-n nonnegative matrix \approx argmin_{V >= 0} ||M-UV||_F^2 rec_error: float number of loops authorized by the error stop criterion iteration: integer final number of update iteration performed complexity_ratio: float number of loops authorized by the stop criterion Notes ----- We solve the following problem :math:`\\min_{V >= 0} ||M-UV||_F^2` The matrix V is updated linewise. The update rule for this resolution is:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j))/UtU[k,k] \\end{equation} with j the update iteration. This problem can also be defined by adding a sparsity coefficient, enhancing sparsity in the solution [2]. In this sparse version, the update rule becomes:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j) - sparsity_coefficient)/UtU[k,k] \\end{equation} References ---------- .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and Hierarchical ALS Algorithms for Nonnegative Matrix Factorization, Neural Computation 24 (4): 1085-1105, 2012. .. [2] J. Eggert, and E. Korner. "Sparse coding and NMF." 2004 IEEE International Joint Conference on Neural Networks (IEEE Cat. No. 04CH37541). Vol. 4. IEEE, 2004. """ rank, n_col_M = tl.shape(UtM) if V is None: # checks if V is empty V = tl.solve(UtU, UtM) V = tl.clip(V, a_min=0, a_max=None) # Scaling scale = tl.sum(UtM * V) / tl.sum(UtU * tl.dot(V, tl.transpose(V))) V = V * scale if exact: n_iter_max = 50000 tol = 10e-16 for iteration in range(n_iter_max): rec_error = 0 for k in range(rank): if UtU[k, k]: if sparsity_coefficient is not None: # Modifying the function for sparsification deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) else: # without sparsity deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) rec_error = rec_error + tl.dot(deltaV, tl.transpose(deltaV)) # Safety procedure, if columns aren't allow to be zero if nonzero_rows and tl.all(V[k, :] == 0): V[k, :] = tl.eps(V.dtype) * tl.max(V) elif nonzero_rows: raise ValueError("Column " + str(k) + " of U is zero with nonzero condition") if normalize: norm = tl.norm(V[k, :]) if norm != 0: V[k, :] /= norm else: sqrt_n = 1 / n_col_M**(1 / 2) V[k, :] = [sqrt_n for i in range(n_col_M)] if iteration == 0: rec_error0 = rec_error numerator = tl.shape(V)[0] * tl.shape(V)[1] + tl.shape(V)[1] * rank denominator = tl.shape(V)[0] * rank + tl.shape(V)[0] complexity_ratio = 1 + (numerator / denominator) if exact: if rec_error < tol * rec_error0: break else: if rec_error < tol * rec_error0 or iteration > 1 + 0.5 * complexity_ratio: break return V, rec_error, iteration, complexity_ratio
def proximal_operator(tensor, non_negative=None, l1_reg=None, l2_reg=None, l2_square_reg=None, unimodality=None, normalize=None, simplex=None, normalized_sparsity=None, soft_sparsity=None, smoothness=None, monotonicity=None, hard_sparsity=None, n_const=1, order=0): """ Proximal operator solves a convex optimization problem. Let f be a convex proper lower-semicontinuous function, the proximal operator of f is :math:`\\argmin_x(f(x) + 1/2||x - v||_2^2)`. This operator can be used to solve constrained optimization problems as a generalization to projections on convex sets. Therefore, proximal gradients are used for constrained tensor decomposition problems in the literature. Parameters ---------- tensor : ndarray non_negative : bool or dictionary This constraint is clipping negative values to '0'. If it is True non-negative constraint is applied to all modes. l1_reg : float or list or dictionary, optional l2_reg : float or list or dictionary, optional l2_square_reg : float or list or dictionary, optional unimodality : bool or dictionary, optional If it is True unimodality constraint is applied to all modes. normalize : bool or dictionary, optional This constraint divides all the values by maximum value of the input array. If it is True normalize constraint is applied to all modes. simplex : float or list or dictionary, optional normalized_sparsity : float or list or dictionary, optional soft_sparsity : float or list or dictionary, optional smoothness : float or list or dictionary, optional monotonicity : bool or dictionary, optional hard_sparsity : float or list or dictionary, optional n_const : int Number of constraints. If it is None, function returns input tensor. Default : 1 order : int Specifies which constraint to implement if several constraints are selected as input Default : 0 Returns ------- tensor : updated tensor according to the selected constraint, which is the solution of the optimization problem above. If constraint is None, function returns the same tensor. References ---------- .. [1]: Moreau, J. J. (1962). Fonctions convexes duales et points proximaux dans un espace hilbertien. Comptes rendus hebdomadaires des séances de l'Académie des sciences, 255, 2897-2899. .. [2]: Parikh, N., & Boyd, S. (2014). Proximal algorithms. Foundations and Trends in optimization, 1(3), 127-239. """ if n_const is None: return tensor constraint, parameter = validate_constraints( non_negative=non_negative, l1_reg=l1_reg, l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality, normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity, soft_sparsity=soft_sparsity, smoothness=smoothness, monotonicity=monotonicity, hard_sparsity=hard_sparsity, n_const=n_const, order=order) if constraint is None: return tensor elif constraint == 'non_negative': return tl.clip(tensor, 0, tl.max(tensor)) elif constraint == 'l1_reg': return soft_thresholding(tensor, parameter) elif constraint == 'l2_reg': return l2_prox(tensor, parameter) elif constraint == 'l2_square_reg': return l2_square_prox(tensor, parameter) elif constraint == 'unimodality': return unimodality_prox(tensor) elif constraint == 'normalize': return tensor / tl.max(tensor) elif constraint == 'simplex': return simplex_prox(tensor, parameter) elif constraint == 'normalized_sparsity': return normalized_sparsity_prox(tensor, parameter) elif constraint == 'soft_sparsity': return soft_sparsity_prox(tensor, parameter) elif constraint == 'smoothness': return smoothness_prox(tensor, parameter) elif constraint == 'monotonicity': return monotonicity_prox(tensor) elif constraint == 'hard_sparsity': return hard_thresholding(tensor, parameter)
def active_set_nnls(Utm, UtU, x=None, n_iter_max=100, tol=10e-8): """ Active set algorithm for non-negative least square solution. Computes an approximate non-negative solution for Ux=m linear system. Parameters ---------- Utm : vectorized ndarray Pre-computed product of the transposed of U and m UtU : ndarray Pre-computed Kronecker product of the transposed of U and U x : init Default: None n_iter_max : int Maximum number of iteration Default: 100 tol : float Early stopping criterion Returns ------- x : ndarray Notes ----- This function solves following problem: .. math:: \\begin{equation} \\min_{x} ||Ux - m||^2 \\end{equation} According to [1], non-negativity-constrained least square estimation problem becomes: .. math:: \\begin{equation} x' = (Utm) - (UTU)\\times x \\end{equation} Reference ---------- [1] : Bro, R., & De Jong, S. (1997). A fast non‐negativity‐constrained least squares algorithm. Journal of Chemometrics: A Journal of the Chemometrics Society, 11(5), 393-401. """ if tl.get_backend() == 'tensorflow': raise ValueError( "Active set is not supported with the tensorflow backend. Consider using fista method with tensorflow." ) if x is None: x_vec = tl.zeros(tl.shape(UtU)[1], **tl.context(UtU)) else: x_vec = tl.base.tensor_to_vec(x) x_gradient = Utm - tl.dot(UtU, x_vec) passive_set = x_vec > 0 active_set = x_vec <= 0 support_vec = tl.zeros(tl.shape(x_vec), **tl.context(x_vec)) for iteration in range(n_iter_max): if iteration > 0 or tl.all(x_vec == 0): indice = tl.argmax(x_gradient) passive_set = tl.index_update(passive_set, tl.index[indice], True) active_set = tl.index_update(active_set, tl.index[indice], False) # To avoid singularity error when initial x exists try: passive_solution = tl.solve(UtU[passive_set, :][:, passive_set], Utm[passive_set]) indice_list = [] for i in range(tl.shape(support_vec)[0]): if passive_set[i]: indice_list.append(i) support_vec = tl.index_update( support_vec, tl.index[int(i)], passive_solution[len(indice_list) - 1]) else: support_vec = tl.index_update(support_vec, tl.index[int(i)], 0) # Start from zeros if solve is not achieved except: x_vec = tl.zeros(tl.shape(UtU)[1]) support_vec = tl.zeros(tl.shape(x_vec), **tl.context(x_vec)) passive_set = x_vec > 0 active_set = x_vec <= 0 if tl.any(active_set): indice = tl.argmax(x_gradient) passive_set = tl.index_update(passive_set, tl.index[indice], True) active_set = tl.index_update(active_set, tl.index[indice], False) passive_solution = tl.solve(UtU[passive_set, :][:, passive_set], Utm[passive_set]) indice_list = [] for i in range(tl.shape(support_vec)[0]): if passive_set[i]: indice_list.append(i) support_vec = tl.index_update( support_vec, tl.index[int(i)], passive_solution[len(indice_list) - 1]) else: support_vec = tl.index_update(support_vec, tl.index[int(i)], 0) # update support vector if it is necessary if tl.min(support_vec[passive_set]) <= 0: for i in range(len(passive_set)): alpha = tl.min( x_vec[passive_set][support_vec[passive_set] <= 0] / (x_vec[passive_set][support_vec[passive_set] <= 0] - support_vec[passive_set][support_vec[passive_set] <= 0])) update = alpha * (support_vec - x_vec) x_vec = x_vec + update passive_set = x_vec > 0 active_set = x_vec <= 0 passive_solution = tl.solve( UtU[passive_set, :][:, passive_set], Utm[passive_set]) indice_list = [] for i in range(tl.shape(support_vec)[0]): if passive_set[i]: indice_list.append(i) support_vec = tl.index_update( support_vec, tl.index[int(i)], passive_solution[len(indice_list) - 1]) else: support_vec = tl.index_update(support_vec, tl.index[int(i)], 0) if tl.any(passive_set) != True or tl.min( support_vec[passive_set]) > 0: break # set x to s x_vec = tl.clip(support_vec, 0, tl.max(support_vec)) # gradient update x_gradient = Utm - tl.dot(UtU, x_vec) if tl.any(active_set) != True or tl.max(x_gradient[active_set]) <= tol: break return x_vec
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0): """Non-negative CP decomposition Uses multiplicative updates, see [2]_ Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 nn_factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True) n_factors = len(nn_factors) norm_tensor = tl.norm(tensor, 2) rec_errors = [] for iteration in range(n_iter_max): for mode in range(tl.ndim(tensor)): # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(n_factors) if i != mode] for i, e in enumerate(sub_indices): if i: accum = accum*tl.dot(tl.transpose(nn_factors[e]), nn_factors[e]) else: accum = tl.dot(tl.transpose(nn_factors[e]), nn_factors[e]) numerator = tl.dot(unfold(tensor, mode), khatri_rao(nn_factors, skip_matrix=mode)) numerator = tl.clip(numerator, a_min=epsilon, a_max=None) denominator = tl.dot(nn_factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) nn_factors[mode] = nn_factors[mode]* numerator / denominator rec_error = tl.norm(tensor - kruskal_to_tensor(nn_factors), 2) / norm_tensor rec_errors.append(rec_error) if iteration > 1 and verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if iteration > 1 and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break return nn_factors
def test_clips_all_negative_tensor_correctly(): # Regression test for bug found with the pytorch backend negative_valued_tensor = tl.zeros((10, 10)) - 0.1 clipped_tensor = tl.clip(negative_valued_tensor, 0) assert tl.all(clipped_tensor == 0)
def non_negative_tucker(tensor, rank, n_iter_max=10, init='svd', tol=10e-5, random_state=None, verbose=False, return_errors=False, normalize_factors=False): """Non-negative Tucker decomposition Iterative multiplicative update, see [2]_ Parameters ---------- tensor : ``ndarray`` rank : None, int or int list size of the core tensor, ``(len(ranks) == tensor.ndim)`` if int, the same rank is used for all modes n_iter_max : int maximum number of iteration init : {'svd', 'random'} random_state : {None, int, np.random.RandomState} verbose : int , optional level of verbosity ranks : None or int list size of the core tensor normalize_factors : if True, aggregates the core which will contain the norms of the factors. Returns ------- core : ndarray positive core of the Tucker decomposition has shape `ranks` factors : ndarray list list of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Yong-Deok Kim and Seungjin Choi, "Non-negative tucker decomposition", IEEE Conference on Computer Vision and Pattern Recognition s(CVPR), pp 1-8, 2007 """ rank = validate_tucker_rank(tl.shape(tensor), rank=rank) epsilon = 10e-12 # Initialisation if init == 'svd': core, factors = tucker(tensor, rank) nn_factors = [tl.abs(f) for f in factors] nn_core = tl.abs(core) else: rng = tl.check_random_state(random_state) core = tl.tensor(rng.random_sample(rank) + 0.01, **tl.context(tensor)) # Check this factors = [ tl.tensor(rng.random_sample(s), **tl.context(tensor)) for s in zip(tl.shape(tensor), rank) ] nn_factors = [tl.abs(f) for f in factors] nn_core = tl.abs(core) norm_tensor = tl.norm(tensor, 2) rec_errors = [] for iteration in range(n_iter_max): for mode in range(tl.ndim(tensor)): B = tucker_to_tensor((nn_core, nn_factors), skip_factor=mode) B = tl.transpose(unfold(B, mode)) numerator = tl.dot(unfold(tensor, mode), B) numerator = tl.clip(numerator, a_min=epsilon, a_max=None) denominator = tl.dot(nn_factors[mode], tl.dot(tl.transpose(B), B)) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) nn_factors[mode] *= numerator / denominator numerator = tucker_to_tensor((tensor, nn_factors), transpose_factors=True) numerator = tl.clip(numerator, a_min=epsilon, a_max=None) for i, f in enumerate(nn_factors): if i: denominator = mode_dot(denominator, tl.dot(tl.transpose(f), f), i) else: denominator = mode_dot(nn_core, tl.dot(tl.transpose(f), f), i) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) nn_core *= numerator / denominator rec_error = tl.norm(tensor - tucker_to_tensor( (nn_core, nn_factors)), 2) / norm_tensor rec_errors.append(rec_error) if iteration > 1 and verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if iteration > 1 and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break if normalize_factors: nn_core, nn_factors = tucker_normalize((nn_core, nn_factors)) tensor = TuckerTensor((nn_core, nn_factors)) if return_errors: return tensor, rec_errors else: return tensor
def custom_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=1e-7, orthogonalise=False, random_state=None, verbose=False, return_errors=False, neg_fac=0): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [| factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors neg_fac: int, optional Index of the factor for which negative values are allowed Returns ------- factors : ndarray list List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. """ # if orthogonalise and not isinstance(orthogonalise, int): # orthogonalise = n_iter_max factors = custom_initialize_factors(tensor, rank, neg_fac=neg_fac, init=init, svd=svd, random_state=random_state) rec_errors = [] norm_tensor = tl.norm(tensor, 2) epsilon = 10e-12 n_factors = len(factors) dims = tl.ndim(tensor) for iteration in range(n_iter_max): for mode in range(dims): if mode == neg_fac: pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.transpose(factor), factor) factor = tl.dot( tl.base.unfold(tensor, mode), tl.tenalg.khatri_rao(factors, skip_matrix=mode)) factor = tl.transpose( tl.solve(tl.transpose(pseudo_inverse), tl.transpose(factor))) factors[mode] = factor else: sub_indices = [i for i in range(n_factors) if i != mode] for i, e in enumerate(sub_indices): if i: accum = accum * tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) numerator = tl.dot( tl.base.unfold(tensor, mode), tl.tenalg.khatri_rao(factors, skip_matrix=mode)) numerator = tl.clip(numerator, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factors[mode] = factors[mode] * numerator / denominator if iteration % 25 == 0 and iteration > 1: rec_error = tl.norm( tensor - tl.kruskal_tensor.kruskal_to_tensor(factors), 2) / norm_tensor rec_errors.append(rec_error) if iteration % 25 == 1 and iteration > 1: rec_error = tl.norm( tensor - tl.kruskal_tensor.kruskal_to_tensor(factors), 2) / norm_tensor rec_errors.append(rec_error) print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break if return_errors: return factors, rec_errors else: return factors
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=1e-8, orthogonalise=False, random_state=None, verbose=False, return_errors=False, non_negative=False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [| factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. Returns ------- factors : ndarray list List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative) rec_errors = [] norm_tensor = tl.norm(tensor, 2) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factor = [tl.qr(factor)[0] for factor in factors] if verbose: print("Starting iteration", iteration) for mode in range(tl.ndim(tensor)): if verbose: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) #factor = tl.dot(unfold(tensor, mode), khatri_rao(factors, skip_matrix=mode).conj()) mttkrp = tl.tenalg.unfolding_dot_khatri_rao(tensor, factors, mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> # This is ||kruskal_to_tensor(factors)||^2 factors_norm = tl.sum( tl.prod( tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0), 0)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(mttkrp * factor) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) if return_errors: return factors, rec_errors else: return factors
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error', fixed_modes=[]): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ This is the same as parafac(non_negative=True). Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity fixed_modes : list, default is [] A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 rank = validate_cp_rank(tl.shape(tensor), rank=rank) if mask is not None and init == "svd": message = "Masking occurs after initialization. Therefore, random initialization is recommended." warnings.warn(message, Warning) if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) if tl.ndim(tensor) - 1 in fixed_modes: warnings.warn( 'You asked for fixing the last mode, which is not supported while tol is fixed.\n The last mode will not be fixed. Consider using tl.moveaxis()' ) fixed_modes.remove(tl.ndim(tensor) - 1) modes_list = [ mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes ] for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: for i, f in enumerate(factors): if min(tl.shape(f)) >= rank: factors[i] = tl.abs(tl.qr(f)[0]) if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) if mask is not None: tensor = tensor * mask + tl.cp_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator factors[mode] = factor if normalize_factors: weights, factors = cp_normalize((weights, factors)) if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = cp_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print( "iteration {}, reconstraction error: {}, decrease = {}" .format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format( iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) cp_tensor = CPTensor((weights, factors)) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def nn_her_CPRAND(tensor, rank, n_samples, n_samples_err=400, factors=None, exact_err=False, it_max=100, err_it_max=20, tol=1e-7, beta=0.1, eta=3, gamma=1.01, gamma_bar=1.005, list_factors=False, time_rec=False, filter=10): """ herCPRAND for nonnegative CP-decomposition same err sample taking mean value of last filter values Parameters ---------- tensor : tensor rank : int n_samples : int sample size n_samples_err : int, optional sample size used for error estimation. The default is 400. factors : list of matrices, optional an initial factor matrices. The default is None. exact_err : boolean, optional whether use err or err_rand_fast for terminaison criterion. The default is False. it_max : int, optional maximal number of iteration. The default is 100. err_it_max : int, optional maximal of iteration if terminaison critirion is not improved. The default is 20. tol : float, optional error tolerance. The default is 1e-7. beta : float, optional extrapolation parameter. The default is 0.5. eta : float, optional decrease coefficient of beta. The default is 1.5. gamma : float, optional increase coefficient of beta. The default is 1.05. gamma_bar : float, optional increase coeefficient of beta_bar. The default is 1.01. list_factors : boolean, optional If true, then return factor matrices of each iteration. The default is False. time_rec : boolean, optional If true, return computation time of each iteration. The default is False. filter : int, optional The filter size used for the mean value Returns ------- the CP decomposition, number of iteration, error and restart pourcentage. list_fac and list_time are optional. """ beta_bar = 1 N = tl.ndim(tensor) # order of tensor norm_tensor = tl.norm(tensor) # norm of tensor if list_factors == True: list_fac = [] if (time_rec == True): list_time = [] if (factors == None): factors = svd_init_fac(tensor, rank) # Initialization of factor hat matrice by factor matrices factors_hat = factors if list_factors == True: list_fac.append(copy.deepcopy(factors)) list_F_hat_bf = [] weights = None it = 0 err_it = 0 cpt = 0 ######################################## ######### error initialization ######### ######################################## if (exact_err == True): F_hat_bf = err(tensor, weights, factors) else: F_hat_bf, ind_bf = err_rand(tensor, None, factors, n_samples_err) list_F_hat_bf.append(F_hat_bf) rng = tl.check_random_state(None) error = [F_hat_bf / norm_tensor] min_err = error[len(error) - 1] while (min_err > tol and it < it_max and err_it < err_it_max): if time_rec == True: tic = time.time() for n in range(N): Zs, indices = sample_khatri_rao(factors_hat, n_samples, skip_matrix=n, random_state=rng) indices_list = [i.tolist() for i in indices] indices_list.insert(n, slice(None, None, None)) indices_list = tuple(indices_list) V = tl.dot(tl.transpose(Zs), Zs) if (n == 0): sampled_unfolding = tensor[indices_list] else: sampled_unfolding = tl.transpose(tensor[indices_list]) W = tl.dot(sampled_unfolding, Zs) factor_bf = factors[n] # update fac, _, _, _ = hals_nnls(tl.transpose(W), V, tl.transpose(factors[n])) factors[n] = tl.transpose( fac ) # solve needs a squared full rank matrix, if rank>nb_sampls ok # extrapolate factors_hat[n] = tl.clip(factors[n] + beta * (factors[n] - factor_bf), a_min=0.0) ######################################## ######### error update ######### ######################################## if (exact_err == False): F_hat_new, _ = err_rand(tensor, weights, factors, n_samples_err, indices_list=ind_bf) else: F_hat_new = err(tensor, weights, factors) list_F_hat_bf.append(F_hat_new) if (F_hat_new > F_hat_bf): factors_hat = factors beta_bar = beta beta = beta / eta cpt = cpt + 1 else: factors = factors_hat beta_bar = min(1, beta_bar * gamma_bar) beta = min(beta_bar, gamma * beta) ######################################## ######### update for next it ######### ######################################## it = it + 1 if (exact_err == False): if it < filter: F_hat_bf = np.mean(list_F_hat_bf) else: F_hat_bf = np.mean(list_F_hat_bf[(len(list_F_hat_bf) - filter):(len(list_F_hat_bf) - 1)]) else: F_hat_bf = F_hat_new if list_factors == True: list_fac.append(copy.deepcopy(factors)) error.append(F_hat_new / norm_tensor) if (error[len(error) - 1] < min_err): min_err = error[len(error) - 1] # err update else: err_it = err_it + 1 if time_rec == True: toc = time.time() list_time.append(toc - tic) if list_factors == True and time_rec == True: return (weights, factors, it, error, cpt / it, list_fac, list_time) if list_factors == True: return (weights, factors, it, error, cpt / it, list_fac) if time_rec == True: return (weights, factors, it, error, cpt / it, list_time) return (weights, factors, it, error, cpt / it)