def cp_als(X, rank, random_state=None, init='randn', **options): """Fits CP Decomposition using the Alternating Least Squares (ALS). Parameters ---------- X : (I_1, ..., I_N) array_like A real array with ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, ``RandomState``, or ``None``, optional (default ``None``) If integer, sets the seed of the random number generator; If RandomState instance, random_state is the random number generator; If None, use the RandomState instance used by ``numpy.random``. init : str, or KTensor, optional (default ``'randn'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation uses the Alternating Least Squares Method. References ---------- Kolda, T. G. & Bader, B. W. "Tensor Decompositions and Applications." SIAM Rev. 51 (2009): 455-500 http://epubs.siam.org/doi/pdf/10.1137/07070111X Comon, Pierre & Xavier Luciani & Andre De Almeida. "Tensor decompositions, alternating least squares and other tales." Journal of chemometrics 23 (2009): 393-405. http://onlinelibrary.wiley.com/doi/10.1002/cem.1236/abstract Examples -------- ``` import tensortools as tt I, J, K, R = 20, 20, 20, 4 X = tt.randn_tensor(I, J, K, rank=R) tt.cp_als(X, rank=R) ``` """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'CP_ALS', **options) # Main optimization loop. while result.still_optimizing: # Iterate over each tensor mode. for n in range(X.ndim): # i) Normalize factors to prevent singularities. U.rebalance() # ii) Compute the N-1 gram matrices. components = [U[j] for j in range(X.ndim) if j != n] grams = sci.multiply.reduce([sci.dot(u.T, u) for u in components]) # iii) Compute Khatri-Rao product. kr = khatri_rao(components) # iv) Form normal equations and solve via Cholesky # c = linalg.cho_factor(grams, overwrite_a=False) # p = unfold(X, n).dot(kr) # U[n] = linalg.cho_solve(c, p.T, overwrite_b=False).T U[n] = linalg.solve(grams, unfold(X, n).dot(kr).T).T # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # grams *= U[-1].T.dot(U[-1]) # obj = np.sqrt(np.sum(grams) - 2*sci.sum(p*U[-1]) + normX**2) / normX obj = linalg.norm(U.full() - X) / normX # Update result result.update(obj) # Finalize and return the optimization result. return result.finalize()
def ncp_bcd(X, rank, random_state=None, init='rand', skip_modes=[], **options): """ Fits nonnegative CP Decomposition using the Block Coordinate Descent (BCD) Method. Parameters ---------- X : (I_1, ..., I_N) array_like A real array with nonnegative entries and ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. init : str, or KTensor, optional (default ``'rand'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. skip_modes : iterable, optional (default ``[]``). Specifies modes of the tensor that are not fit. This can be used to fix certain factor matrices that have been previously fit. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation is using the Block Coordinate Descent Method. References ---------- Xu, Yangyang, and Wotao Yin. "A block coordinate descent method for regularized multiconvex optimization with applications to negative tensor factorization and completion." SIAM Journal on imaging sciences 6.3 (2013): 1758-1789. Examples -------- """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Store norm of X for computing objective function. N = X.ndim # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'NCP_BCD', **options) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Block coordinate descent # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Um = U.copy() # Extrapolations of compoenents extraw = 1 # Used for extrapolation weight update weights_U = np.ones(N) # Extrapolation weights L = np.ones(N) # Lipschitz constants obj_bcd = 0.5 * normX**2 # Initial objective value # Main optimization loop. while result.still_optimizing: obj_bcd_old = obj_bcd # Old objective value U_old = U.copy() extraw_old = extraw for n in range(N): # Skip modes that are specified as fixed. if n in skip_modes: continue # Select all components, but U_n components = [U[j] for j in range(N) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T.dot(arr) for arr in components]) # Update gradient Lipschnitz constant L0 = L # Lipschitz constants L[n] = linalg.norm(grams, 2) # ii) Compute Khatri-Rao product kr = khatri_rao(components) p = unfold(X, n).dot(kr) # Compute Gradient. grad = Um[n].dot(grams) - p # Enforce nonnegativity (project onto nonnegative orthant). U[n] = sci.maximum(0.0, Um[n] - grad / L[n]) # Compute objective function and update optimization result. # grams *= U[X.ndim - 1].T.dot(U[X.ndim - 1]) # obj = np.sqrt(sci.sum(grams) - 2 * sci.sum(U[X.ndim - 1] * p) + normX**2) / normX obj = linalg.norm(X - U.full()) / normX result.update(obj) # Correction and extrapolation. grams *= U[N - 1].T.dot(U[N - 1]) obj_bcd = 0.5 * (sci.sum(grams) - 2 * sci.sum(U[N - 1] * p) + normX**2) extraw = (1 + sci.sqrt(1 + 4 * extraw_old**2)) / 2.0 if obj_bcd >= obj_bcd_old: # restore previous A to make the objective nonincreasing Um = sci.copy(U_old) else: # apply extrapolation w = (extraw_old - 1.0) / extraw # Extrapolation weight for n in range(N): weights_U[n] = min(w, 1.0 * sci.sqrt( L0[n] / L[n])) # choose smaller weights for convergence Um[n] = U[n] + weights_U[n] * (U[n] - U_old[n] ) # extrapolation # Finalize and return the optimization result. return result.finalize()
def ncp_hals( X, rank, mask=None, random_state=None, init='rand', skip_modes=[], negative_modes=[], **options): """ Fits nonnegtaive CP Decomposition using the Hierarcial Alternating Least Squares (HALS) Method. Parameters ---------- X : (I_1, ..., I_N) array_like A real array with nonnegative entries and ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. init : str, or KTensor, optional (default ``'rand'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. skip_modes : iterable, optional (default ``[]``). Specifies modes of the tensor that are not fit. This can be used to fix certain factor matrices that have been previously fit. negative_modes : iterable, optional (default ``[]``). Specifies modes of the tensor whose factors are not constrained to be nonnegative. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation is using the Hierarcial Alternating Least Squares Method. References ---------- Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for large scale nonnegative matrix and tensor factorizations." IEICE transactions on fundamentals of electronics, communications and computer sciences 92.3: 708-721, 2009. Examples -------- """ # Mask missing elements. if mask is not None: X = np.copy(X) X[~mask] = np.mean(X[mask]) # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'NCP_HALS', **options) # Store problem dimensions. normX = np.linalg.norm(X) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate the HALS algorithm until convergence or maxiter is reached # i) compute the N gram matrices and multiply # ii) Compute Khatri-Rao product # iii) Update component U_1, U_2, ... U_N # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while result.still_optimizing: for n in range(X.ndim): # Skip modes that are specified as fixed. if n in skip_modes: continue # Select all components, but U_n components = [U[j] for j in range(X.ndim) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T @ arr for arr in components]) # ii) Compute Khatri-Rao product kr = khatri_rao(components) Xmkr = unfold(X, n).dot(kr) # iii) Update component U_n _hals_update(U[n], grams, Xmkr, n not in negative_modes) # iv) Update masked elements. if mask is not None: pred = U.full() X[~mask] = pred[~mask] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if mask is None: grams *= U[-1].T @ U[-1] residsq = np.sum(grams) - 2 * np.sum(U[-1] * Xmkr) + (normX ** 2) result.update(np.sqrt(residsq) / normX) else: result.update(np.linalg.norm(X - pred) / normX) # end optimization loop, return result. return result.finalize()
def ncp_hals(X, rank, random_state=None, init='rand', **options): """ Fits nonnegtaive CP Decomposition using the Hierarcial Alternating Least Squares (HALS) Method. Parameters ---------- X : (I_1, ..., I_N) array_like A real array with nonnegative entries and ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. random_state : integer, RandomState instance or None, optional (default ``None``) If integer, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. init : str, or KTensor, optional (default ``'rand'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- This implemenation is using the Hierarcial Alternating Least Squares Method. References ---------- Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for large scale nonnegative matrix and tensor factorizations." IEICE transactions on fundamentals of electronics, communications and computer sciences 92.3: 708-721, 2009. Examples -------- """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state) result = FitResult(U, 'NCP_HALS', **options) # Store problem dimensions. normX = linalg.norm(X) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Iterate the HALS algorithm until convergence or maxiter is reached # i) compute the N gram matrices and multiply # ii) Compute Khatri-Rao product # iii) Update component U_1, U_2, ... U_N # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while result.still_optimizing: violation = 0.0 for n in range(X.ndim): # Select all components, but U_n components = [U[j] for j in range(X.ndim) if j != n] # i) compute the N-1 gram matrices grams = sci.multiply.reduce([arr.T.dot(arr) for arr in components]) # ii) Compute Khatri-Rao product kr = khatri_rao(components) p = unfold(X, n).dot(kr) # iii) Update component U_n violation += _hals_update(U[n], grams, p) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # grams *= U[X.ndim - 1].T.dot(U[X.ndim - 1]) # obj = np.sqrt( (sci.sum(grams) - 2 * sci.sum(U[X.ndim - 1] * p) + normX**2)) / normX result.update(linalg.norm(X - U.full()) / normX) # end optimization loop, return result. return result.finalize()
def mcp_als(X, rank, mask, random_state=None, init='randn', **options): """Fits CP Decomposition with missing data using Alternating Least Squares (ALS). Parameters ---------- X : (I_1, ..., I_N) array_like A tensor with ``X.ndim >= 3``. rank : integer The `rank` sets the number of components to be computed. mask : (I_1, ..., I_N) array_like A binary tensor with the same shape as ``X``. All entries equal to zero correspond to held out or missing data in ``X``. All entries equal to one correspond to observed entries in ``X`` and the decomposition is fit to these datapoints. random_state : integer, ``RandomState``, or ``None``, optional (default ``None``) If integer, sets the seed of the random number generator; If RandomState instance, random_state is the random number generator; If None, use the RandomState instance used by ``numpy.random``. init : str, or KTensor, optional (default ``'randn'``). Specifies initial guess for KTensor factor matrices. If ``'randn'``, Gaussian random numbers are used to initialize. If ``'rand'``, uniform random numbers are used to initialize. If KTensor instance, a copy is made to initialize the optimization. options : dict, specifying fitting options. tol : float, optional (default ``tol=1E-5``) Stopping tolerance for reconstruction error. max_iter : integer, optional (default ``max_iter = 500``) Maximum number of iterations to perform before exiting. min_iter : integer, optional (default ``min_iter = 1``) Minimum number of iterations to perform before exiting. max_time : integer, optional (default ``max_time = np.inf``) Maximum computational time before exiting. verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``) Display progress. Returns ------- result : FitResult instance Object which holds the fitted results. It provides the factor matrices in form of a KTensor, ``result.factors``. Notes ----- Fitting CP decompositions with missing data can be exploited to perform cross-validation. References ---------- Williams, A. H. "Solving Least-Squares Regression with Missing Data." http://alexhwilliams.info/itsneuronalblog/2018/02/26/censored-lstsq/ """ # Check inputs. optim_utils._check_cpd_inputs(X, rank) # Initialize problem. U, _ = optim_utils._get_initial_ktensor(init, X, rank, random_state, scale_norm=False) result = FitResult(U, 'MCP_ALS', **options) normX = np.linalg.norm((X * mask)) # Main optimization loop. while result.still_optimizing: # Iterate over each tensor mode. for n in range(X.ndim): # i) Normalize factors to prevent singularities. U.rebalance() # ii) Unfold data and mask along the nth mode. unf = unfold(X, n) # i_n x N m = unfold(mask, n) # i_n x N # iii) Form Khatri-Rao product of factors matrices. components = [U[j] for j in range(X.ndim) if j != n] krt = khatri_rao(components).T # N x r # iv) Broadcasted solve of linear systems. # Left hand side of equations, R x R x X.shape[n] # Right hand side of equations, X.shape[n] x R x 1 lhs_stack = np.matmul(m[:, None, :] * krt[None, :, :], krt.T[None, :, :]) rhs_stack = np.dot(unf * m, krt.T)[:, :, None] # vi) Update factor. U[n] = np.linalg.solve(lhs_stack, rhs_stack).reshape(X.shape[n], rank) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update the optimization result, checks for convergence. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute objective function # grams *= U[-1].T.dot(U[-1]) # obj = np.sqrt(np.sum(grams) - 2*sci.sum(p*U[-1]) + normX**2) / normX obj = linalg.norm(mask * (U.full() - X)) / normX # Update result result.update(obj) # Finalize and return the optimization result. return result.finalize()