def test_cp_to_tensor(): """Test for cp_to_tensor.""" U1 = np.reshape(np.arange(1, 10), (3, 3)) U2 = np.reshape(np.arange(10, 22), (4, 3)) U3 = np.reshape(np.arange(22, 28), (2, 3)) U4 = np.reshape(np.arange(28, 34), (2, 3)) U = [tl.tensor(t) for t in [U1, U2, U3, U4]] true_res = tl.tensor([[[[ 46754., 51524.], [ 52748., 58130.]], [[ 59084., 65114.], [ 66662., 73466.]], [[ 71414., 78704.], [ 80576., 88802.]], [[ 83744., 92294.], [ 94490., 104138.]]], [[[ 113165., 124784.], [ 127790., 140912.]], [[ 143522., 158264.], [ 162080., 178730.]], [[ 173879., 191744.], [ 196370., 216548.]], [[ 204236., 225224.], [ 230660., 254366.]]], [[[ 179576., 198044.], [ 202832., 223694.]], [[ 227960., 251414.], [ 257498., 283994.]], [[ 276344., 304784.], [ 312164., 344294.]], [[ 324728., 358154.], [ 366830., 404594.]]]]) res = cp_to_tensor((tl.ones(3), U)) assert_array_equal(res, true_res, err_msg='Khatri-rao incorrectly transformed into full tensor.') columns = 4 rows = [3, 4, 2] matrices = [tl.tensor(np.arange(k * columns).reshape((k, columns))) for k in rows] tensor = cp_to_tensor((tl.ones(columns), matrices)) for i in range(len(rows)): unfolded = unfold(tensor, mode=i) U_i = matrices.pop(i) reconstructed = tl.dot(U_i, tl.transpose(khatri_rao(matrices))) assert_array_almost_equal(reconstructed, unfolded) matrices.insert(i, U_i)
def test_initialize_nn_cp(): """Test that if we initialise with an existing init, then it isn't modified. """ init = CPTensor( [None, [-tl.ones((30, 3)), -tl.ones((20, 3)), -tl.ones((10, 3))]]) tensor = cp_to_tensor(init) initialised_tensor = initialize_nn_cp(tensor, 3, init=init) for factor_matrix, init_factor_matrix in zip(init[1], initialised_tensor[1]): assert_array_equal(factor_matrix, init_factor_matrix) assert_array_equal(tensor, cp_to_tensor(initialised_tensor))
def test_parafac2_normalize_factors(): rng = check_random_state(1234) rank = 2 # Rank 2 so we only need to test rank of minimum and maximum random_parafac2_tensor = random_parafac2( shapes=[(15 + rng.randint(5), 30) for _ in range(25)], rank=rank, random_state=rng, ) random_parafac2_tensor.factors[0] = random_parafac2_tensor.factors[0] + 0.1 norms = tl.ones(rank) for factor in random_parafac2_tensor.factors: norms = norms * tl.norm(factor, axis=0) slices = parafac2_to_tensor(random_parafac2_tensor) unnormalized_rec = parafac2(slices, rank, random_state=rng, normalize_factors=False) assert unnormalized_rec.weights[0] == 1 normalized_rec = parafac2(slices, rank, random_state=rng, normalize_factors=True, n_iter_max=1000) assert tl.max(tl.abs(T.norm(normalized_rec.factors[0], axis=0) - 1)) < 1e-5 assert abs(tl.max(norms) - tl.max(normalized_rec.weights)) / tl.max(norms) < 1e-2 assert abs(tl.min(norms) - tl.min(normalized_rec.weights)) / tl.min(norms) < 1e-2
def test_symmetric_parafac_power_iteration(monkeypatch): """Test for symmetric Parafac optimized with robust tensor power iterations""" rng = tl.check_random_state(1234) tol_norm_2 = 10e-1 tol_max_abs = 10e-1 size = 5 rank = 4 true_factor = tl.tensor(rng.random_sample((size, rank))) true_weights = tl.ones(rank) tensor = tl.cp_to_tensor((true_weights, [true_factor] * 3)) weights, factor = symmetric_parafac_power_iteration(tensor, rank=10, n_repeat=10, n_iteration=10) rec = tl.cp_to_tensor((weights, [factor] * 3)) error = tl.norm(rec - tensor, 2) error /= tl.norm(tensor, 2) assert_(error < tol_norm_2, 'norm 2 of reconstruction higher than tol') # Test the max abs difference between the reconstruction and the tensor assert_( tl.max(tl.abs(rec - tensor)) < tol_max_abs, 'abs norm of reconstruction error higher than tol') assert_class_wrapper_correctly_passes_arguments( monkeypatch, symmetric_parafac_power_iteration, SymmetricCP, ignore_args={}, rank=3)
def vec2factors(vec, shape, rank, context=None): """Wrapper function detailed in Appendix C [1] Builds a set of N matrices, where the k-th matrix is shape(k) x rank in dimension Parameters ---------- vec : ndarray vector of values to proliferate matrices with shape: tensor shape shape of tensor dictates number of rows in each matrix rank: int number of columns in each matrix, *** rank cannot be > dimension of smallest mode *** Returns ------- M1 : CPTensor CPTensor with factor matrices formed by 'vec' """ numFacts = len(shape) factors = [] place = 0 for i in range(numFacts): factor = np.zeros((rank * shape[i]), **context) for j in range(shape[i] * rank): factor[j] = vec[j + place] factor = tl.tensor(factor.reshape((rank, shape[i])), **context) factors.append(tl.transpose(factor)) place += shape[i] * rank M1 = CPTensor((tl.ones(rank, ), factors)) return M1
def smoothness_prox(tensor, regularizer): """Proximal operator for smoothness Parameters ---------- tensor : ndarray regularizer : float Returns ------- ndarray """ diag_matrix = tl.diag(2 * regularizer * tl.ones(tl.shape(tensor)[0]) + 1) + \ tl.diag(-regularizer * tl.ones(tl.shape(tensor)[0] - 1), k=-1) + \ tl.diag(-regularizer * tl.ones(tl.shape(tensor)[0] - 1), k=1) return tl.solve(diag_matrix, tensor)
def test_cp_normalize(): shape = (3, 4, 5) rank = 4 cp_tensor = random_cp(shape, rank) weights, factors = cp_normalize(cp_tensor) expected_norm = tl.ones(rank) for f in factors: assert_array_almost_equal(tl.norm(f, axis=0), expected_norm) assert_array_almost_equal(cp_to_tensor((weights, factors)), cp_to_tensor(cp_tensor))
def dtd(factors_old, X_old, X_new, rank, n_iter=1, mu=1, verbose=False): weights = tl.ones(rank) if verbose: X = tl.tensor(np.concatenate((X_old, X_new))) n_dim = tl.ndim(X_old) U = factors_old.copy() for i in range(n_iter): # temporal mode for A1 V = tl.tensor(np.ones((rank, rank))) for j, factor in enumerate(U): if j != 0: V = V * tl.dot(tl.transpose(factor), factor) mttkrp = unfolding_dot_khatri_rao(X_new, (None, U), 0) A1 = tl.transpose(tl.solve(tl.transpose(V), tl.transpose(mttkrp))) # non-temporal mode for mode in range(1, n_dim): U1 = U.copy() U1[0] = A1 V = tl.tensor(np.ones((rank, rank))) W = tl.tensor(np.ones((rank, rank))) for j, factor in enumerate(U): factor_old = factors_old[j] if j != mode: W = W * tl.dot(tl.transpose(factor_old), factor) if j == 0: V = V * (mu * tl.dot(tl.transpose(factor), factor) + tl.dot(tl.transpose(A1), A1)) else: V = V * tl.dot(tl.transpose(factor), factor) mttkrp0 = mu * tl.dot(factors_old[mode], W) mttkrp1 = unfolding_dot_khatri_rao(X_new, (None, U1), mode) U[mode] = tl.transpose( tl.solve(tl.transpose(V), tl.transpose(mttkrp0 + mttkrp1))) # temporal mode for A0 V = tl.tensor(np.ones((rank, rank))) W = tl.tensor(np.ones((rank, rank))) for j, factor in enumerate(U): factor_old = factors_old[j] if j != 0: V = V * tl.dot(tl.transpose(factor), factor) W = W * tl.dot(tl.transpose(factor_old), factor) mttkrp = tl.dot(factors_old[0], W) U[0] = tl.transpose(tl.solve(tl.transpose(V), tl.transpose(mttkrp))) if verbose: U1 = U.copy() U1[0] = np.concatenate((U[0], A1)) X_est = construct_tensor(U1) compare_tensors(X, X_est) U[0] = np.concatenate((U[0].copy(), A1)) return KruskalTensor((weights, U))
def krprod(factors, indices_list): ''' Implement Khatri Rao Product for given nonzeros' indicies ''' rank = tl.shape(factors[0])[1] nnz = len(indices_list[0]) nonzeros = tl.ones((nnz, rank), **tl.context(factors[0])) for indices, factor in zip(indices_list, factors): nonzeros = nonzeros * factor[indices, :] return torch.sum(nonzeros, dim=1)
def normalize_factors(factors): """Normalizes factors to unit length and returns factor magnitudes Turns ``factors = [|U_1, ... U_n|]`` into ``[weights; |V_1, ... V_n|]``, where the columns of each `V_k` are normalized to unit Euclidean length from the columns of `U_k` with the normalizing constants absorbed into `weights`. In the special case of a symmetric tensor, `weights` holds the eigenvalues of the tensor. Parameters ---------- factors : ndarray list list of matrices, all with the same number of columns i.e.:: for u in U: u[i].shape == (s_i, R) where `R` is fixed while `s_i` can vary with `i` Returns ------- normalized_factors : list of ndarrays list of matrices with the same shape as `factors` weights : ndarray vector of length `R` holding normalizing constants """ # allocate variables for weights, and normalized factors rank = factors[0].shape[1] weights = tl.ones(rank, **tl.context(factors[0])) normalized_factors = [] # normalize columns of factor matrices for factor in factors: scales = tl.norm(factor, axis=0) weights *= scales scales_non_zero = tl.where( scales == 0, tl.ones(tl.shape(scales), **tl.context(factors[0])), scales) normalized_factors.append(factor / scales_non_zero) return normalized_factors, weights
def test_cp_to_vec(): """Test for cp_to_vec""" U1 = np.reshape(np.arange(1, 10), (3, 3)) U2 = np.reshape(np.arange(10, 22), (4, 3)) U3 = np.reshape(np.arange(22, 28), (2, 3)) U4 = np.reshape(np.arange(28, 34), (2, 3)) U = [tl.tensor(t) for t in [U1, U2, U3, U4]] cp_tensor = CPTensor((tl.ones(3), U)) full_tensor = cp_to_tensor(cp_tensor) true_res = tensor_to_vec(full_tensor) res = cp_to_vec(cp_tensor) assert_array_equal(true_res, res, err_msg='khatri_rao product converted incorrectly to vec.')
def test_cp_to_unfolded(): """Test for cp_to_unfolded. !!Assumes that cp_to_tensor and unfold are properly tested and work!! """ U1 = np.reshape(np.arange(1, 10), (3, 3)) U2 = np.reshape(np.arange(10, 22), (4, 3)) U3 = np.reshape(np.arange(22, 28), (2, 3)) U4 = np.reshape(np.arange(28, 34), (2, 3)) U = [tl.tensor(t) for t in [U1, U2, U3, U4]] cp_tensor = CPTensor((tl.ones(3), U)) full_tensor = cp_to_tensor(cp_tensor) for mode in range(4): true_res = unfold(full_tensor, mode) res = cp_to_unfolded(cp_tensor, mode) assert_array_equal(true_res, res, err_msg='khatri_rao product unfolded incorrectly for mode {}.'.format(mode))
def tl_sample_uniform(tensor, nsamp): """Uniformly sample 'nsamp' indices from a tensor 'tensor' along with corresponding values and the weight of the sample. Parameters ---------- X : ndarray Dense tensor nsamp : integer number of samples Returns ------- subs : ndarray Subscripts (indices) vals : ndarray Values wgts : ndarray Weights """ d = tl.ndim(tensor) shp = tl.shape(tensor) tsz = 1 for i in shp: tsz *= i # generate subscripts subSamp = lambda x, y: np.ceil(x * y) subs = subSamp(np.random.rand(nsamp, d), shp) subs = subs.astype(int) - 1 # adjust for zero-indexing # quick check that indices are in bounds if tl.min(subs) < 0: print("Bad subscripts generated for sampling.") sys.exit(1) # capture corresponding values for subscripts vals = [] for i in subs: index = tuple(i.tolist()) vals.append(tensor[index]) vals = tl.reshape(tl.tensor(vals), (len(vals), 1)) # calculate weights for sample wgts = tsz / nsamp * tl.ones((nsamp, 1)) return subs, vals, wgts
def test_gcp_fg_est_helper(): # build up test case tensor(X) from CP tensor(M) # keying off Matlab implementation results in debug mode rank = 2 shape = [4, 2, 3] ndim = len(shape) factors = [] for i in range(ndim): factors.append(tl.tensor(np.arange(rank * shape[i]).reshape(rank, shape[i]).T)) weights = tl.ones(rank) mTen = CPTensor((weights, factors)) ten = tl.cp_to_tensor(mTen) print(tl.shape(ten)) nsamp = 10 subs, vals, wgts = tl_sample_uniform(ten, nsamp) # set up test scenerio, alt_subs & alt_vals (wgts unaffected) ind0 = np.array([0, 1, 2]) ind1 = np.array([3, 1, 1]) ind2 = np.array([2, 0, 2]) ind3 = np.array([0, 0, 1]) ind4 = np.array([3, 0, 2]) ind5 = np.array([2, 1, 1]) ind6 = np.array([3, 1, 0]) ind7 = np.array([0, 0, 1]) ind8 = np.array([3, 0, 1]) ind9 = np.array([0, 0, 1]) alt_subs = tl.tensor(np.array([ind0, ind1, ind2, ind3, ind4, ind5, ind6, ind7, ind8, ind9])) alt_vals = [] for i in range(nsamp): alt_vals.append(ten[tuple(alt_subs[i])]) # call gcp_fg_est_helper() # model vals retured ought to match those in the alt_vals # because of how ten and mTen were constructed mvals, z_exp = gcp_fg_est_helper(mTen.factors, alt_subs) for k in range(nsamp): assert(mvals[k] == alt_vals[k])
def parafac2(tensor_slices, rank, n_iter_max=100, init='random', svd='numpy_svd', normalize_factors=False, tol=1e-8, random_state=None, verbose=False, return_errors=False, n_iter_parafac=5): r"""PARAFAC2 decomposition [1]_ of a third order tensor via alternating least squares (ALS) Computes a rank-`rank` PARAFAC2 decomposition of the third-order tensor defined by `tensor_slices`. The decomposition is on the form :math:`(A [B_i] C)` such that the i-th frontal slice, :math:`X_i`, of :math:`X` is given by .. math:: X_i = B_i diag(a_i) C^T, where :math:`diag(a_i)` is the diagonal matrix whose nonzero entries are equal to the :math:`i`-th row of the :math:`I \times R` factor matrix :math:`A`, :math:`B_i` is a :math:`J_i \times R` factor matrix such that the cross product matrix :math:`B_{i_1}^T B_{i_1}` is constant for all :math:`i`, and :math:`C` is a :math:`K \times R` factor matrix. To compute this decomposition, we reformulate the expression for :math:`B_i` such that .. math:: B_i = P_i B, where :math:`P_i` is a :math:`J_i \times R` orthogonal matrix and :math:`B` is a :math:`R \times R` matrix. An alternative formulation of the PARAFAC2 decomposition is that the tensor element :math:`X_{ijk}` is given by .. math:: X_{ijk} = \sum_{r=1}^R A_{ir} B_{ijr} C_{kr}, with the same constraints hold for :math:`B_i` as above. Parameters ---------- tensor_slices : ndarray or list of ndarrays Either a third order tensor or a list of second order tensors that may have different number of rows. Note that the second mode factor matrices are allowed to change over the first mode, not the third mode as some other implementations use (see note below). rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random', CPTensor, Parafac2Tensor} Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : bool (optional) If True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors. Note that there may be some inaccuracies in the component weights. tol : float, optional (Default: 1e-8) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors n_iter_parafac: int, optional Number of PARAFAC iterations to perform for each PARAFAC2 iteration Returns ------- Parafac2Tensor : (weight, factors, projection_matrices) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * projection_matrices : List of projection matrices used to create evolving factors. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] Kiers, H.A.L., ten Berge, J.M.F. and Bro, R. (1999), PARAFAC2—Part I. A direct fitting algorithm for the PARAFAC2 model. J. Chemometrics, 13: 275-294. Notes ----- This formulation of the PARAFAC2 decomposition is slightly different from the one in [1]_. The difference lies in that here, the second mode changes over the first mode, whereas in [1]_, the second mode changes over the third mode. We made this change since that means that the function accept both lists of matrices and a single nd-array as input without any reordering of the modes. """ weights, factors, projections = initialize_decomposition( tensor_slices, rank, init=init, svd=svd, random_state=random_state) rec_errors = [] norm_tensor = tl.sqrt( sum(tl.norm(tensor_slice, 2)**2 for tensor_slice in tensor_slices)) svd_fun = _get_svd(svd) projected_tensor = tl.zeros([factor.shape[0] for factor in factors], **T.context(factors[0])) for iteration in range(n_iter_max): if verbose: print("Starting iteration", iteration) factors[1] = factors[1] * T.reshape(weights, (1, -1)) weights = T.ones(weights.shape, **tl.context(tensor_slices[0])) projections = _compute_projections(tensor_slices, factors, svd_fun, out=projections) projected_tensor = _project_tensor_slices(tensor_slices, projections, out=projected_tensor) _, factors = parafac(projected_tensor, rank, n_iter_max=n_iter_parafac, init=(weights, factors), svd=svd, orthogonalise=False, verbose=verbose, return_errors=False, normalize_factors=False, mask=None, random_state=random_state, tol=1e-100) if normalize_factors: new_factors = [] for factor in factors: norms = T.norm(factor, axis=0) norms = tl.where( tl.abs(norms) <= tl.eps(factor.dtype), tl.ones(tl.shape(norms), **tl.context(factors[0])), norms) weights = weights * norms new_factors.append(factor / (tl.reshape(norms, (1, -1)))) factors = new_factors if tol: rec_error = _parafac2_reconstruction_error( tensor_slices, (weights, factors, projections)) rec_error /= norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('PARAFAC2 reconstruction error={}, variation={}.'. format(rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('PARAFAC2 reconstruction error={}'.format( rec_errors[-1])) parafac2_tensor = Parafac2Tensor((weights, factors, projections)) if return_errors: return parafac2_tensor, rec_errors else: return parafac2_tensor
def one_ntd_step(tensor, ranks, in_core, in_factors, norm_tensor, sparsity_coefficients, fixed_modes, normalize, mode_core_norm, alpha=0.5, delta=0.01): """ One pass of Hierarchical Alternating Least Squares update along all modes, and gradient update on the core, which decreases reconstruction error in Nonnegative Tucker Decomposition. Update the factors by solving a least squares problem per mode, as described in [1]. Note that the unfolding order is the one described in [2], which is different from [1]. This function is strictly superior to a least squares solver ran on the matricized problems min_X ||Y - AX||_F^2 since A is structured as a Kronecker product of the other factors/core. Tensors are manipulated with the tensorly toolbox [3]. Parameters ---------- unfolded_tensors: list of array The spectrogram tensor, unfolded according to all its modes. ranks: list of integers Ranks for eac factor of the decomposition. in_core : tensorly tensor Current estimates of the core in_factors: list of array Current estimates for the factors of this NTD. The value of factor[update_mode] will be updated using a least squares update. The values in in_factors are not modified. norm_tensor : float The Frobenius norm of the input tensor sparsity_coefficients: list of float (as much as the number of modes + 1 for the core) The sparsity coefficients on each factor and on the core respectively. fixed_modes: list of integers (between 0 and the number of modes + 1 for the core) Has to be set not to update a factor, taken in the order of modes and lastly on the core. normalize: list of boolean (as much as the number of modes + 1 for the core) A boolean whereas the factors need to be normalized. The normalization is a l_2 normalization on each of the rank components (For the factors, each column will be normalized, ie each atom of the dimension of the current rank). mode_core_norm: integer or None The mode on which normalize the core, or None if normalization shouldn't be enforced. Will only be useful if the last element of the previous "normalise" argument is set to True. Indexes of the modes start at 0. Default: None alpha : positive float Ratio between outer computations and inner loops. Typically set to 0.5 or 1. Set to +inf in the deterministic mode, as it depends on runtime. Default: 0.5 delta : float in [0,1] Early stop criterion, while err_k > delta*err_0. Set small for almost exact nnls solution, or larger (e.g. 1e-2) for inner loops of a NTD computation. Default: 0.01 Returns ------- core: tensorly tensor The core tensor linking the factors of the decomposition factors: list of factors An array containing all the factors computed with the NTD cost_fct_val: The value of the cost function at this step, normalized by the squared norm of the original tensor. References ---------- [1] Tamara G Kolda and Brett W Bader. "Tensor decompositions and applications", SIAM review 51.3 (2009), pp. 455{500. [2] Jeremy E Cohen. "About notations in multiway array processing", arXiv preprint arXiv:1511.01306, (2015). [3] J. Kossai et al. "TensorLy: Tensor Learning in Python", arxiv preprint (2018) """ # Avoiding errors for fixed_value in fixed_modes: sparsity_coefficients[fixed_value] = None # Copy core = in_core.copy() factors = in_factors.copy() # Generating the mode update sequence modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes] for mode in modes_list: #unfolded_core = tl.base.unfold(core, mode) tic = time.time() # UtU # First, element-wise products # some computations could be reused but the gain is small. elemprod = factors.copy() for i, factor in enumerate(factors): if i != mode: elemprod[i] = tl.dot(tl.conj(tl.transpose(factor)), factor) # Second, the multiway product with core G temp = tl.tenalg.multi_mode_dot(core, elemprod, skip=mode) # this line can be computed with tensor contractions con_modes = [i for i in range(tl.ndim(tensor)) if i != mode] UtU = tl.tenalg.contract(temp, con_modes, core, con_modes) #UtU = unfold(temp, mode)@tl.transpose(unfold(core, mode)) # UtM # First, the contraction of data with other factors temp = tl.tenalg.multi_mode_dot(tensor, factors, skip=mode, transpose = True) # again, computable by tensor contractions #MtU = unfold(temp, mode)@tl.transpose(unfold(core, mode)) MtU = tl.tenalg.contract(temp, con_modes, core, con_modes) UtM = tl.transpose(MtU) # Computing the Kronekcer product #kron = tl.tenalg.kronecker(factors, skip_matrix = mode, reverse = False) #kron_core = tl.dot(kron, tl.transpose(unfolded_core)) #rhs = tl.dot(unfolded_tensors[mode], kron_core) # Maybe suboptimal #cross = tl.dot(tl.transpose(kron_core), kron_core) timer = time.time() - tic # Call the hals resolution with nnls, optimizing the current mode factors[mode] = tl.transpose(nnls.hals_nnls_acc(UtM, UtU, tl.transpose(factors[mode]), maxiter=100, atime=timer, alpha=alpha, delta=delta, sparsity_coefficient = sparsity_coefficients[mode], normalize = normalize[mode])[0]) #refolded_tensor = tl.base.fold(unfolded_tensors[0], 0, tensor_shape) # Core update #all_MtX = tl.tenalg.multi_mode_dot(tensor, factors, transpose = True) # better implementation: reuse the computation of temp ! # Also reuse elemprod form last update all_MtX = tl.tenalg.mode_dot(temp, tl.transpose(factors[modes_list[-1]]), modes_list[-1]) all_MtM = tl.copy(elemprod) all_MtM[modes_list[-1]] = factors[modes_list[-1]].T@factors[modes_list[-1]] #all_MtM = np.array([fac.T@fac for fac in factors]) # Projected gradient gradient_step = 1 #print(f"factors[modes_list[-1]]: {factors[modes_list[-1]]}") #print(f"all_MtM: {all_MtM}") for MtM in all_MtM: #print(f"MtM: {MtM}") gradient_step *= 1/(scipy.sparse.linalg.svds(MtM, k=1)[1][0]) gradient_step = round(gradient_step, 6) # Heurisitc, to avoid consecutive imprecision cnt = 1 upd_0 = 0 upd = 1 if sparsity_coefficients[-1] is None: sparse = 0 else: sparse = sparsity_coefficients[-1] # TODO: dynamic stopping criterion # Maybe: try fast gradient instead of gradient while cnt <= 300 and upd>= delta * upd_0: gradient = - all_MtX + tl.tenalg.multi_mode_dot(core, all_MtM, transpose = False) + sparse * tl.ones(core.shape) # Proposition of reformulation for error computations delta_core = np.minimum(gradient_step*gradient, core) core = core - delta_core upd = tl.norm(delta_core) if cnt == 1: upd_0 = upd cnt += 1 if normalize[-1]: unfolded_core = tl.unfold(core, mode_core_norm) for idx_mat in range(unfolded_core.shape[0]): if tl.norm(unfolded_core[idx_mat]) != 0: unfolded_core[idx_mat] = unfolded_core[idx_mat] / tl.norm(unfolded_core[idx_mat], 2) core = tl.fold(unfolded_core, mode_core_norm, core.shape) # Adding the l1 norm value to the reconstruction error sparsity_error = 0 for index, sparse in enumerate(sparsity_coefficients): if sparse: if index < len(factors): sparsity_error += 2 * (sparse * np.linalg.norm(factors[index], ord=1)) elif index == len(factors): sparsity_error += 2 * (sparse * tl.norm(core, 1)) else: raise NotImplementedError("TODEBUG: Too many sparsity coefficients, should have been raised before.") rec_error = norm_tensor ** 2 - 2*tl.tenalg.inner(all_MtX, core) + tl.tenalg.inner(tl.tenalg.multi_mode_dot(core, all_MtM, transpose = False), core) cost_fct_val = (rec_error + sparsity_error) / (norm_tensor ** 2) #exhaustive_rec_error = (tl.norm(tensor - tl.tenalg.multi_mode_dot(core, factors, transpose = False), 2) + sparsity_error) / norm_tensor #print("diff: " + str(rec_error - exhaustive_rec_error)) #print("max" + str(np.amax(factors[2]))) return core, factors, cost_fct_val # exhaustive_rec_error
def online_tensor_decomposition(dataset, X, X_stream, rank, n_iter=1, ul=-1, ll=-1, verbose=False, methods=['dao', 'dtd', 'ocp', 'fcp']): results = {} start = time.time() (weights, factors_old) = parafac(X_stream[0], rank, init='random') init_time = time.time() - start for method in methods: print('-------------------------------------') mem_usage = sys.getsizeof(X_stream[0]) if method in ['dao', 'dtd']: print(f'>> {method}: rank-{rank} n_iter-{n_iter}') elif method in ['ocp', 'fcp']: print(f'>> {method}: rank-{rank}') factors = factors_old X_old = X_stream[0] n_dim = tl.ndim(X_old) if not method in ['dao', 'dtd', 'ocp', 'fcp']: raise ValueError('The method does not exist.') if method == 'fcp': mem_usage = sys.getsizeof(X) ktensor = parafac(X, rank, init='random') (weights, factors) = ktensor mem_usage += sys.getsizeof(factors) X_est = construct_tensor(factors) err_norm = tl.norm(X - X_est) global_rt = time.time() - start global_fit = 1 - (err_norm / tl.norm(X)) print('Global Fitness :', format(global_fit * 100, '.4f'), '%') print('Global Running Time :', format(global_rt, '.4f'), 'sec') print('Memory Usage :', mem_usage, 'bytes') results[method] = [ktensor] continue ktensors = [] verbose_list = [] split_points = [] refine_points = [] fitness = [] running_time = [] begin = time.time() - init_time welford = Welford() X_est = construct_tensor(factors) err_norm = tl.norm(X_old - X_est) welford(err_norm * 1.2) if method == 'ocp': start = time.time() K = get_KhatriRao_except0(factors) H = get_Hadamard(factors) P = np.empty((n_dim), dtype=object) Q = np.empty((n_dim), dtype=object) for mode in range(1, n_dim): P[mode] = tl.dot(tl.unfold(X_old, mode), tl.tenalg.khatri_rao((factors[0], K[mode]))) Q[mode] = H / tl.dot(tl.transpose(factors[mode]), factors[mode]) #print('init_time:', time.time()-start) mem_usage += sys.getsizeof(K) mem_usage += sys.getsizeof(H) mem_usage += sys.getsizeof(P) mem_usage += sys.getsizeof(Q) iter_mem_usage = 0 for i, X_new in enumerate(X_stream[1:]): i_mem = sys.getsizeof(X_new) start = time.time() if method == 'dao': (weights, factors0) = data_adaptive_online_cp(factors.copy(), X_old, X_new, rank, n_iter=n_iter, mu=0.8, verbose=False) elif method == 'ocp': ((weights, factors0), P0, Q0) = online_cp(factors.copy(), X_old, X_new, rank, P, Q, verbose=False) elif method == 'dtd': (weights, factors0) = dtd(factors.copy(), X_old, X_new, rank, n_iter=n_iter, mu=1, verbose=False) U = factors0.copy() U[0] = U[0][-X_new.shape[0] - 1:-1] i_mem += sys.getsizeof(U) dX_est = construct_tensor(U) err_norm = tl.norm(X_new - dX_est) z_score = get_z_score(err_norm, welford.mean, welford.std) if method == 'dao' and ul > 0 and z_score > ul: weights = tl.ones(rank) ktensors.append(KruskalTensor((weights, factors.copy()))) split_points.append(i + 1) X_old = X_stream[i + 1] (weights, factors0) = parafac(X_old, rank, init='random') elapsed_time = time.time() - start verbose_list.append([i + 1, elapsed_time, err_norm, z_score]) i_mem += sys.getsizeof(factors0) start = time.time() X_est = construct_tensor(factors0) err_norm = tl.norm(X_old - X_est) welford = Welford() welford(err_norm * 1.2) z_score = get_z_score(err_norm, welford.mean, welford.std) factors = factors0.copy() welford(err_norm) elapsed_time = time.time() - start verbose_list.append([i + 1, elapsed_time, err_norm, z_score]) fitness.append(err_norm / tl.norm(X_new)) running_time.append(elapsed_time) continue elif method == 'dao' and ll > 0 and z_score > ll: refine_points.append(i + 1) elapsed_time = time.time() - start verbose_list.append([i + 1, elapsed_time, err_norm, z_score]) (weights, factors) = data_adaptive_online_cp(factors, X_old, X_new, rank, n_iter=n_iter * 2, mu=0.5, verbose=False) i_mem += sys.getsizeof(factors) U = factors.copy() U[0] = U[0][-X_new.shape[0] - 1:-1] dX_est = construct_tensor(U) err_norm = tl.norm(X_new - dX_est) welford(err_norm) else: if method == 'ocp': P = P0 Q = Q0 factors = factors0.copy() welford(err_norm) elapsed_time = time.time() - start verbose_list.append([i + 1, elapsed_time, err_norm, z_score]) fitness.append(err_norm / tl.norm(X_new)) running_time.append(elapsed_time) X_old = tl.concatenate((X_old, X_new)) iter_mem_usage = max(iter_mem_usage, i_mem) if verbose: X_est = construct_tensor(factors) compare_tensors(X_old, X_est) mem_usage += iter_mem_usage weights = tl.ones(rank) ktensors.append(KruskalTensor((weights, factors))) mem_usage += sys.getsizeof(ktensors) global_rt = time.time() - begin tensor_est = construct_tensor(ktensors[0][1]) for (weights, factors) in ktensors[1:]: tensor_est = tl.tensor( tl.concatenate((tensor_est, construct_tensor(factors)))) global_error_norm = compare_tensors(X, tensor_est) if method == 'dao': print(f'SPLIT: {len(split_points)}, REFINE: {len(refine_points)}') if method != 'fcp': verbose_list = np.asarray(verbose_list, dtype=float) fitness = np.asarray(fitness, dtype=float) running_time = np.asarray(running_time, dtype=float) tot_norm = tl.norm(X) local_fit = 1 - np.mean(fitness) local_rt = np.mean(running_time) global_fit = 1 - (global_error_norm / tot_norm) print('Global Fitness :', format(global_fit * 100, '.4f'), '%') print('Avg Local Fitness :', format(local_fit * 100, '.4f'), '%') print('Global Running Time :', format(global_rt, '.4f'), 'sec') print('Avg Local Running Time :', format(local_rt, '.4f'), 'sec') print('Memory Usage :', mem_usage, 'bytes') results[method] = ktensors return results
def randomised_parafac(tensor, rank, n_samples, n_iter_max=100, init='random', svd='numpy_svd', tol=10e-9, max_stagnation=20, random_state=None, verbose=1): """Randomised CP decomposition via sampled ALS Parameters ---------- tensor : ndarray rank : int number of components n_samples : int number of samples per ALS step n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance max_stagnation: int, optional, default is 0 if not zero, the maximum allowed number of iterations with no decrease in fit random_state : {None, int, np.random.RandomState}, default is None verbose : int, optional level of verbosity Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [3] Casey Battaglino, Grey Ballard and Tamara G. Kolda, "A Practical Randomized CP Tensor Decomposition", """ rng = check_random_state(random_state) factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state) rec_errors = [] n_dims = tl.ndim(tensor) norm_tensor = tl.norm(tensor, 2) min_error = 0 weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): for mode in range(n_dims): kr_prod, indices_list = sample_khatri_rao(factors, n_samples, skip_matrix=mode, random_state=rng) indices_list = [i.tolist() for i in indices_list] # Keep all the elements of the currently considered mode indices_list.insert(mode, slice(None, None, None)) # MXNet will not be happy if this is a list insteaf of a tuple indices_list = tuple(indices_list) if mode: sampled_unfolding = tensor[indices_list] else: sampled_unfolding = tl.transpose(tensor[indices_list]) pseudo_inverse = tl.dot(tl.transpose(kr_prod), kr_prod) factor = tl.dot(tl.transpose(kr_prod), sampled_unfolding) factor = tl.transpose(tl.solve(pseudo_inverse, factor)) factors[mode] = factor if max_stagnation or tol: rec_error = tl.norm(tensor - kruskal_to_tensor( (weights, factors)), 2) / norm_tensor if not min_error or rec_error < min_error: min_error = rec_error stagnation = -1 stagnation += 1 rec_errors.append(rec_error) if iteration > 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if (tol and abs(rec_errors[-2] - rec_errors[-1]) < tol) or \ (stagnation and (stagnation > max_stagnation)): if verbose: print('converged in {} iterations.'.format(iteration)) break return KruskalTensor((weights, factors))
def sample_khatri_rao(matrices, n_samples, skip_matrix=None, return_sampled_rows=False, random_state=None): """Random subsample of the Khatri-Rao product of the given list of matrices If one matrix only is given, that matrix is directly returned. Parameters ---------- matrices : ndarray list list of matrices with the same number of columns, i.e.:: for i in len(matrices): matrices[i].shape = (n_i, m) n_samples : int number of samples to be taken from the Khatri-Rao product skip_matrix : None or int, optional, default is None if not None, index of a matrix to skip random_state : None, int or numpy.random.RandomState if int, used to set the seed of the random number generator if numpy.random.RandomState, used to generate random_samples returned_sampled_rows : bool, default is False if True, also returns a list of the rows sampled from the full khatri-rao product Returns ------- sampled_Khatri_Rao : ndarray The sampled matricised tensor Khatri-Rao with `n_samples` rows indices : tuple list a list of indices sampled for each mode indices_kr : int list list of length `n_samples` containing the sampled row indices """ if random_state is None or not isinstance(random_state, np.random.RandomState): rng = check_random_state(random_state) warnings.warn( 'You are creating a new random number generator at each call.\n' 'If you are calling sample_khatri_rao inside a loop this will be slow:' ' best to create a rng outside and pass it as argument (random_state=rng).' ) else: rng = random_state if skip_matrix is not None: matrices = [ matrices[i] for i in range(len(matrices)) if i != skip_matrix ] rank = tl.shape(matrices[0])[1] sizes = [tl.shape(m)[0] for m in matrices] # For each matrix, randomly choose n_samples indices for which to compute the khatri-rao product indices_list = [ rng.randint(0, tl.shape(m)[0], size=n_samples, dtype=int) for m in matrices ] if return_sampled_rows: # Compute corresponding rows of the full khatri-rao product indices_kr = np.zeros((n_samples), dtype=int) for size, indices in zip(sizes, indices_list): indices_kr = indices_kr * size + indices # Compute the Khatri-Rao product for the chosen indices sampled_kr = tl.ones((n_samples, rank), **tl.context(matrices[0])) for indices, matrix in zip(indices_list, matrices): sampled_kr = sampled_kr * matrix[indices, :] if return_sampled_rows: return sampled_kr, indices_list, indices_kr else: return sampled_kr, indices_list
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=0, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [ tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors) ] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where( tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor / (tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def forward(self, x, y=None): relu_latent = [] pool_latent = [] bias_latent_cnn = [] relu_latentpn = [] mean_latent_cnn = [] var_latent_cnn = [] xbias = th.zeros([1, x.shape[1], x.shape[2], x.shape[3]], device=None) ############################ conv1 ##################################### x = self.features[0](x) xbias = self.features[0](xbias) mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True)) var_latent_cnn.append( th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2, dim=(0, 2, 3), keepdim=True)) ############################ batchnorm1 ################################## x = self.features[1](x) xbias = self.insnorms_cnn[0](xbias) bias_latent_cnn.append(self.features[1].bias) ############################ relu1 ################################## x = self.features[2](x) xbias = self.features[2](xbias) relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1) #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu relu_latentpn.append( th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1) ############################ pool1 ################################## pool_latent.append( th.ge( x - F.interpolate( self.features[3](x), scale_factor=2, mode='nearest'), 0)) #pool_latent records the locations where the original pixel values are greater than the ones after interpolation #from a max pooled output. x = self.features[3](x) #perform maxpooling on input image/activation xbias = self.features[3]( xbias) #perform maxpooling on input bias/bias activation ############################ conv2 ##################################### x = self.features[4](x) xbias = self.features[4](xbias) mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True)) var_latent_cnn.append( th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2, dim=(0, 2, 3), keepdim=True)) ############################ batchnorm2 ################################## x = self.features[5](x) xbias = self.insnorms_cnn[1](xbias) bias_latent_cnn.append(self.features[5].bias) ############################ relu2 ################################## x = self.features[6](x) xbias = self.features[6](xbias) relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1) #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu relu_latentpn.append( th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1) ############################ pool2 ################################## pool_latent.append( th.ge( x - F.interpolate( self.features[7](x), scale_factor=2, mode='nearest'), 0)) #pool_latent records the locations where the original pixel values are greater than the ones after interpolation #from a max pooled output. x = self.features[7](x) #perform maxpooling on input image/activation xbias = self.features[7]( xbias) #perform maxpooling on input bias/bias activation ############################ conv3 ##################################### x = self.features[8](x) xbias = self.features[8](xbias) mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True)) var_latent_cnn.append( th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2, dim=(0, 2, 3), keepdim=True)) ############################ batchnorm3 ################################## x = self.features[9](x) xbias = self.insnorms_cnn[2](xbias) bias_latent_cnn.append(self.features[9].bias) ############################ relu3 ################################## x = self.features[10](x) xbias = self.features[10](xbias) relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1) #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu relu_latentpn.append( th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1) ############################ pool3 ################################## pool_latent.append( th.ge( x - F.interpolate( self.features[11](x), scale_factor=2, mode='nearest'), 0)) #pool_latent records the locations where the original pixel values are greater than the ones after interpolation #from a max pooled output. x = self.features[11](x) #perform maxpooling on input image/activation xbias = self.features[11]( xbias) #perform maxpooling on input bias/bias activation relu_latent = relu_latent[::-1] pool_latent = pool_latent[::-1] bias_latent_cnn = bias_latent_cnn[::-1] self.bias_latent_cnn = bias_latent_cnn relu_latentpn = relu_latentpn[::-1] mean_latent_cnn = mean_latent_cnn[::-1] var_latent_cnn = var_latent_cnn[::-1] # send the features into the classifier trl_w, z = self.trl(x) w_t = trl_w.permute(dims=(3, 0, 1, 2)) # do reconstruction via nrm # xhat: the reconstruction image # loss_pn: path normalization loss # use z to reconstruct instead of argmax z xhat, _, loss_pn, loss_neg = self.topdown( self.nrm, make_one_hot(y, self.num_class), relu_latent, pool_latent, bias_latent_cnn, tl.ones( [1, z.size()[1]], device=None), relu_latentpn, mean_latent_cnn, var_latent_cnn, w_t) if y is not None else self.topdown( self.nrm, make_one_hot(th.argmax(z.detach(), dim=1), self.num_class), relu_latent, pool_latent, bias_latent_cnn, tl.ones([1, z.size()[1]], device=None), relu_latentpn, mean_latent_cnn, var_latent_cnn, w_t) return [z, xhat, loss_pn, loss_neg]
def make_svd_non_negative(tensor, U, S, V, nntype): """ Use NNDSVD method to transform SVD results into a non-negative form. This method leads to more efficient solving with NNMF [1]. Parameters ---------- tensor : tensor being decomposed U, S, V: SVD factorization results nntype : {'nndsvd', 'nndsvda'} Whether to fill small values with 0.0 (nndsvd), or the tensor mean (nndsvda, default). [1]: Boutsidis & Gallopoulos. Pattern Recognition, 41(4): 1350-1362, 2008. """ # NNDSVD initialization W = tl.zeros_like(U) H = tl.zeros_like(V) # The leading singular triplet is non-negative # so it can be used as is for initialization. W = tl.index_update(W, tl.index[:, 0], tl.sqrt(S[0]) * tl.abs(U[:, 0])) H = tl.index_update(H, tl.index[0, :], tl.sqrt(S[0]) * tl.abs(V[0, :])) for j in range(1, tl.shape(U)[1]): x, y = U[:, j], V[j, :] # extract positive and negative parts of column vectors x_p, y_p = tl.clip(x, a_min=0.0), tl.clip(y, a_min=0.0) x_n, y_n = tl.abs(tl.clip(x, a_max=0.0)), tl.abs(tl.clip(y, a_max=0.0)) # and their norms x_p_nrm, y_p_nrm = tl.norm(x_p), tl.norm(y_p) x_n_nrm, y_n_nrm = tl.norm(x_n), tl.norm(y_n) m_p, m_n = x_p_nrm * y_p_nrm, x_n_nrm * y_n_nrm # choose update if m_p > m_n: u = x_p / x_p_nrm v = y_p / y_p_nrm sigma = m_p else: u = x_n / x_n_nrm v = y_n / y_n_nrm sigma = m_n lbd = tl.sqrt(S[j] * sigma) W = tl.index_update(W, tl.index[:, j], lbd * u) H = tl.index_update(H, tl.index[j, :], lbd * v) # After this point we no longer need H eps = tl.eps(tensor.dtype) if nntype == "nndsvd": W = soft_thresholding(W, eps) elif nntype == "nndsvda": avg = tl.mean(tensor) W = tl.where(W < eps, tl.ones(tl.shape(W), **tl.context(W)) * avg, W) else: raise ValueError( 'Invalid nntype parameter: got %r instead of one of %r' % (nntype, ('nndsvd', 'nndsvda'))) return W
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error',\ fixed_modes = [], svd_mask_repeats=5, linesearch = False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that:: tensor = [|weights; factors[0], ..., factors[-1] |]. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if ``(previous rec_error - current rec_error) < tol``. If 'abs_rec_error', ALS terminates when `|previous rec_error - current rec_error| < tol`. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = cp_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. fixed_modes : list, default is [] A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. svd_mask_repeats: int If using a tensor with masked values, this initializes using SVD multiple times to remove the effect of these missing values on the initialization. linesearch : bool, default is False Whether to perform line search as proposed by Bro [3]. Returns ------- CPTensor : (weight, factors) * weights : 1D array of shape (rank, ) * all ones if normalize_factors is False (default) * weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. .. [3] R. Bro, "Multi-Way Analysis in the Food Industry: Models, Algorithms, and Applications", PhD., University of Amsterdam, 1998 """ rank = validate_cp_rank(tl.shape(tensor), rank=rank) if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max if linesearch: acc_pow = 2.0 # Extrapolate to the iteration^(1/acc_pow) ahead acc_fail = 0 # How many times acceleration have failed max_fail = 4 # Increase acc_pow with one after max_fail failure weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) if mask is not None and init == "svd": for _ in range(svd_mask_repeats): tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if tl.ndim(tensor)-1 in fixed_modes: warnings.warn('You asked for fixing the last mode, which is not supported.\n The last mode will not be fixed. Consider using tl.moveaxis()') fixed_modes.remove(tl.ndim(tensor)-1) modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes] if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if linesearch and iteration % 2 == 0: factors_last = [tl.copy(f) for f in factors] weights_last = tl.copy(weights) if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if not iteration and weights is not None: # Take into account init weights mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) else: mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: scales = tl.norm(factor, 2, axis=0) weights = tl.where(scales==0, tl.ones(tl.shape(scales), **tl.context(factor)), scales) factor = factor / tl.reshape(weights, (1, -1)) factors[mode] = factor # Will we be performing a line search iteration if linesearch and iteration % 2 == 0 and iteration > 5: line_iter = True else: line_iter = False # Calculate the current unnormalized error if we need it if (tol or return_errors) and line_iter is False: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) else: if mask is not None: tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) # Start line search if requested. if line_iter is True: jump = iteration ** (1.0 / acc_pow) new_weights = weights_last + (weights - weights_last) * jump new_factors = [factors_last[ii] + (factors[ii] - factors_last[ii])*jump for ii in range(tl.ndim(tensor))] new_rec_error, new_tensor, new_norm_tensor = error_calc(tensor, norm_tensor, new_weights, new_factors, sparsity, mask) if (new_rec_error / new_norm_tensor) < rec_errors[-1]: factors, weights = new_factors, new_weights tensor, norm_tensor = new_tensor, new_norm_tensor unnorml_rec_error = new_rec_error acc_fail = 0 if verbose: print("Accepted line search jump of {}.".format(jump)) else: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) acc_fail += 1 if verbose: print("Line search failed for jump of {}.".format(jump)) if acc_fail == max_fail: acc_pow += 1.0 acc_fail = 0 if verbose: print("Reducing acceleration.") rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if tol: if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstruction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) cp_tensor = CPTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ cp_to_tensor((weights, factors)),\ sparsity) cp_tensor = (cp_tensor, sparse_component) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def her_CPRAND(tensor, rank, n_samples, factors=None, exact_err=True, it_max=100, err_it_max=20, tol=1e-7, beta=0.1, eta=3, gamma=1.01, gamma_bar=1.005, list_factors=False, time_rec=False): """ herCPRAND for CP-decomposition return also exact error Parameters ---------- tensor : tensor rank : int n_samples : int sample size factors : list of matrices, optional an initial factor matrices. The default is None. exact_err : boolean, optional whether use err or err_rand_fast for terminaison criterion. The default is False. (not useful for this version) it_max : int, optional maximal number of iteration. The default is 100. err_it_max : int, optional maximal of iteration if terminaison critirion is not improved. The default is 20. tol : float, optional error tolerance. The default is 1e-7. beta : float, optional extrapolation parameter. The default is 0.1. eta : float, optional decrease coefficient of beta. The default is 3. gamma : float, optional increase coefficient of beta. The default is 1.01. gamma_bar : float, optional increase coeefficient of beta_bar. The default is 1.005. list_factors : boolean, optional If true, then return factor matrices of each iteration. The default is False. time_rec : boolean, optional If true, return computation time of each iteration. The default is False. Returns ------- the CP decomposition, number of iteration and exact / estimated termination criterion. list_fac and list_time are optional. """ beta_bar = 1 N = tl.ndim(tensor) # order of tensor norm_tensor = tl.norm(tensor) # norm of tensor if list_factors == True: list_fac = [] if (time_rec == True): list_time = [] if (factors == None): factors = svd_init_fac(tensor, rank) # Initialization of factor hat matrice by factor matrices factors_hat = factors if list_factors == True: list_fac.append(copy.deepcopy(factors)) weights = None it = 0 err_it = 0 cpt = 0 ######################################## ######### error initialization ######### ######################################## F_hat_bf, ind_bf = err_rand(tensor, None, factors, n_samples) F_hat_bf_ex = err(tensor, None, factors) # exact cost rng = tl.random.check_random_state(None) error = [F_hat_bf / norm_tensor] error_ex = [F_hat_bf_ex / norm_tensor] min_err = error[len(error) - 1] while (min_err > tol and it < it_max and err_it < err_it_max): if time_rec == True: tic = time.time() factors_hat_bf = factors_hat for n in range(N): Zs, indices = sample_khatri_rao(factors_hat, n_samples, skip_matrix=n, random_state=rng) indices_list = [i.tolist() for i in indices] indices_list.insert(n, slice(None, None, None)) indices_list = tuple(indices_list) V = tl.dot(tl.transpose(Zs), Zs) # J'ai du mal avec la syntaxe tensor[indices_list], # Ca renvoie une matrices et non un tenseur? if (n == 0): sampled_unfolding = tensor[indices_list] else: sampled_unfolding = tl.transpose(tensor[indices_list]) W = tl.dot(sampled_unfolding, Zs) factor_bf = factors[n] # update factors[n] = tl.transpose( tl.solve(V, tl.transpose(W)) ) # solve needs a squared full rank matrix, if rank>nb_sampls ok # if (n==N-1) : F_hat_new=tl.norm(tl.dot(Zs,tl.transpose(factors[n]))-sampled_unfolding,2) # cost update # extrapolate factors_hat[n] = factors[n] + beta * (factors[n] - factor_bf) ######################################## ######### error update ######### ######################################## matrices = factors_hat_bf[:-1] Zs_bf = tl.ones((n_samples, rank), **tl.context(matrices[0])) for indices, matrix in zip(indices_list, matrices): Zs_bf = Zs_bf * matrix[indices, :] V_bf = tl.dot(tl.transpose(Zs_bf), Zs_bf) W_bf = tl.dot(tl.transpose(tensor[indices_list]), Zs_bf) F_hat_bf, a = err_rand_fast(tensor, factor_bf, V_bf, W_bf, indices_list, n_samples) F_hat_new, _ = err_rand_fast(tensor, factors[N - 1], V, W, indices_list, n_samples) if (F_hat_new > F_hat_bf): factors_hat = factors beta_bar = beta beta = beta / eta cpt = cpt + 1 else: factors = factors_hat beta_bar = min(1, beta_bar * gamma_bar) beta = min(beta_bar, gamma * beta) ######################################## ######### update for next it ######### ######################################## it = it + 1 if list_factors == True: list_fac.append(copy.deepcopy(factors)) error.append(F_hat_new / norm_tensor) if (error[len(error) - 1] < min_err): min_err = error[len(error) - 1] # err update else: err_it = err_it + 1 if time_rec == True: toc = time.time() list_time.append(toc - tic) error_ex.append(err(tensor, None, factors) / norm_tensor) # exact cost update # weights,factors=tl.cp_normalize((None,factors)) if list_factors == True and time_rec == True: return (weights, factors, it, error_ex, error, cpt / it, list_fac, list_time) if list_factors == True: return (weights, factors, it, error_ex, error, cpt / it, list_fac) if time_rec == True: return (weights, factors, it, error_ex, error, cpt / it, list_time) return (weights, factors, it, error_ex, error, cpt / it)
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ non_negative=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error'): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if (previous rec_error - current rec_error) < tol. If 'abs_rec_error', ALS terminates when |previous rec_error - current rec_error| < tol. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = kruskal_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: if sparsity: low_rank_component = kruskal_to_tensor((weights, factors)) sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ kruskal_to_tensor((weights, factors)),\ sparsity) kruskal_tensor = (kruskal_tensor, sparse_component) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error'): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ This is the same as parafac(non_negative=True). Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: for i, f in enumerate(factors): if min(tl.shape(f)) >= rank: factors[i] = tl.abs(tl.qr(f)[0]) if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}".format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def online_cp(factors_old, X_old, X_new, rank, P, Q, n_iter=1, mu=1, verbose=False, transformed=False): weights = tl.ones(rank) if verbose: X = tl.tensor(np.concatenate((X_old, X_new))) n_dim = tl.ndim(X_old) U = factors_old.copy() if not transformed: K = get_KhatriRao_except0(factors_old) H = get_Hadamard(factors_old[1:]) for i in range(n_iter): # temporal mode for A1 if not transformed: mttkrp = tl.dot(tl.unfold(X_new, 0), tl.tenalg.khatri_rao((U[1], K[1]))) else: # for higher accracy, lower speed mttkrp_parts = [] for r in range(rank): component = tl.tenalg.multi_mode_dot(X_new, [f[:, r] for f in U], skip=0) mttkrp_parts.append(component) mttkrp = np.stack(mttkrp_parts, axis=1) A1 = tl.transpose(tl.solve(tl.transpose(H), tl.transpose(mttkrp))) # non-temporal mode for mode in range(1, n_dim): if not transformed: dP = tl.dot(tl.unfold(X_new, mode), tl.tenalg.khatri_rao((A1, K[mode]))) UTU = tl.dot(tl.transpose(U[mode]), U[mode]) dQ = tl.dot(tl.transpose(A1), A1) * H / UTU U[mode] = tl.transpose( tl.solve(tl.transpose(mu * Q[mode] + dQ), tl.transpose(mu * P[mode] + dP))) P[mode] = P[mode] + dP Q[mode] = Q[mode] + dQ else: U1 = U.copy() U1[0] = A1 H_mode = H / tl.dot(tl.transpose(U[mode]), U[mode]) V = (mu * tl.dot(tl.transpose(U[0]), U[0]) + tl.dot(tl.transpose(A1), A1)) * H_mode mttkrp0 = unfolding_dot_khatri_rao(X_old, (None, U), mode) mttkrp1 = unfolding_dot_khatri_rao(X_new, (None, U1), mode) U[mode] = tl.transpose( tl.solve(tl.transpose(V), tl.transpose(mu * mttkrp0 + mttkrp1))) H = H_mode * tl.dot(tl.transpose(U[mode]), U[mode]) # temporal mode for A0 if transformed: mttkrp = unfolding_dot_khatri_rao(X_old, (None, U), 0) U[0] = tl.transpose(tl.solve(tl.transpose(H), tl.transpose(mttkrp))) if verbose: U1 = U.copy() U1[0] = np.concatenate((U[0], A1)) X_est = construct_tensor(U1) compare_tensors(X, X_est) U[0] = np.concatenate((U[0], A1)) return (KruskalTensor((weights, U)), P, Q)
if not iteration and weights is not None: # Take into account init weights mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) #ALS显式解的前两项 else: mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose( tl.solve( tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) #这里之所以有这么多转置是因为solve(a,b)是解ax = b中的x #如果需要列单位化 if normalize_factors: scales = tl.norm(factor, 2, axis=0) weights = tl.where(scales == 0, tl.ones(tl.shape(scales), **tl.context(factor)), scales) factor = factor / tl.reshape(weights, (1, -1)) #这里做归一化要小心,有numpy不熟悉的除法 factors[mode] = factor # Will we be performing a line search iteration line_iter = False # Calculate the current unnormalized error if we need it if (tol or return_errors) and line_iter is False: unnorml_rec_error, tensor, norm_tensor = error_calc( tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) #The unnormalized reconstruction error else:
def non_negative_parafac_hals(tensor, rank, n_iter_max=100, init="svd", svd='numpy_svd', tol=10e-8, random_state=None, sparsity_coefficients=None, fixed_modes=None, nn_modes='all', exact=False, normalize_factors=False, verbose=False, return_errors=False, cvg_criterion='abs_rec_error'): """ Non-negative CP decomposition via HALS Uses Hierarchical ALS (Alternating Least Squares) which updates each factor column-wise (one column at a time while keeping all other columns fixed), see [1]_ Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance Default: 1e-8 random_state : {None, int, np.random.RandomState} sparsity_coefficients: array of float (of length the number of modes) The sparsity coefficients on each factor. If set to None, the algorithm is computed without sparsity Default: None, fixed_modes: array of integers (between 0 and the number of modes) Has to be set not to update a factor, 0 and 1 for U and V respectively Default: None nn_modes: None, 'all' or array of integers (between 0 and the number of modes) Used to specify which modes to impose non-negativity constraints on. If 'all', then non-negativity is imposed on all modes. Default: 'all' exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution Default: False normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors verbose: boolean Indicates whether the algorithm prints the successive reconstruction errors or not Default: False return_errors: boolean Indicates whether the algorithm should return all reconstruction errors and computation time of each iteration or not Default: False cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if ``(previous rec_error - current rec_error) < tol``. If 'abs_rec_error', ALS terminates when `|previous rec_error - current rec_error| < tol`. sparsity : float or int random_state : {None, int, np.random.RandomState} Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` errors: list A list of reconstruction errors at each iteration of the algorithm. References ---------- .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and Hierarchical ALS Algorithms for Nonnegative Matrix Factorization, Neural Computation 24 (4): 1085-1105, 2012. """ weights, factors = initialize_nn_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) norm_tensor = tl.norm(tensor, 2) n_modes = tl.ndim(tensor) if sparsity_coefficients is None or isinstance(sparsity_coefficients, float): sparsity_coefficients = [sparsity_coefficients] * n_modes if fixed_modes is None: fixed_modes = [] if nn_modes == 'all': nn_modes = set(range(n_modes)) elif nn_modes is None: nn_modes = set() # Avoiding errors for fixed_value in fixed_modes: sparsity_coefficients[fixed_value] = None for mode in range(n_modes): if sparsity_coefficients[mode] is not None: warnings.warn( "Sparsity coefficient is ignored in unconstrained modes.") # Generating the mode update sequence modes = [mode for mode in range(n_modes) if mode not in fixed_modes] # initialisation - declare local varaibles rec_errors = [] # Iteratation for iteration in range(n_iter_max): # One pass of least squares on each updated mode for mode in modes: # Computing Hadamard of cross-products pseudo_inverse = tl.tensor(tl.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.transpose(factor), factor) pseudo_inverse = tl.reshape(weights, (-1, 1)) * pseudo_inverse * tl.reshape( weights, (1, -1)) mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) if mode in nn_modes: # Call the hals resolution with nnls, optimizing the current mode nn_factor, _, _, _ = hals_nnls( tl.transpose(mttkrp), pseudo_inverse, tl.transpose(factors[mode]), n_iter_max=100, sparsity_coefficient=sparsity_coefficients[mode], exact=exact) factors[mode] = tl.transpose(nn_factor) else: factor = tl.solve(tl.transpose(pseudo_inverse), tl.transpose(mttkrp)) factors[mode] = tl.transpose(factor) if normalize_factors and mode != modes[-1]: weights, factors = cp_normalize((weights, factors)) if tol: factors_norm = cp_norm((weights, factors)) iprod = tl.sum(tl.sum(mttkrp * factors[-1], axis=0)) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print( "iteration {}, reconstruction error: {}, decrease = {}" .format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format( iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) if normalize_factors: weights, factors = cp_normalize((weights, factors)) cp_tensor = CPTensor((weights, factors)) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def data_adaptive_online_cp(factors_old, X_old, X_new, rank, n_iter=1, mu=1, verbose=False): weights = tl.ones(rank) if verbose: X = tl.tensor(np.concatenate((X_old, X_new))) n_dim = tl.ndim(X_old) U = factors_old.copy() H = get_Hadamard(U[1:]) G = H ATA0 = tl.dot(tl.transpose(U[0]), U[0]) ATA1 = tl.dot(tl.transpose(U[1]), U[1]) for i in range(n_iter): # temporal mode for A1 mttkrp_parts = [] for r in range(rank): component = tl.tenalg.multi_mode_dot(X_new, [f[:, r] for f in U], skip=0) mttkrp_parts.append(component) mttkrp = np.stack(mttkrp_parts, axis=1) A1 = tl.transpose(tl.solve(tl.transpose(H), tl.transpose(mttkrp))) ATA1 = tl.dot(tl.transpose(A1), A1) # non-temporal mode for mode in range(1, n_dim): U1 = U.copy() U1[0] = A1 G = G / tl.dot(tl.transpose(factors_old[mode]), U[mode]) W = G * tl.dot(tl.transpose(factors_old[0]), U[0]) mttkrp0 = mu * tl.dot(factors_old[mode], W) mttkrp1 = unfolding_dot_khatri_rao(X_new, (None, U1), mode) H = H / tl.dot(tl.transpose(U[mode]), U[mode]) V = H * (mu * ATA0 + ATA1) U[mode] = tl.transpose( tl.solve(tl.transpose(V), tl.transpose(mttkrp0 + mttkrp1))) G = G * tl.dot(tl.transpose(factors_old[mode]), U[mode]) H = H * tl.dot(tl.transpose(U[mode]), U[mode]) # temporal mode for A0 mttkrp = tl.dot(factors_old[0], G) U[0] = tl.transpose(tl.solve(tl.transpose(H), tl.transpose(mttkrp))) ATA0 = tl.dot(tl.transpose(U[0]), U[0]) if verbose: U1 = U.copy() U1[0] = np.concatenate((U[0], A1)) X_est = construct_tensor(U1) compare_tensors(X, X_est) U[0] = np.concatenate((U[0].copy(), A1)) return KruskalTensor((weights, U))