def excluded_value_fraction(self): '''Returns the fraction of missing/excluded values in the tensor, given the values that are masked in tensor.mask Returns ------- excluded_fraction : float Fraction of missing/excluded values in the tensor. ''' if self.mask is None: print("The interaction tensor does not have masked values") return 0.0 else: fraction = tl.sum(self.mask) / tl.prod(tl.tensor(self.tensor.shape)) excluded_fraction = 1.0 - fraction.item() return excluded_fraction
def tt_vonneumann_entropy(tensor): """Returns the von Neumann entropy of a density matrix (square matrix) in TT tensor form. Parameters ---------- tensor : (TT tensor) Data structure Returns ------- tt_von_neumann_entropy : order-0 tensor """ square_dim = int(tl.sqrt(tl.prod(tl.tensor(tensor.shape)))) tensor = tl.reshape(tt_to_tensor(tensor), (square_dim, square_dim)) return vonneumann_entropy(tensor)
def sparsify_tensor(tensor, card): """Zeros out all elements in the `tensor` except `card` elements with maximum absolute values. Parameters ---------- tensor : ndarray card : int Desired number of non-zero elements in the `tensor` Returns ------- ndarray of shape tensor.shape """ if card >= tl.prod(tl.tensor(tensor.shape)): return tensor bound = tl.sort(tl.abs(tensor), axis = None)[-card] return tl.where(tl.abs(tensor) < bound, tl.zeros(tensor.shape, **tl.context(tensor)), tensor)
def initialize_constrained_parafac(tensor, rank, init='svd', svd='numpy_svd', random_state=None, non_negative=None, l1_reg=None, l2_reg=None, l2_square_reg=None, unimodality=None, normalize=None, simplex=None, normalized_sparsity=None, soft_sparsity=None, smoothness=None, monotonicity=None, hard_sparsity=None): r"""Initialize factors used in `constrained_parafac`. Parameters ---------- The type of initialization is set using `init`. If `init == 'random'` then initialize factor matrices with uniform distribution using `random_state`. If `init == 'svd'` then initialize the `m`th factor matrix using the `rank` left singular vectors of the `m`th unfolding of the input tensor. If init is a previously initialized `cp tensor`, all the weights are pulled in the last factor and then the weights are set to "1" for the output tensor. Lastly, factors are updated with proximal operator according to the selected constraint(s), so that they satisfy the imposed constraints (does not apply to cptensor initialization). Parameters ---------- tensor : ndarray rank : int random_state : {None, int, np.random.RandomState} init : {'svd', 'random', cptensor}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS non_negative : bool or dictionary This constraint is clipping negative values to '0'. If it is True non-negative constraint is applied to all modes. l1_reg : float or list or dictionary, optional l2_reg : float or list or dictionary, optional l2_square_reg : float or list or dictionary, optional unimodality : bool or dictionary, optional If it is True unimodality constraint is applied to all modes. normalize : bool or dictionary, optional This constraint divides all the values by maximum value of the input array. If it is True normalize constraint is applied to all modes. simplex : float or list or dictionary, optional normalized_sparsity : float or list or dictionary, optional soft_sparsity : float or list or dictionary, optional smoothness : float or list or dictionary, optional monotonicity : bool or dictionary, optional hard_sparsity : float or list or dictionary, optional Returns ------- factors : CPTensor An initial cp tensor. """ n_modes = tl.ndim(tensor) rng = tl.check_random_state(random_state) if init == 'random': weights, factors = random_cp(tl.shape(tensor), rank, normalise_factors=False, **tl.context(tensor)) elif init == 'svd': try: svd_fun = tl.SVD_FUNS[svd] except KeyError: message = 'Got svd={}. However, for the current backend ({}), the possible choices are {}'.format( svd, tl.get_backend(), tl.SVD_FUNS) raise ValueError(message) factors = [] for mode in range(tl.ndim(tensor)): U, S, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank) # Put SVD initialization on the same scaling as the tensor in case normalize_factors=False if mode == 0: idx = min(rank, tl.shape(S)[0]) U = tl.index_update(U, tl.index[:, :idx], U[:, :idx] * S[:idx]) if tensor.shape[mode] < rank: random_part = tl.tensor(rng.random_sample((U.shape[0], rank - tl.shape(tensor)[mode])), **tl.context(tensor)) U = tl.concatenate([U, random_part], axis=1) factors.append(U[:, :rank]) elif isinstance(init, (tuple, list, CPTensor)): try: weights, factors = CPTensor(init) if tl.all(weights == 1): weights, factors = CPTensor((None, factors)) else: weights_avg = tl.prod(weights) ** (1.0 / tl.shape(weights)[0]) for i in range(len(factors)): factors[i] = factors[i] * weights_avg kt = CPTensor((None, factors)) return kt except ValueError: raise ValueError( 'If initialization method is a mapping, then it must ' 'be possible to convert it to a CPTensor instance' ) else: raise ValueError('Initialization method "{}" not recognized'.format(init)) for i in range(n_modes): factors[i] = proximal_operator(factors[i], non_negative=non_negative, l1_reg=l1_reg, l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality, normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity, soft_sparsity=soft_sparsity, smoothness=smoothness, monotonicity=monotonicity, hard_sparsity=hard_sparsity, n_const=n_modes, order=i) kt = CPTensor((None, factors)) return kt
def initialize_cp(tensor, rank, init='svd', svd='numpy_svd', random_state=None, normalize_factors=False): r"""Initialize factors used in `parafac`. The type of initialization is set using `init`. If `init == 'random'` then initialize factor matrices with uniform distribution using `random_state`. If `init == 'svd'` then initialize the `m`th factor matrix using the `rank` left singular vectors of the `m`th unfolding of the input tensor. If init is a previously initialized `cp tensor`, all the weights are pulled in the last factor and then the weights are set to "1" for the output tensor. Parameters ---------- tensor : ndarray rank : int init : {'svd', 'random', cptensor}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS non_negative : bool, default is False if True, non-negative factors are returned Returns ------- factors : CPTensor An initial cp tensor. """ rng = tl.check_random_state(random_state) if init == 'random': kt = random_cp(tl.shape(tensor), rank, normalise_factors=False, random_state=rng, **tl.context(tensor)) elif init == 'svd': try: svd_fun = tl.SVD_FUNS[svd] except KeyError: message = 'Got svd={}. However, for the current backend ({}), the possible choices are {}'.format( svd, tl.get_backend(), tl.SVD_FUNS) raise ValueError(message) factors = [] for mode in range(tl.ndim(tensor)): U, S, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank) # Put SVD initialization on the same scaling as the tensor in case normalize_factors=False if mode == 0: idx = min(rank, tl.shape(S)[0]) U = tl.index_update(U, tl.index[:, :idx], U[:, :idx] * S[:idx]) if tensor.shape[mode] < rank: # TODO: this is a hack but it seems to do the job for now random_part = tl.tensor( rng.random_sample( (U.shape[0], rank - tl.shape(tensor)[mode])), **tl.context(tensor)) U = tl.concatenate([U, random_part], axis=1) factors.append(U[:, :rank]) kt = CPTensor((None, factors)) elif isinstance(init, (tuple, list, CPTensor)): # TODO: Test this try: if normalize_factors is True: warnings.warn( 'It is not recommended to initialize a tensor with normalizing. Consider normalizing the tensor before using this function' ) kt = CPTensor(init) weights, factors = kt if tl.all(weights == 1): kt = CPTensor((None, factors)) else: weights_avg = tl.prod(weights)**(1.0 / tl.shape(weights)[0]) for i in range(len(factors)): factors[i] = factors[i] * weights_avg kt = CPTensor((None, factors)) except ValueError: raise ValueError( 'If initialization method is a mapping, then it must ' 'be possible to convert it to a CPTensor instance') else: raise ValueError( 'Initialization method "{}" not recognized'.format(init)) if normalize_factors: kt = cp_normalize(kt) return kt
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=False, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factor = [tl.qr(factor)[0] for factor in factors] if verbose: print("Starting iteration", iteration) for mode in range(tl.ndim(tensor)): if verbose: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor(factors, mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: factor_norm = tl.norm(factor, axis=0) weights *= factor_norm positive_factor_norms = tl.where( factor_norm == 0, tl.ones(tl.shape(factor_norm), **tl.context(factors[0])), factor_norm) factor = factor / positive_factor_norms factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> # This is ||kruskal_to_tensor(factors)||^2 factors_norm = tl.sum( tl.prod( tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0), 0)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(mttkrp * factor) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=1e-8, orthogonalise=False, random_state=None, verbose=False, return_errors=False, non_negative=False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [| factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. Returns ------- factors : ndarray list List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative) rec_errors = [] norm_tensor = tl.norm(tensor, 2) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factor = [tl.qr(factor)[0] for factor in factors] if verbose: print("Starting iteration", iteration) for mode in range(tl.ndim(tensor)): if verbose: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) #factor = tl.dot(unfold(tensor, mode), khatri_rao(factors, skip_matrix=mode).conj()) mttkrp = tl.tenalg.unfolding_dot_khatri_rao(tensor, factors, mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> # This is ||kruskal_to_tensor(factors)||^2 factors_norm = tl.sum( tl.prod( tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0), 0)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(mttkrp * factor) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) if return_errors: return factors, rec_errors else: return factors