Exemple #1
0
    def excluded_value_fraction(self):
        '''Returns the fraction of missing/excluded values in the tensor,
        given the values that are masked in tensor.mask

        Returns
        -------
        excluded_fraction : float
            Fraction of missing/excluded values in the tensor.
        '''
        if self.mask is None:
            print("The interaction tensor does not have masked values")
            return 0.0
        else:
            fraction = tl.sum(self.mask) / tl.prod(tl.tensor(self.tensor.shape))
            excluded_fraction = 1.0 - fraction.item()
            return excluded_fraction
Exemple #2
0
def tt_vonneumann_entropy(tensor):
    """Returns the von Neumann entropy of a density matrix (square matrix) in TT tensor form.

    Parameters
    ----------
    tensor : (TT tensor)
        Data structure

    Returns
    -------
    tt_von_neumann_entropy : order-0 tensor
    """
    square_dim = int(tl.sqrt(tl.prod(tl.tensor(tensor.shape))))
    tensor = tl.reshape(tt_to_tensor(tensor), (square_dim, square_dim))

    return vonneumann_entropy(tensor)
Exemple #3
0
def sparsify_tensor(tensor, card):
    """Zeros out all elements in the `tensor` except `card` elements with maximum absolute values. 
    
    Parameters
    ----------
    tensor : ndarray
    card : int
        Desired number of non-zero elements in the `tensor`
        
    Returns
    -------
    ndarray of shape tensor.shape
    """
    if card >= tl.prod(tl.tensor(tensor.shape)):
        return tensor
    bound = tl.sort(tl.abs(tensor), axis = None)[-card]
    
    return tl.where(tl.abs(tensor) < bound, tl.zeros(tensor.shape, **tl.context(tensor)), tensor)
Exemple #4
0
def initialize_constrained_parafac(tensor, rank, init='svd', svd='numpy_svd',
                                   random_state=None, non_negative=None, l1_reg=None,
                                   l2_reg=None, l2_square_reg=None, unimodality=None, normalize=None,
                                   simplex=None, normalized_sparsity=None,
                                   soft_sparsity=None, smoothness=None, monotonicity=None,
                                   hard_sparsity=None):
    r"""Initialize factors used in `constrained_parafac`.

    Parameters
    ----------

    The type of initialization is set using `init`. If `init == 'random'` then
    initialize factor matrices with uniform distribution using `random_state`. If `init == 'svd'` then
    initialize the `m`th factor matrix using the `rank` left singular vectors
    of the `m`th unfolding of the input tensor. If init is a previously initialized `cp tensor`, all
    the weights are pulled in the last factor and then the weights are set to "1" for the output tensor.
    Lastly, factors are updated with proximal operator according to the selected constraint(s), so that they satisfy the
    imposed constraints (does not apply to cptensor initialization).

    Parameters
    ----------
    tensor : ndarray
    rank : int
    random_state : {None, int, np.random.RandomState}
    init : {'svd', 'random', cptensor}, optional
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    non_negative : bool or dictionary
        This constraint is clipping negative values to '0'. If it is True non-negative constraint is applied to all modes.
    l1_reg : float or list or dictionary, optional
    l2_reg : float or list or dictionary, optional
    l2_square_reg : float or list or dictionary, optional
    unimodality : bool or dictionary, optional
        If it is True unimodality constraint is applied to all modes.
    normalize : bool or dictionary, optional
        This constraint divides all the values by maximum value of the input array. If it is True normalize constraint
        is applied to all modes.
    simplex : float or list or dictionary, optional
    normalized_sparsity : float or list or dictionary, optional
    soft_sparsity : float or list or dictionary, optional
    smoothness : float or list or dictionary, optional
    monotonicity : bool or dictionary, optional
    hard_sparsity : float or list or dictionary, optional
    Returns
    -------
    factors : CPTensor
        An initial cp tensor.
    """
    n_modes = tl.ndim(tensor)
    rng = tl.check_random_state(random_state)

    if init == 'random':
        weights, factors = random_cp(tl.shape(tensor), rank, normalise_factors=False, **tl.context(tensor))

    elif init == 'svd':
        try:
            svd_fun = tl.SVD_FUNS[svd]
        except KeyError:
            message = 'Got svd={}. However, for the current backend ({}), the possible choices are {}'.format(
                svd, tl.get_backend(), tl.SVD_FUNS)
            raise ValueError(message)

        factors = []
        for mode in range(tl.ndim(tensor)):
            U, S, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank)

            # Put SVD initialization on the same scaling as the tensor in case normalize_factors=False
            if mode == 0:
                idx = min(rank, tl.shape(S)[0])
                U = tl.index_update(U, tl.index[:, :idx], U[:, :idx] * S[:idx])

            if tensor.shape[mode] < rank:
                random_part = tl.tensor(rng.random_sample((U.shape[0], rank - tl.shape(tensor)[mode])),
                                        **tl.context(tensor))
                U = tl.concatenate([U, random_part], axis=1)

            factors.append(U[:, :rank])

    elif isinstance(init, (tuple, list, CPTensor)):
        try:
            weights, factors = CPTensor(init)

            if tl.all(weights == 1):
                weights, factors = CPTensor((None, factors))
            else:
                weights_avg = tl.prod(weights) ** (1.0 / tl.shape(weights)[0])
                for i in range(len(factors)):
                    factors[i] = factors[i] * weights_avg
            kt = CPTensor((None, factors))
            return kt
        except ValueError:
            raise ValueError(
                'If initialization method is a mapping, then it must '
                'be possible to convert it to a CPTensor instance'
            )
    else:
        raise ValueError('Initialization method "{}" not recognized'.format(init))

    for i in range(n_modes):
        factors[i] = proximal_operator(factors[i], non_negative=non_negative, l1_reg=l1_reg,
                                       l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality,
                                       normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity,
                                       soft_sparsity=soft_sparsity, smoothness=smoothness,
                                       monotonicity=monotonicity, hard_sparsity=hard_sparsity, n_const=n_modes, order=i)
    kt = CPTensor((None, factors))
    return kt
Exemple #5
0
def initialize_cp(tensor,
                  rank,
                  init='svd',
                  svd='numpy_svd',
                  random_state=None,
                  normalize_factors=False):
    r"""Initialize factors used in `parafac`.

    The type of initialization is set using `init`. If `init == 'random'` then
    initialize factor matrices with uniform distribution using `random_state`. If `init == 'svd'` then
    initialize the `m`th factor matrix using the `rank` left singular vectors
    of the `m`th unfolding of the input tensor. If init is a previously initialized `cp tensor`, all
    the weights are pulled in the last factor and then the weights are set to "1" for the output tensor.

    Parameters
    ----------
    tensor : ndarray
    rank : int
    init : {'svd', 'random', cptensor}, optional
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    non_negative : bool, default is False
        if True, non-negative factors are returned

    Returns
    -------
    factors : CPTensor
        An initial cp tensor.

    """
    rng = tl.check_random_state(random_state)

    if init == 'random':
        kt = random_cp(tl.shape(tensor),
                       rank,
                       normalise_factors=False,
                       random_state=rng,
                       **tl.context(tensor))

    elif init == 'svd':
        try:
            svd_fun = tl.SVD_FUNS[svd]
        except KeyError:
            message = 'Got svd={}. However, for the current backend ({}), the possible choices are {}'.format(
                svd, tl.get_backend(), tl.SVD_FUNS)
            raise ValueError(message)

        factors = []
        for mode in range(tl.ndim(tensor)):
            U, S, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank)

            # Put SVD initialization on the same scaling as the tensor in case normalize_factors=False
            if mode == 0:
                idx = min(rank, tl.shape(S)[0])
                U = tl.index_update(U, tl.index[:, :idx], U[:, :idx] * S[:idx])

            if tensor.shape[mode] < rank:
                # TODO: this is a hack but it seems to do the job for now
                random_part = tl.tensor(
                    rng.random_sample(
                        (U.shape[0], rank - tl.shape(tensor)[mode])),
                    **tl.context(tensor))
                U = tl.concatenate([U, random_part], axis=1)

            factors.append(U[:, :rank])

        kt = CPTensor((None, factors))

    elif isinstance(init, (tuple, list, CPTensor)):
        # TODO: Test this
        try:
            if normalize_factors is True:
                warnings.warn(
                    'It is not recommended to initialize a tensor with normalizing. Consider normalizing the tensor before using this function'
                )

            kt = CPTensor(init)
            weights, factors = kt

            if tl.all(weights == 1):
                kt = CPTensor((None, factors))
            else:
                weights_avg = tl.prod(weights)**(1.0 / tl.shape(weights)[0])
                for i in range(len(factors)):
                    factors[i] = factors[i] * weights_avg
                kt = CPTensor((None, factors))
        except ValueError:
            raise ValueError(
                'If initialization method is a mapping, then it must '
                'be possible to convert it to a CPTensor instance')
    else:
        raise ValueError(
            'Initialization method "{}" not recognized'.format(init))

    if normalize_factors:
        kt = cp_normalize(kt)

    return kt
def parafac(tensor,
            rank,
            n_iter_max=100,
            init='svd',
            svd='numpy_svd',
            normalize_factors=False,
            tol=1e-8,
            orthogonalise=False,
            random_state=None,
            verbose=False,
            return_errors=False,
            non_negative=False,
            mask=None):
    """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS)
    Computes a rank-`rank` decomposition of `tensor` [1]_ such that,

        ``tensor = [|weights; factors[0], ..., factors[-1] |]``.

    Parameters
    ----------
    tensor : ndarray
    rank  : int
        Number of components.
    n_iter_max : int
        Maximum number of iteration
    init : {'svd', 'random'}, optional
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor
        of shape (rank, ), which will contain the norms of the factors
    tol : float, optional
        (Default: 1e-6) Relative reconstruction error tolerance. The
        algorithm is considered to have found the global minimum when the
        reconstruction error is less than `tol`.
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors
    non_negative : bool, optional
        Perform non_negative PARAFAC. See :func:`non_negative_parafac`.
    mask : ndarray
        array of booleans with the same shape as ``tensor`` should be 0 where
        the values are missing and 1 everywhere else. Note:  if tensor is
        sparse, then mask should also be sparse with a fill value of 1 (or
        True). Allows for missing values [2]_


    Returns
    -------
    KruskalTensor : (weight, factors)
        * weights : 1D array of shape (rank, )
            all ones if normalize_factors is False (default), 
            weights of the (normalized) factors otherwise
        * factors : List of factors of the CP decomposition element `i` is of shape
            (tensor.shape[i], rank)

    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications",
       SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009.
       
    .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." 
            Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180.


    """
    epsilon = 10e-12

    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    factors = initialize_factors(tensor,
                                 rank,
                                 init=init,
                                 svd=svd,
                                 random_state=random_state,
                                 non_negative=non_negative)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)
    weights = tl.ones(rank, **tl.context(tensor))

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            factor = [tl.qr(factor)[0] for factor in factors]

        if verbose:
            print("Starting iteration", iteration)
        for mode in range(tl.ndim(tensor)):
            if verbose:
                print("Mode", mode, "of", tl.ndim(tensor))
            if non_negative:
                accum = 1
                # khatri_rao(factors).tl.dot(khatri_rao(factors))
                # simplifies to multiplications
                sub_indices = [i for i in range(len(factors)) if i != mode]
                for i, e in enumerate(sub_indices):
                    if i:
                        accum *= tl.dot(tl.transpose(factors[e]), factors[e])
                    else:
                        accum = tl.dot(tl.transpose(factors[e]), factors[e])

            pseudo_inverse = tl.tensor(np.ones((rank, rank)),
                                       **tl.context(tensor))
            for i, factor in enumerate(factors):
                if i != mode:
                    pseudo_inverse = pseudo_inverse * tl.dot(
                        tl.conj(tl.transpose(factor)), factor)

            if mask is not None:
                tensor = tensor * mask + tl.kruskal_to_tensor(factors,
                                                              mask=1 - mask)

            mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode)

            if non_negative:
                numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None)
                denominator = tl.dot(factors[mode], accum)
                denominator = tl.clip(denominator, a_min=epsilon, a_max=None)
                factor = factors[mode] * numerator / denominator
            else:
                factor = tl.transpose(
                    tl.solve(tl.conj(tl.transpose(pseudo_inverse)),
                             tl.transpose(mttkrp)))

            if normalize_factors:
                factor_norm = tl.norm(factor, axis=0)
                weights *= factor_norm
                positive_factor_norms = tl.where(
                    factor_norm == 0,
                    tl.ones(tl.shape(factor_norm), **tl.context(factors[0])),
                    factor_norm)
                factor = factor / positive_factor_norms

            factors[mode] = factor

        if tol:
            # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec>
            # This is ||kruskal_to_tensor(factors)||^2
            factors_norm = tl.sum(
                tl.prod(
                    tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0),
                    0))
            # mttkrp and factor for the last mode. This is equivalent to the
            # inner product <tensor, factorization>
            iprod = tl.sum(mttkrp * factor)
            rec_error = tl.sqrt(
                tl.abs(norm_tensor**2 + factors_norm -
                       2 * iprod)) / norm_tensor
            rec_errors.append(rec_error)

            if iteration >= 1:
                if verbose:
                    print('reconstruction error={}, variation={}.'.format(
                        rec_errors[-1], rec_errors[-2] - rec_errors[-1]))

                if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol:
                    if verbose:
                        print('converged in {} iterations.'.format(iteration))
                    break
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    kruskal_tensor = KruskalTensor((weights, factors))

    if return_errors:
        return kruskal_tensor, rec_errors
    else:
        return kruskal_tensor
def parafac(tensor,
            rank,
            n_iter_max=100,
            init='svd',
            svd='numpy_svd',
            tol=1e-8,
            orthogonalise=False,
            random_state=None,
            verbose=False,
            return_errors=False,
            non_negative=False):
    """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS)

    Computes a rank-`rank` decomposition of `tensor` [1]_ such that,

        ``tensor = [| factors[0], ..., factors[-1] |]``.

    Parameters
    ----------
    tensor : ndarray
    rank  : int
        Number of components.
    n_iter_max : int
        Maximum number of iteration
    init : {'svd', 'random'}, optional
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    tol : float, optional
        (Default: 1e-6) Relative reconstruction error tolerance. The
        algorithm is considered to have found the global minimum when the
        reconstruction error is less than `tol`.
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors
    non_negative : bool, optional
        Perform non_negative PARAFAC. See :func:`non_negative_parafac`.

    Returns
    -------
    factors : ndarray list
        List of factors of the CP decomposition element `i` is of shape
        (tensor.shape[i], rank)
    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications",
       SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009.
    """
    epsilon = 10e-12

    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    factors = initialize_factors(tensor,
                                 rank,
                                 init=init,
                                 svd=svd,
                                 random_state=random_state,
                                 non_negative=non_negative)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            factor = [tl.qr(factor)[0] for factor in factors]

        if verbose:
            print("Starting iteration", iteration)
        for mode in range(tl.ndim(tensor)):
            if verbose:
                print("Mode", mode, "of", tl.ndim(tensor))
            if non_negative:
                accum = 1
                # khatri_rao(factors).tl.dot(khatri_rao(factors))
                # simplifies to multiplications
                sub_indices = [i for i in range(len(factors)) if i != mode]
                for i, e in enumerate(sub_indices):
                    if i:
                        accum *= tl.dot(tl.transpose(factors[e]), factors[e])
                    else:
                        accum = tl.dot(tl.transpose(factors[e]), factors[e])

            pseudo_inverse = tl.tensor(np.ones((rank, rank)),
                                       **tl.context(tensor))
            for i, factor in enumerate(factors):
                if i != mode:
                    pseudo_inverse = pseudo_inverse * tl.dot(
                        tl.conj(tl.transpose(factor)), factor)

            #factor = tl.dot(unfold(tensor, mode), khatri_rao(factors, skip_matrix=mode).conj())
            mttkrp = tl.tenalg.unfolding_dot_khatri_rao(tensor, factors, mode)

            if non_negative:
                numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None)
                denominator = tl.dot(factors[mode], accum)
                denominator = tl.clip(denominator, a_min=epsilon, a_max=None)
                factor = factors[mode] * numerator / denominator
            else:
                factor = tl.transpose(
                    tl.solve(tl.conj(tl.transpose(pseudo_inverse)),
                             tl.transpose(mttkrp)))

            factors[mode] = factor

        if tol:
            # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec>
            # This is ||kruskal_to_tensor(factors)||^2
            factors_norm = tl.sum(
                tl.prod(
                    tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0),
                    0))
            # mttkrp and factor for the last mode. This is equivalent to the
            # inner product <tensor, factorization>
            iprod = tl.sum(mttkrp * factor)
            rec_error = tl.sqrt(
                tl.abs(norm_tensor**2 + factors_norm -
                       2 * iprod)) / norm_tensor
            rec_errors.append(rec_error)

            if iteration >= 1:
                if verbose:
                    print('reconstruction error={}, variation={}.'.format(
                        rec_errors[-1], rec_errors[-2] - rec_errors[-1]))

                if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol:
                    if verbose:
                        print('converged in {} iterations.'.format(iteration))
                    break
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    if return_errors:
        return factors, rec_errors
    else:
        return factors