Esempio n. 1
0
def _project_tensor_slices(tensor_slices, projections, out=None):
    if out is None:
        rank = projections[0].shape[1]
        num_slices = len(tensor_slices)
        num_cols = tensor_slices[0].shape[1]
        out = T.zeros((num_slices, rank, num_cols),
                      **T.context(tensor_slices[0]))

    for i, (tensor_slice,
            projection) in enumerate(zip(tensor_slices, projections)):
        slice_ = T.dot(T.transpose(projection), tensor_slice)
        out = tl.index_update(out, tl.index[i, :], slice_)
    return out
Esempio n. 2
0
def parafac2(
    tensor_slices,
    rank,
    n_iter_max=2000,
    init='random',
    svd='numpy_svd',
    normalize_factors=False,
    tol=1e-8,
    absolute_tol=1e-13,
    nn_modes=None,
    random_state=None,
    verbose=False,
    return_errors=False,
    n_iter_parafac=5,
):
    r"""PARAFAC2 decomposition [1]_ of a third order tensor via alternating least squares (ALS)

    Computes a rank-`rank` PARAFAC2 decomposition of the third-order tensor defined by 
    `tensor_slices`. The decomposition is on the form :math:`(A [B_i] C)` such that the
    i-th frontal slice, :math:`X_i`, of :math:`X` is given by

    .. math::
    
        X_i = B_i diag(a_i) C^T,
    
    where :math:`diag(a_i)` is the diagonal matrix whose nonzero entries are equal to
    the :math:`i`-th row of the :math:`I \times R` factor matrix :math:`A`, :math:`B_i` 
    is a :math:`J_i \times R` factor matrix such that the cross product matrix :math:`B_{i_1}^T B_{i_1}`
    is constant for all :math:`i`, and :math:`C` is a :math:`K \times R` factor matrix. 
    To compute this decomposition, we reformulate the expression for :math:`B_i` such that

    .. math::

        B_i = P_i B,

    where :math:`P_i` is a :math:`J_i \times R` orthogonal matrix and :math:`B` is a
    :math:`R \times R` matrix.
    

    An alternative formulation of the PARAFAC2 decomposition is that the tensor element
    :math:`X_{ijk}` is given by

    .. math::

        X_{ijk} = \sum_{r=1}^R A_{ir} B_{ijr} C_{kr},
    
    with the same constraints hold for :math:`B_i` as above.
     

    Parameters
    ----------
    tensor_slices : ndarray or list of ndarrays
        Either a third order tensor or a list of second order tensors that may have different number of rows.
        Note that the second mode factor matrices are allowed to change over the first mode, not the
        third mode as some other implementations use (see note below).
    rank : int
        Number of components.
    n_iter_max : int, optional
        (Default: 2000) Maximum number of iteration

        .. versionchanged:: 0.6.1

            Previously, the default maximum number of iterations was 100.
    init : {'svd', 'random', CPTensor, Parafac2Tensor}
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    normalize_factors : bool (optional)
        If True, aggregate the weights of each factor in a 1D-tensor
        of shape (rank, ), which will contain the norms of the factors. Note that
        there may be some inaccuracies in the component weights.
    tol : float, optional
        (Default: 1e-8) Relative reconstruction error decrease tolerance. The
        algorithm is considered to have converged when
        :math:`\left|\| X - \hat{X}_{n-1} \|^2 - \| X - \hat{X}_{n} \|^2\right| < \epsilon \| X - \hat{X}_{n-1} \|^2`.
        That is, when the relative change in sum of squared error is less
        than the tolerance.

        .. versionchanged:: 0.6.1

            Previously, the stopping condition was
            :math:`\left|\| X - \hat{X}_{n-1} \| - \| X - \hat{X}_{n} \|\right| < \epsilon`.
    absolute_tol : float, optional
        (Default: 1e-13) Absolute reconstruction error tolearnce. The algorithm
        is considered to have converged when 
        :math:`\left|\| X - \hat{X}_{n-1} \|^2 - \| X - \hat{X}_{n} \|^2\right| < \epsilon_\text{abs}`.
        That is, when the relative sum of squared error is less than the specified tolerance.
        The absolute tolerance is necessary for stopping the algorithm when used on noise-free
        data that follows the PARAFAC2 constraint.

        If None, then the machine precision + 1000 will be used.
    nn_modes: None, 'all' or array of integers
        (Default: None) Used to specify which modes to impose non-negativity constraints on.
        We cannot impose non-negativity constraints on the the B-mode (mode 1) with the ALS
        algorithm, so if this mode is among the constrained modes, then a warning will be shown
        (see notes for more info).
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors
    n_iter_parafac : int, optional
        Number of PARAFAC iterations to perform for each PARAFAC2 iteration

    Returns
    -------
    Parafac2Tensor : (weight, factors, projection_matrices)
        * weights : 1D array of shape (rank, )
            all ones if normalize_factors is False (default), 
            weights of the (normalized) factors otherwise
        * factors : List of factors of the CP decomposition element `i` is of shape
            (tensor.shape[i], rank)
        * projection_matrices : List of projection matrices used to create evolving
            factors.
         
    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] Kiers, H.A.L., ten Berge, J.M.F. and Bro, R. (1999), 
            PARAFAC2—Part I. A direct fitting algorithm for the PARAFAC2 model. 
            J. Chemometrics, 13: 275-294.

    Notes
    -----
    This formulation of the PARAFAC2 decomposition is slightly different from the one in [1]_.
    The difference lies in that here, the second mode changes over the first mode, whereas in
    [1]_, the second mode changes over the third mode. We made this change since that means
    that the function accept both lists of matrices and a single nd-array as input without
    any reordering of the modes.

    Because of the reformulation above, :math:`B_i = P_i B`, the :math:`B_i` matrices
    cannot be constrained to be non-negative with ALS. If this mode is constrained to be
    non-negative, then :math:`B` will be non-negative, but not the orthogonal `P_i` matrices.
    Consequently, the `B_i` matrices are unlikely to be non-negative.
    """
    weights, factors, projections = initialize_decomposition(
        tensor_slices, rank, init=init, svd=svd, random_state=random_state)

    rec_errors = []
    norm_tensor = tl.sqrt(
        sum(tl.norm(tensor_slice, 2)**2 for tensor_slice in tensor_slices))
    svd_fun = _get_svd(svd)

    if absolute_tol is None:
        absolute_tol = tl.eps(factors[0].dtype) * 1000

    # If nn_modes is set, we use HALS, otherwise, we use the standard parafac implementation.
    if nn_modes is None:

        def parafac_updates(X, w, f):
            return parafac(X,
                           rank,
                           n_iter_max=n_iter_parafac,
                           init=(w, f),
                           svd=svd,
                           orthogonalise=False,
                           verbose=verbose,
                           return_errors=False,
                           normalize_factors=False,
                           mask=None,
                           random_state=random_state,
                           tol=1e-100)[1]
    else:
        if nn_modes == 'all' or 1 in nn_modes:
            warn(
                "Mode `1` of PARAFAC2 fitted with ALS cannot be constrained to be truly non-negative. See the documentation for more info."
            )

        def parafac_updates(X, w, f):
            return non_negative_parafac_hals(X,
                                             rank,
                                             n_iter_max=n_iter_parafac,
                                             init=(w, f),
                                             svd=svd,
                                             nn_modes=nn_modes,
                                             verbose=verbose,
                                             return_errors=False,
                                             tol=1e-100)[1]

    projected_tensor = tl.zeros([factor.shape[0] for factor in factors],
                                **T.context(factors[0]))

    for iteration in range(n_iter_max):
        if verbose:
            print("Starting iteration", iteration)
        factors[1] = factors[1] * T.reshape(weights, (1, -1))
        weights = T.ones(weights.shape, **tl.context(tensor_slices[0]))

        projections = _compute_projections(tensor_slices,
                                           factors,
                                           svd_fun,
                                           out=projections)
        projected_tensor = _project_tensor_slices(tensor_slices,
                                                  projections,
                                                  out=projected_tensor)
        factors = parafac_updates(projected_tensor, weights, factors)

        if normalize_factors:
            new_factors = []
            for factor in factors:
                norms = T.norm(factor, axis=0)
                norms = tl.where(
                    tl.abs(norms) <= tl.eps(factor.dtype),
                    tl.ones(tl.shape(norms), **tl.context(factors[0])), norms)

                weights = weights * norms
                new_factors.append(factor / (tl.reshape(norms, (1, -1))))

            factors = new_factors

        if tol:
            rec_error = _parafac2_reconstruction_error(
                tensor_slices, (weights, factors, projections))
            rec_error /= norm_tensor
            rec_errors.append(rec_error)

            if iteration >= 1:
                if verbose:
                    print('PARAFAC2 reconstruction error={}, variation={}.'.
                          format(rec_errors[-1],
                                 rec_errors[-2] - rec_errors[-1]))

                if abs(rec_errors[-2]**2 - rec_errors[-1]**2) < (
                        tol *
                        rec_errors[-2]**2) or rec_errors[-1]**2 < absolute_tol:
                    if verbose:
                        print('converged in {} iterations.'.format(iteration))
                    break
            else:
                if verbose:
                    print('PARAFAC2 reconstruction error={}'.format(
                        rec_errors[-1]))

    parafac2_tensor = Parafac2Tensor((weights, factors, projections))

    if return_errors:
        return parafac2_tensor, rec_errors
    else:
        return parafac2_tensor
Esempio n. 3
0
def robust_pca(X,
               mask=None,
               tol=10e-7,
               reg_E=1,
               reg_J=1,
               mu_init=10e-5,
               mu_max=10e9,
               learning_rate=1.1,
               n_iter_max=100,
               verbose=1):
    """Robust Tensor PCA via ALM with support for missing values

        Decomposes a tensor `X` into the sum of a low-rank component `D`
        and a sparse component `E`.

    Parameters
    ----------
    X : ndarray
        tensor data of shape (n_samples, N1, ..., NS)
    mask : ndarray
        array of booleans with the same shape as `X`
        should be zero where the values are missing and 1 everywhere else
    tol : float
        convergence value
    reg_E : float, optional, default is 1
        regularisation on the sparse part `E`
    reg_J : float, optional, default is 1
        regularisation on the low rank part `D`
    mu_init : float, optional, default is 10e-5
        initial value for mu
    mu_max : float, optional, default is 10e9
        maximal value for mu
    learning_rate : float, optional, default is 1.1
        percentage increase of mu at each iteration
    n_iter_max : int, optional, default is 100
        maximum number of iteration
    verbose : int, default is 1
        level of verbosity

    Returns
    -------
    (D, E)
        Robust decomposition of `X`

    D : `X`-like array
        low-rank part
    E : `X`-like array
        sparse error part

    Notes
    -----
    The problem we solve is, for an input tensor :math:`\\tilde X`:

    .. math::
       :nowrap:

        \\begin{equation*}
        \\begin{aligned}
           & \\min_{\\{J_i\\}, \\tilde D, \\tilde E}
           & & \\sum_{i=1}^N  \\text{reg}_J \\|J_i\\|_* + \\text{reg}_E \\|E\\|_1 \\\\
           & \\text{subject to}
           & & \\tilde X  = \\tilde A + \\tilde E \\\\
           & & & A_{[i]} =  J_i,  \\text{ for each } i \\in \\{1, 2, \\cdots, N\\}\\\\
        \\end{aligned}
        \\end{equation*}

    """
    if mask is None:
        mask = 1

    # Initialise the decompositions
    D = T.zeros_like(X, **T.context(X))  # low rank part
    E = T.zeros_like(X, **T.context(X))  # sparse part
    L_x = T.zeros_like(X, **T.context(
        X))  # Lagrangian variables for the (X - D - E - L_x/mu) term
    J = [T.zeros_like(X, **T.context(X))
         for _ in range(T.ndim(X))]  # Low-rank modes of X
    L = [T.zeros_like(X, **T.context(X))
         for _ in range(T.ndim(X))]  # Lagrangian or J

    # Norm of the reconstructions at each iteration
    rec_X = []
    rec_D = []

    mu = mu_init

    for iteration in range(n_iter_max):

        for i in range(T.ndim(X)):
            J[i] = fold(
                svd_thresholding(
                    unfold(D, i) + unfold(L[i], i) / mu, reg_J / mu), i,
                X.shape)

        D = L_x / mu + X - E
        for i in range(T.ndim(X)):
            D += J[i] - L[i] / mu
        D /= (T.ndim(X) + 1)

        E = soft_thresholding(X - D + L_x / mu, mask * reg_E / mu)

        # Update the lagrangian multipliers
        for i in range(T.ndim(X)):
            L[i] += mu * (D - J[i])

        L_x += mu * (X - D - E)

        mu = min(mu * learning_rate, mu_max)

        # Evolution of the reconstruction errors
        rec_X.append(T.norm(X - D - E, 2))
        rec_D.append(max([T.norm(low_rank - D, 2) for low_rank in J]))

        # Convergence check
        if iteration > 1:
            if max(rec_X[-1], rec_D[-1]) <= tol:
                if verbose:
                    print('\nConverged in {} iterations'.format(iteration))
                break
            else:
                print("[INFO] iter:", iteration, " error:",
                      (max(rec_X[-1], rec_D[-1]).item()))

    return D, E