Esempio n. 1
0
def test_clip():
    # Test that clip can work with single arguments
    X = T.tensor([0.0, -1.0, 1.0])
    X_low = T.tensor([0.0, 0.0, 1.0])
    X_high = T.tensor([0.0, -1.0, 0.0])
    assert_array_equal(tl.clip(X, a_min=0.0), X_low)
    assert_array_equal(tl.clip(X, a_max=0.0), X_high)

    # More extensive test with a larger random tensor
    rng = tl.check_random_state(0)
    tensor = tl.tensor(rng.random_sample((10, 10, 10)).astype('float32'))

    val1 = np.float32(rng.random_sample())
    val2 = np.float32(rng.random_sample())
    limits = [(min(val1, val2), max(val1, val2)), (-1, 2),
              (tl.max(tensor) + 1, None), (None, tl.min(tensor) - 1),
              (tl.max(tensor), None), (tl.min(tensor), None),
              (None, tl.max(tensor)), (None, tl.min(tensor))]

    for min_val, max_val in limits:
        message = f"Tensor clipped incorrectly with min_val={min_val} and max_val={max_val}. Tensor bounds are ({tl.to_numpy(tl.min(tensor))}, {tl.to_numpy(tl.max(tensor))}"
        if min_val is not None:
            assert tl.all(tl.clip(tensor, min_val, None) >= min_val), message
            assert tl.all(
                tl.clip(tensor, min_val, max_val) >= min_val), message
        if max_val is not None:
            assert tl.all(tl.clip(tensor, None, max_val) <= max_val), message
            assert tl.all(
                tl.clip(tensor, min_val, max_val) <= max_val), message
Esempio n. 2
0
def test_parafac2_normalize_factors():
    rng = check_random_state(1234)
    rank = 2  # Rank 2 so we only need to test rank of minimum and maximum

    random_parafac2_tensor = random_parafac2(
        shapes=[(15 + rng.randint(5), 30) for _ in range(25)],
        rank=rank,
        random_state=rng,
    )
    random_parafac2_tensor.factors[0] = random_parafac2_tensor.factors[0] + 0.1
    norms = tl.ones(rank)
    for factor in random_parafac2_tensor.factors:
        norms = norms * tl.norm(factor, axis=0)

    slices = parafac2_to_tensor(random_parafac2_tensor)

    unnormalized_rec = parafac2(slices,
                                rank,
                                random_state=rng,
                                normalize_factors=False)
    assert unnormalized_rec.weights[0] == 1

    normalized_rec = parafac2(slices,
                              rank,
                              random_state=rng,
                              normalize_factors=True,
                              n_iter_max=1000)
    assert tl.max(tl.abs(T.norm(normalized_rec.factors[0], axis=0) - 1)) < 1e-5
    assert abs(tl.max(norms) -
               tl.max(normalized_rec.weights)) / tl.max(norms) < 1e-2
    assert abs(tl.min(norms) -
               tl.min(normalized_rec.weights)) / tl.min(norms) < 1e-2
Esempio n. 3
0
def test_tucker():
    """Test for the Tucker decomposition"""
    rng = check_random_state(1234)

    tol_norm_2 = 10e-3
    tol_max_abs = 10e-1
    tensor = tl.tensor(rng.random_sample((3, 4, 3)))
    core, factors = tucker(tensor, rank=None, n_iter_max=200, verbose=True)
    reconstructed_tensor = tucker_to_tensor((core, factors))
    norm_rec = tl.norm(reconstructed_tensor, 2)
    norm_tensor = tl.norm(tensor, 2)
    assert ((norm_rec - norm_tensor) / norm_rec < tol_norm_2)

    # Test the max abs difference between the reconstruction and the tensor
    assert (tl.max(tl.abs(reconstructed_tensor - tensor)) < tol_max_abs)

    # Test the shape of the core and factors
    ranks = [2, 3, 1]
    core, factors = tucker(tensor, rank=ranks, n_iter_max=100, verbose=1)
    for i, rank in enumerate(ranks):
        assert_equal(factors[i].shape, (tensor.shape[i], ranks[i]),
                     err_msg="factors[{}].shape={}, expected {}".format(
                         i, factors[i].shape, (tensor.shape[i], ranks[i])))
        assert_equal(tl.shape(core)[i],
                     rank,
                     err_msg="Core.shape[{}]={}, "
                     "expected {}".format(i, core.shape[i], rank))

    # Random and SVD init should converge to a similar solution
    tol_norm_2 = 10e-1
    tol_max_abs = 10e-1

    core_svd, factors_svd = tucker(tensor,
                                   rank=[3, 4, 3],
                                   n_iter_max=200,
                                   init='svd',
                                   verbose=1)
    core_random, factors_random = tucker(tensor,
                                         rank=[3, 4, 3],
                                         n_iter_max=200,
                                         init='random',
                                         random_state=1234)
    rec_svd = tucker_to_tensor((core_svd, factors_svd))
    rec_random = tucker_to_tensor((core_random, factors_random))
    error = tl.norm(rec_svd - rec_random, 2)
    error /= tl.norm(rec_svd, 2)
    assert_(error < tol_norm_2,
            'norm 2 of difference between svd and random init too high')
    assert_(
        tl.max(tl.abs(rec_svd - rec_random)) < tol_max_abs,
        'abs norm of difference between svd and random init too high')
Esempio n. 4
0
def test_parafac_power_iteration():
    """Test for symmetric Parafac optimized with robust tensor power iterations"""
    rng = check_random_state(1234)
    tol_norm_2 = 10e-1
    tol_max_abs = 10e-1

    shape = (5, 3, 4)
    rank = 4
    tensor = random_cp(shape, rank=rank, full=True, random_state=rng)
    ktensor = parafac_power_iteration(tensor,
                                      rank=10,
                                      n_repeat=10,
                                      n_iteration=10)

    rec = tl.cp_to_tensor(ktensor)
    error = tl.norm(rec - tensor, 2) / tl.norm(tensor, 2)
    assert_(
        error < tol_norm_2,
        f'Norm 2 of reconstruction error={error} higher than tol={tol_norm_2}.'
    )
    error = tl.max(tl.abs(rec - tensor))
    assert_(
        error < tol_max_abs,
        f'Absolute norm of reconstruction error={error} higher than tol={tol_max_abs}.'
    )
Esempio n. 5
0
def simplex_prox(tensor, parameter):
    """
    Projects the input tensor on the simplex of radius parameter.

    Parameters
    ----------
    tensor : ndarray
    parameter : float

    Returns
    -------
    ndarray

    References
    ----------
    .. [1]: Held, Michael, Philip Wolfe, and Harlan P. Crowder.
            "Validation of subgradient optimization."
            Mathematical programming 6.1 (1974): 62-88.
    """
    _, col = tl.shape(tensor)
    tensor = tl.clip(tensor, 0, tl.max(tensor))
    tensor_sort = tl.sort(tensor, axis=0, descending=True)

    to_change = tl.sum(tl.where(
        tensor_sort > (tl.cumsum(tensor_sort, axis=0) - parameter), 1.0, 0.0),
                       axis=0)
    difference = tl.zeros(col)
    for i in range(col):
        if to_change[i] > 0:
            difference = tl.index_update(
                difference, tl.index[i],
                tl.cumsum(tensor_sort, axis=0)[int(to_change[i] - 1), i])
    difference = (difference - parameter) / to_change
    return tl.clip(tensor - difference, a_min=0)
Esempio n. 6
0
def test_symmetric_parafac_power_iteration(monkeypatch):
    """Test for symmetric Parafac optimized with robust tensor power iterations"""
    rng = tl.check_random_state(1234)
    tol_norm_2 = 10e-1
    tol_max_abs = 10e-1

    size = 5
    rank = 4
    true_factor = tl.tensor(rng.random_sample((size, rank)))
    true_weights = tl.ones(rank)
    tensor = tl.cp_to_tensor((true_weights, [true_factor] * 3))
    weights, factor = symmetric_parafac_power_iteration(tensor,
                                                        rank=10,
                                                        n_repeat=10,
                                                        n_iteration=10)

    rec = tl.cp_to_tensor((weights, [factor] * 3))
    error = tl.norm(rec - tensor, 2)
    error /= tl.norm(tensor, 2)
    assert_(error < tol_norm_2, 'norm 2 of reconstruction higher than tol')
    # Test the max abs difference between the reconstruction and the tensor
    assert_(
        tl.max(tl.abs(rec - tensor)) < tol_max_abs,
        'abs norm of reconstruction error higher than tol')
    assert_class_wrapper_correctly_passes_arguments(
        monkeypatch,
        symmetric_parafac_power_iteration,
        SymmetricCP,
        ignore_args={},
        rank=3)
Esempio n. 7
0
def test_parafac2_slice_and_tensor_input():
    rank = 3

    random_parafac2_tensor = random_parafac2(shapes=[(15, 30)
                                                     for _ in range(25)],
                                             rank=rank,
                                             random_state=1234)
    tensor = parafac2_to_tensor(random_parafac2_tensor)
    slices = parafac2_to_slices(random_parafac2_tensor)

    slice_rec = parafac2(slices,
                         rank,
                         random_state=1234,
                         normalize_factors=False,
                         n_iter_max=100)
    slice_rec_tensor = parafac2_to_tensor(slice_rec)

    tensor_rec = parafac2(tensor,
                          rank,
                          random_state=1234,
                          normalize_factors=False,
                          n_iter_max=100)
    tensor_rec_tensor = parafac2_to_tensor(tensor_rec)

    assert tl.max(tl.abs(slice_rec_tensor - tensor_rec_tensor)) < 1e-8
Esempio n. 8
0
def test_partial_tucker():
    """Test for the Partial Tucker decomposition"""
    rng = tl.check_random_state(1234)
    tol_norm_2 = 10e-3
    tol_max_abs = 10e-1
    tensor = tl.tensor(rng.random_sample((3, 4, 3)))
    modes = [1, 2]
    core, factors = partial_tucker(tensor, modes, rank=None, n_iter_max=200, verbose=True)
    reconstructed_tensor = multi_mode_dot(core, factors, modes=modes)
    norm_rec = tl.norm(reconstructed_tensor, 2)
    norm_tensor = tl.norm(tensor, 2)
    assert_((norm_rec - norm_tensor)/norm_rec < tol_norm_2)

    # Test the max abs difference between the reconstruction and the tensor
    assert_(tl.max(tl.abs(norm_rec - norm_tensor)) < tol_max_abs)

    # Test the shape of the core and factors
    ranks = [3, 1]
    core, factors = partial_tucker(tensor, modes=modes, rank=ranks, n_iter_max=100, verbose=1)
    for i, rank in enumerate(ranks):
        assert_equal(factors[i].shape, (tensor.shape[i+1], ranks[i]),
                     err_msg="factors[{}].shape={}, expected {}".format(
                         i, factors[i].shape, (tensor.shape[i+1], ranks[i])))
    assert_equal(core.shape, [tensor.shape[0]]+ranks, err_msg="Core.shape={}, "
                     "expected {}".format(core.shape, [tensor.shape[0]]+ranks))

    # Test random_state fixes the core and the factor matrices
    core1, factors1 = partial_tucker(tensor, modes=modes, rank=ranks, random_state=0)
    core2, factors2 = partial_tucker(tensor, modes=modes, rank=ranks, random_state=0)
    assert_array_equal(core1, core2)
    for factor1, factor2 in zip(factors1, factors2):
        assert_array_equal(factor1, factor2)
Esempio n. 9
0
def test_parafac2_to_tensor():
    rng = check_random_state(1234)
    rank = 3

    I = 25
    J = 15
    K = 30

    weights, factors, projections = random_parafac2(shapes=[(J, K)] * I,
                                                    rank=rank,
                                                    random_state=rng)

    constructed_tensor = parafac2_to_tensor((weights, factors, projections))
    tensor_manual = T.zeros((I, J, K), **T.context(weights))

    for i in range(I):
        Bi = T.dot(projections[i], factors[1])
        for j in range(J):
            for k in range(K):
                for r in range(rank):
                    tensor_manual = tl.index_update(
                        tensor_manual, tl.index[i, j,
                                                k], tensor_manual[i, j, k] +
                        factors[0][i][r] * Bi[j][r] * factors[2][k][r])

    assert_(tl.max(tl.abs(constructed_tensor - tensor_manual)) < 1e-6)
Esempio n. 10
0
def monotonicity_prox(tensor, decreasing=False):
    """
    This function projects each column of the input array on the set of arrays so that
          x[1] <= x[2] <= ... <= x[n] (decreasing=False)
                        or
          x[1] >= x[2] >= ... >= x[n] (decreasing=True)
    is satisfied columnwise.

    Parameters
    ----------
    tensor : ndarray
    decreasing : If it is True, function returns columnwise
                 monotone decreasing tensor. Otherwise, returned array
                 will be monotone increasing.
                 Default: True

    Returns
    -------
    ndarray
          A tensor of which columns' are monotonic.

    References
    ----------
    .. [1]: G. Chierchia, E. Chouzenoux, P. L. Combettes, and J.-C. Pesquet
            "The Proximity Operator Repository. User's guide"
    """
    if tl.ndim(tensor) == 1:
        tensor = tl.reshape(tensor, [tl.shape(tensor)[0], 1])
    elif tl.ndim(tensor) > 2:
        raise ValueError(
            "Monotonicity prox doesn't support an input which has more than 2 dimensions."
        )
    tensor_mon = tl.copy(tensor)
    if decreasing:
        tensor_mon = tl.flip(tensor_mon, axis=0)
    row, column = tl.shape(tensor_mon)
    cum_sum = tl.cumsum(tensor_mon, axis=0)
    for j in range(column):
        assisted_tensor = tl.zeros([row, row])
        for i in range(row):
            if i == 0:
                assisted_tensor = tl.index_update(
                    assisted_tensor, tl.index[i, i:], cum_sum[i:, j] /
                    tl.tensor(tl.arange(row - i) + 1, **tl.context(tensor)))
            else:
                assisted_tensor = tl.index_update(
                    assisted_tensor, tl.index[i, i:],
                    (cum_sum[i:, j] - cum_sum[i - 1, j]) /
                    tl.tensor(tl.arange(row - i) + 1, **tl.context(tensor)))
        tensor_mon = tl.index_update(tensor_mon, tl.index[:, j],
                                     tl.max(assisted_tensor, axis=0))
        for i in reversed(range(row - 1)):
            if tensor_mon[i, j] > tensor_mon[i + 1, j]:
                tensor_mon = tl.index_update(tensor_mon, tl.index[i, j],
                                             tensor_mon[i + 1, j])
    if decreasing:
        tensor_mon = tl.flip(tensor_mon, axis=0)
    return tensor_mon
Esempio n. 11
0
def print_weight(model_path):
    model = load_model(model_path)

    conv_norm = list()
    conv_max = list()
    conv_min = list()

    depconv_norm = list()
    depconv_max = list()
    depconv_min = list()

    for layer in model.layers:
        print(layer.name)
        if layer.get_weights() and type(layer).__name__ != 'DepthwiseConv2D':
            conv, _ = layer.get_weights()
            conv_norm.append(tt.norm(conv))
            conv_max.append(tt.max(conv))
            conv_min.append(tt.min(conv))

#            plt.figure(figsize=(10,10))
#            plt.imshow(my_unfold(conv, -1))
#            plt.colorbar()
#            plt.show()
#            plt.clf()

        elif layer.get_weights() and type(layer).__name__ == 'DepthwiseConv2D':
            w = layer.get_weights()
            depconv_norm.append(tt.norm(w[0]))
            depconv_max.append(tt.max(w[0]))
            depconv_min.append(tt.min(w[0]))


#            plt.figure(figsize=(20,20))
#            plt.imshow(my_unfold(w[0], -1))
#            plt.colorbar()
#            plt.show()
#            plt.clf()

    conv_info = [conv_norm, conv_max, conv_min]
    depconv_info = [depconv_norm, depconv_max, depconv_min]

    return conv_info, depconv_info
Esempio n. 12
0
    def test_R2X(self):
        """Test to ensure R2X for higher components is larger."""
        tensor = tl.tensor(np.random.rand(12, 10, 15))
        arr = [find_R2X(tensor, perform_decomposition(tensor, i)) for i in range(1, 8)]

        for j in range(len(arr) - 1):
            self.assertTrue(arr[j] < arr[j + 1])

        # confirm R2X is >= 0 and <=1
        self.assertGreaterEqual(tl.min(arr), 0)
        self.assertLessEqual(tl.max(arr), 1)
Esempio n. 13
0
def test_pad_by_zeros():
    """Test that if we pad a tensor by zeros, then it doesn't change.

    This failed for TensorFlow at some point.
    """
    rng = check_random_state(1234)
    rank = 3

    I = 25
    J = 15
    K = 30

    weights, factors, projections = random_parafac2(shapes=[(J, K)] * I,
                                                    rank=rank,
                                                    random_state=rng)
    constructed_tensor = parafac2_to_tensor((weights, factors, projections))
    padded_tensor = _pad_by_zeros(constructed_tensor)
    assert_(tl.max(tl.abs(constructed_tensor - padded_tensor)) < 1e-10)
Esempio n. 14
0
def test_partial_tucker():
    """Test for the Partial Tucker decomposition"""
    rng = check_random_state(1234)
    tol_norm_2 = 10e-3
    tol_max_abs = 10e-1
    tensor = tl.tensor(rng.random_sample((3, 4, 3)))
    modes = [1, 2]
    for svd_func in tl.SVD_FUNS:
        if tl.get_backend() == 'tensorflow_graph' and svd_func == 'numpy_svd':
            continue  # TODO(craymichael)

        core, factors = partial_tucker(tensor,
                                       modes,
                                       rank=None,
                                       n_iter_max=200,
                                       svd=svd_func,
                                       verbose=True)
        reconstructed_tensor = multi_mode_dot(core, factors, modes=modes)
        norm_rec = tl.to_numpy(tl.norm(reconstructed_tensor, 2))
        norm_tensor = tl.to_numpy(tl.norm(tensor, 2))
        assert_((norm_rec - norm_tensor) / norm_rec < tol_norm_2)

        # Test the max abs difference between the reconstruction and the tensor
        assert_(
            tl.to_numpy(tl.max(tl.abs(norm_rec - norm_tensor))) < tol_max_abs)

        # Test the shape of the core and factors
        ranks = [3, 1]
        core, factors = partial_tucker(tensor,
                                       modes=modes,
                                       rank=ranks,
                                       n_iter_max=100,
                                       svd=svd_func,
                                       verbose=1)
        for i, rank in enumerate(ranks):
            assert_equal(
                factors[i].shape, (tensor.shape[i + 1], ranks[i]),
                err_msg="factors[{}].shape={}, expected {} (svd=\"{}\")".
                format(i, factors[i].shape, (tensor.shape[i + 1], ranks[i]),
                       svd_func))
        assert_equal(core.shape, [tensor.shape[0]] + ranks,
                     err_msg="Core.shape={}, "
                     "expected {} (svd=\"{}\")".format(
                         core.shape, [tensor.shape[0]] + ranks, svd_func))
Esempio n. 15
0
def gcp(X, R, type='normal', func=None, grad=None, lower=None,\
        opt='lbfgsb', mask=None, maxiters=1000, \
        init='random', printitn=10, state=None, factr=1e7, pgtol=1e-4, \
        fsamp=None, gsamp=None, oversample=1.1, sampler='uniform', \
        fsampler=None, rate=1e-3, decay=0.1, maxfails=1, epciters=1000, \
        festtol=-math.inf, beta1=0.9, beta2=0.999, epsilon=1e-8):
    """Generalized CANDECOMP/PARAFAC (GCP) decomposition via all-at-once optimization (OPT) [1]
    Computes a rank-'R' decomposition of 'tensor' such that::

      tensor = [|weights; factors[0], ..., factors[-1] |].

    GCP-OPT allows the use of a variety of statistically motivated loss functions
    suited to the data held in a tensor (i.e. continuous, discrete, binary, etc)

    Parameters
    ----------
    X : ndarray
        Tensor to factorize
        **COMING SOON**
        Sparse tensor support
    R : int
        Rank of decomposition (Number of components).
    type : str,
        Type of objective function used
        Options include:
            'normal' or 'gaussian'          - Gaussian for real-valued data (DEFAULT)
            'binary' or 'bernoulli-odds'    - Bernoulli w/ odds link for binary data
            'bernoulli-logit'               - Bernoulli w/ logit link for binary data
            'count' or 'poisson'            - Poisson for count data
            'poisson-log'                   - Poisson w/ log link for count data
            'rayleigh'                      - Rayleigh distribution for real-valued data
            'gamma'                         - Gamma distribution for non-negative real-valued data
        **COMING SOON**:
            'huber (DELTA)                  - Similar to Gaussian, for real-valued data
            'negative-binomial (r)'         - Negative binomial for count data
            'beta (BETA)'                   - Beta divergence for non-negative real-valued data
            'user-specified'                - Customized objective function provided by user
    func: lambda function
        User specified custom objective function, eg. lambda x, m: (m-x)**2
    grad: lambda function
        User specified custom gradient function, eg. lambda x, m: 2*(m-x)
    lower: 0 or -inf
        Lower bound for custom objective/gradient
    opt : str
        Optimization method
        Options include:
            'lbfgsb'    - Bound-constrained limited-memory BFGS
            'sgd'       - Stochastic gradient descent (SGD)
            **COMING SOON**
            'adam'      - Momentum-based SGD method
            'adagrad'   - Adaptive gradient algorithm, well suited for sparse data
        If 'tensor' is dense, all 4 options can be used, 'lbfgsb' by default.
        **COMING SOON** - Sparse format support
        If 'tensor' is sparse, only 'sgd', 'adam' and 'adagrad' can be used, 'adam' by
        default.
        Each method has specific parameters, see documentation
    mask : ndarray
        Specifies a mask, 0's for missing/incomplete entries, 1's elsewhere, with
        the same shape as 'tensor'.
        **COMING SOON** - Missing/incomplete data simulation.
    maxiters : int
        Maximum number of outer iterations, 1000 by default.
    init : {'random', 'svd', cptensor}
        Initialization for factor matrices, 'random' by default.
        Options include:
            'random'    - random initialization from a uniform distribution on [0,1)
            'svd'       - initialize the `m`th factor matrix using the `rank` left
                          singular vectors of the `m`th unfolding of the input tensor.
            cptensor    - initialization provided by user.  NOTE: weights are pulled
                          in the last factor and then the weights are set to "1" for
                          the output tensor.
        Initializations all result in a cptensor where the weights are one.
    printitn : int
        Print every n iterations; 0 for no printing, 10 by default.
    state : {None, int, np.random.RandomState}
        Seed for reproducable random number generation
    factr : float
        (L-BFGS-B parameter)
        Tolerance on the change of objective values. Defaults to 1e7.
    pgtol : float
        (L-BFGS-B parameter)
        Projected gradient tolerance.  Defaults to 1e-5
    sampler : {uniform, stratified, semi-stratified}
        Type of sampling to use for stochastic gradient (SGD/ADAM/ADAGRAD).
        Defaults to 'uniform' for dense tensors.
        Defaults to 'stratified' for sparse tensors.
        Options include:
            'uniform'           - Uniform random sampling
            **COMING SOON**
            'stratified'        - Stratified sampling, targets sparse data. Zero and
                                  nonzero values sampled separately.
            'semi-stratified'   - Similar to stratified sampling, but is more
                                  computationally efficient (See papers referenced).
    gsamp : int
        Number of samples for stochastic gradient (SGD/ADAM/ADAGRAD parameter).
        Generally set to be O(sum(shape)*R).
        **COMING SOON**
        For stratified or semi-stratified, this may be two numbers:
            - the number of nnz samples
            - the number of zero samples.
        If only one number is specified, then this value is used for both nnzs and
        zeros (total number of samples is 2x specified value in this case).
    fsampler : {'uniform', 'stratified', custom}
        Type of sampling for estimating objective function (SGD/ADAM/ADAGRAD parameter).
        Options include:
            'uniform'       - Uniform random sampling
            **COMING SOON**
            'stratified'    - Stratified sampling, targets sparse data. Zero and
                              nonzero values sampled separately.
            custom          - User-defined sampler (lambda function). Custom option
                              is primarily useful in reusing sampled elements across
                              multiple tests.
    fsamp : int
        (SGD/ADAM/ADAGRAD parameter)
        Number of samples to estimate objective function.
        This should generally be somewhat large since we want this sample to generate a
        reliable estimate of the true function value.
    oversample : float
        (Stratified sampling parameter)
        Factor to oversample when implicitly sampling zeros in the sparse case.
        Defaults to 1.1. Only adjust for very small tensors.
    rate : float
        (SGD/ADAM parameter)
        Initial learning rate. Defaults to 1e-3.
    decay : float
        (SGD/ADAM parameter)
        Amount to decrease learning rate when progress stagnates, i.e. no change in
        objective function between epochs.  Defaults to 0.1.
    maxfails : int
        (SGD/ADAM parameter)
        Number of times to decrease the learning rate.
        Defaults to 1, may be set to zero.
    epciters : int
        (SGD/ADAM parameter)
        Iterations per epoch. Defaults to 1000.
    festtol : float
        (SGD/ADAM parameter)
        Quit estimation of function if it goes below this level.
        Defaults to -inf.
    beta1 : float
        (ADAM parameter)    - generally doesn't need to be changed
        Defaults to 0.9
    beta2 : float
        (ADAM parameter)    - generally doesn't need to be changed
        Defaults to 0.999
    epsilon : float
        (ADAM parameter)    - generally doesn't need to be changed
        Defaults to 1e-8

    Returns
    -------
    Mfin : CPTensor
        Canonical polyadic decomposition of input tensor X

    Reference
    ---------
    [1] D. Hong, T. G. Kolda, J. A. Duersch, Generalized Canonical
        Polyadic Tensor Decomposition, SIAM Review, 62:133-163, 2020,
        https://doi.org/10.1137/18M1203626
    [2] T. G. Kolda, D. Hong, Stochastic Gradients for Large-Scale Tensor
        Decomposition. SIAM J. Mathematics of Data Science, 2:1066-1095,
        2020, https://doi.org/10.1137/19m1266265

    """
    # Timer - Setup (outside optimization)
    start_setup0 = time.perf_counter()

    # Initial setup
    nd = tl.ndim(X)
    sz = tl.shape(X)
    tsz = X.size
    X_context = tl.context(X)
    vecsz = 0
    for i in range(nd):
        # tsz *= sz[i]
        vecsz += sz[i]
    vecsz *= R
    W = mask

    # Random set-up
    if state is not None:
        state = tl.check_random_state(state)

    # capture stats(nnzs, zeros, missing)
    nnonnzeros = 0
    X = tl.tensor_to_vec(X)
    for i in X:
        if i > 0:
            nnonnzeros += 1
    X = tl.reshape(X, sz)
    nzeros = tsz - nnonnzeros
    nmissing = 0
    if W is not None:
        W = tl.tensor_to_vec(W)
        for i in range(tl.shape(W)[0]):
            if W[i] > 0: nmissing += 1  # TODO: is this right??
        W = tl.reshape(W, sz)

    # Dictionary for storing important information regarding the decomposition problem
    info = {}
    info['tsz'] = tsz
    info[
        'nmissing'] = 0  # TODO: revisit once missing value functionality incorporated
    info['nnonnzeros'] = nnonnzeros
    info[
        'nzeros'] = nzeros  # TODO: revisit once missing value functionality incorporated

    # Set up function, gradient, and bounds
    fh, gh, lb = validate_type(type, X)
    info['type'] = type
    info['fh'] = fh
    info['gh'] = gh
    info['lb'] = lb

    # initialize CP-tensor and make a copy to work with so as to have the starting guess
    M0 = initialize_cp(X, R, init=init, random_state=state)
    wghts0 = tl.copy(M0[0])
    fcts0 = []
    for i in range(nd):
        f = tl.copy(M0[1][i])
        fcts0.append(f)
    M = CPTensor((wghts0, fcts0))

    # Lambda weights are assumed to be all ones throughout, check initial guess satisfies assumption
    if not tl.all(M[0]):
        print("Initialization of CP tensor has failed (lambda weight(s) != 1.")
        sys.exit(1)

    # check optimization method
    if validate_opt(opt):
        print("Choose optimization method from: {lbfgsb, sgd}")
        sys.exit(1)
    use_stoc = False
    if opt != 'lbfgsb':
        use_stoc = True
    info['opt'] = opt

    # set up for stochastic optimization (e.g. sgd, adam, adagrad)
    if use_stoc:
        # set up fsampler, gsampler ---> uniform sampling only for now
        # TODO : add stratified, semi-stratified and user-specified sampling options
        if not sampler == "uniform":
            print(
                "Only uniform sampling currently supported for stochastic optimization."
            )
            sys.exit(1)
        fsampler_type = sampler
        gsampler_type = sampler

        # setup fsampler
        f_samp = fsamp
        if f_samp == None:
            upper = np.maximum(math.ceil(tsz / 10), 10 ^ 6)
            f_samp = np.minimum(upper, tsz)

        # set up lambda function/function handle for uniform sampling
        fsampler = lambda: tl_sample_uniform(X, f_samp)
        fsampler_str = "{} with {} samples".format(fsampler_type, f_samp)

        # setup gsampler
        g_samp = gsamp
        if g_samp == None:
            upper = np.maximum(1000, math.ceil(10 * tsz / maxiters))
            g_samp = np.minimum(upper, tsz)

        # setup lambda function/function handle for uniform sampling
        gsampler = lambda: tl_sample_uniform(X, g_samp)
        gsampler_str = "{} with {} samples".format(gsampler_type, g_samp)

        # capture the info
        info['fsampler'] = fsampler_str
        info['gsampler'] = gsampler_str
        info['fsamp'] = f_samp
        info['gsamp'] = g_samp

    time_setup0 = time.perf_counter() - start_setup0

    # Welcome message
    if printitn > 0:
        print("GCP-OPT-{} (Generalized CP Tensor Decomposition)".format(opt))
        print("------------------------------------------------")
        print("Tensor size:\t\t\t\t{} ({} total entries)".format(sz, tsz))
        if nmissing > 0:
            print("Missing entries: {} ({})".format(nmissing,
                                                    100 * nmissing / tsz))
        print("Generalized function type:\t{}".format(type))
        print("Objective function:\t\t\t{}".format(
            inspect.getsource(fh).strip()))
        print("Gradient function:\t\t\t{}".format(
            inspect.getsource(gh).strip()))
        print("Lower bound of factors:\t\t{}".format(lb))
        print("Optimization method:\t\t{}".format(opt))
        if use_stoc:
            print("Max iterations (epochs): {}".format(maxiters))
            print("Iterations per epoch: {}".format(epciters))
            print("Learning rate / decay / maxfails: {} {} {}".format(
                rate, decay, maxfails))
            print("Function Sampler: {}".format(fsampler_str))
            print("Gradient Sampler: {}".format(gsampler_str))
        else:
            print("Max iterations:\t\t\t\t{}".format(maxiters))
            print("Projected gradient tol:\t\t{}\n".format(pgtol))

    # Make like a zombie and start decomposing
    Mfin = None
    # L-BFGS-B optimization
    if opt == 'lbfgsb':
        # Timer - Setup (inside optimization)
        start_setup1 = time.perf_counter()

        # set up bounds for l-bfgs-b if lb = 0
        bounds = None
        if lb == 0:
            lb = tl.zeros(tsz)
            ub = math.inf * tl.ones(tsz)
        fcn = lambda x: tl_gcp_fg(vec2factors(x, sz, R, X_context), X, fh, gh)
        m = factors2vec(M[1])

        # capture params for l-bfgs-b
        lbfgsb_params = {}
        lbfgsb_params['x0'] = factors2vec(M0.factors)
        lbfgsb_params['printEvery'] = printitn
        lbfgsb_params['maxIts'] = maxiters
        lbfgsb_params['maxTotalIts'] = maxiters * 10
        lbfgsb_params['factr'] = factr
        lbfgsb_params['pgtol'] = pgtol

        time_setup1 = time.perf_counter() - start_setup1

        if printitn > 0:
            print("Begin main loop")

        # Timer - Main operation
        start_main = time.perf_counter()
        x, f, info_dict = fmin_l_bfgs_b(fcn, m, approx_grad=False, bounds=None, \
                                        pgtol=pgtol, factr=factr, maxiter=maxiters)
        time_main = time.perf_counter() - start_main

        # capture info
        info['fcn'] = fcn
        info['lbfgsbopts'] = lbfgsb_params
        info['lbfgsbout'] = info_dict
        info['finalf'] = f

        if printitn > 0:
            print("\nFinal objective: {}".format(f))
            print("Setup time: {}".format(time_setup0 + time_setup1))
            print("Main loop time: {}".format(time_main))
            print("Outer iterations:"
                  )  # TODO: access this value (see manpage for fmin_l_bfgs_b)
            print("Total iterations: {}".format(info_dict['nit']))
            print("L-BFGS-B exit message: {} ({})".format(
                info_dict['task'], info_dict['warnflag']))
        Mfin = vec2factors(x, sz, R, X_context)

    # Stochastic optimization
    else:
        # Timer - Setup (inside optimization)
        start_setup1 = time.perf_counter()
        if opt == "adam" or opt == "adagrad":
            print("{} not currently supported".format(opt))
            sys.exit(1)
        # prepare for sgd
        # initialize moments
        m = []
        v = []

        # Extract samples for estimating function value (i.e. call fsampler), these never change
        fsubs, fvals, fwgts = fsampler()

        # Compute initial estimated function value
        fest = tl_gcp_fg_est(M, fh, gh, fsubs, fvals, fwgts, True, False,
                             False, False)

        # Set up loop variables
        nfails = 0
        titers = 0

        M_weights = tl.copy(M[0])
        M_factors = []
        for k in range(nd):
            M_factors.append(tl.copy(M[1][k]))
        Msave = CPTensor(
            (M_weights, M_factors))  # save a copy of the initial model
        msave = m
        vsave = v
        fest_prev = fest[0]

        # Tracing the progress in function value by epoch
        fest_trace = tl.zeros(maxiters + 1)
        step_trace = tl.zeros(maxiters + 1)
        time_trace = tl.zeros(maxiters + 1)
        fest_trace[0] = fest[0]

        # Print status
        if printitn > 0:
            print("Begin main loop")
            print("Initial f-est: {}".format(fest[0]))

        time_setup1 = time.perf_counter() - start_setup1
        start_main = time.perf_counter()
        time_trace[0] = time.perf_counter() - start_setup0

        # Main loop - outer iteration
        for nepoch in range(maxiters):
            step = (decay**nfails) * rate
            # Main loop - inner iteration
            for iter in range(epciters):
                # Tracking iterations
                titers = titers + 1

                # Select subset for stochastic gradient (i.e. call gsampler)
                gsubs, gvals, gwts = gsampler()

                # Compute gradients for each mode
                Gest = tl_gcp_fg_est(M, fh, gh, gsubs, gvals, gwts, False,
                                     True, False, False)

                # Check for inf gradient
                for g in Gest[0]:
                    g_max = tl.max(g)
                    g_min = tl.min(g)
                    if math.isinf(g_max) or math.isinf(g_min):
                        print(
                            "Infinite gradient encountered! (epoch = {}, iter = {})"
                            .format(nepoch, iter))

                # TODO : add functionality for ADAM and ADAGRAD optimization
                # Take gradient step
                for k in range(nd):
                    M.factors[k] = M.factors[k] - step * Gest[0][k]

            # Estimate objective (i.e. call tl_gcp_fg_est)
            fest = tl_gcp_fg_est(M, fh, gh, fsubs, fvals, fwgts, True, False,
                                 False, False)

            # Save trace (fest & step)
            fest_trace[nepoch + 1] = fest[0]
            step_trace[nepoch + 1] = step

            # Check convergence condition
            failed_epoch = False
            if fest[0] > fest_prev:
                failed_epoch = True
            if failed_epoch:
                nfails += 1
            festtol_test = False
            if fest[0] < festtol:
                festtol_test = True

            # Reporting
            if printitn > 0 and (nepoch % printitn == 0 or failed_epoch
                                 or festtol_test):
                print("Epoch {}: f-est = {}, step = {}".format(
                    nepoch, fest[0], step),
                      end='')
                if failed_epoch:
                    print(
                        ", nfails = {} (resetting to solution from last epoch)"
                        .format(nfails))
                print("")

            # Rectify failed epoch or save current solution
            if failed_epoch:
                M = Msave
                m = msave
                v = vsave
                fest[0] = fest_prev
                titers = titers - epciters
            else:
                Msave = CPTensor((tl.copy(M.weights), tl.copy(M.factors)))
                msave = m
                vsave = v
                fest_prev = fest[0]

            time_trace[nepoch] = time.perf_counter() - start_setup0

            if (nfails > maxfails) or festtol_test:
                break
        Mfin = M
        time_main = time.perf_counter() - start_main

        # capture info
        info['fest_trace'] = fest_trace
        info['step_trace'] = step_trace
        info['time_trace'] = time_trace
        info['nepoch'] = nepoch

        # Report end of main loop
        if printitn > 0:
            print("End Main Loop")
            print("")
            print("Final f-east: {}".format(fest[0]))
            print("Setup time: {0:0.6f}".format(time_setup0 + time_setup1))
            print("Main loop time: {0:0.6f}".format(time_main))
            print("Total iterations: {}".format(nepoch * epciters))
    # Wrap up / capture remaining info
    info['mainTime'] = time_main
    info['setupTime0'] = time_setup0
    info['setupTime1'] = time_setup1
    info['setupTime'] = time_setup0 + time_setup1

    return Mfin
Esempio n. 16
0
def test_non_negative_parafac_hals():
    """Test for non-negative PARAFAC HALS
    TODO: more rigorous test
    """
    tol_norm_2 = 10e-1
    tol_max_abs = 1
    rng = tl.check_random_state(1234)
    tensor = tl.tensor(rng.random_sample((3, 3, 3)) + 1)
    res = parafac(tensor, rank=3, n_iter_max=120)
    nn_res = non_negative_parafac_hals(tensor,
                                       rank=3,
                                       n_iter_max=100,
                                       tol=10e-4,
                                       init='svd',
                                       verbose=0)

    # Make sure all components are positive
    _, nn_factors = nn_res
    for factor in nn_factors:
        assert_(tl.all(factor >= 0))

    reconstructed_tensor = tl.cp_to_tensor(res)
    nn_reconstructed_tensor = tl.cp_to_tensor(nn_res)
    error = tl.norm(reconstructed_tensor - nn_reconstructed_tensor, 2)
    error /= tl.norm(reconstructed_tensor, 2)
    assert_(error < tol_norm_2, 'norm 2 of reconstruction higher than tol')

    # Test the max abs difference between the reconstruction and the tensor
    assert_(
        tl.max(tl.abs(reconstructed_tensor - nn_reconstructed_tensor)) <
        tol_max_abs, 'abs norm of reconstruction error higher than tol')

    # Test fixing mode 0 or 1 with given init
    fixed_tensor = random_cp((3, 3, 3), rank=2)
    for factor in fixed_tensor[1]:
        factor = tl.abs(factor)
    rec_svd_fixed_mode_0 = non_negative_parafac_hals(tensor,
                                                     rank=2,
                                                     n_iter_max=2,
                                                     init=fixed_tensor,
                                                     fixed_modes=[0])
    rec_svd_fixed_mode_1 = non_negative_parafac_hals(tensor,
                                                     rank=2,
                                                     n_iter_max=2,
                                                     init=fixed_tensor,
                                                     fixed_modes=[1])
    # Check if modified after 2 iterations
    assert_array_equal(
        rec_svd_fixed_mode_0.factors[0],
        fixed_tensor.factors[0],
        err_msg='Fixed mode 0 was modified in candecomp_parafac')
    assert_array_equal(
        rec_svd_fixed_mode_1.factors[1],
        fixed_tensor.factors[1],
        err_msg='Fixed mode 1 was modified in candecomp_parafac')

    res_svd = non_negative_parafac_hals(tensor,
                                        rank=3,
                                        n_iter_max=100,
                                        tol=10e-4,
                                        init='svd')
    res_random = non_negative_parafac_hals(tensor,
                                           rank=3,
                                           n_iter_max=100,
                                           tol=10e-4,
                                           init='random',
                                           verbose=0)
    rec_svd = tl.cp_to_tensor(res_svd)
    rec_random = tl.cp_to_tensor(res_random)
    error = tl.norm(rec_svd - rec_random, 2)
    error /= tl.norm(rec_svd, 2)
    assert_(error < tol_norm_2,
            'norm 2 of difference between svd and random init too high')
    assert_(
        tl.max(tl.abs(rec_svd - rec_random)) < tol_max_abs,
        'abs norm of difference between svd and random init too high')

    # Regression test: used wrong variable for convergence checking
    # Used mttkrp*factor instead of mttkrp*factors[-1], which resulted in
    # error when mode 2 was not constrained and erroneous convergence checking
    # when mode 2 was constrained.
    tensor = tl.tensor(rng.random_sample((3, 3, 3)) + 1)
    nn_estimate, errs = non_negative_parafac_hals(tensor,
                                                  rank=2,
                                                  n_iter_max=2,
                                                  tol=1e-10,
                                                  init='svd',
                                                  verbose=0,
                                                  nn_modes={
                                                      0,
                                                  },
                                                  return_errors=True)
Esempio n. 17
0
def hals_nnls(UtM,
              UtU,
              V=None,
              n_iter_max=500,
              tol=10e-8,
              sparsity_coefficient=None,
              normalize=False,
              nonzero_rows=False,
              exact=False):
    """
    Non Negative Least Squares (NNLS)

    Computes an approximate solution of a nonnegative least
    squares problem (NNLS) with an exact block-coordinate descent scheme.
    M is m by n, U is m by r, V is r by n.
    All matrices are nonnegative componentwise.

    This algorithm is defined in [1], as an accelerated version of the HALS algorithm.

    It features two accelerations: an early stop stopping criterion, and a
    complexity averaging between precomputations and loops, so as to use large
    precomputations several times.

    This function is made for being used repetively inside an
    outer-loop alternating algorithm, for instance for computing nonnegative
    matrix Factorization or tensor factorization.

    Parameters
    ----------
    UtM: r-by-n array
        Pre-computed product of the transposed of U and M, used in the update rule
    UtU: r-by-r array
        Pre-computed product of the transposed of U and U, used in the update rule
    V: r-by-n initialization matrix (mutable)
        Initialized V array
        By default, is initialized with one non-zero entry per column
        corresponding to the closest column of U of the corresponding column of M.
    n_iter_max: Postivie integer
        Upper bound on the number of iterations
        Default: 500
    tol : float in [0,1]
        early stop criterion, while err_k > delta*err_0. Set small for
        almost exact nnls solution, or larger (e.g. 1e-2) for inner loops
        of a PARAFAC computation.
        Default: 10e-8
    sparsity_coefficient: float or None
        The coefficient controling the sparisty level in the objective function.
        If set to None, the problem is solved unconstrained.
        Default: None
    nonzero_rows: boolean
        True if the lines of the V matrix can't be zero,
        False if they can be zero
        Default: False
    exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost.
        If it is False, the algorithm gives an approximate solution
        Default: False

    Returns
    -------
    V: array
        a r-by-n nonnegative matrix \approx argmin_{V >= 0} ||M-UV||_F^2
    rec_error: float
        number of loops authorized by the error stop criterion
    iteration: integer
        final number of update iteration performed
    complexity_ratio: float
        number of loops authorized by the stop criterion

    Notes
    -----
    We solve the following problem :math:`\\min_{V >= 0} ||M-UV||_F^2`

    The matrix V is updated linewise. The update rule for this resolution is::

    .. math::
        \\begin{equation}
            V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j))/UtU[k,k]
        \\end{equation}

    with j the update iteration.

    This problem can also be defined by adding a sparsity coefficient,
    enhancing sparsity in the solution [2]. In this sparse version, the update rule becomes::

    .. math::
        \\begin{equation}
            V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j) - sparsity_coefficient)/UtU[k,k]
        \\end{equation}

    References
    ----------
    .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and
       Hierarchical ALS Algorithms for Nonnegative Matrix Factorization,
       Neural Computation 24 (4): 1085-1105, 2012.

    .. [2] J. Eggert, and E. Korner. "Sparse coding and NMF."
       2004 IEEE International Joint Conference on Neural Networks
       (IEEE Cat. No. 04CH37541). Vol. 4. IEEE, 2004.

    """

    rank, n_col_M = tl.shape(UtM)
    if V is None:  # checks if V is empty
        V = tl.solve(UtU, UtM)

        V = tl.clip(V, a_min=0, a_max=None)
        # Scaling
        scale = tl.sum(UtM * V) / tl.sum(UtU * tl.dot(V, tl.transpose(V)))
        V = V * scale

    if exact:
        n_iter_max = 50000
        tol = 10e-16
    for iteration in range(n_iter_max):
        rec_error = 0
        for k in range(rank):

            if UtU[k, k]:
                if sparsity_coefficient is not None:  # Modifying the function for sparsification

                    deltaV = tl.where(
                        (UtM[k, :] - tl.dot(UtU[k, :], V) -
                         sparsity_coefficient) / UtU[k, k] > -V[k, :],
                        (UtM[k, :] - tl.dot(UtU[k, :], V) -
                         sparsity_coefficient) / UtU[k, k], -V[k, :])
                    V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV)

                else:  # without sparsity

                    deltaV = tl.where(
                        (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k] >
                        -V[k, :],
                        (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k],
                        -V[k, :])
                    V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV)

                rec_error = rec_error + tl.dot(deltaV, tl.transpose(deltaV))

                # Safety procedure, if columns aren't allow to be zero
                if nonzero_rows and tl.all(V[k, :] == 0):
                    V[k, :] = tl.eps(V.dtype) * tl.max(V)

            elif nonzero_rows:
                raise ValueError("Column " + str(k) +
                                 " of U is zero with nonzero condition")

            if normalize:
                norm = tl.norm(V[k, :])
                if norm != 0:
                    V[k, :] /= norm
                else:
                    sqrt_n = 1 / n_col_M**(1 / 2)
                    V[k, :] = [sqrt_n for i in range(n_col_M)]
        if iteration == 0:
            rec_error0 = rec_error

        numerator = tl.shape(V)[0] * tl.shape(V)[1] + tl.shape(V)[1] * rank
        denominator = tl.shape(V)[0] * rank + tl.shape(V)[0]
        complexity_ratio = 1 + (numerator / denominator)
        if exact:
            if rec_error < tol * rec_error0:
                break
        else:
            if rec_error < tol * rec_error0 or iteration > 1 + 0.5 * complexity_ratio:
                break
    return V, rec_error, iteration, complexity_ratio
Esempio n. 18
0
def unimodality_prox(tensor):
    """
    This function projects each column of the input array on the set of arrays so that
          x[1] <= x[2] <= x[j] >= x[j+1]... >= x[n]
    is satisfied columnwise.

    Parameters
    ----------
    tensor : ndarray

    Returns
    -------
    ndarray
         A tensor of which columns' distribution are unimodal.

    References
    ----------
    .. [1]: Bro, R., & Sidiropoulos, N. D. (1998). Least squares algorithms under
            unimodality and non‐negativity constraints. Journal of Chemometrics:
            A Journal of the Chemometrics Society, 12(4), 223-247.
    """
    if tl.ndim(tensor) == 1:
        tensor = tl.vec_to_tensor(tensor, [tl.shape(tensor)[0], 1])
    elif tl.ndim(tensor) > 2:
        raise ValueError(
            "Unimodality prox doesn't support an input which has more than 2 dimensions."
        )

    tensor_unimodal = tl.copy(tensor)
    monotone_increasing = tl.tensor(monotonicity_prox(tensor),
                                    **tl.context(tensor))
    monotone_decreasing = tl.tensor(monotonicity_prox(tensor, decreasing=True),
                                    **tl.context(tensor))
    # Next line finds mutual peak points
    values = tl.tensor(
        tl.to_numpy((tensor - monotone_decreasing >= 0)) * tl.to_numpy(
            (tensor - monotone_increasing >= 0)), **tl.context(tensor))

    sum_inc = tl.where(values == 1,
                       tl.cumsum(tl.abs(tensor - monotone_increasing), axis=0),
                       tl.tensor(0, **tl.context(tensor)))
    sum_inc = tl.where(values == 1,
                       sum_inc - tl.abs(tensor - monotone_increasing),
                       tl.tensor(0, **tl.context(tensor)))
    sum_dec = tl.where(
        tl.flip(values, axis=0) == 1,
        tl.cumsum(tl.abs(
            tl.flip(tensor, axis=0) - tl.flip(monotone_decreasing, axis=0)),
                  axis=0), tl.tensor(0, **tl.context(tensor)))
    sum_dec = tl.where(
        tl.flip(values, axis=0) == 1, sum_dec -
        tl.abs(tl.flip(tensor, axis=0) - tl.flip(monotone_decreasing, axis=0)),
        tl.tensor(0, **tl.context(tensor)))

    difference = tl.where(values == 1, sum_inc + tl.flip(sum_dec, axis=0),
                          tl.max(sum_inc + tl.flip(sum_dec, axis=0)))
    min_indice = tl.argmin(tl.tensor(difference), axis=0)

    for i in range(len(min_indice)):
        tensor_unimodal = tl.index_update(
            tensor_unimodal, tl.index[:int(min_indice[i]), i],
            monotone_increasing[:int(min_indice[i]), i])
        tensor_unimodal = tl.index_update(
            tensor_unimodal, tl.index[int(min_indice[i] + 1):, i],
            monotone_decreasing[int(min_indice[i] + 1):, i])
    return tensor_unimodal
Esempio n. 19
0
def proximal_operator(tensor,
                      non_negative=None,
                      l1_reg=None,
                      l2_reg=None,
                      l2_square_reg=None,
                      unimodality=None,
                      normalize=None,
                      simplex=None,
                      normalized_sparsity=None,
                      soft_sparsity=None,
                      smoothness=None,
                      monotonicity=None,
                      hard_sparsity=None,
                      n_const=1,
                      order=0):
    """
    Proximal operator solves a convex optimization problem. Let f be a
    convex proper lower-semicontinuous function, the proximal operator of f is :math:`\\argmin_x(f(x) + 1/2||x - v||_2^2)`.
    This operator can be used to solve constrained optimization problems as a generalization to projections on convex sets.
    Therefore, proximal gradients are used for constrained tensor decomposition problems in the literature.

    Parameters
    ----------
    tensor : ndarray
    non_negative : bool or dictionary
        This constraint is clipping negative values to '0'. If it is True non-negative constraint is applied to all modes.
    l1_reg : float or list or dictionary, optional
    l2_reg : float or list or dictionary, optional
    l2_square_reg : float or list or dictionary, optional
    unimodality : bool or dictionary, optional
        If it is True unimodality constraint is applied to all modes.
    normalize : bool or dictionary, optional
        This constraint divides all the values by maximum value of the input array. If it is True normalize constraint
        is applied to all modes.
    simplex : float or list or dictionary, optional
    normalized_sparsity : float or list or dictionary, optional
    soft_sparsity : float or list or dictionary, optional
    smoothness : float or list or dictionary, optional
    monotonicity : bool or dictionary, optional
    hard_sparsity : float or list or dictionary, optional
    n_const : int
        Number of constraints. If it is None, function returns input tensor.
        Default : 1
    order : int
        Specifies which constraint to implement if several constraints are selected as input
        Default : 0
    Returns
    -------
    tensor : updated tensor according to the selected constraint, which is the solution of the optimization problem above.
             If constraint is None, function returns the same tensor.

    References
    ----------
    .. [1]: Moreau, J. J. (1962). Fonctions convexes duales et points proximaux dans un espace hilbertien.
            Comptes rendus hebdomadaires des séances de l'Académie des sciences, 255, 2897-2899.
    .. [2]: Parikh, N., & Boyd, S. (2014). Proximal algorithms.
            Foundations and Trends in optimization, 1(3), 127-239.
    """
    if n_const is None:
        return tensor
    constraint, parameter = validate_constraints(
        non_negative=non_negative,
        l1_reg=l1_reg,
        l2_reg=l2_reg,
        l2_square_reg=l2_square_reg,
        unimodality=unimodality,
        normalize=normalize,
        simplex=simplex,
        normalized_sparsity=normalized_sparsity,
        soft_sparsity=soft_sparsity,
        smoothness=smoothness,
        monotonicity=monotonicity,
        hard_sparsity=hard_sparsity,
        n_const=n_const,
        order=order)
    if constraint is None:
        return tensor
    elif constraint == 'non_negative':
        return tl.clip(tensor, 0, tl.max(tensor))
    elif constraint == 'l1_reg':
        return soft_thresholding(tensor, parameter)
    elif constraint == 'l2_reg':
        return l2_prox(tensor, parameter)
    elif constraint == 'l2_square_reg':
        return l2_square_prox(tensor, parameter)
    elif constraint == 'unimodality':
        return unimodality_prox(tensor)
    elif constraint == 'normalize':
        return tensor / tl.max(tensor)
    elif constraint == 'simplex':
        return simplex_prox(tensor, parameter)
    elif constraint == 'normalized_sparsity':
        return normalized_sparsity_prox(tensor, parameter)
    elif constraint == 'soft_sparsity':
        return soft_sparsity_prox(tensor, parameter)
    elif constraint == 'smoothness':
        return smoothness_prox(tensor, parameter)
    elif constraint == 'monotonicity':
        return monotonicity_prox(tensor)
    elif constraint == 'hard_sparsity':
        return hard_thresholding(tensor, parameter)
Esempio n. 20
0
def active_set_nnls(Utm, UtU, x=None, n_iter_max=100, tol=10e-8):
    """
     Active set algorithm for non-negative least square solution.

     Computes an approximate non-negative solution for Ux=m linear system.

     Parameters
     ----------
     Utm : vectorized ndarray
        Pre-computed product of the transposed of U and m
     UtU : ndarray
        Pre-computed Kronecker product of the transposed of U and U
     x : init
        Default: None
     n_iter_max : int
         Maximum number of iteration
         Default: 100
     tol : float
         Early stopping criterion

     Returns
     -------
     x : ndarray

     Notes
     -----
     This function solves following problem:
     .. math::
        \\begin{equation}
             \\min_{x} ||Ux - m||^2
        \\end{equation}

     According to [1], non-negativity-constrained least square estimation problem becomes:
     .. math::
        \\begin{equation}
             x' = (Utm) - (UTU)\\times x
        \\end{equation}

     Reference
     ----------
     [1] : Bro, R., & De Jong, S. (1997). A fast non‐negativity‐constrained
           least squares algorithm. Journal of Chemometrics: A Journal of
           the Chemometrics Society, 11(5), 393-401.
     """
    if tl.get_backend() == 'tensorflow':
        raise ValueError(
            "Active set is not supported with the tensorflow backend. Consider using fista method with tensorflow."
        )

    if x is None:
        x_vec = tl.zeros(tl.shape(UtU)[1], **tl.context(UtU))
    else:
        x_vec = tl.base.tensor_to_vec(x)

    x_gradient = Utm - tl.dot(UtU, x_vec)
    passive_set = x_vec > 0
    active_set = x_vec <= 0
    support_vec = tl.zeros(tl.shape(x_vec), **tl.context(x_vec))

    for iteration in range(n_iter_max):

        if iteration > 0 or tl.all(x_vec == 0):
            indice = tl.argmax(x_gradient)
            passive_set = tl.index_update(passive_set, tl.index[indice], True)
            active_set = tl.index_update(active_set, tl.index[indice], False)
        # To avoid singularity error when initial x exists
        try:
            passive_solution = tl.solve(UtU[passive_set, :][:, passive_set],
                                        Utm[passive_set])
            indice_list = []
            for i in range(tl.shape(support_vec)[0]):
                if passive_set[i]:
                    indice_list.append(i)
                    support_vec = tl.index_update(
                        support_vec, tl.index[int(i)],
                        passive_solution[len(indice_list) - 1])
                else:
                    support_vec = tl.index_update(support_vec,
                                                  tl.index[int(i)], 0)
        # Start from zeros if solve is not achieved
        except:
            x_vec = tl.zeros(tl.shape(UtU)[1])
            support_vec = tl.zeros(tl.shape(x_vec), **tl.context(x_vec))
            passive_set = x_vec > 0
            active_set = x_vec <= 0
            if tl.any(active_set):
                indice = tl.argmax(x_gradient)
                passive_set = tl.index_update(passive_set, tl.index[indice],
                                              True)
                active_set = tl.index_update(active_set, tl.index[indice],
                                             False)
            passive_solution = tl.solve(UtU[passive_set, :][:, passive_set],
                                        Utm[passive_set])
            indice_list = []
            for i in range(tl.shape(support_vec)[0]):
                if passive_set[i]:
                    indice_list.append(i)
                    support_vec = tl.index_update(
                        support_vec, tl.index[int(i)],
                        passive_solution[len(indice_list) - 1])
                else:
                    support_vec = tl.index_update(support_vec,
                                                  tl.index[int(i)], 0)

        # update support vector if it is necessary
        if tl.min(support_vec[passive_set]) <= 0:
            for i in range(len(passive_set)):
                alpha = tl.min(
                    x_vec[passive_set][support_vec[passive_set] <= 0] /
                    (x_vec[passive_set][support_vec[passive_set] <= 0] -
                     support_vec[passive_set][support_vec[passive_set] <= 0]))
                update = alpha * (support_vec - x_vec)
                x_vec = x_vec + update
                passive_set = x_vec > 0
                active_set = x_vec <= 0
                passive_solution = tl.solve(
                    UtU[passive_set, :][:, passive_set], Utm[passive_set])
                indice_list = []
                for i in range(tl.shape(support_vec)[0]):
                    if passive_set[i]:
                        indice_list.append(i)
                        support_vec = tl.index_update(
                            support_vec, tl.index[int(i)],
                            passive_solution[len(indice_list) - 1])
                    else:
                        support_vec = tl.index_update(support_vec,
                                                      tl.index[int(i)], 0)

                if tl.any(passive_set) != True or tl.min(
                        support_vec[passive_set]) > 0:
                    break
        # set x to s
        x_vec = tl.clip(support_vec, 0, tl.max(support_vec))

        # gradient update
        x_gradient = Utm - tl.dot(UtU, x_vec)

        if tl.any(active_set) != True or tl.max(x_gradient[active_set]) <= tol:
            break

    return x_vec