def test_smooth_1D_l2(self):

        from parsimony.functions import CombinedFunction
        import parsimony.functions as functions
        import parsimony.functions.nesterov.grouptv as grouptv
        import parsimony.datasets.simulate.l1_l2_grouptvmu as l1_l2_grouptvmu
        import parsimony.utils.weights as weights

        np.random.seed(1337)

        n, p = 10, 15
        shape = (1, 1, p)

        l = 0.0
        k = 0.1  # Must have some regularisation for all variables.
        g = 0.9

        start_vector = weights.RandomUniformWeights(normalise=True)
        beta = start_vector.get_weights(p)

        rects = [[(0, 5)], [(4, 10)], [(13, 15)]]
                              # 0 [ 5 ] 0
                              # 1 [ 5 ] 0
                              # 2 [ 5 ] 0
                              # 3 [ 5 ] 0
                              # 4 [ 4 ] 0 / 1
        beta[:5, :] = 5.0     # 5 [ 3 ] 1
        beta[4, :] = 4.0      # 6 [ 3 ] 1
        beta[5:10, :] = 3.0   # 7 [ 3 ] 1
        beta[13:15, :] = 7.0  # 8 [ 3 ] 1
                              # 9 [ 3 ] 1
                              # 0 [ x ] -
                              # 1 [ x ] -
                              # 2 [ x ] -
                              # 3 [ 7 ] 2
                              # 4 [ 7 ] 2
        alpha = 1.0
        Sigma = alpha * np.eye(p, p) \
              + (1.0 - alpha) * np.random.randn(p, p)
        mean = np.zeros(p)
        M = np.random.multivariate_normal(mean, Sigma, n)
        e = np.random.randn(n, 1)

        snr = 100.0

        A = grouptv.linear_operator_from_rects(rects, shape)
        mu_min = 5e-8
        X, y, beta_star = l1_l2_grouptvmu.load(l=l, k=k, g=g, beta=beta,
                                               M=M, e=e, A=A, mu=mu_min,
                                               snr=snr)

        eps = 1e-5
        max_iter = 12000

        beta_start = start_vector.get_weights(p)

        mus = [5e-2, 5e-4, 5e-6, 5e-8]
        fista = FISTA(eps=eps, max_iter=max_iter / len(mus))

        beta_parsimony = beta_start
        for mu in mus:
            function = CombinedFunction()
            function.add_loss(functions.losses.LinearRegression(X, y,
                                                                mean=False))
            function.add_penalty(grouptv.GroupTotalVariation(l=g,
                                                             A=A, mu=mu,
                                                             penalty_start=0))

            function.add_penalty(functions.penalties.L2Squared(l=k,
                                                             penalty_start=0))

            beta_parsimony = fista.run(function, beta_parsimony)

        berr = np.linalg.norm(beta_parsimony - beta_star)
#        print "berr:", berr
        assert berr < 5e-2

        f_parsimony = function.f(beta_parsimony)
        f_star = function.f(beta_star)
        ferr = abs(f_parsimony - f_star)
#        print "ferr:", ferr
        assert ferr < 5e-5
    def test_smooth_2D_l1(self):

        from parsimony.functions import CombinedFunction
        import parsimony.functions as functions
        import parsimony.functions.nesterov.grouptv as grouptv
        import parsimony.datasets.simulate.l1_l2_grouptvmu as l1_l2_grouptvmu
        import parsimony.utils.weights as weights

        np.random.seed(1337)

        n, p = 10, 18
        shape = (1, 3, 6)

        l = 0.618
        k = 0.0
        g = 1.618

        start_vector = weights.ZerosWeights()
        beta = start_vector.get_weights(p)

        rects = [[(0, 1), (0, 3)], [(1, 2), (3, 6)]]

        beta = np.reshape(beta, shape[1:])
        beta[0:2, 0:4] = 1.0
        beta[1:3, 3:6] = 2.0
        beta[1, 3] = 1.5
        beta = np.reshape(beta, (p, 1))

        alpha = 1.0
        Sigma = alpha * np.eye(p, p) \
              + (1.0 - alpha) * np.random.randn(p, p)
        mean = np.zeros(p)
        M = np.random.multivariate_normal(mean, Sigma, n)
        e = np.random.randn(n, 1)

        snr = 100.0

        A = grouptv.linear_operator_from_rects(rects, shape)
        mu_min = 5e-8
        X, y, beta_star = l1_l2_grouptvmu.load(l=l, k=k, g=g, beta=beta,
                                               M=M, e=e, A=A, mu=mu_min,
                                               snr=snr)

        eps = 1e-5
        max_iter = 10000

        beta_start = start_vector.get_weights(p)

        mus = [5e-2, 5e-4, 5e-6, 5e-8]
        fista = FISTA(eps=eps, max_iter=max_iter / len(mus))

        beta_parsimony = beta_start
        for mu in mus:
            function = CombinedFunction()
            function.add_loss(functions.losses.LinearRegression(X, y,
                                                                mean=False))
            function.add_penalty(grouptv.GroupTotalVariation(l=g,
                                                             A=A, mu=mu,
                                                             penalty_start=0))

            function.add_prox(functions.penalties.L1(l=l, penalty_start=0))

            beta_parsimony = fista.run(function, beta_parsimony)

        berr = np.linalg.norm(beta_parsimony - beta_star)
#        print "berr:", berr
        assert berr < 5e-2

        f_parsimony = function.f(beta_parsimony)
        f_star = function.f(beta_star)
        ferr = abs(f_parsimony - f_star)
#        print "ferr:", ferr
        assert ferr < 5e-5
    def test_overlapping_smooth(self):

        import numpy as np
        from parsimony.functions import CombinedFunction
        import parsimony.functions as functions
        import parsimony.functions.nesterov.gl as gl
        import parsimony.datasets.simulate.l1_l2_glmu as l1_l2_glmu
        import parsimony.utils.weights as weights

        np.random.seed(314)

        # Note that p must be even!
        n, p = 25, 30
        groups = [list(range(0, 2 * int(p / 3))), list(range(int(p / 3), p))]
        group_weights = [1.5, 0.5]

        A = gl.linear_operator_from_groups(p,
                                           groups=groups,
                                           weights=group_weights)

        l = 0.0
        k = 0.0
        g = 0.9

        start_vector = weights.RandomUniformWeights(normalise=True)
        beta = start_vector.get_weights(p)

        alpha = 1.0
        Sigma = alpha * np.eye(p, p) \
              + (1.0 - alpha) * np.random.randn(p, p)
        mean = np.zeros(p)
        M = np.random.multivariate_normal(mean, Sigma, n)
        e = np.random.randn(n, 1)

        snr = 100.0

        mu_min = 5e-8
        X, y, beta_star = l1_l2_glmu.load(l,
                                          k,
                                          g,
                                          beta,
                                          M,
                                          e,
                                          A,
                                          mu=mu_min,
                                          snr=snr)

        eps = 1e-8
        max_iter = 15000

        beta_start = start_vector.get_weights(p)

        mus = [5e-0, 5e-2, 5e-4, 5e-6, 5e-8]
        fista = FISTA(eps=eps, max_iter=max_iter / len(mus))

        beta_parsimony = beta_start
        for mu in mus:
            #            function = functions.LinearRegressionL1L2GL(X, y, l, k, g,
            #                                                        A=A, mu=mu,
            #                                                        penalty_start=0)

            function = CombinedFunction()
            function.add_loss(
                functions.losses.LinearRegression(X, y, mean=False))
            function.add_penalty(
                gl.GroupLassoOverlap(l=g, A=A, mu=mu, penalty_start=0))

            beta_parsimony = fista.run(function, beta_parsimony)

        berr = np.linalg.norm(beta_parsimony - beta_star)
        #        print berr
        assert berr < 5e-2

        f_parsimony = function.f(beta_parsimony)
        f_star = function.f(beta_star)
        #        print(abs(f_parsimony - f_star))
        assert abs(f_parsimony - f_star) < 5e-6
    def test_overlapping_smooth(self):

        import numpy as np
        from parsimony.functions import CombinedFunction
        import parsimony.functions as functions
        import parsimony.functions.nesterov.gl as gl
        import parsimony.datasets.simulate.l1_l2_glmu as l1_l2_glmu
        import parsimony.utils.start_vectors as start_vectors

        np.random.seed(314)

        # Note that p must be even!
        n, p = 25, 30
        groups = [range(0, 2 * p / 3), range(p / 3, p)]
        weights = [1.5, 0.5]

        A = gl.A_from_groups(p, groups=groups, weights=weights)

        l = 0.0
        k = 0.0
        g = 0.9

        start_vector = start_vectors.RandomStartVector(normalise=True)
        beta = start_vector.get_vector(p)

        alpha = 1.0
        Sigma = alpha * np.eye(p, p) \
              + (1.0 - alpha) * np.random.randn(p, p)
        mean = np.zeros(p)
        M = np.random.multivariate_normal(mean, Sigma, n)
        e = np.random.randn(n, 1)

        snr = 100.0

        mu_min = 5e-8
        X, y, beta_star = l1_l2_glmu.load(l, k, g, beta, M, e, A,
                                          mu=mu_min, snr=snr)

        eps = 1e-8
        max_iter = 15000

        beta_start = start_vector.get_vector(p)

        mus = [5e-0, 5e-2, 5e-4, 5e-6, 5e-8]
        fista = FISTA(eps=eps, max_iter=max_iter / len(mus))

        beta_parsimony = beta_start
        for mu in mus:
#            function = functions.LinearRegressionL1L2GL(X, y, l, k, g,
#                                                        A=A, mu=mu,
#                                                        penalty_start=0)

            function = CombinedFunction()
            function.add_function(functions.losses.LinearRegression(X, y,
                                                               mean=False))
            function.add_penalty(gl.GroupLassoOverlap(l=g, A=A, mu=mu,
                                                      penalty_start=0))

            beta_parsimony = fista.run(function, beta_parsimony)

        berr = np.linalg.norm(beta_parsimony - beta_star)
#        print berr
        assert berr < 5e-2

        f_parsimony = function.f(beta_parsimony)
        f_star = function.f(beta_star)
#        print abs(f_parsimony - f_star)
        assert abs(f_parsimony - f_star) < 5e-7
class MultiblockFISTA(bases.ExplicitAlgorithm,
                      bases.IterativeAlgorithm,
                      bases.InformationAlgorithm):
    """ The projected gradient algorithm with alternating minimisations in a
    multiblock setting.

    Parameters
    ----------
    info : List or tuple of utils.consts.Info. What, if any, extra run
            information should be stored. Default is an empty list, which means
            that no run information is computed nor returned.

    eps : Positive float. Tolerance for the stopping criterion.

    outer_iter : Non-negative integer. Maximum allowed number of outer loop
            iterations.

    max_iter : Non-negative integer. Maximum allowed number of iterations.

    min_iter : Non-negative integer less than or equal to max_iter. Minimum
            number of iterations that must be performed. Default is 1.
    """
    INTERFACES = [multiblock_properties.MultiblockFunction,
                  multiblock_properties.MultiblockGradient,
                  multiblock_properties.MultiblockProjectionOperator,
                  multiblock_properties.MultiblockStepSize]

    INFO_PROVIDED = [Info.ok,
                     Info.num_iter,
                     Info.time,
                     Info.fvalue,
                     Info.converged]

    def __init__(self, info=[], outer_iter=20,
                 eps=consts.TOLERANCE,
                 max_iter=consts.MAX_ITER, min_iter=1):

        super(MultiblockFISTA, self).__init__(info=info,
                                              max_iter=max_iter,
                                              min_iter=min_iter)

        self.outer_iter = outer_iter
        self.eps = float(eps)

#        from parsimony.algorithms.primaldual import NaiveCONESTA
        # Copy the allowed info keys for FISTA.
        self.fista_info = list()
        for nfo in self.info_copy():
            if nfo in FISTA.INFO_PROVIDED:
                self.fista_info.append(nfo)
#        if not self.fista_info.allows(Info.num_iter):
#            self.fista_info.add_key(Info.num_iter)
        if Info.converged not in self.fista_info:
            self.fista_info.append(Info.converged)

#        self.algorithm = FISTA(info=self.fista_info,
#                               eps=self.eps,
#                               max_iter=self.max_iter,
#                               min_iter=self.min_iter)
#        self.algorithm = algorithms.StaticCONESTA(mu_start=1.0,
#                                                  info=self.info,
#                                                  eps=self.eps,
#                                                  max_iter=self.max_iter,
#                                                  min_iter=self.min_iter)

    @bases.force_reset
    @bases.check_compatibility
    def run(self, function, w):

        if self.info_requested(Info.ok):
            self.info_set(Info.ok, False)
        if self.info_requested(Info.time):
            t = [0.0]
        if self.info_requested(Info.fvalue):
            f = [function.f(w)]
        if self.info_requested(Info.converged):
            self.info_set(Info.converged, False)

        self.algorithm = FISTA(info=self.fista_info,
                               eps=self.eps,
                               max_iter=self.max_iter,
                               min_iter=self.min_iter)

        print "len(w):", len(w)
        print "max_iter:", self.algorithm.max_iter

        num_iter = [0] * len(w)
#        w_old = [0] * len(w)

#        it = 0
#        while True:
        for it in xrange(1, self.outer_iter + 1):

            all_converged = True

            for i in xrange(len(w)):
                print "it: %d, i: %d" % (it, i)

#                for j in xrange(len(w)):
#                    w_old[j] = w[j]

                if i == 0:
                    pass
                if i == 1:
                    pass
                if i == 2:
                    pass

                func = mb_losses.MultiblockFunctionWrapper(function, w, i)
#                self.fista_info.clear()
#                self.algorithm.set_params(max_iter=self.max_iter - num_iter[i])
#                w[i] = self.algorithm.run(func, w_old[i])
                w[i] = self.algorithm.run(func, w[i])

                num_iter[i] += self.algorithm.num_iter

                if self.algorithm.info_requested(Info.time):
                    tval = self.algorithm.info_get(Info.time)
                if self.algorithm.info_requested(Info.fvalue):
                    fval = self.algorithm.info_get(Info.fvalue)

#                if Info.converged in self.fista_info:
#                    if not self.fista_info[Info.converged] \
#                            or self.fista_info[Info.num_iter] > 1:
#                        all_converged = False

#                if maths.norm(w_old[i] - w[i]) < self.eps:
#                    converged = True
#                    break

                if self.info_requested(Info.time):
                    t = t + tval
                if self.info_requested(Info.fvalue):
                    f = f + fval

                print "l0 :", maths.norm0(w[i]), \
                    ", l1 :", maths.norm1(w[i]), \
                    ", l2²:", maths.norm(w[i]) ** 2.0

            if self.algorithm.info_requested(Info.fvalue):
                print "f:", fval[-1]

            for i in xrange(len(w)):

                # Take one ISTA step for use in the stopping criterion.
                step = function.step(w, i)
                w_tilde = function.prox(w[:i] +
                                        [w[i] - step * function.grad(w, i)] +
                                        w[i + 1:], i, step)

#                func = mb_losses.MultiblockFunctionWrapper(function, w, i)
#                step2 = func.step(w[i])
#                w_tilde2 = func.prox(w[i] - step2 * func.grad(w[i]), step2)
#
#                print "diff:", maths.norm(w_tilde - w_tilde2)

#                print "err:", maths.norm(w[i] - w_tilde) * (1.0 / step)
                if maths.norm(w[i] - w_tilde) > step * self.eps:
                    all_converged = False
                    break

            if all_converged:
                print "All converged!"

                if self.info_requested(Info.converged):
                    self.info_set(Info.converged, True)

                break

#            # If all blocks have used max_iter iterations, stop.
#            if np.all(np.asarray(num_iter) >= self.max_iter):
#                break

#            it += 1

        if self.info_requested(Info.num_iter):
            self.info_set(Info.num_iter, num_iter)
        if self.info_requested(Info.time):
            self.info_set(Info.time, t)
        if self.info_requested(Info.fvalue):
            self.info_set(Info.fvalue, f)
        if self.info_requested(Info.ok):
            self.info_set(Info.ok, True)

        return w