def gl_ProxGD_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu_0, opts: dict):
    default_opts = {
        "maxit": 2500,  # 最大迭代次数
        "thres": 1e-3,  # 判断小量是否被认为 0 的阈值
        "step_type": "line_search",  # 步长衰减的类型(见辅助函数)
        "alpha0": 2e-3,  # 步长的初始值
        "ftol": 1e-6,  # 停机准则,当目标函数历史最优值的变化小于该值时认为满足
        "stable_len_threshold": 70,
        "line_search_attenuation_coeffi": 0.9,
        "maxit_line_search_iter": 5,
    }
    # The second dictionary's values overwrite those from the first.
    opts = {**default_opts, **opts}
    sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / x.size

    def real_obj_func(x: np.ndarray):
        fro_term = 0.5 * np.sum((A @ x - b) ** 2)
        regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1))
        return fro_term + mu_0 * regular_term

    out = {
        "fvec": None,  # 每一步迭代的 LASSO 问题目标函数值
        "grad_hist": None,  # 可微部分梯度范数的历史值
        "f_hist": None,  # 目标函数的历史值
        "f_hist_best": None,  # 目标函数每一步迭代对应的历史最优值
        "tt": None,  # 运行时间
        "flag": None  # 标记是否收敛
    }

    maxit, ftol, alpha0 = opts["maxit"], opts["ftol"], opts["alpha0"]
    stable_len_threshold = opts["stable_len_threshold"]
    thres = opts["thres"]
    step_type = opts['step_type']
    aten_coeffi = opts['line_search_attenuation_coeffi']
    max_line_search_iter = opts['maxit_line_search_iter']

    logger.debug("alpha0= {:10E}".format(alpha0))
    f_hist, f_hist_best, sparsity_hist = [], [], []
    f_best = np.inf

    x = np.copy(x0)
    stopwatch = Stopwatch()
    stopwatch.start()
    k = 0
    for mu in [100 * mu_0, 10 * mu_0, mu_0]:
        logger.debug("new mu= {:10E}".format(mu))

        # min f(x) = g(x) + h(x)
        # g(x) = 0.5 * |Ax-b|_F^2
        # h(x) = mu * |x|_{1,2}

        def g(x: np.ndarray):
            return 0.5 * np.sum((A @ x - b) ** 2)

        grad_g = None

        def prox_th(x: np.ndarray, t):
            """ Proximal operator of t * mu * h(x).
            """
            t_mu = t * mu
            row_norms = LA.norm(x, axis=1).reshape(-1, 1)
            rv = x * np.clip(row_norms - t_mu, a_min=0, a_max=None) / ((row_norms < thres) + row_norms)
            return rv

        def Gt(x: np.ndarray, t):
            return (x - prox_th(x - t * grad_g, t)) / t

        inner_iter = 0

        def set_step(step_type: str):
            iter_hat = max(inner_iter, 1000) - 999
            if step_type == 'fixed':
                return alpha0
            elif step_type == 'diminishing':
                return alpha0 / np.sqrt(iter_hat)
            elif step_type == 'diminishing2':
                return alpha0 / iter_hat
            elif step_type == 'line_search':
                g_x = g(x)

                def stop_condition(x, t):
                    gt_x = Gt(x, t)
                    return (g(x - t * gt_x)
                            <= g_x - t * np.sum(grad_g * gt_x) + 0.5 * t * np.sum(gt_x ** 2))

                alpha = alpha0
                for i in range(max_line_search_iter):
                    if stop_condition(x, alpha):
                        break
                    alpha *= aten_coeffi
                return alpha
            else:
                logger.error("Unsupported type.")

        stable_len = 0

        while inner_iter < maxit:
            # Record current objective value
            f_now = real_obj_func(x)
            f_hist.append(f_now)

            f_best = min(f_best, f_now)
            f_hist_best.append(f_best)

            sparsity_hist.append(sparsity_func(x))

            k += 1
            inner_iter += 1

            if (k > 1
                    and abs(f_hist[k - 1] - f_hist[k - 2]) / abs(f_hist[k - 2]) < ftol
                    and abs(sparsity_hist[k - 1] - sparsity_hist[k - 2]) / abs(sparsity_hist[k - 2]) < ftol):
                stable_len += 1
            else:
                stable_len = 0
            if stable_len > stable_len_threshold:
                break

            x[np.abs(x) < thres] = 0

            grad_g = A.T @ (A @ x - b)
            alpha_k = set_step(step_type)
            # logger.debug("alpha_k: {}".format(alpha_k))
            x = prox_th(x - alpha_k * grad_g, alpha_k)

            if k % 100 == 0:
                logger.debug(
                    'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format(k, f_now.item(), sparsity_func(x)))

    elapsed_time = stopwatch.elapsed(time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6
    out = {
        "tt": elapsed_time,
        "fval": real_obj_func(x),
        "f_hist": f_hist,
        "f_hist_best": f_hist_best
    }

    return x, k, out
def gl_Alm_dual(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu, opts: dict):
    default_opts = {
        "maxit": 100,  # 最大迭代次数
        "thres": 1e-3,  # 判断小量是否被认为 0 的阈值
        "tau": (1 + math.sqrt(5)) * 0.5,
        "rho": 1e2,
        "converge_len": 20,
    }

    # The second dictionary's values overwrite those from the first.
    opts = {**default_opts, **opts}

    def sparsity_func(x: np.ndarray):
        return np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / x.size

    def real_obj_func(x: np.ndarray):
        fro_term = 0.5 * np.sum((A @ x - b)**2)
        regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1))
        return fro_term + mu * regular_term

    out = {
        "fvec": None,  # 每一步迭代的 LASSO 问题目标函数值
        "f_hist": None,  # 目标函数的历史值
        "f_hist_best": None,  # 目标函数每一步迭代对应的历史最优值
        "tt": None,  # 运行时间
    }

    def projection_functor(x: np.array):
        row_norms = LA.norm(x, axis=1, ord=2).reshape(-1, 1)
        return mu * x / np.clip(row_norms, a_min=mu, a_max=None)

    maxit, thres = opts["maxit"], opts["thres"]
    rho, tau = opts['rho'], opts['tau']
    converge_len = opts['converge_len']

    f_hist, f_hist_best, sparsity_hist = [], [], []
    f_best = np.inf

    x_k = np.copy(x0)
    z_k = np.zeros_like(b)
    u_k = np.zeros_like(x_k)

    stopwatch = Stopwatch()
    stopwatch.start()

    L = LA.cholesky(np.identity(A.shape[0]) + rho * A @ A.T)

    k = 0
    length = 0

    while k < maxit:
        k += 1

        u = solve_sub_problem(b, rho, A, x_k, L, mu)
        z = LA.solve(L.T, LA.solve(L, A @ (x_k - rho * u) - b))
        x = x_k - tau * rho * (u + A.T @ z)

        r_k = u + A.T @ z  # 原始可行性
        s_k = A @ (u_k - u)  # 对偶可行性

        z_k, u_k, x_k = z, u, x
        f_now = real_obj_func(x_k)
        f_hist.append(f_now)

        f_best = min(f_best, f_now)
        f_hist_best.append(f_best)

        sparsity_now = sparsity_func(x_k)

        sparsity_hist.append(sparsity_now)

        if k % 1 == 0:
            logger.debug(
                'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format(
                    k, f_now.item(), sparsity_now.item()))

        r_k_norm = LA.norm(r_k, ord=2)
        s_k_norm = LA.norm(s_k, ord=2)
        if r_k_norm < thres and s_k_norm < thres:
            length += 1
        else:
            length = 0

        if length >= converge_len:
            break

    elapsed_time = stopwatch.elapsed(
        time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6
    out = {
        "tt": elapsed_time,
        "fval": real_obj_func(x_k),
        "f_hist": f_hist,
        "f_hist_best": f_hist_best
    }

    return x_k, k, out
Esempio n. 3
0
def gl_SGD_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu_0, opts: dict):
    default_opts = {
        "maxit": 2100,  # 内循环最大迭代次数
        "thres": 1e-3,  # 判断小量是否被认为 0 的阈值
        "step_type": "diminishing",  # 步长衰减的类型(见辅助函数)
        "alpha0": 1e-3,  # 步长的初始值
        "ftol": 1e-5,  # 停机准则,当目标函数历史最优值的变化小于该值时认为满足
        "stable_len_threshold": 100,
        "continuous_subgradient_flag": False,
    }
    # The second dictionary's values overwrite those from the first.
    opts = {**default_opts, **opts}
    sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / x.size
    out = {
        "fvec": None,  # 每一步迭代的 LASSO 问题目标函数值
        "grad_hist": None,  # 可微部分梯度范数的历史值
        "f_hist": None,  # 目标函数的历史值
        "f_hist_best": None,  # 目标函数每一步迭代对应的历史最优值
        "tt": None,  # 运行时间
        "flag": None  # 标记是否收敛
    }

    maxit, ftol, alpha0 = opts["maxit"], opts["ftol"], opts["alpha0"]
    stable_len_threshold = opts["stable_len_threshold"]
    thres = opts["thres"]

    if opts["continuous_subgradient_flag"]:
        L = np.max(LA.eigvals(A.T @ A))
        alpha0 = 1. / L.real

    logger.debug("alpha0= {:10E}".format(alpha0))
    f_hist, f_hist_best = [], []
    f_best = np.inf

    x = np.copy(x0)
    stopwatch = Stopwatch()
    stopwatch.start()
    k = 0
    stable_len = 0
    for mu in [100 * mu_0, 10 * mu_0, mu_0]:
        logger.debug("new mu= {:10E}".format(mu))

        def obj_func(x: np.ndarray):
            fro_term = 0.5 * np.sum((A @ x - b) ** 2)
            regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1))
            return fro_term + mu * regular_term

        def subgrad(x: np.ndarray):
            fro_term_grad = A.T @ (A @ x - b)
            regular_term_norm = LA.norm(x, axis=1).reshape(-1, 1)
            regular_term_grad = x / ((regular_term_norm < thres) + regular_term_norm)
            grad = fro_term_grad + mu * regular_term_grad
            return grad

        inn_iter = 0

        def set_step(step_type):
            iter_hat = max(inn_iter, 1000) - 999
            if step_type == 'fixed' or mu > mu_0:
                return alpha0
            elif step_type == 'diminishing':
                return alpha0 / np.sqrt(iter_hat)
            elif step_type == 'diminishing2':
                return alpha0 / iter_hat
            else:
                logger.error("Unsupported type.")

        while inn_iter < maxit:
            # Record current objective value
            f_now = obj_func(x)
            f_hist.append(f_now)

            f_best = min(f_best, f_now)
            f_hist_best.append(f_best)
            k += 1
            inn_iter += 1

            if k > 1 and abs(f_hist[k - 1] - f_hist[k - 2]) / abs(f_hist[k - 2]) < ftol:
                stable_len += 1
            else:
                stable_len = 0
            # if stable_len > stable_len_threshold:
            #     break

            x[np.abs(x) < thres] = 0
            sub_g = subgrad(x)
            alpha = set_step(opts["step_type"])
            x = x - alpha * sub_g

            if k % 100 == 0:
                logger.debug('iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format(k, f_now.item(), sparsity_func(x)))

    elapsed_time = stopwatch.elapsed(time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6
    out = {
        "tt": elapsed_time,
        "fval": obj_func(x),
        "f_hist": f_hist,
        "f_hist_best": f_hist_best
    }

    return x, k, out
Esempio n. 4
0
def gl_Admm_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu,
                   opts: dict):
    default_opts = {
        "maxit": 100,  # 最大迭代次数
        "thres": 1e-3,  # 判断小量是否被认为 0 的阈值
        "tau": (1 + math.sqrt(5)) * 0.5,
        "rho": 1e-2,
        "eta_0": 100,
        "converge_len": 10,
        "converge_thres": 1e-5,
        "step_type": "fixed",
    }

    # The second dictionary's values overwrite those from the first.
    opts = {**default_opts, **opts}
    sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))
                                     ) / x.size

    def real_obj_func(x: np.ndarray):
        fro_term = 0.5 * np.sum((A @ x - b)**2)
        regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1))
        return fro_term + mu * regular_term

    out = {
        "fvec": None,  # 每一步迭代的 LASSO 问题目标函数值
        "f_hist": None,  # 目标函数的历史值
        "f_hist_best": None,  # 目标函数每一步迭代对应的历史最优值
        "tt": None,  # 运行时间
    }

    maxit, thres = opts["maxit"], opts["thres"]
    rho, tau, eta_0 = opts['rho'], opts['tau'], opts['eta_0']
    converge_len = opts['converge_len']
    converge_thres = opts['converge_thres']
    step_type = opts['step_type']

    f_hist, f_hist_best, sparsity_hist = [], [], []
    f_best = np.inf

    def prox_tf(x: np.array, t):
        t_mu = t * mu
        row_norms = LA.norm(x, axis=1).reshape(-1, 1)
        rv = x * np.clip(row_norms - t_mu, a_min=0, a_max=None) / (
            (row_norms < thres) + row_norms)
        return rv

    x_k = np.copy(x0)
    y_k = x_k
    z_k = x_k

    stopwatch = Stopwatch()
    stopwatch.start()

    k = 0

    L = LA.cholesky(rho * np.identity(A.shape[1]) + A.T @ A)
    AT_b = A.T @ b

    length = 0

    def set_step(step_type: str):
        if step_type == 'fixed':
            return eta_0
        elif step_type == 'diminishing':
            return eta_0 / np.sqrt(k)
        elif step_type == 'diminishing2':
            return eta_0 / k

    while k < maxit:
        k += 1
        eta = set_step(step_type)
        y = LA.solve(L.T, LA.solve(L, AT_b - z_k + rho * x_k))
        # x = prox_tf(y + z_k / rho, 1/rho)
        x = prox_tf(x_k - eta * rho * (x_k - y - z_k / rho), eta)
        z = z_k - tau * rho * (x - y)

        r_k = x - y  # 原始可行性
        s_k = y - y_k  # 对偶可行性

        x_k, y_k, z_k = x, y, z

        f_now = real_obj_func(x_k)
        f_hist.append(f_now)

        f_best = min(f_best, f_now)
        f_hist_best.append(f_best)

        sparsity_hist.append(sparsity_func(x_k))

        if k % 1 == 0:
            logger.debug(
                'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format(
                    k, f_now.item(), sparsity_func(x_k)))

        r_k_norm = LA.norm(r_k, ord=2)
        s_k_norm = LA.norm(s_k, ord=2)
        if r_k_norm < thres and s_k_norm < thres:
            length += 1
        else:
            length = 0

        if length >= converge_len:
            break

    elapsed_time = stopwatch.elapsed(
        time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6
    out = {
        "tt": elapsed_time,
        "fval": real_obj_func(x_k),
        "f_hist": f_hist,
        "f_hist_best": f_hist_best
    }

    return x_k, k, out
def gl_FGD_primal(x0: np.ndarray, A: np.ndarray, b: np.ndarray, mu_0,
                  opts: dict):
    default_opts = {
        "maxit": 1500,  # 最大迭代次数
        "thres": 1e-3,  # 判断小量是否被认为 0 的阈值
        "step_type": "line_search",  # 步长衰减的类型(见辅助函数)
        "alpha0": 1e-3,  # 步长的初始值
        "ftol": 1e-6,  # 停机准则,当目标函数历史最优值的变化小于该值时认为满足
        "stable_len_threshold": 70,
        "line_search_attenuation_coeffi": 0.98,
        "maxit_line_search_iter": 5,
        "delta": 1e-6  # 光滑化参数
    }
    # The second dictionary's values overwrite those from the first.
    opts = {**default_opts, **opts}
    sparsity_func = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))
                                     ) / x.size

    def real_obj_func(x: np.ndarray):
        fro_term = 0.5 * np.sum((A @ x - b)**2)
        regular_term = np.sum(LA.norm(x, axis=1).reshape(-1, 1))
        return fro_term + mu_0 * regular_term

    out = {
        "fvec": None,  # 每一步迭代的 LASSO 问题目标函数值
        "grad_hist": None,  # 可微部分梯度范数的历史值
        "f_hist": None,  # 目标函数的历史值
        "f_hist_best": None,  # 目标函数每一步迭代对应的历史最优值
        "tt": None,  # 运行时间
        "flag": None  # 标记是否收敛
    }

    maxit, ftol, alpha0 = opts["maxit"], opts["ftol"], opts["alpha0"]
    stable_len_threshold = opts["stable_len_threshold"]
    thres = opts["thres"]
    step_type = opts['step_type']
    aten_coeffi = opts['line_search_attenuation_coeffi']
    max_line_search_iter = opts['maxit_line_search_iter']
    delta = opts["delta"]

    logger.debug("alpha0= {:10E}".format(alpha0))
    f_hist, f_hist_best, sparsity_hist = [], [], []
    v_hist, t_hist = [], []
    f_best = np.inf

    x_k = np.copy(x0)

    stopwatch = Stopwatch()
    stopwatch.start()
    k = 0
    for mu in [100 * mu_0, 10 * mu_0, mu_0]:
        logger.debug("new mu= {:10E}".format(mu))

        # min f(x) = g(x) + h(x)
        # g(x) = 0.5 * |Ax-b|_F^2 + mu * smoothed |x|_{1,2}
        # h(x) = 0

        def g_func(x: np.ndarray):
            fro_term = 0.5 * np.sum((A @ x - b)**2)
            regular_term = np.sum(
                np.sqrt(np.sum(x**2, axis=1).reshape(-1, 1) + delta * delta) -
                delta)
            return fro_term + mu * regular_term

        def grad_g_func(x: np.ndarray):
            fro_term_grad = A.T @ (A @ x - b)
            regular_term_grad = x / np.sqrt(
                np.sum(x**2, axis=1).reshape(-1, 1) + delta * delta)
            return fro_term_grad + mu * regular_term_grad

        v_k = np.copy(x_k)
        t_k = alpha0

        def prox_th(x: np.ndarray, t):
            """ Proximal operator of t * mu * h(x).
            """
            return x

        inner_iter = 0

        def set_step(step_type: str):
            iter_hat = max(inner_iter, 1000) - 999
            if step_type == 'fixed':
                return alpha0
            elif step_type == 'diminishing':
                return alpha0 / np.sqrt(iter_hat)
            elif step_type == 'diminishing2':
                return alpha0 / iter_hat
            elif step_type == 'line_search':

                t = t_k
                g_y = g_func(y)

                def stop_condition(t):
                    x = prox_th(y - t * grad_g_y, t)
                    g_x = g_func(x)
                    return g_x <= g_y + np.sum(grad_g_y * (x - y)) + np.sum(
                        (x - y)**2) / (2 * t)

                for i in range(max_line_search_iter):
                    if stop_condition(t):
                        break
                    t *= aten_coeffi
                return t

            else:
                logger.error("Unsupported type.")

        stable_len = 0

        while inner_iter < maxit:
            # Record current objective value
            f_now = real_obj_func(x_k)
            f_hist.append(f_now)

            f_best = min(f_best, f_now)
            f_hist_best.append(f_best)

            sparsity_hist.append(sparsity_func(x_k))

            v_hist.append(v_k)

            t_hist.append(t_k)

            k += 1
            inner_iter += 1

            if (k > 1
                    and abs(f_hist[k - 1] - f_hist[k - 2]) / abs(f_hist[k - 2])
                    < ftol
                    and abs(sparsity_hist[k - 1] - sparsity_hist[k - 2]) /
                    abs(sparsity_hist[k - 2]) < ftol):
                stable_len += 1
            else:
                stable_len = 0
            if stable_len > stable_len_threshold:
                break

            x_k[np.abs(x_k) < thres] = 0

            theta = 2 / (inner_iter + 1)
            y = (1 - theta) * x_k + theta * v_k
            grad_g_y = grad_g_func(y)

            t = set_step(step_type)
            x = prox_th(y - t * grad_g_y, t)
            v = x_k + (x - x_k) / theta

            x_k, v_k, t_k = x, v, t

            if k % 100 == 0:
                logger.debug(
                    'iter= {:5}, objective= {:10E}, sparsity= {:3f}'.format(
                        k, f_now.item(), sparsity_func(x)))

    elapsed_time = stopwatch.elapsed(
        time_format=Stopwatch.TimeFormat.kMicroSecond) / 1e6
    out = {
        "tt": elapsed_time,
        "fval": real_obj_func(x),
        "f_hist": f_hist,
        "f_hist_best": f_hist_best
    }

    return x, k, out