Ejemplo n.º 1
0
 def get_alpha(self, fw_state, problem):
     extra_param_s = problem.param_func(fw_state['s'])
     extra_param = problem.param
     beta_max = self.adjust_beta(problem,
                                 fw_state,
                                 extra_param,
                                 extra_param_s,
                                 case='line_search')
     grad_beta = lambda beta: problem.grad(
         (1 - beta) * fw_state['x'] + beta * fw_state['s'],
         (1 - beta) * extra_param + beta * extra_param_s)
     t_lb = 0
     delta_x = -fw_state['delta_x']
     ub = dot_product(grad_beta(beta_max), delta_x)
     t_ub = beta_max
     t = t_ub
     while (t_ub < 1) and (ub < 0):
         t_ub = 1 - (1 - t_ub) / 2
         ub = dot_product(grad_beta(t_ub), delta_x)
     while (t_ub - t_lb > self.accuracy):
         t = (t_lb + t_ub) / 2
         val = dot_product(grad_beta(t), delta_x)
         if val > 0:
             t_ub = t
         else:
             t_lb = t
     return t
Ejemplo n.º 2
0
def estimate_lipschitz(problem, ndim):
    Lest = 1
    if ndim == 1:
        dirr = np.ones(problem.n)
    elif ndim == 2:
        dirr = np.eye(problem.n)
    if Lest == 1:
        # Estimate Lipschitz Constant
        for _ in range(1, 16):
            Dir = problem.hess_mult_vec(dirr)
            dirr = Dir / norm(Dir)
        Hd = problem.hess_mult_vec(dirr)
        dHd = dot_product(dirr, Hd)
        L = dHd / (dot_product(dirr, dirr))
    return L
Ejemplo n.º 3
0
def estimate_lipschitz_bb(x, x_old, grad, grad_old, bb_type=2):
    s = x - x_old
    y = grad - grad_old
    if bb_type == 2:
        est = norm(y) / norm(s)
    elif bb_type == 3:
        est = abs(dot_product(y, s)) / norm(s)
    else:
        est = np.sqrt(norm(y)) / norm(s)
    return est
Ejemplo n.º 4
0
 def get_alpha(self, fw_state, problem):
     if fw_state['k'] == 1:
         self.h = fw_state['Gap']
         self.r = np.sqrt(6 * self.h / problem.sigma_f)
     fw_state['r'] = self.r
     s = problem.llo_oracle(fw_state, problem)
     delta_x = fw_state['x'] - s
     fw_state['s'] = s
     fw_state['delta_x'] = delta_x
     fw_state['Gap'] = dot_product(fw_state['grad'], fw_state['delta_x'])
     e = np.sqrt(problem.hess_mult(delta_x)) * problem.Mf / 2
     alpha = min(self.h * problem.Mf**2 / (4 * e**2), 1) * (1 / (1 + e))
     self.h = self.h * np.exp(-alpha / 2)
     self.r = self.r * np.sqrt(np.exp(-alpha / 2))
     return alpha
Ejemplo n.º 5
0
 def get_alpha(self, fw_state, problem):
     extra_param_s = problem.param_func(fw_state['s'])
     extra_param = problem.param
     beta_max = self.adjust_beta(problem, fw_state, extra_param,
                                 extra_param_s)
     func_beta = lambda beta: problem.val(
         (1 - beta) * fw_state['x'] + beta * fw_state['s'],
         (1 - beta) * extra_param + beta * extra_param_s)
     delta_x = -fw_state['delta_x']
     fx = fw_state['f']
     L = self.nu * fw_state['L']
     qx = dot_product(fw_state['grad'], delta_x)
     qqx = L / 2 * norm(delta_x)**2
     t = min(-1 * qx / (L * norm(delta_x)**2), beta_max)
     while func_beta(t) > fx + t * qx + t**2 * qqx:
         L = self.tau * L
         qqx = qqx * self.tau
         t = min(-1 * qx / (2 * qqx), beta_max)
     fw_state['L'] = L
     return t
Ejemplo n.º 6
0
 def func(x_2):
     return 0.5 * problem.hess_mult(x_2 - x) + dot_product(
         scopt_state['grad'], x_2 - x)
Ejemplo n.º 7
0
def fista(problem,
          scopt_state,
          max_iter=1000,
          tol=1e-5,
          fista_type='mfista',
          Lest='backtracking',
          print_fista=False):
    x = scopt_state['x']

    def func(x_2):
        return 0.5 * problem.hess_mult(x_2 - x) + dot_product(
            scopt_state['grad'], x_2 - x)

    def grad_func(x_2):
        return problem.hess_mult_vec(x_2 - x) + scopt_state['grad']

    y = x.copy()
    if Lest == 'estimate':
        L = estimate_lipschitz(problem, ndim=x.ndim)
    elif Lest == 'backtracking':
        L = 1
    x_cur = y.copy()
    f_cur = func(x_cur)
    t = 1
    beta = 2
    for k in range(1, max_iter + 1):
        grad_y = grad_func(y)
        f_y = func(y)
        if Lest == 'estimate':
            x_tmp = y - 1 / L * grad_y
            z = problem.projection(x_tmp)
            f_z = func(z)
            diff_yz = z - y
        elif Lest == 'backtracking':
            z = y
            L = L / beta
            diff_yz = z - y
            f_z = f_y + 1
        while (f_z > f_y + dot_product(grad_y, diff_yz) +
               (L / 2) * norm(diff_yz)**2) or (f_z > f_y):
            L = L * beta
            x_tmp = y - 1 / L * grad_y
            z = problem.projection(x_tmp)
            f_z = func(z)
            diff_yz = z - y
            if L > 1e+20:
                z = problem.projection(y)
                f_z = func(z)
                diff_yz = z - y
                L = L / beta
                break
        f_nxt = f_z
        if (f_nxt > f_cur) and (fista_type == 'mfista'):
            x_nxt = x_cur
            f_nxt = f_cur
        else:
            x_nxt = z
        zdiff = z - x_cur
        ndiff = norm(zdiff)
        if (ndiff < tol) and (k > 1) and print_fista:
            print('Fista err = %3.3e; Subiter = %3d; subproblem converged!' %
                  (ndiff, k))
            break
        xdiff = x_nxt - x_cur
        t_nxt = 0.5 * (1 + np.sqrt(1 + 4 * (t**2)))
        y = x_nxt + (t - 1) / t_nxt * xdiff + t / t_nxt * (z - x_nxt)
        t = t_nxt
        x_cur = x_nxt
        f_cur = f_nxt
    return x_nxt
Ejemplo n.º 8
0
def run_prox_grad(
    problem,
    x_0=None,
    max_iter=1000,
    eps=1e-10,
    bb_type=3,
    backtracking=True,
    btk_iters=100,
    print_every=10,
):

    if x_0 is None:
        x = problem.generate_start_point()
    else:
        x = x_0
    x_old = 0
    grad_old = 0
    alpha_hist = []
    f_hist = []
    time_hist = []
    err_hist = []
    int_start = time.time()
    time_hist.append(0)
    Mf = problem.Mf
    nu = problem.nu
    for k in range(1, max_iter + 1):
        start = time.time()
        f = problem.val(x)
        grad = problem.grad(x)
        Lips_cur = estimate_lipschitz_bb(x,
                                         x_old,
                                         grad,
                                         grad_old,
                                         bb_type=bb_type)
        x_nxt = problem.projection(x - 1 / Lips_cur * grad)
        diffx = x_nxt - x
        nrm_dx = norm(diffx)
        lam_k = np.sqrt(Lips_cur * dot_product(diffx, diffx))
        beta_k = Mf * norm(diffx)
        if backtracking:
            for _ in range(btk_iters):
                if Lips_cur <= ((lam_k * lam_k) / (nrm_dx * nrm_dx)):
                    break
                else:
                    Lips_cur = Lips_cur / 2
                    x_nxt = problem.projection(x - 1 / Lips_cur * grad)

        if backtracking:
            diffx = x_nxt - x
            nrm_dx = norm(diffx)
            lam_k = np.sqrt(Lips_cur * dot_product(diffx, diffx))
            beta_k = Mf * norm(diffx)
        alpha = min(beta_k / (lam_k * (lam_k + beta_k)), 1.)
        alpha_hist.append(alpha)
        x_old = x
        grad_old = grad
        x = x + alpha * diffx
        end = time.time()
        alpha_hist.append(alpha)
        f_hist.append(f)
        rdiff = nrm_dx / max(1.0, norm(x))
        err_hist.append(rdiff)
        time_hist.append(end - start)

        if (rdiff <= eps) and (k > 1):
            print('Convergence achieved!')
            print('iter = %4d, stepsize = %3.3e, rdiff = %3.3e,value=%g' %
                  (k, alpha, rdiff, f))
            break

        if (k % print_every == 0) or (k == 1):
            print('iter = %4d, stepsize = %3.3e, rdiff = %3.3e , f = %g' %
                  (k, alpha, rdiff, f))
    int_end = time.time()
    if k >= max_iter:
        f_hist.append(f)
        print('Exceed the maximum number of iterations')
    print(int_end - int_start)
    return x, alpha_hist, f_hist, time_hist
Ejemplo n.º 9
0
def run_frank_wolfe(problem,
                    x_0=None,
                    alpha_policy='standard',
                    max_iter=1000,
                    eps=1e-10,
                    print_every=10):
    policy = POLICY_DICT[alpha_policy]
    fw_state = {}
    fw_state['L'] = 1
    lower_bound = float("-inf")
    upper_bound = float("inf")
    real_Gap = upper_bound - lower_bound
    criterion = 1e10 * eps

    if x_0 is None:
        x = problem.generate_start_point()
    else:
        x = x_0

    alpha_hist = []
    Gap_hist = []
    f_hist = []
    time_hist = [0]
    int_start = time.time()

    for k in range(1, max_iter + 1):
        fw_state['k'] = k
        start_time = time.time()
        f = problem.val(x)

        #find optimal
        grad = problem.grad(x)
        fw_state['f'] = f
        fw_state['grad'] = grad
        fw_state['x'] = x
        fw_state['s'] = problem.linear_oracle(grad)
        fw_state['delta_x'] = x - fw_state['s']
        fw_state['Gap'] = dot_product(grad, fw_state['delta_x'])

        alpha = policy.get_alpha(fw_state, problem)

        x_nxt = x + alpha * (fw_state['s'] - x)
        time_hist.append(time.time() - start_time)
        x_last = x.copy()
        alpha_hist.append(alpha)
        Gap_hist.append(fw_state['Gap'])
        f_hist.append(f)
        x = x_nxt
        if f < upper_bound:
            upper_bound = f
            x_best = x.copy()
        lower_bound = max(lower_bound, f - fw_state['Gap'])
        if (lower_bound - upper_bound) / abs(lower_bound) > 1e-3:
            print(
                f'upper_bound={upper_bound:.2e}, lower_bound={lower_bound:.2e}'
            )
            sys.exit("Lower bound bigger than upper bound")
        real_Gap = upper_bound - lower_bound
        criterion = min(criterion, norm(x - x_last) / max(1, norm(x_last)))

        if k % print_every == 0 or k == 1:
            print(
                f'iter={k}, stepsize={alpha:.2e}, criterion={criterion:.2e},'
                f' upper_bound={upper_bound:.2e}, lower_bound={lower_bound:.2e},'
                f' real_Gap={real_Gap:.2e}, f_val={f}')

        if (criterion <= eps
            ) and (upper_bound - lower_bound) / np.abs(lower_bound) <= eps:
            f_hist.append(f)
            f = problem.val(x_best)
            print('Convergence achieved!')
            print(
                f'iter = {k}, stepsize = {alpha}, crit = {criterion}, upper_bound={upper_bound}, lower_bound={lower_bound}, real_Gap={real_Gap}'
            )
            return x_best, alpha_hist, Gap_hist, f_hist, time_hist

    #x_hist.append(x)
    f_hist.append(f)
    int_end = time.time()
    print(int_end - int_start)
    return x_best, alpha_hist, Gap_hist, f_hist, time_hist