def exercise_two_loop_recursion(fgh): x0 = flex.double([3.0, -4.0]) g0 = fgh.g(x0) h0 = flex.double([[1,0],[0,1]]) memory = [] hg0 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g0) assert approx_equal(hg0, h0.matrix_multiply(g0)) # x1 = x0 - 1/3 * hg0 g1 = fgh.g(x1) h1 = bfgs.h_update(hk=h0, sk=x1-x0, yk=g1-g0) memory.append(bfgs.memory_element(s=x1-x0, y=g1-g0)) hg1 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g1) assert approx_equal(hg1, h1.matrix_multiply(g1)) # x2 = x1 - 1/5 * hg1 g2 = fgh.g(x2) h2 = bfgs.h_update(hk=h1, sk=x2-x1, yk=g2-g1) memory.append(bfgs.memory_element(s=x2-x1, y=g2-g1)) hg2 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g2) assert approx_equal(hg2, h2.matrix_multiply(g2)) # x3 = x2 - 3/8 * hg2 g3 = fgh.g(x3) h3 = bfgs.h_update(hk=h2, sk=x3-x2, yk=g3-g2) memory.append(bfgs.memory_element(s=x3-x2, y=g3-g2)) hg3 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g3) assert approx_equal(hg3, h3.matrix_multiply(g3))
def lbfgs_emulation(O, memory_range=5): assert len(O.xfgc_infos) == 1 for O.i_step in xrange(O.params.iteration_limit): if (O.i_step == 0): dests = -O.grads stp = 1 / O.grads.norm() else: active_infos = O.get_active_infos() assert len(active_infos) > 1 if (memory_range is not None): active_infos = active_infos[-(memory_range+1):] memory = O.build_bfgs_memory(active_infos=active_infos) assert memory is not None k_1 = active_infos[-1] k_2 = active_infos[-2] gamma = bfgs.h0_scaling( sk=k_1.x-k_2.x, yk=k_1.grads-k_2.grads) hk0 = flex.double(O.x.size(), gamma) dests = -bfgs.hg_two_loop_recursion( memory=memory, hk0=hk0, gk=O.grads) stp = 1 stp = O.line_search(dests, stp=stp) assert stp is not None O.update_fgc(is_iterate=True) print "%4d: %s" % (O.i_step+1, O.format_rms_info()) sys.stdout.flush() if (O.grads_mean_sq < O.params.grads_mean_sq_threshold): O.termination_remark = "" break else: O.termination_remark = " (iteration limit reached)"
def callback_after_step(O, minimizer): xk = O.prev_x xl = O.x gk = O.fgh.g(xk) gl = O.fgh.g(xl) def check(bk, hk): hl = bfgs.h_update(hk, xl-xk, gl-gk) bl = bfgs.b_update(bk, xl-xk, gl-gk) assert approx_equal(matrix.sqr(hl).inverse(), bl) es = eigensystem.real_symmetric(bl) assert es.values().all_gt(0) assert bfgs.h0_scaling(sk=xl-xk, yk=gl-gk) > 0 # bk = flex.double([[1,0],[0,1]]) hk = bk check(bk, hk) # bk = O.fgh.h(xk) es = eigensystem.real_symmetric(bk) if (es.values().all_gt(0)): hk = bk.deep_copy() hk.matrix_inversion_in_place() check(bk, hk) # h0 = flex.double([[0.9,0.1],[-0.2,0.8]]) hg_tlr = bfgs.hg_two_loop_recursion(memory=O.memory, hk0=h0, gk=gl) h_upd = h0 for m in O.memory: h_upd = bfgs.h_update(hk=h_upd, sk=m.s, yk=m.y) hg_upd = h_upd.matrix_multiply(gl) assert approx_equal(hg_tlr, hg_upd) # O.memory.append(bfgs.memory_element(s=xl-xk, y=gl-gk)) O.prev_x = O.x.deep_copy()
def compute_step_just_grads(O): inp_i_info = len(O.xfgc_infos) - 1 inp_info = O.xfgc_infos[-1] limits = flex.double() for ix, dsl, g in zip(count(), O.dynamic_shift_limits, inp_info.grads): limits.append(dsl.pair(x=O.x[ix]).get(grad=g)) assert limits.all_gt(0) def get_pseudo_curvs(): ag_max = flex.max(flex.abs(inp_info.grads)) assert ag_max != 0 dests = (-inp_info.grads / ag_max) * (limits / 2) assert flex.abs(dests).all_le(limits / 2 * (1 + 1e-6)) assert (dests > 0).all_eq(inp_info.grads < 0) O.pseudo_curvs_i_info = inp_i_info return dests if (O.pseudo_curvs is None): dests = get_pseudo_curvs() else: active_infos = O.get_active_infos(O.pseudo_curvs_i_info) assert len(active_infos) > 1 memory = O.build_bfgs_memory(active_infos=active_infos) if (memory is None): O.pseudo_curvs = None dests = get_pseudo_curvs() else: hk0 = 1 / O.pseudo_curvs dests = -bfgs.hg_two_loop_recursion( memory=memory, hk0=hk0, gk=inp_info.grads) madl = flex.max(flex.abs(dests / limits)) if (madl > 1): print "madl:", madl dests *= (1 / madl) assert flex.abs(dests).all_le(limits * (1 + 1e-6)) dest_adj = O.line_search(dests, stpmax=2.0) print "dest_adj:", dest_adj if (dest_adj is not None): dests *= dest_adj elif (O.pseudo_curvs is not None): O.pseudo_curvs = None dests = get_pseudo_curvs() dest_adj = O.line_search(dests, stpmax=2.0) if (dest_adj is not None): dests *= dest_adj if (O.pseudo_curvs is None): assert (dests > 0).all_eq(inp_info.grads < 0) assert flex.abs(dests).all_le(limits * (1 + 1e-6)) O.pseudo_curvs = -inp_info.grads / dests assert O.pseudo_curvs.all_gt(0) O.x = inp_info.x + dests O.update_fgc(is_iterate=True) O.aq_sel_size = None O.aq_n_used = None
def compute_step_just_grads(O): inp_i_info = len(O.xfgc_infos) - 1 inp_info = O.xfgc_infos[-1] limits = flex.double() for ix,dsl,g in zip(count(), O.dynamic_shift_limits, inp_info.grads): limits.append(dsl.pair(x=O.x[ix]).get(grad=g)) assert limits.all_gt(0) def get_pseudo_curvs(): ag_max = flex.max(flex.abs(inp_info.grads)) assert ag_max != 0 dests = (-inp_info.grads/ag_max) * (limits/2) assert flex.abs(dests).all_le(limits/2*(1+1e-6)) assert (dests > 0).all_eq(inp_info.grads < 0) O.pseudo_curvs_i_info = inp_i_info return dests if (O.pseudo_curvs is None): dests = get_pseudo_curvs() else: active_infos = O.get_active_infos(O.pseudo_curvs_i_info) assert len(active_infos) > 1 memory = O.build_bfgs_memory(active_infos=active_infos) if (memory is None): O.pseudo_curvs = None dests = get_pseudo_curvs() else: hk0 = 1 / O.pseudo_curvs dests = -bfgs.hg_two_loop_recursion( memory=memory, hk0=hk0, gk=inp_info.grads) madl = flex.max(flex.abs(dests / limits)) if (madl > 1): print "madl:", madl dests *= (1/madl) assert flex.abs(dests).all_le(limits*(1+1e-6)) dest_adj = O.line_search(dests, stpmax=2.0) print "dest_adj:", dest_adj if (dest_adj is not None): dests *= dest_adj elif (O.pseudo_curvs is not None): O.pseudo_curvs = None dests = get_pseudo_curvs() dest_adj = O.line_search(dests, stpmax=2.0) if (dest_adj is not None): dests *= dest_adj if (O.pseudo_curvs is None): assert (dests > 0).all_eq(inp_info.grads < 0) assert flex.abs(dests).all_le(limits*(1+1e-6)) O.pseudo_curvs = -inp_info.grads / dests assert O.pseudo_curvs.all_gt(0) O.x = inp_info.x + dests O.update_fgc(is_iterate=True) O.aq_sel_size = None O.aq_n_used = None
def update_dests_using_bfgs_formula(O, dests): O.aq_sel_size = -2 O.aq_n_used = -2 if (O.params.bfgs_estimate_limit_factor <= 0): return aq_sel = flex.size_t() aq_sel_size_start = 0 iinfo_active = [] for iinfo in xrange(len(O.xfgc_infos)-1,-1,-1): info = O.xfgc_infos[iinfo] if (info.is_iterate): if (aq_sel_size_start == 0): aq_sel = info.approx_quads aq_sel_size_start = aq_sel.size() if (aq_sel_size_start < 2): return else: next_aq_sel = aq_sel.intersection(other=info.approx_quads) if ( next_aq_sel.size() < aq_sel_size_start * 0.9 and len(iinfo_active) > 1): break aq_sel = next_aq_sel iinfo_active.append(iinfo) iinfo_active.sort() O.aq_sel_size = aq_sel.size() if (len(iinfo_active) < 2 or O.aq_sel_size < 2): return O.aq_n_used = -1 assert iinfo_active[-1] == len(O.xfgc_infos)-1 curvs = O.xfgc_infos[iinfo_active[-1]].curvs.select(aq_sel) assert curvs.all_gt(0) hk0 = 1 / curvs memory = [] for iinfo in iinfo_active[:-1]: k = O.xfgc_infos[iinfo] l = O.xfgc_infos[iinfo+1] xk = k.x.select(aq_sel) xl = l.x.select(aq_sel) gk = k.grads.select(aq_sel) gl = l.grads.select(aq_sel) m = bfgs.memory_element(s=xl-xk, y=gl-gk) gks = gk.dot(m.s) gls = gl.dot(m.s) wolfe_curv_cond = (gls >= 0.9 * gks) # Nocedal & Wright (1999) Equation 3.7b # reformulated using sk instead of pk if (not wolfe_curv_cond): return if (m.rho is None): print "Warning: rho <= 0" return memory.append(m) aq_dests = -bfgs.hg_two_loop_recursion( memory=memory, hk0=hk0, gk=O.xfgc_infos[-1].grads.select(aq_sel)) O.aq_n_used = 0 for aq_dest,ix in zip(aq_dests, aq_sel): dsl = O.dynamic_shift_limits[ix] limit = dsl.pair(x=O.x[ix]).get(grad=O.grads[ix]) if (abs(aq_dest) <= O.params.bfgs_estimate_limit_factor * limit): dests[ix] = aq_dest O.aq_n_used += 1