Exemple #1
0
    def test_nested_concatenate(self):
        aa = ch.arange(3)
        bb = ch.arange(4)
        cc = ch.arange(5)
        
        result = ch.concatenate((ch.concatenate((aa,bb)),cc))
        self.assertTrue(result.m0 is aa)
        self.assertTrue(result.m1 is bb)
        self.assertTrue(result.m2 is cc)

        self.assertTrue(result.dr_wrt(aa).nnz > 0)
        self.assertTrue(result.dr_wrt(bb).nnz > 0)
        self.assertTrue(result.dr_wrt(cc).nnz > 0)
Exemple #2
0
    def test_nested_concatenate(self):
        aa = ch.arange(3)
        bb = ch.arange(4)
        cc = ch.arange(5)

        result = ch.concatenate((ch.concatenate((aa, bb)), cc))
        self.assertTrue(result.m0 is aa)
        self.assertTrue(result.m1 is bb)
        self.assertTrue(result.m2 is cc)

        self.assertTrue(result.dr_wrt(aa).nnz > 0)
        self.assertTrue(result.dr_wrt(bb).nnz > 0)
        self.assertTrue(result.dr_wrt(cc).nnz > 0)
Exemple #3
0
def setup_objective(obj,
                    free_variables,
                    on_step=None,
                    disp=True,
                    make_dense=False):
    '''
    obj here can be a list of ch objects or a dict of label: ch objects. Either way, the ch
    objects will be merged into one objective using a ChInputsStacked. The labels are just used
    for printing out values per objective with each iteration. If make_dense is True, the
    resulting object with return a desne Jacobian
    '''
    # Validate free variables
    num_unique_ids = len(
        np.unique(np.array([id(freevar) for freevar in free_variables])))
    if num_unique_ids != len(free_variables):
        raise Exception(
            'The "free_variables" param contains duplicate variables.')
    # Extract labels
    labels = {}
    if isinstance(obj, list) or isinstance(obj, tuple):
        obj = ch.concatenate([f.ravel() for f in obj])
    elif isinstance(obj, dict):
        labels = obj
        obj = ch.concatenate([f.ravel() for f in obj.values()])
    # build objective
    x = np.concatenate([freevar.r.ravel() for freevar in free_variables])
    obj = ChInputsStacked(obj=obj,
                          free_variables=free_variables,
                          x=x,
                          make_dense=make_dense)

    # build callback
    def callback():
        if on_step is not None:
            on_step(obj)
        if disp:
            report_line = ['%.2e' % (np.sum(obj.r**2), )]
            for label, objective in sorted(labels.items(), key=lambda x: x[0]):
                report_line.append('%s: %.2e' %
                                   (label, np.sum(objective.r**2)))
            report_line = " | ".join(report_line) + '\n'
            sys.stderr.write(report_line)

    return obj, callback
Exemple #4
0
def slogdet(*args):
    n = len(args)
    if n == 1:
        r2 = LogAbsDet(x=args[0])
        r1 = SignLogAbsDet(r2)
        return r1, r2
    else:
        r2 = [LogAbsDet(x=arg) for arg in args]
        r1 = [SignLogAbsDet(r) for r in r2]
        r2 = ch.concatenate(r2)
        return r1, r2
Exemple #5
0
def minimize(fun, x0, method='dogleg', bounds=None, constraints=(), tol=None, callback=None, options=None):

    if method == 'dogleg':
        if options is None: options = {}
        return _minimize_dogleg(fun, free_variables=x0, on_step=callback, **options)

    if isinstance(fun, list) or isinstance(fun, tuple):
        fun = ch.concatenate([f.ravel() for f in fun])
    if isinstance(fun, dict):
        fun = ch.concatenate([f.ravel() for f in fun.values()])
    obj = fun
    free_variables = x0


    from ch import SumOfSquares

    hessp = None
    hess = None
    if obj.size == 1:
        obj_scalar = obj
    else:
        obj_scalar = SumOfSquares(obj)
    
        def hessp(vs, p,obj, obj_scalar, free_variables):
            changevars(vs,obj,obj_scalar,free_variables)
            if not hasattr(hessp, 'vs'):
                hessp.vs = vs*0+1e16
            if np.max(np.abs(vs-hessp.vs)) > 0:

                J = ns_jacfunc(vs,obj,obj_scalar,free_variables)
                hessp.J = J
                hessp.H = 2. * J.T.dot(J)
                hessp.vs = vs
            return np.array(hessp.H.dot(p)).ravel()
            #return 2*np.array(hessp.J.T.dot(hessp.J.dot(p))).ravel()
            
        if method.lower() != 'newton-cg':
            def hess(vs, obj, obj_scalar, free_variables):
                changevars(vs,obj,obj_scalar,free_variables)
                if not hasattr(hessp, 'vs'):
                    hessp.vs = vs*0+1e16
                if np.max(np.abs(vs-hessp.vs)) > 0:
                    J = ns_jacfunc(vs,obj,obj_scalar,free_variables)
                    hessp.H = 2. * J.T.dot(J)
                return hessp.H
        
    def changevars(vs, obj, obj_scalar, free_variables):
        cur = 0
        changed = False
        for idx, freevar in enumerate(free_variables):
            sz = freevar.r.size
            newvals = vs[cur:cur+sz].copy().reshape(free_variables[idx].shape)
            if np.max(np.abs(newvals-free_variables[idx]).ravel()) > 0:
                free_variables[idx][:] = newvals
                changed = True

            cur += sz
            
        methods_without_callback = ('anneal', 'powell', 'cobyla', 'slsqp')
        if callback is not None and changed and method.lower() in methods_without_callback:
            callback(None)

        return changed
    
    def residuals(vs,obj, obj_scalar, free_variables):
        changevars(vs, obj, obj_scalar, free_variables)
        residuals = obj_scalar.r.ravel()[0]
        return residuals

    def scalar_jacfunc(vs,obj, obj_scalar, free_variables):
        if not hasattr(scalar_jacfunc, 'vs'):
            scalar_jacfunc.vs = vs*0+1e16
        if np.max(np.abs(vs-scalar_jacfunc.vs)) == 0:
            return scalar_jacfunc.J
            
        changevars(vs, obj, obj_scalar, free_variables)
        
        if True: # faster, at least on some problems
            result = np.concatenate([np.array(obj_scalar.lop(wrt, np.array([[1]]))).ravel() for wrt in free_variables])            
        else:
            jacs = [obj_scalar.dr_wrt(wrt) for wrt in free_variables]
            for idx, jac in enumerate(jacs):
                if sp.issparse(jac):
                    jacs[idx] = jacs[idx].todense()
            result = np.concatenate([jac.ravel() for jac in jacs])

        scalar_jacfunc.J = result
        scalar_jacfunc.vs = vs
        return result.ravel()
        
    def ns_jacfunc(vs,obj, obj_scalar, free_variables):
        if not hasattr(ns_jacfunc, 'vs'):
            ns_jacfunc.vs = vs*0+1e16
        if np.max(np.abs(vs-ns_jacfunc.vs)) == 0:
            return ns_jacfunc.J
            
        changevars(vs, obj, obj_scalar, free_variables)
        jacs = [obj.dr_wrt(wrt) for wrt in free_variables]
        result = hstack(jacs)

        ns_jacfunc.J = result
        ns_jacfunc.vs = vs
        return result

        
    x1 = scipy.optimize.minimize(
        method=method,
        fun=residuals,
        callback=callback,
        x0=np.concatenate([free_variable.r.ravel() for free_variable in free_variables]),
        jac=scalar_jacfunc,
        hessp=hessp, hess=hess, args=(obj, obj_scalar, free_variables),
        bounds=bounds, constraints=constraints, tol=tol, options=options).x

    changevars(x1, obj, obj_scalar, free_variables)
    return free_variables
Exemple #6
0
def _minimize_dogleg(obj, free_variables, on_step=None,
                     maxiter=200, max_fevals=np.inf, sparse_solver='spsolve',
                     disp=False, show_residuals=None, e_1=1e-15, e_2=1e-15, e_3=0., delta_0=None):

    """"Nonlinear optimization using Powell's dogleg method.

    See Lourakis et al, 2005, ICCV '05, "Is Levenberg-Marquardt
    the Most Efficient Optimization for Implementing Bundle
    Adjustment?":
    http://www.ics.forth.gr/cvrl/publications/conferences/0201-P0401-lourakis-levenberg.pdf
    """

    if show_residuals is not None:
        import warnings
        warnings.warn('minimize_dogleg: show_residuals parm is deprecaed, pass a dict instead.')

    labels = {}
    if isinstance(obj, list) or isinstance(obj, tuple):
        obj = ch.concatenate([f.ravel() for f in obj])
    elif isinstance(obj, dict):
        labels = obj
        obj = ch.concatenate([f.ravel() for f in obj.values()])


    niters = maxiter
    verbose = disp
    num_unique_ids = len(np.unique(np.array([id(freevar) for freevar in free_variables])))
    if num_unique_ids != len(free_variables):
        raise Exception('The "free_variables" param contains duplicate variables.')
        
    obj = ChInputsStacked(obj=obj, free_variables=free_variables, x=np.concatenate([freevar.r.ravel() for freevar in free_variables]))

    def call_cb():
        if on_step is not None:
            on_step(obj)

        report_line = ""
        if len(labels) > 0:
            report_line += '%.2e | ' % (np.sum(obj.r**2),)
        for label in sorted(labels.keys()):
            objective = labels[label]
            report_line += '%s: %.2e | ' % (label, np.sum(objective.r**2))
        if len(labels) > 0:
            report_line += '\n'
        sys.stderr.write(report_line)

    call_cb()

    # pif = print-if-verbose.
    # can't use "print" because it's a statement, not a fn
    pif = lambda x: sys.stdout.write(x + '\n') if verbose else 0

    if callable(sparse_solver):
        solve = sparse_solver
    elif isinstance(sparse_solver, str) and sparse_solver in _solver_fns.keys():
        solve = _solver_fns[sparse_solver]
    else:
        raise Exception('sparse_solver argument must be either a string in the set (%s) or have the api of scipy.sparse.linalg.spsolve.' % ''.join(_solver_fns.keys(), ' '))

    # optimization parms
    k_max = niters
    fevals = 0

    k = 0
    delta = delta_0
    p = col(obj.x.r) 

    fevals += 1
    
    tm = time.time()
    pif('computing Jacobian...')
    J = obj.J

    if sp.issparse(J):
        assert(J.nnz > 0)
    pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], time.time() - tm))
    
    if J.shape[1] != p.size:
        import pdb; pdb.set_trace()
    assert(J.shape[1] == p.size)
    
    tm = time.time()
    pif('updating A and g...')
    A = J.T.dot(J)    
    r = col(obj.r.copy())
    
    g = col(J.T.dot(-r))
    pif('A and g updated in %.2fs' % (time.time() - tm))
    
    stop = norm(g, np.inf) < e_1
    while (not stop) and (k < k_max) and (fevals < max_fevals):
        k += 1
        pif('beginning iteration %d' % (k,))
        d_sd = col((sqnorm(g)) / (sqnorm (J.dot(g))) * g)
        GNcomputed = False

        while True:
            # if the Cauchy point is outside the trust region,
            # take that direction but only to the edge of the trust region
            if delta is not None and norm(d_sd) >= delta:
                pif('PROGRESS: Using stunted cauchy')
                d_dl = np.array(col(delta/norm(d_sd) * d_sd))
            else:
                if not GNcomputed:
                    tm = time.time()
                    if scipy.sparse.issparse(A):
                        A.eliminate_zeros()
                        pif('sparse solve...sparsity infill is %.3f%% (hessian %dx%d), J infill %.3f%%' % (
                            100. * A.nnz / (A.shape[0] * A.shape[1]),
                            A.shape[0],
                            A.shape[1],
                            100. * J.nnz / (J.shape[0] * J.shape[1])))

                        d_gn = col(solve(A, g)) if g.size>1 else np.atleast_1d(g.ravel()[0]/A[0,0])
                        pif('sparse solve...done in %.2fs' % (time.time() - tm))
                    else:
                        pif('dense solve...')
                        try:
                            d_gn = col(np.linalg.solve(A, g))
                        except Exception:
                            d_gn = col(np.linalg.lstsq(A, g)[0])
                        pif('dense solve...done in %.2fs' % (time.time() - tm))
                    GNcomputed = True

                # if the gauss-newton solution is within the trust region, use it
                if delta is None or norm(d_gn) <= delta:
                    pif('PROGRESS: Using gauss-newton solution')
                    d_dl = np.array(d_gn)
                    if delta is None:
                        delta = norm(d_gn)

                else: # between cauchy step and gauss-newton step
                    pif('PROGRESS: between cauchy and gauss-newton')

                    # compute beta multiplier
                    delta_sq = delta**2
                    pnow = (
                        (d_gn-d_sd).T.dot(d_gn-d_sd)*delta_sq
                        + d_gn.T.dot(d_sd)**2
                        - sqnorm(d_gn) * (sqnorm(d_sd)))
                    B = delta_sq - sqnorm(d_sd)
                    B /= ((d_gn-d_sd).T.dot(d_sd) + math.sqrt(pnow))

                    # apply step
                    d_dl = np.array(d_sd + float(B) * (d_gn - d_sd))

                    #assert(math.fabs(norm(d_dl) - delta) < 1e-12)
            if norm(d_dl) <= e_2*norm(p):
                pif('stopping because of small step size (norm_dl < %.2e)' % (e_2*norm(p)))
                stop = True
            else:
                p_new = p + d_dl

                tm_residuals = time.time()
                obj.x = p_new
                fevals += 1

                r_trial = obj.r.copy()
                tm_residuals = time.time() - tm

                # rho is the ratio of...
                # (improvement in SSE) / (predicted improvement in SSE)  
                
                # slower
                #rho = norm(e_p)**2 - norm(e_p_trial)**2
                #rho = rho / (L(d_dl*0, e_p, J) - L(d_dl, e_p, J))              
                
                # faster
                sqnorm_ep = sqnorm(r)
                rho = sqnorm_ep - norm(r_trial)**2
                
                if rho > 0:
                    rho /= predicted_improvement(d_dl, -r, J, sqnorm_ep, A, g)
                    
                improvement_occurred = rho > 0

                # if the objective function improved, update input parameter estimate.
                # Note that the obj.x already has the new parms,
                # and we should not set them again to the same (or we'll bust the cache)                
                if improvement_occurred:
                    p = col(p_new)
                    call_cb()

                    if (sqnorm_ep - norm(r_trial)**2) / sqnorm_ep < e_3:
                        stop = True
                        pif('stopping because improvement < %.1e%%' % (100*e_3,))


                else:  # Put the old parms back
                    obj.x = ch.Ch(p)
                    obj.on_changed('x') # copies from flat vector to free variables

                # if the objective function improved and we're not done,
                # get ready for the next iteration
                if improvement_occurred and not stop:
                    tm_jac = time.time()
                    pif('computing Jacobian...')
                    J = obj.J.copy()
                    tm_jac = time.time() - tm_jac
                    pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], tm_jac))

                    pif('Residuals+Jac computed in %.2fs' % (tm_jac + tm_residuals,))

                    tm = time.time()
                    pif('updating A and g...')
                    A = J.T.dot(J)
                    r = col(r_trial)
                    g = col(J.T.dot(-r))
                    pif('A and g updated in %.2fs' % (time.time() - tm))
                    
                    if norm(g, np.inf) < e_1:
                        stop = True
                        pif('stopping because norm(g, np.inf) < %.2e' % (e_1))

                # update our trust region
                delta = updateRadius(rho, delta, d_dl)
                
                if delta <= e_2*norm(p):
                    stop = True
                    pif('stopping because trust region is too small')

            # the following "collect" is very expensive.
            # please contact matt if you find situations where it actually helps things.
            #import gc; gc.collect()
            if stop or improvement_occurred or (fevals >= max_fevals):
                break
        if k >= k_max:
            pif('stopping because max number of user-specified iterations (%d) has been met' % (k_max,))
        elif fevals >= max_fevals:
            pif('stopping because max number of user-specified func evals (%d) has been met' % (max_fevals,))

    return obj.free_variables
Exemple #7
0
def minimize(fun,
             x0,
             method='dogleg',
             bounds=None,
             constraints=(),
             tol=None,
             callback=None,
             options=None):

    if method == 'dogleg':
        if options is None: options = {}
        return minimize_dogleg(fun,
                               free_variables=x0,
                               on_step=callback,
                               **options)

    if isinstance(fun, list) or isinstance(fun, tuple):
        fun = ch.concatenate([f.ravel() for f in fun])
    if isinstance(fun, dict):
        fun = ch.concatenate([f.ravel() for f in fun.values()])
    obj = fun
    free_variables = x0

    from ch import SumOfSquares

    hessp = None
    hess = None
    if obj.size == 1:
        obj_scalar = obj
    else:
        obj_scalar = SumOfSquares(obj)

        def hessp(vs, p, obj, obj_scalar, free_variables):
            changevars(vs, obj, obj_scalar, free_variables)
            if not hasattr(hessp, 'vs'):
                hessp.vs = vs * 0 + 1e16
            if np.max(np.abs(vs - hessp.vs)) > 0:

                J = ns_jacfunc(vs, obj, obj_scalar, free_variables)
                hessp.J = J
                hessp.H = 2. * J.T.dot(J)
                hessp.vs = vs
            return np.array(hessp.H.dot(p)).ravel()
            #return 2*np.array(hessp.J.T.dot(hessp.J.dot(p))).ravel()

        if method.lower() != 'newton-cg':

            def hess(vs, obj, obj_scalar, free_variables):
                changevars(vs, obj, obj_scalar, free_variables)
                if not hasattr(hessp, 'vs'):
                    hessp.vs = vs * 0 + 1e16
                if np.max(np.abs(vs - hessp.vs)) > 0:
                    J = ns_jacfunc(vs, obj, obj_scalar, free_variables)
                    hessp.H = 2. * J.T.dot(J)
                return hessp.H

    def changevars(vs, obj, obj_scalar, free_variables):
        cur = 0
        changed = False
        for idx, freevar in enumerate(free_variables):
            sz = freevar.r.size
            newvals = vs[cur:cur + sz].copy().reshape(
                free_variables[idx].shape)
            if np.max(np.abs(newvals - free_variables[idx]).ravel()) > 0:
                free_variables[idx][:] = newvals
                changed = True

            cur += sz

        methods_without_callback = ('anneal', 'powell', 'cobyla', 'slsqp')
        if callback is not None and changed and method.lower(
        ) in methods_without_callback:
            callback(None)

        return changed

    def residuals(vs, obj, obj_scalar, free_variables):
        changevars(vs, obj, obj_scalar, free_variables)
        residuals = obj_scalar.r.ravel()[0]
        return residuals

    def scalar_jacfunc(vs, obj, obj_scalar, free_variables):
        if not hasattr(scalar_jacfunc, 'vs'):
            scalar_jacfunc.vs = vs * 0 + 1e16
        if np.max(np.abs(vs - scalar_jacfunc.vs)) == 0:
            return scalar_jacfunc.J

        changevars(vs, obj, obj_scalar, free_variables)

        if True:  # faster, at least on some problems
            result = np.concatenate([
                np.array(obj_scalar.lop(wrt, np.array([[1]]))).ravel()
                for wrt in free_variables
            ])
        else:
            jacs = [obj_scalar.dr_wrt(wrt) for wrt in free_variables]
            for idx, jac in enumerate(jacs):
                if sp.issparse(jac):
                    jacs[idx] = jacs[idx].todense()
            result = np.concatenate([jac.ravel() for jac in jacs])

        scalar_jacfunc.J = result
        scalar_jacfunc.vs = vs
        return result.ravel()

    def ns_jacfunc(vs, obj, obj_scalar, free_variables):
        if not hasattr(ns_jacfunc, 'vs'):
            ns_jacfunc.vs = vs * 0 + 1e16
        if np.max(np.abs(vs - ns_jacfunc.vs)) == 0:
            return ns_jacfunc.J

        changevars(vs, obj, obj_scalar, free_variables)
        jacs = [obj.dr_wrt(wrt) for wrt in free_variables]
        result = hstack(jacs)

        ns_jacfunc.J = result
        ns_jacfunc.vs = vs
        return result

    x1 = scipy.optimize.minimize(method=method,
                                 fun=residuals,
                                 callback=callback,
                                 x0=np.concatenate([
                                     free_variable.r.ravel()
                                     for free_variable in free_variables
                                 ]),
                                 jac=scalar_jacfunc,
                                 hessp=hessp,
                                 hess=hess,
                                 args=(obj, obj_scalar, free_variables),
                                 bounds=bounds,
                                 constraints=constraints,
                                 tol=tol,
                                 options=options).x

    changevars(x1, obj, obj_scalar, free_variables)
    return free_variables
Exemple #8
0
def _minimize_dogleg(obj, free_variables, on_step=None,
                     maxiter=200, max_fevals=np.inf, sparse_solver='spsolve',
                     disp=False, show_residuals=None, e_1=1e-15, e_2=1e-15, e_3=0., delta_0=None):

    """"Nonlinear optimization using Powell's dogleg method.

    See Lourakis et al, 2005, ICCV '05, "Is Levenberg-Marquardt
    the Most Efficient Optimization for Implementing Bundle
    Adjustment?":
    http://www.ics.forth.gr/cvrl/publications/conferences/0201-P0401-lourakis-levenberg.pdf
    """

    import warnings
    if show_residuals is not None:
        import warnings
        warnings.warn('minimize_dogleg: show_residuals parm is deprecaed, pass a dict instead.')

    labels = {}
    if isinstance(obj, list) or isinstance(obj, tuple):
        obj = ch.concatenate([f.ravel() for f in obj])
    elif isinstance(obj, dict):
        labels = obj
        obj = ch.concatenate([f.ravel() for f in obj.values()])


    niters = maxiter
    verbose = disp
    num_unique_ids = len(np.unique(np.array([id(freevar) for freevar in free_variables])))
    if num_unique_ids != len(free_variables):
        raise Exception('The "free_variables" param contains duplicate variables.')
        
    obj = ChInputsStacked(obj=obj, free_variables=free_variables, x=np.concatenate([freevar.r.ravel() for freevar in free_variables]))

    def call_cb():
        if on_step is not None:
            on_step(obj)

        report_line = ""
        if len(labels) > 0:
            report_line += '%.2e | ' % (np.sum(obj.r**2),)
        for label in sorted(labels.keys()):
            objective = labels[label]
            report_line += '%s: %.2e | ' % (label, np.sum(objective.r**2))
        if len(labels) > 0:
            report_line += '\n'
        sys.stderr.write(report_line)

    call_cb()

    # pif = print-if-verbose.
    # can't use "print" because it's a statement, not a fn
    pif = lambda x: sys.stdout.write(x + '\n') if verbose else 0

    if callable(sparse_solver):
        solve = sparse_solver
    elif isinstance(sparse_solver, str) and sparse_solver in _solver_fns.keys():
        solve = _solver_fns[sparse_solver]
    else:
        raise Exception('sparse_solver argument must be either a string in the set (%s) or have the api of scipy.sparse.linalg.spsolve.' % ''.join(_solver_fns.keys(), ' '))

    # optimization parms
    k_max = niters
    fevals = 0

    k = 0
    delta = delta_0
    p = col(obj.x.r) 

    fevals += 1
    
    tm = time.time()
    pif('computing Jacobian...')
    J = obj.J

    if sp.issparse(J):
        assert(J.nnz > 0)
    pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], time.time() - tm))
    
    if J.shape[1] != p.size:
        import pdb; pdb.set_trace()
    assert(J.shape[1] == p.size)
    
    tm = time.time()
    pif('updating A and g...')
    A = J.T.dot(J)    
    r = col(obj.r.copy())
    
    g = col(J.T.dot(-r))
    pif('A and g updated in %.2fs' % (time.time() - tm))
    
    stop = norm(g, np.inf) < e_1
    while (not stop) and (k < k_max) and (fevals < max_fevals):
        k += 1
        pif('beginning iteration %d' % (k,))
        d_sd = col((sqnorm(g)) / (sqnorm (J.dot(g))) * g)
        GNcomputed = False

        while True:
            # if the Cauchy point is outside the trust region,
            # take that direction but only to the edge of the trust region
            if delta is not None and norm(d_sd) >= delta:
                pif('PROGRESS: Using stunted cauchy')
                d_dl = np.array(col(delta/norm(d_sd) * d_sd))
            else:
                if not GNcomputed:
                    tm = time.time()
                    if scipy.sparse.issparse(A):
                        A.eliminate_zeros()
                        pif('sparse solve...sparsity infill is %.3f%% (hessian %dx%d), J infill %.3f%%' % (
                            100. * A.nnz / (A.shape[0] * A.shape[1]),
                            A.shape[0],
                            A.shape[1],
                            100. * J.nnz / (J.shape[0] * J.shape[1])))
                            
                        if g.size > 1:             
                            d_gn = col(solve(A, g))
                            if np.any(np.isnan(d_gn)) or np.any(np.isinf(d_gn)):
                                from scipy.sparse.linalg import lsqr
                                d_gn = col(lsqr(A, g)[0])
                        else:
                            d_gn = np.atleast_1d(g.ravel()[0]/A[0,0])
                        pif('sparse solve...done in %.2fs' % (time.time() - tm))
                    else:
                        pif('dense solve...')
                        try:
                            d_gn = col(np.linalg.solve(A, g))
                        except Exception:
                            d_gn = col(np.linalg.lstsq(A, g)[0])
                        pif('dense solve...done in %.2fs' % (time.time() - tm))
                    GNcomputed = True

                # if the gauss-newton solution is within the trust region, use it
                if delta is None or norm(d_gn) <= delta:
                    pif('PROGRESS: Using gauss-newton solution')
                    d_dl = np.array(d_gn)
                    if delta is None:
                        delta = norm(d_gn)

                else: # between cauchy step and gauss-newton step
                    pif('PROGRESS: between cauchy and gauss-newton')

                    # compute beta multiplier
                    delta_sq = delta**2
                    pnow = (
                        (d_gn-d_sd).T.dot(d_gn-d_sd)*delta_sq
                        + d_gn.T.dot(d_sd)**2
                        - sqnorm(d_gn) * (sqnorm(d_sd)))
                    B = delta_sq - sqnorm(d_sd)
                    B /= ((d_gn-d_sd).T.dot(d_sd) + math.sqrt(pnow))

                    # apply step
                    d_dl = np.array(d_sd + float(B) * (d_gn - d_sd))

                    #assert(math.fabs(norm(d_dl) - delta) < 1e-12)
            if norm(d_dl) <= e_2*norm(p):
                pif('stopping because of small step size (norm_dl < %.2e)' % (e_2*norm(p)))
                stop = True
            else:
                p_new = p + d_dl

                tm_residuals = time.time()
                obj.x = p_new
                fevals += 1

                r_trial = obj.r.copy()
                tm_residuals = time.time() - tm

                # rho is the ratio of...
                # (improvement in SSE) / (predicted improvement in SSE)  
                
                # slower
                #rho = norm(e_p)**2 - norm(e_p_trial)**2
                #rho = rho / (L(d_dl*0, e_p, J) - L(d_dl, e_p, J))              
                
                # faster
                sqnorm_ep = sqnorm(r)
                rho = sqnorm_ep - norm(r_trial)**2
                
                with warnings.catch_warnings():
                    warnings.filterwarnings('ignore',category=RuntimeWarning)
                    if rho > 0:
                        rho /= predicted_improvement(d_dl, -r, J, sqnorm_ep, A, g)
                    
                improvement_occurred = rho > 0

                # if the objective function improved, update input parameter estimate.
                # Note that the obj.x already has the new parms,
                # and we should not set them again to the same (or we'll bust the cache)                
                if improvement_occurred:
                    p = col(p_new)
                    call_cb()

                    if (sqnorm_ep - norm(r_trial)**2) / sqnorm_ep < e_3:
                        stop = True
                        pif('stopping because improvement < %.1e%%' % (100*e_3,))


                else:  # Put the old parms back
                    obj.x = ch.Ch(p)
                    obj.on_changed('x') # copies from flat vector to free variables

                # if the objective function improved and we're not done,
                # get ready for the next iteration
                if improvement_occurred and not stop:
                    tm_jac = time.time()
                    pif('computing Jacobian...')
                    J = obj.J.copy()
                    tm_jac = time.time() - tm_jac
                    pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], tm_jac))

                    pif('Residuals+Jac computed in %.2fs' % (tm_jac + tm_residuals,))

                    tm = time.time()
                    pif('updating A and g...')
                    A = J.T.dot(J)
                    r = col(r_trial)
                    g = col(J.T.dot(-r))
                    pif('A and g updated in %.2fs' % (time.time() - tm))
                    
                    if norm(g, np.inf) < e_1:
                        stop = True
                        pif('stopping because norm(g, np.inf) < %.2e' % (e_1))

                # update our trust region
                delta = updateRadius(rho, delta, d_dl)
                
                if delta <= e_2*norm(p):
                    stop = True
                    pif('stopping because trust region is too small')

            # the following "collect" is very expensive.
            # please contact matt if you find situations where it actually helps things.
            #import gc; gc.collect()
            if stop or improvement_occurred or (fevals >= max_fevals):
                break
        if k >= k_max:
            pif('stopping because max number of user-specified iterations (%d) has been met' % (k_max,))
        elif fevals >= max_fevals:
            pif('stopping because max number of user-specified func evals (%d) has been met' % (max_fevals,))

    return obj.free_variables